akshaybaweja · April 5, 2021 19:01 · Apr 5, 2021 · Apr 5, 2021 · Apr 5, 2021 · Apr 5, 2021
diff --git a/recipe_analysis.ipynb b/recipe_analysis.ipynb
diff --git a/recipe_analysis.ipynb b/recipe_analysis.ipynb
diff --git a/recipe_analysis.ipynb b/recipe_analysis.ipynb
@@ -1293,7 +1293,7 @@
       "source": [
         "def WC(values):\n",
         "    wc = WordCloud(width = 1920, height = 1080, background_color ='black', stopwords = stopwords, min_font_size = 5,colormap='autumn', mask=transformed_india_mask).generate(values)\n",
-        "    plt.figure(figsize=(20,11))\n",
+        "    plt.figure()\n",
         "    plt.title(\"TranslatedIngredients\")\n",
         "    plt.imshow(wc)\n",
         "    plt.axis('off')\n",

diff --git a/recipe_analysis.ipynb b/recipe_analysis.ipynb
@@ -1319,330 +1319,6 @@
           }
         }
       ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "p6Jif7cLmwkk"
-      },
-      "source": [
-        "# Machine Learning"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "BN-8F28Au5Wu"
-      },
-      "source": [
-        "#data = df.loc[:, 'TranslatedInstructions']\n",
-        "data = df.loc[:, 'TranslatedIngredients']"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "iGIlyTL7vsyf"
-      },
-      "source": [
-        "## Training"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "k7xSyzx6zvMg"
-      },
-      "source": [
-        "from gensim.utils import simple_preprocess\n",
-        "from gensim.models.doc2vec import Doc2Vec, TaggedDocument\n",
-        "import re\n",
-        "\n",
-        "tagged_data = []\n",
-        "for i, line in enumerate(data):\n",
-        "    tokens = simple_preprocess(line)\n",
-        "    tagged_data.append(TaggedDocument(tokens, [i]))"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "b-ZzI72szwT9"
-      },
-      "source": [
-        "# Generally it is better to use less epochs for larger datasets\n",
-        "# This is a small dataset so 30 works well. See article for recommendations\n",
-        "\n",
-        "model = Doc2Vec(vector_size=50, min_count=2, epochs=30)"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "mjFLXEhmz0bs"
-      },
-      "source": [
-        "model.build_vocab(tagged_data)"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "W8Wz0Ipbz27D",
-        "outputId": "a52a64e3-ab2f-4ebd-9701-7c02b69b1abb"
-      },
-      "source": [
-        "%time model.train(tagged_data, total_examples=model.corpus_count, epochs=model.epochs)"
-      ],
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "CPU times: user 38 s, sys: 5.24 s, total: 43.2 s\n",
-            "Wall time: 27 s\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "EVSY_dkjz54s"
-      },
-      "source": [
-        "model.save('/content/drive/MyDrive/Colab Notebooks/indianFood.model')"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "QWkH3-Mb0ADh"
-      },
-      "source": [
-        "model = Doc2Vec.load(\"/content/drive/MyDrive/Colab Notebooks/indianFood.model\")"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "U46Ot24y0At5"
-      },
-      "source": [
-        "features_extracted = []\n",
-        "for i, review in enumerate(data):\n",
-        "    features_extracted.append(model.docvecs[i])"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "5tRA4nyS0E5t"
-      },
-      "source": [
-        "from sklearn.preprocessing import StandardScaler\n",
-        "\n",
-        "ss = StandardScaler()\n",
-        "scaled = ss.fit_transform(features_extracted)"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "2OVY2JPM0Has"
-      },
-      "source": [
-        "from sklearn.cluster import KMeans\n",
-        "\n",
-        "kmeans = KMeans(n_clusters=20)\n",
-        "clusters = kmeans.fit_predict(scaled)"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "1xVjcMFA0bFt"
-      },
-      "source": [
-        "from sklearn.neighbors import NearestNeighbors\n",
-        "\n",
-        "neighbors = NearestNeighbors(n_neighbors=5, metric='cosine').fit(scaled)\n",
-        "_, closest = neighbors.kneighbors(kmeans.cluster_centers_)"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "TOfLk79E57lu"
-      },
-      "source": [
-        "## UMAP"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "cK0UmLea59Eu"
-      },
-      "source": [
-        "import umap"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "FqN9OKjf5-uP",
-        "outputId": "b4cb0c7c-c66d-4e12-e36e-b8cbb809fb1e"
-      },
-      "source": [
-        "%time embedding = umap.UMAP().fit_transform(scaled)"
-      ],
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "CPU times: user 42.4 s, sys: 477 ms, total: 42.9 s\n",
-            "Wall time: 28.3 s\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "uuzQCKlv6Ad5"
-      },
-      "source": [
-        "from sklearn.preprocessing import MinMaxScaler\n",
-        "scaler = MinMaxScaler()\n",
-        "scaler.fit(embedding)\n",
-        "embedding_scaled = scaler.transform(embedding)"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "colab": {
-          "background_save": true
-        },
-        "id": "MA-oB3tJ6Iop"
-      },
-      "source": [
-        "# Now we put all our data into one list\n",
-        "lookup = []\n",
-        "\n",
-        "for index, umap_pos in enumerate(embedding_scaled):\n",
-        "    lookup.append({\n",
-        "        'cluster_num': float(clusters[index]),\n",
-        "        'umap_pos': umap_pos.tolist(),\n",
-        "        'name': df.iloc[index]['TranslatedRecipeName'],\n",
-        "        'ingredients': df.iloc[index]['TranslatedIngredients'],\n",
-        "        'preptime': int(df.iloc[index]['PrepTimeInMins']),\n",
-        "        'cooktime': int(df.iloc[index]['CookTimeInMins']),\n",
-        "        'totaltime': int(df.iloc[index]['TotalTimeInMins']),\n",
-        "        'servings': int(df.iloc[index]['Servings']),\n",
-        "        'cuisine': df.iloc[index]['Cuisine'],\n",
-        "        'course': df.iloc[index]['Course'],\n",
-        "        'diet': df.iloc[index]['Diet'],\n",
-        "        'instructions': df.iloc[index]['TranslatedInstructions'],\n",
-        "        'image': df.iloc[index]['image'],\n",
-        "        'url': df.iloc[index]['URL']\n",
-        "    })"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "colab": {
-          "background_save": true
-        },
-        "id": "bstVdpbJ6M5B",
-        "outputId": "761473e4-743f-4b29-d21a-ca10e9630310"
-      },
-      "source": [
-        "# This is what one entry in the list looks like\n",
-        "lookup[1]"
-      ],
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "execute_result",
-          "data": {
-            "text/plain": [
-              "{'cluster_num': 12.0,\n",
-              " 'cooktime': 10,\n",
-              " 'course': 'Main Course',\n",
-              " 'cuisine': 'South Indian Recipes',\n",
-              " 'diet': 'Vegetarian',\n",
-              " 'image': 'https://www.archanaskitchen.com/images/archanaskitchen/1-Author/b.yojana-gmail.com/Spicy_Thakkali_Rice_Tomato_Pulihora-1_edited.jpg',\n",
-              " 'ingredients': '2-1 / 2 cups rice - cooked, 3 tomatoes, 3 teaspoons BC Belle Bhat powder, salt - as per taste, 1 teaspoon chana dal, 1/2 teaspoon cumin seeds, 1 teaspoon white urad dal, 1/2 Teaspoon mustard, 1 green chilli, 1 dry red chilli, 2 teaspoon cashew - or peanuts, 1-1 / 2 tablespoon oil - 1/2 teaspoon asafoetida ',\n",
-              " 'instructions': 'To make tomato puliogere, first cut the tomatoes. Now put in a mixer grinder and puree it. Now heat oil in a pan. After the oil is hot, add chana dal, urad dal, cashew nuts and let it cook for 10 to 20 seconds. After 10 to 20 seconds, add cumin seeds, mustard seeds, green chillies, dry red chillies and curry leaves. After 30 seconds, add tomato puree to it and mix. Add BC Belle Bhat powder, salt and mix it. Allow to cook for 7 to 8 minutes and then turn off the gas. Take it out in a bowl, add cooked rice and mix it. Serve hot. Serve tomato puliogre with tomato cucumber raita and papad for dinner. ',\n",
-              " 'name': 'Spicy Tomato Rice (Recipe) ',\n",
-              " 'preptime': 5,\n",
-              " 'servings': 3,\n",
-              " 'totaltime': 15,\n",
-              " 'umap_pos': [0.20701216161251068, 0.297481507062912],\n",
-              " 'url': 'http://www.archanaskitchen.com/spicy-tomato-rice-recipe-in-hindi'}"
-            ]
-          },
-          "metadata": {
-            "tags": []
-          },
-          "execution_count": 0
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "colab": {
-          "background_save": true
-        },
-        "id": "JHUsWA8vuFGs"
-      },
-      "source": [
-        "import json\n",
-        "\n",
-        "with open('/content/drive/MyDrive/Colab Notebooks/Recipe Classification/data.json', 'w') as outfile:\n",
-        "    json.dump(lookup, outfile)"
-      ],
-      "execution_count": null,
-      "outputs": []
     }
   ]
 }
diff --git a/recipe_analysis.ipynb b/recipe_analysis.ipynb
@@ -6,14 +6,25 @@
       "name": "recipe_analysis.ipynb",
       "provenance": [],
       "collapsed_sections": [],
-      "toc_visible": true
+      "toc_visible": true,
+      "include_colab_link": true
     },
     "kernelspec": {
       "display_name": "Python 3",
       "name": "python3"
     }
   },
   "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "view-in-github",
+        "colab_type": "text"
+      },
+      "source": [
+        "<a href=\"https://colab.research.google.com/gist/akshaybaweja/5401170e8bf816f64ad9cfeceead1eb9/recipe_analysis.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
+      ]
+    },
     {
       "cell_type": "markdown",
       "metadata": {

diff --git a/recipe_analysis.ipynb b/recipe_analysis.ipynb