{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"collapsed_sections":["fQcMREQvThwU","ugf5R7Ihi2eU","fmm6lJZH27-5","8SMtZaf6EkMD","uYO9OW7sXZMF"],"mount_file_id":"1nkLBMUOcoheh7EH5xe3uev4aZqAvPdVG","authorship_tag":"ABX9TyNIByCmZPZzgkENHepyEbKv"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"markdown","source":["# Initial instructions"],"metadata":{"id":"fQcMREQvThwU"}},{"cell_type":"code","source":["! pip install kaggle"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"opfjSdEKbA6q","executionInfo":{"status":"ok","timestamp":1688566779143,"user_tz":-210,"elapsed":4727,"user":{"displayName":"Amir Hossein Karami","userId":"12632705231641967217"}},"outputId":"9a722f41-7e7f-48e5-b73a-a6d3766296e4"},"execution_count":1,"outputs":[{"output_type":"stream","name":"stdout","text":["Requirement already satisfied: kaggle in /usr/local/lib/python3.10/dist-packages (1.5.13)\n","Requirement already satisfied: six>=1.10 in /usr/local/lib/python3.10/dist-packages (from kaggle) (1.16.0)\n","Requirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from kaggle) (2023.5.7)\n","Requirement already satisfied: python-dateutil in /usr/local/lib/python3.10/dist-packages (from kaggle) (2.8.2)\n","Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from kaggle) (2.27.1)\n","Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from kaggle) (4.65.0)\n","Requirement already satisfied: python-slugify in /usr/local/lib/python3.10/dist-packages (from kaggle) (8.0.1)\n","Requirement already satisfied: urllib3 in /usr/local/lib/python3.10/dist-packages (from kaggle) (1.26.16)\n","Requirement already satisfied: text-unidecode>=1.3 in /usr/local/lib/python3.10/dist-packages (from python-slugify->kaggle) (1.3)\n","Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.10/dist-packages (from requests->kaggle) (2.0.12)\n","Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->kaggle) (3.4)\n"]}]},{"cell_type":"code","source":["from google.colab import files"],"metadata":{"id":"9gjOv_WUJBzz","executionInfo":{"status":"ok","timestamp":1688566817902,"user_tz":-210,"elapsed":467,"user":{"displayName":"Amir Hossein Karami","userId":"12632705231641967217"}}},"execution_count":2,"outputs":[]},{"cell_type":"code","source":["files.upload()\n","! mkdir ~/.kaggle\n","! cp kaggle.json ~/.kaggle/\n","! chmod 600 ~/.kaggle/kaggle.json\n","! kaggle datasets download -d rounakbanik/the-movies-dataset"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":401},"id":"cQoNEnDDJOvf","executionInfo":{"status":"error","timestamp":1688566832628,"user_tz":-210,"elapsed":12771,"user":{"displayName":"Amir Hossein Karami","userId":"12632705231641967217"}},"outputId":"26b37601-be8f-4ba9-a01c-1e21bba10e66"},"execution_count":3,"outputs":[{"output_type":"display_data","data":{"text/plain":[""],"text/html":["\n"," \n"," \n"," Upload widget is only available when the cell has been executed in the\n"," current browser session. Please rerun this cell to enable.\n"," \n"," "]},"metadata":{}},{"output_type":"error","ename":"KeyboardInterrupt","evalue":"ignored","traceback":["\u001b[0;31m---------------------------------------------------------------------------\u001b[0m","\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)","\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mfiles\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mupload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mget_ipython\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msystem\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m' mkdir ~/.kaggle'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mget_ipython\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msystem\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m' cp kaggle.json ~/.kaggle/'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mget_ipython\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msystem\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m' chmod 600 ~/.kaggle/kaggle.json'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mget_ipython\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msystem\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m' kaggle datasets download -d rounakbanik/the-movies-dataset'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.10/dist-packages/google/colab/files.py\u001b[0m in \u001b[0;36mupload\u001b[0;34m()\u001b[0m\n\u001b[1;32m 67\u001b[0m \"\"\"\n\u001b[1;32m 68\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 69\u001b[0;31m \u001b[0muploaded_files\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_upload_files\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmultiple\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 70\u001b[0m \u001b[0;31m# Mapping from original filename to filename as saved locally.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 71\u001b[0m \u001b[0mlocal_filenames\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.10/dist-packages/google/colab/files.py\u001b[0m in \u001b[0;36m_upload_files\u001b[0;34m(multiple)\u001b[0m\n\u001b[1;32m 151\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 152\u001b[0m \u001b[0;31m# First result is always an indication that the file picker has completed.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 153\u001b[0;31m result = _output.eval_js(\n\u001b[0m\u001b[1;32m 154\u001b[0m 'google.colab._files._uploadFiles(\"{input_id}\", \"{output_id}\")'.format(\n\u001b[1;32m 155\u001b[0m \u001b[0minput_id\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0minput_id\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moutput_id\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0moutput_id\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.10/dist-packages/google/colab/output/_js.py\u001b[0m in \u001b[0;36meval_js\u001b[0;34m(script, ignore_result, timeout_sec)\u001b[0m\n\u001b[1;32m 38\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mignore_result\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 39\u001b[0m \u001b[0;32mreturn\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 40\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0m_message\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread_reply_from_input\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrequest_id\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtimeout_sec\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 41\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 42\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.10/dist-packages/google/colab/_message.py\u001b[0m in \u001b[0;36mread_reply_from_input\u001b[0;34m(message_id, timeout_sec)\u001b[0m\n\u001b[1;32m 94\u001b[0m \u001b[0mreply\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_read_next_input_message\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 95\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mreply\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0m_NOT_READY\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mreply\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdict\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 96\u001b[0;31m \u001b[0mtime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msleep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0.025\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 97\u001b[0m \u001b[0;32mcontinue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 98\u001b[0m if (\n","\u001b[0;31mKeyboardInterrupt\u001b[0m: "]}]},{"cell_type":"code","source":["import os\n","if not os.path.isdir('/content/data/cleaned') :\n"," os.mkdir('/content/data/cleaned')"],"metadata":{"id":"HaEx9oF3Li7f"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["! unzip '/content/the-movies-dataset.zip' -d '/content/data/raw'"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"pLeGJcfvLwwJ","executionInfo":{"status":"ok","timestamp":1688546148833,"user_tz":-210,"elapsed":10746,"user":{"displayName":"Amir Hossein Karami","userId":"12632705231641967217"}},"outputId":"debf1aaf-4932-47f9-b076-49457210b967"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["Archive: /content/the-movies-dataset.zip\n"," inflating: /content/data/raw/credits.csv \n"," inflating: /content/data/raw/keywords.csv \n"," inflating: /content/data/raw/links.csv \n"," inflating: /content/data/raw/links_small.csv \n"," inflating: /content/data/raw/movies_metadata.csv \n"," inflating: /content/data/raw/ratings.csv \n"," inflating: /content/data/raw/ratings_small.csv \n"]}]},{"cell_type":"markdown","source":["# Needed Imports"],"metadata":{"id":"a0M_2XYkbY_O"}},{"cell_type":"code","source":["import numpy as np\n","import pandas as pd\n","import matplotlib.pyplot as plt\n","import re"],"metadata":{"id":"z2eLwbMCbbxB","executionInfo":{"status":"ok","timestamp":1688584736915,"user_tz":-210,"elapsed":1969,"user":{"displayName":"Amir Hossein Karami","userId":"12632705231641967217"}}},"execution_count":4,"outputs":[]},{"cell_type":"markdown","source":["# Load datasets"],"metadata":{"id":"ugf5R7Ihi2eU"}},{"cell_type":"code","source":["metadata = pd.read_csv('/content/drive/MyDrive/Rec/data/cleaned/clean_metadata.csv')\n","credits = pd.read_csv('/content/drive/MyDrive/Rec/data/cleaned/clean_credits.csv')\n","keywords = pd.read_csv('/content/drive/MyDrive/Rec/data/cleaned/clean_keywords.csv')\n","links = pd.read_csv('/content/drive/MyDrive/Rec/data/raw/links.csv')\n","links = links[links['tmdbId'].notnull()]['tmdbId'].astype('int')"],"metadata":{"id":"pW6Sw6dCi4aX","executionInfo":{"status":"ok","timestamp":1688574375594,"user_tz":-210,"elapsed":3626,"user":{"displayName":"Amir Hossein Karami","userId":"12632705231641967217"}}},"execution_count":28,"outputs":[]},{"cell_type":"code","source":["print('shape: {}'.format(metadata.shape))\n","print('columns: \\n {}'.format(metadata.columns))\n","metadata.head(3)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":723},"id":"N_nTAGZUjZTc","executionInfo":{"status":"ok","timestamp":1688571998020,"user_tz":-210,"elapsed":809,"user":{"displayName":"Amir Hossein Karami","userId":"12632705231641967217"}},"outputId":"3d810160-7989-4473-f969-bddaeea13eb7"},"execution_count":22,"outputs":[{"output_type":"stream","name":"stdout","text":["shape: (45447, 30)\n","columns: \n"," Index(['adult', 'budget', 'homepage', 'id', 'imdb_id', 'original_language',\n"," 'original_title', 'overview', 'popularity', 'poster_path',\n"," 'production_companies', 'release_date', 'revenue', 'runtime', 'status',\n"," 'tagline', 'title', 'video', 'vote_average', 'vote_count',\n"," 'name_belongs_to_collection', 'id_belongs_to_collection',\n"," 'poster_path_belongs_to_collection',\n"," 'backdrop_path_belongs_to_collection', 'name_genres', 'id_genres',\n"," 'name_production_countries', 'iso_3166_1_production_companies',\n"," 'name_production_companies', 'id_production_companies'],\n"," dtype='object')\n"]},{"output_type":"execute_result","data":{"text/plain":[" adult budget homepage id imdb_id \\\n","0 False 30000000 http://toystory.disney.com/toy-story 862 tt0114709 \n","1 False 65000000 NaN 8844 tt0113497 \n","2 False 0 NaN 15602 tt0113228 \n","\n"," original_language original_title \\\n","0 en Toy Story \n","1 en Jumanji \n","2 en Grumpier Old Men \n","\n"," overview popularity \\\n","0 Led by Woody, Andy's toys live happily in his ... 21.946943 \n","1 When siblings Judy and Peter discover an encha... 17.015539 \n","2 A family wedding reignites the ancient feud be... 11.712900 \n","\n"," poster_path ... name_belongs_to_collection \\\n","0 /rhIRbceoE9lR4veEXuwCC2wARtG.jpg ... Toy Story Collection \n","1 /vzmL6fP7aPKNKPRTFnZmiUfciyV.jpg ... NaN \n","2 /6ksm1sjKMFLbO7UY2i6G1ju9SML.jpg ... Grumpy Old Men Collection \n","\n"," id_belongs_to_collection poster_path_belongs_to_collection \\\n","0 10194.0 /7G9915LfUQ2lVfwMEEhDsn3kT4B.jpg \n","1 NaN NaN \n","2 119050.0 /nLvUdqgPgm3F85NMCii9gVFUcet.jpg \n","\n"," backdrop_path_belongs_to_collection name_genres \\\n","0 /9FBwqcd9IRruEDUrTdcaafOMKUq.jpg Animation, Comedy, Family \n","1 NaN Adventure, Fantasy, Family \n","2 /hypTnLot2z8wpFS7qwsQHW1uV8u.jpg Romance, Comedy \n","\n"," id_genres name_production_countries iso_3166_1_production_companies \\\n","0 16, 35, 10751 United States of America US \n","1 12, 14, 10751 United States of America US \n","2 10749, 35 United States of America US \n","\n"," name_production_companies id_production_companies \n","0 Pixar Animation Studios 3 \n","1 TriStar Pictures, Teitler Film, Interscope Com... 559, 2550, 10201 \n","2 Warner Bros., Lancaster Gate 6194, 19464 \n","\n","[3 rows x 30 columns]"],"text/html":["\n","
\n","
\n","
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
adultbudgethomepageidimdb_idoriginal_languageoriginal_titleoverviewpopularityposter_path...name_belongs_to_collectionid_belongs_to_collectionposter_path_belongs_to_collectionbackdrop_path_belongs_to_collectionname_genresid_genresname_production_countriesiso_3166_1_production_companiesname_production_companiesid_production_companies
0False30000000http://toystory.disney.com/toy-story862tt0114709enToy StoryLed by Woody, Andy's toys live happily in his ...21.946943/rhIRbceoE9lR4veEXuwCC2wARtG.jpg...Toy Story Collection10194.0/7G9915LfUQ2lVfwMEEhDsn3kT4B.jpg/9FBwqcd9IRruEDUrTdcaafOMKUq.jpgAnimation, Comedy, Family16, 35, 10751United States of AmericaUSPixar Animation Studios3
1False65000000NaN8844tt0113497enJumanjiWhen siblings Judy and Peter discover an encha...17.015539/vzmL6fP7aPKNKPRTFnZmiUfciyV.jpg...NaNNaNNaNNaNAdventure, Fantasy, Family12, 14, 10751United States of AmericaUSTriStar Pictures, Teitler Film, Interscope Com...559, 2550, 10201
2False0NaN15602tt0113228enGrumpier Old MenA family wedding reignites the ancient feud be...11.712900/6ksm1sjKMFLbO7UY2i6G1ju9SML.jpg...Grumpy Old Men Collection119050.0/nLvUdqgPgm3F85NMCii9gVFUcet.jpg/hypTnLot2z8wpFS7qwsQHW1uV8u.jpgRomance, Comedy10749, 35United States of AmericaUSWarner Bros., Lancaster Gate6194, 19464
\n","

3 rows × 30 columns

\n","
\n"," \n"," \n"," \n","\n"," \n","
\n","
\n"," "]},"metadata":{},"execution_count":22}]},{"cell_type":"code","source":["a = metadata['original_language'].value_counts()\n","a[a>10]"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"QPmkUByIlo1G","executionInfo":{"status":"ok","timestamp":1688572010282,"user_tz":-210,"elapsed":438,"user":{"displayName":"Amir Hossein Karami","userId":"12632705231641967217"}},"outputId":"de2a56b6-96a2-461c-f4bb-56bd552c714e"},"execution_count":23,"outputs":[{"output_type":"execute_result","data":{"text/plain":["en 32260\n","fr 2437\n","it 1529\n","ja 1349\n","de 1079\n","es 994\n","ru 826\n","hi 508\n","ko 444\n","zh 409\n","sv 384\n","pt 316\n","cn 313\n","fi 295\n","nl 248\n","da 224\n","pl 219\n","tr 150\n","cs 130\n","el 113\n","no 106\n","fa 101\n","hu 100\n","ta 78\n","th 75\n","he 67\n","sr 63\n","ro 57\n","te 45\n","ar 39\n","ml 36\n","xx 33\n","hr 29\n","bn 29\n","mr 25\n","et 24\n","is 24\n","tl 23\n","id 20\n","lv 18\n","ka 18\n","sl 17\n","uk 16\n","bs 14\n","ca 12\n","Name: original_language, dtype: int64"]},"metadata":{},"execution_count":23}]},{"cell_type":"markdown","source":["# Constructing the dataset used for item based recommendation"],"metadata":{"id":"fmm6lJZH27-5"}},{"cell_type":"code","source":["keywords['id'] = keywords['id'].astype('int')\n","credits['id'] = credits['id'].astype('int')"],"metadata":{"id":"qHk24Ai_l_tH","executionInfo":{"status":"ok","timestamp":1688574139845,"user_tz":-210,"elapsed":589,"user":{"displayName":"Amir Hossein Karami","userId":"12632705231641967217"}}},"execution_count":24,"outputs":[]},{"cell_type":"code","source":["metadata = metadata.merge(credits, on='id')\n","metadata = metadata.merge(keywords, on='id')"],"metadata":{"id":"2MNqsMX13JZJ","executionInfo":{"status":"ok","timestamp":1688574651033,"user_tz":-210,"elapsed":503,"user":{"displayName":"Amir Hossein Karami","userId":"12632705231641967217"}}},"execution_count":32,"outputs":[]},{"cell_type":"markdown","source":["## Use only the available movies in TMDB"],"metadata":{"id":"32SR5rG64Vxy"}},{"cell_type":"code","source":["rec_data = metadata[metadata['id'].isin(links)].copy()\n","rec_data.shape"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"GLHc1UW-3NDu","executionInfo":{"status":"ok","timestamp":1688574670536,"user_tz":-210,"elapsed":593,"user":{"displayName":"Amir Hossein Karami","userId":"12632705231641967217"}},"outputId":"f66c62c1-7700-4a92-e249-56e145731564"},"execution_count":33,"outputs":[{"output_type":"execute_result","data":{"text/plain":["(45459, 45)"]},"metadata":{},"execution_count":33}]},{"cell_type":"code","source":["rec_data = rec_data.drop_duplicates(subset='id')"],"metadata":{"id":"hoaS3X9ma9F-","executionInfo":{"status":"ok","timestamp":1688583554915,"user_tz":-210,"elapsed":579,"user":{"displayName":"Amir Hossein Karami","userId":"12632705231641967217"}}},"execution_count":140,"outputs":[]},{"cell_type":"markdown","source":["## Adding 'director' column"],"metadata":{"id":"Ax8jTRG05rWa"}},{"cell_type":"code","source":["def is_float(string):\n"," try:\n"," float(string)\n"," return True\n"," except ValueError:\n"," return False"],"metadata":{"id":"JorU4WBB40kq","executionInfo":{"status":"ok","timestamp":1688574823282,"user_tz":-210,"elapsed":505,"user":{"displayName":"Amir Hossein Karami","userId":"12632705231641967217"}}},"execution_count":34,"outputs":[]},{"cell_type":"code","source":["def directors_names(job_crew, index):\n"," if not isinstance(job_crew, float):\n"," jobs = job_crew.split(', ')\n"," director_indices = [i for i, job in enumerate(jobs) if job == 'Director']\n"," if director_indices:\n"," names = rec_data.loc[index, 'name_crew']\n"," if not isinstance(names, float):\n"," names = names.split(', ')\n"," director_names = [names[i] for i in director_indices]\n"," return ', '.join(director_names)\n"," return np.nan"],"metadata":{"id":"4Wv7jSsX5wS7","executionInfo":{"status":"ok","timestamp":1688575844013,"user_tz":-210,"elapsed":467,"user":{"displayName":"Amir Hossein Karami","userId":"12632705231641967217"}}},"execution_count":37,"outputs":[]},{"cell_type":"code","source":["for i in rec_data.index:\n"," rec_data.loc[i, 'director'] = directors_names(rec_data.loc[i, 'job_crew'], i)\n","rec_data['director']"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"RJZBCGZS62pU","executionInfo":{"status":"ok","timestamp":1688577018900,"user_tz":-210,"elapsed":7640,"user":{"displayName":"Amir Hossein Karami","userId":"12632705231641967217"}},"outputId":"9e4659dd-06ad-4334-9192-53b2f2f33640"},"execution_count":38,"outputs":[{"output_type":"execute_result","data":{"text/plain":["0 John Lasseter\n","1 Joe Johnston\n","2 Howard Deutch\n","3 Forest Whitaker\n","4 Charles Shyer\n"," ... \n","45454 Hamid Nematollah\n","45455 Lav Diaz\n","45456 Mark L. Lester\n","45457 Yakov Protazanov\n","45458 Daisy Asquith\n","Name: director, Length: 45459, dtype: object"]},"metadata":{},"execution_count":38}]},{"cell_type":"markdown","source":["# Generating the final dataset"],"metadata":{"id":"8SMtZaf6EkMD"}},{"cell_type":"code","source":["rec_data = rec_data [['id', 'original_language', 'overview','tagline', 'title', 'name_genres', 'name_cast', 'name_keywords', 'director']]"],"metadata":{"id":"DhM20iQwHV_1","executionInfo":{"status":"ok","timestamp":1688579331516,"user_tz":-210,"elapsed":613,"user":{"displayName":"Amir Hossein Karami","userId":"12632705231641967217"}}},"execution_count":84,"outputs":[]},{"cell_type":"code","source":["rec_data = rec_data.replace(np.nan, '')"],"metadata":{"id":"yawZztUkFTD1","executionInfo":{"status":"ok","timestamp":1688580086363,"user_tz":-210,"elapsed":1800,"user":{"displayName":"Amir Hossein Karami","userId":"12632705231641967217"}}},"execution_count":87,"outputs":[]},{"cell_type":"code","source":["rec_data = rec_data.replace('', 'Not mentioned')"],"metadata":{"id":"F_96ZhozO--3","executionInfo":{"status":"ok","timestamp":1688580411504,"user_tz":-210,"elapsed":498,"user":{"displayName":"Amir Hossein Karami","userId":"12632705231641967217"}}},"execution_count":95,"outputs":[]},{"cell_type":"code","source":["rec_data['description'] = 'Title: ' + rec_data['title'] + '. Director: ' + rec_data['director'] + '. Genres: ' + rec_data['name_genres'] + '. Overview: ' + rec_data['overview'] + ' Keywords: ' + rec_data['name_keywords'] + '. Language: ' + rec_data['original_language'] + '.'"],"metadata":{"id":"fywEpVC9N1ae","executionInfo":{"status":"ok","timestamp":1688582041083,"user_tz":-210,"elapsed":448,"user":{"displayName":"Amir Hossein Karami","userId":"12632705231641967217"}}},"execution_count":115,"outputs":[]},{"cell_type":"code","source":["def clean_text (text) :\n"," try:\n"," # Remove new line and tabs\n"," clean = text.replace(\"\\n\", \" \")\n"," clean = clean.replace(\"\\t\", \" \")\n"," clean = clean.replace(\"\\r\", \" \")\n"," clean = clean.replace(\"Â\\xa0\", \"\") # non-breaking space\n","\n"," # Remove all punctuation and special characters\n"," # clean = re.sub(\n"," # r\"([^\\s\\w]|_)+\", \"\", clean\n"," # ) # noqa W695 invalid escape sequence '\\s'\n","\n"," # If you want to keep some punctuation, see below commented out example\n"," clean = re.sub(r'([^,.:\\s\\w\\-]|_)+','', clean)\n","\n"," # Skip further processing if the text will be used in BERT tokenization\n","\n"," except Exception:\n"," print(\"Cannot clean non-existent text\")\n"," clean = \"\"\n","\n"," return clean"],"metadata":{"id":"EWUD2uTEGdTa","executionInfo":{"status":"ok","timestamp":1688582398315,"user_tz":-210,"elapsed":472,"user":{"displayName":"Amir Hossein Karami","userId":"12632705231641967217"}}},"execution_count":128,"outputs":[]},{"cell_type":"code","source":["rec_data['description'] = rec_data['description'].apply(clean_text)"],"metadata":{"id":"yQPx5nkFGlb-","executionInfo":{"status":"ok","timestamp":1688582648519,"user_tz":-210,"elapsed":2256,"user":{"displayName":"Amir Hossein Karami","userId":"12632705231641967217"}}},"execution_count":130,"outputs":[]},{"cell_type":"code","source":["final_rec_data = rec_data[['id', 'title', 'description']]\n","final_rec_data = final_rec_data.drop_duplicates(subset='id')"],"metadata":{"id":"4YjLmb1TXtvr","executionInfo":{"status":"ok","timestamp":1688584254391,"user_tz":-210,"elapsed":988,"user":{"displayName":"Amir Hossein Karami","userId":"12632705231641967217"}}},"execution_count":141,"outputs":[]},{"cell_type":"code","source":["final_rec_data.shape"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"vEbcByGnZWLY","executionInfo":{"status":"ok","timestamp":1688584258284,"user_tz":-210,"elapsed":931,"user":{"displayName":"Amir Hossein Karami","userId":"12632705231641967217"}},"outputId":"20c3aea6-aa57-4e5f-c2e6-94e884b98d26"},"execution_count":142,"outputs":[{"output_type":"execute_result","data":{"text/plain":["(45429, 3)"]},"metadata":{},"execution_count":142}]},{"cell_type":"markdown","source":["## Saving the dataset"],"metadata":{"id":"uYO9OW7sXZMF"}},{"cell_type":"code","source":["final_rec_data.to_csv('/content/drive/MyDrive/Rec/data/cleaned/descriptions.csv', index=False)"],"metadata":{"id":"SVR8F9QaZZXL","executionInfo":{"status":"ok","timestamp":1688584265792,"user_tz":-210,"elapsed":1909,"user":{"displayName":"Amir Hossein Karami","userId":"12632705231641967217"}}},"execution_count":143,"outputs":[]},{"cell_type":"markdown","source":["# REC"],"metadata":{"id":"ty8JI6oFhQ_d"}},{"cell_type":"code","source":["d = pd.read_csv('/content/drive/MyDrive/Rec/data/cleaned/descriptions.csv')"],"metadata":{"id":"gAthfffufZXF","executionInfo":{"status":"ok","timestamp":1688584746369,"user_tz":-210,"elapsed":1187,"user":{"displayName":"Amir Hossein Karami","userId":"12632705231641967217"}}},"execution_count":5,"outputs":[]},{"cell_type":"code","source":["from sklearn.feature_extraction.text import TfidfVectorizer\n","from sklearn.metrics.pairwise import linear_kernel, cosine_similarity\n","\n","tfidf = TfidfVectorizer(analyzer='word',ngram_range=(1, 2),min_df=0.0, stop_words='english')\n","tfidf_matrix = tfidf.fit_transform(d['description'])\n","# cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)\n","# cosine_sim = cosine_sim.astype(np.float16)\n"],"metadata":{"id":"4nGOUldZY6X7","executionInfo":{"status":"ok","timestamp":1688584778153,"user_tz":-210,"elapsed":17021,"user":{"displayName":"Amir Hossein Karami","userId":"12632705231641967217"}}},"execution_count":7,"outputs":[]},{"cell_type":"code","source":["def recommender (title, num_recommends):\n"," idx = d[d['title'] == title].index[0]\n"," cosine_sim = cosine_similarity(tfidf_matrix[int(idx)], tfidf_matrix)\n"," similarity_scores = list(enumerate(cosine_sim[0]))\n"," similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)\n"," similarity_scores = similarity_scores[1: num_recommends + 1]\n"," movie_indices = [i[0] for i in similarity_scores]\n"," return d.iloc[movie_indices]['title']"],"metadata":{"id":"M40vz12Oajc3","executionInfo":{"status":"ok","timestamp":1688585022061,"user_tz":-210,"elapsed":848,"user":{"displayName":"Amir Hossein Karami","userId":"12632705231641967217"}}},"execution_count":23,"outputs":[]},{"cell_type":"markdown","source":[],"metadata":{"id":"zpcUEJrDhZLY"}},{"cell_type":"code","source":["! pip install scikit-surprise"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"nx9G3cXEkbCZ","executionInfo":{"status":"ok","timestamp":1688598659233,"user_tz":-210,"elapsed":51093,"user":{"displayName":"Amir Hossein Karami","userId":"12632705231641967217"}},"outputId":"3433f6f6-58f4-4625-8ad7-3b565166eee0"},"execution_count":2,"outputs":[{"output_type":"stream","name":"stdout","text":["Collecting scikit-surprise\n"," Downloading scikit-surprise-1.1.3.tar.gz (771 kB)\n","\u001b[?25l \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/772.0 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m772.0/772.0 kB\u001b[0m \u001b[31m43.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n","Requirement already satisfied: joblib>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-surprise) (1.2.0)\n","Requirement already satisfied: numpy>=1.17.3 in /usr/local/lib/python3.10/dist-packages (from scikit-surprise) (1.22.4)\n","Requirement already satisfied: scipy>=1.3.2 in /usr/local/lib/python3.10/dist-packages (from scikit-surprise) (1.10.1)\n","Building wheels for collected packages: scikit-surprise\n"," Building wheel for scikit-surprise (setup.py) ... \u001b[?25l\u001b[?25hdone\n"," Created wheel for scikit-surprise: filename=scikit_surprise-1.1.3-cp310-cp310-linux_x86_64.whl size=3096320 sha256=ab360f2850ab501540eeccaf1058521f2c63a69cb989d9308e7f3d63bc789795\n"," Stored in directory: /root/.cache/pip/wheels/a5/ca/a8/4e28def53797fdc4363ca4af740db15a9c2f1595ebc51fb445\n","Successfully built scikit-surprise\n","Installing collected packages: scikit-surprise\n","Successfully installed scikit-surprise-1.1.3\n"]}]},{"cell_type":"code","source":["import pandas as pd\n","from surprise import Dataset, SVD ,Reader\n","from sklearn.metrics.pairwise import linear_kernel, cosine_similarity\n","from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer\n","from surprise.model_selection import cross_validate , KFold\n","from surprise import model_selection"],"metadata":{"id":"9IJpu0c3f7ub","executionInfo":{"status":"ok","timestamp":1688598692049,"user_tz":-210,"elapsed":1775,"user":{"displayName":"Amir Hossein Karami","userId":"12632705231641967217"}}},"execution_count":3,"outputs":[]},{"cell_type":"code","source":["from surprise import Dataset, Reader, SVD\n","from surprise.model_selection import train_test_split\n","from surprise.accuracy import rmse\n","\n","# load data from a CSV file\n","data = pd.read_csv('/content/drive/MyDrive/Rec/data/raw/ratings_small.csv')\n","\n","# define the Reader object\n","reader = Reader(rating_scale=(1, 5))\n","\n","# load the data into the Dataset object\n","dataset = Dataset.load_from_df(data[['userId', 'movieId', 'rating']], reader)\n","\n","# split the data into training and testing sets\n","trainset, testset = train_test_split(dataset, test_size=0.2)\n","\n","# define the SVD algorithm\n","algo = SVD(n_factors=100, n_epochs=20)\n","\n","# train the algorithm on the training set\n","algo.fit(trainset)\n","\n","# make predictions on the testing set\n","predictions = algo.test(testset)\n","\n","# evaluate the performance of the algorithm\n","rmse_score = rmse(predictions)\n","print('RMSE:', rmse_score)\n","\n","# make recommendations for a given user\n","user_id = 24256\n","items_to_recommend = []\n","for item_id in data['movieId'].unique():\n"," predicted_rating = algo.predict(user_id, item_id).est\n"," if predicted_rating >= 4.8:\n"," items_to_recommend.append(item_id)\n","print('Items to recommend:', items_to_recommend)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"s5T_lyVHoDOg","executionInfo":{"status":"ok","timestamp":1688599195314,"user_tz":-210,"elapsed":1948,"user":{"displayName":"Amir Hossein Karami","userId":"12632705231641967217"}},"outputId":"845856e2-33ef-4459-9bb1-e81dca281acf"},"execution_count":9,"outputs":[{"output_type":"stream","name":"stdout","text":["RMSE: 0.8969\n","RMSE: 0.8968864510559503\n","Items to recommend: []\n"]}]},{"cell_type":"code","source":["links = links.rename(columns={'imdbId' : 'imdb_id'})\n","links['movieId']"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":206},"id":"DYxbl8NX79Dr","executionInfo":{"status":"ok","timestamp":1688610513785,"user_tz":-210,"elapsed":661,"user":{"displayName":"Amir Hossein Karami","userId":"12632705231641967217"}},"outputId":"afb9d051-07a2-441a-dd84-04f8c1a56530"},"execution_count":34,"outputs":[{"output_type":"execute_result","data":{"text/plain":[" movieId imdb_id tmdbId\n","0 1 114709 862.0\n","1 2 113497 8844.0\n","2 3 113228 15602.0\n","3 4 114885 31357.0\n","4 5 113041 11862.0"],"text/html":["\n","
\n","
\n","
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
movieIdimdb_idtmdbId
01114709862.0
121134978844.0
2311322815602.0
3411488531357.0
4511304111862.0
\n","
\n"," \n"," \n"," \n","\n"," \n","
\n","
\n"," "]},"metadata":{},"execution_count":34}]},{"cell_type":"code","source":["links['imdb_id']=links['imdb_id'].astype(int)"],"metadata":{"id":"Oyogeoo--vRr","executionInfo":{"status":"ok","timestamp":1688609761885,"user_tz":-210,"elapsed":662,"user":{"displayName":"Amir Hossein Karami","userId":"12632705231641967217"}}},"execution_count":27,"outputs":[]},{"cell_type":"code","source":["2 in cr['id']"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"Sil04xEt_INh","executionInfo":{"status":"ok","timestamp":1688610594282,"user_tz":-210,"elapsed":719,"user":{"displayName":"Amir Hossein Karami","userId":"12632705231641967217"}},"outputId":"87b039fd-7f7f-4a01-e6d7-75d5234e2e15"},"execution_count":36,"outputs":[{"output_type":"execute_result","data":{"text/plain":["True"]},"metadata":{},"execution_count":36}]},{"cell_type":"code","source":["cr = cr.merge(links, on='imdb_id')"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":311},"id":"R7IM3WBY8Fbh","executionInfo":{"status":"error","timestamp":1688609767176,"user_tz":-210,"elapsed":715,"user":{"displayName":"Amir Hossein Karami","userId":"12632705231641967217"}},"outputId":"1eb8c903-36cf-4c15-9cb8-c4b3229b7edd"},"execution_count":28,"outputs":[{"output_type":"error","ename":"ValueError","evalue":"ignored","traceback":["\u001b[0;31m---------------------------------------------------------------------------\u001b[0m","\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)","\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mcr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmerge\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlinks\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mon\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'imdb_id'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m","\u001b[0;32m/usr/local/lib/python3.10/dist-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36mmerge\u001b[0;34m(self, right, how, on, left_on, right_on, left_index, right_index, sort, suffixes, copy, indicator, validate)\u001b[0m\n\u001b[1;32m 10091\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mpandas\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcore\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreshape\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmerge\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mmerge\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 10092\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m> 10093\u001b[0;31m return merge(\n\u001b[0m\u001b[1;32m 10094\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 10095\u001b[0m \u001b[0mright\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.10/dist-packages/pandas/core/reshape/merge.py\u001b[0m in \u001b[0;36mmerge\u001b[0;34m(left, right, how, on, left_on, right_on, left_index, right_index, sort, suffixes, copy, indicator, validate)\u001b[0m\n\u001b[1;32m 108\u001b[0m \u001b[0mvalidate\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mstr\u001b[0m \u001b[0;34m|\u001b[0m \u001b[0;32mNone\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 109\u001b[0m ) -> DataFrame:\n\u001b[0;32m--> 110\u001b[0;31m op = _MergeOperation(\n\u001b[0m\u001b[1;32m 111\u001b[0m \u001b[0mleft\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 112\u001b[0m \u001b[0mright\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.10/dist-packages/pandas/core/reshape/merge.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, left, right, how, on, left_on, right_on, axis, left_index, right_index, sort, suffixes, indicator, validate)\u001b[0m\n\u001b[1;32m 705\u001b[0m \u001b[0;31m# validate the merge keys dtypes. We may need to coerce\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 706\u001b[0m \u001b[0;31m# to avoid incompatible dtypes\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 707\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_maybe_coerce_merge_keys\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 708\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 709\u001b[0m \u001b[0;31m# If argument passed to validate,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.10/dist-packages/pandas/core/reshape/merge.py\u001b[0m in \u001b[0;36m_maybe_coerce_merge_keys\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1338\u001b[0m \u001b[0minferred_right\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mstring_types\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0minferred_left\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mstring_types\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1339\u001b[0m ):\n\u001b[0;32m-> 1340\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmsg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1341\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1342\u001b[0m \u001b[0;31m# datetimelikes must match exactly\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;31mValueError\u001b[0m: You are trying to merge on object and int64 columns. If you wish to proceed you should use pd.concat"]}]},{"cell_type":"code","source":[],"metadata":{"id":"tkpi2hXk-gXH"},"execution_count":null,"outputs":[]}]}