{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [], "collapsed_sections": [], "machine_shape": "hm", "mount_file_id": "1aBrZOQRBhTOgg2wvc0sh1d79m9abNU-O", "authorship_tag": "ABX9TyOdcckjc7kMuJJm+A64/dzt", "include_colab_link": true }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" }, "accelerator": "GPU" }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "view-in-github", "colab_type": "text" }, "source": [ "\"Open" ] }, { "cell_type": "markdown", "source": [], "metadata": { "id": "NDK6pgcVQ6RI" } }, { "cell_type": "code", "source": [ "from google.colab import drive\n", "drive.mount('/content/drive')" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "L76IjCQkviFl", "outputId": "eebb493e-ff37-4336-9a03-8b39307627fd" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Mounted at /content/drive\n" ] } ] }, { "cell_type": "code", "source": [ "%cd /content/drive/MyDrive/git/idp_LiteratureResearch_Tool/\n" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "PnedAltsxot6", "outputId": "0de30b5e-0ce2-4adf-aff0-7e952e5087c3" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "/content/drive/MyDrive/git/idp_LiteratureResearch_Tool\n" ] } ] }, { "cell_type": "code", "source": [ "!ls" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "CPRrgG9Fx06U", "outputId": "62224f1a-a049-4c40-89a5-4f4a1b888842" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "example_run.py\tliterature README.md requirements.txt\n", "examples\tlrt\t reports setup.py\n" ] } ] }, { "cell_type": "code", "source": [ "!pip install -r requirements.txt" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 1000 }, "id": "w2ruvvI-yLeD", "outputId": "58b61e2e-42a0-462b-8745-934b14aee1fd" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", "Collecting evaluate==0.2.2\n", " Downloading evaluate-0.2.2-py3-none-any.whl (69 kB)\n", "\u001b[K |████████████████████████████████| 69 kB 4.9 MB/s \n", "\u001b[?25hCollecting kmeans_pytorch==0.3\n", " Downloading kmeans_pytorch-0.3-py3-none-any.whl (4.4 kB)\n", "Requirement already satisfied: numpy==1.21.6 in /usr/local/lib/python3.7/dist-packages (from -r requirements.txt (line 3)) (1.21.6)\n", "Requirement already satisfied: scikit_learn==1.0.2 in /usr/local/lib/python3.7/dist-packages (from -r requirements.txt (line 4)) (1.0.2)\n", "Collecting sentence_transformers==2.2.2\n", " Downloading sentence-transformers-2.2.2.tar.gz (85 kB)\n", "\u001b[K |████████████████████████████████| 85 kB 4.9 MB/s \n", "\u001b[?25hCollecting setuptools==63.4.1\n", " Downloading setuptools-63.4.1-py3-none-any.whl (1.2 MB)\n", "\u001b[K |████████████████████████████████| 1.2 MB 47.7 MB/s \n", "\u001b[?25hRequirement already satisfied: torch==1.12.1 in /usr/local/lib/python3.7/dist-packages (from -r requirements.txt (line 7)) (1.12.1+cu113)\n", "Requirement already satisfied: yellowbrick==1.5 in /usr/local/lib/python3.7/dist-packages (from -r requirements.txt (line 8)) (1.5)\n", "Collecting transformers==4.22.1\n", " Downloading transformers-4.22.1-py3-none-any.whl (4.9 MB)\n", "\u001b[K |████████████████████████████████| 4.9 MB 56.6 MB/s \n", "\u001b[?25hCollecting textdistance==4.5.0\n", " Downloading textdistance-4.5.0-py3-none-any.whl (31 kB)\n", "Requirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.7/dist-packages (from evaluate==0.2.2->-r requirements.txt (line 1)) (2.23.0)\n", "Requirement already satisfied: fsspec[http]>=2021.05.0 in /usr/local/lib/python3.7/dist-packages (from evaluate==0.2.2->-r requirements.txt (line 1)) (2022.8.2)\n", "Collecting multiprocess\n", " Downloading multiprocess-0.70.13-py37-none-any.whl (115 kB)\n", "\u001b[K |████████████████████████████████| 115 kB 65.0 MB/s \n", "\u001b[?25hRequirement already satisfied: tqdm>=4.62.1 in /usr/local/lib/python3.7/dist-packages (from evaluate==0.2.2->-r requirements.txt (line 1)) (4.64.1)\n", "Collecting responses<0.19\n", " Downloading responses-0.18.0-py3-none-any.whl (38 kB)\n", "Collecting huggingface-hub>=0.7.0\n", " Downloading huggingface_hub-0.10.0-py3-none-any.whl (163 kB)\n", "\u001b[K |████████████████████████████████| 163 kB 60.3 MB/s \n", "\u001b[?25hRequirement already satisfied: dill in /usr/local/lib/python3.7/dist-packages (from evaluate==0.2.2->-r requirements.txt (line 1)) (0.3.5.1)\n", "Collecting datasets>=2.0.0\n", " Downloading datasets-2.5.1-py3-none-any.whl (431 kB)\n", "\u001b[K |████████████████████████████████| 431 kB 51.2 MB/s \n", "\u001b[?25hCollecting xxhash\n", " Downloading xxhash-3.0.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (212 kB)\n", "\u001b[K |████████████████████████████████| 212 kB 52.0 MB/s \n", "\u001b[?25hRequirement already satisfied: importlib-metadata in /usr/local/lib/python3.7/dist-packages (from evaluate==0.2.2->-r requirements.txt (line 1)) (4.12.0)\n", "Requirement already satisfied: pandas in /usr/local/lib/python3.7/dist-packages (from evaluate==0.2.2->-r requirements.txt (line 1)) (1.3.5)\n", "Requirement already satisfied: packaging in /usr/local/lib/python3.7/dist-packages (from evaluate==0.2.2->-r requirements.txt (line 1)) (21.3)\n", "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.7/dist-packages (from scikit_learn==1.0.2->-r requirements.txt (line 4)) (3.1.0)\n", "Requirement already satisfied: joblib>=0.11 in /usr/local/lib/python3.7/dist-packages (from scikit_learn==1.0.2->-r requirements.txt (line 4)) (1.1.0)\n", "Requirement already satisfied: scipy>=1.1.0 in /usr/local/lib/python3.7/dist-packages (from scikit_learn==1.0.2->-r requirements.txt (line 4)) (1.7.3)\n", "Requirement already satisfied: torchvision in /usr/local/lib/python3.7/dist-packages (from sentence_transformers==2.2.2->-r requirements.txt (line 5)) (0.13.1+cu113)\n", "Requirement already satisfied: nltk in /usr/local/lib/python3.7/dist-packages (from sentence_transformers==2.2.2->-r requirements.txt (line 5)) (3.7)\n", "Collecting sentencepiece\n", " Downloading sentencepiece-0.1.97-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n", "\u001b[K |████████████████████████████████| 1.3 MB 53.7 MB/s \n", "\u001b[?25hRequirement already satisfied: typing-extensions in /usr/local/lib/python3.7/dist-packages (from torch==1.12.1->-r requirements.txt (line 7)) (4.1.1)\n", "Requirement already satisfied: matplotlib!=3.0.0,>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from yellowbrick==1.5->-r requirements.txt (line 8)) (3.2.2)\n", "Requirement already satisfied: cycler>=0.10.0 in /usr/local/lib/python3.7/dist-packages (from yellowbrick==1.5->-r requirements.txt (line 8)) (0.11.0)\n", "Requirement already satisfied: filelock in /usr/local/lib/python3.7/dist-packages (from transformers==4.22.1->-r requirements.txt (line 9)) (3.8.0)\n", "Collecting tokenizers!=0.11.3,<0.13,>=0.11.1\n", " Downloading tokenizers-0.12.1-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (6.6 MB)\n", "\u001b[K |████████████████████████████████| 6.6 MB 40.9 MB/s \n", "\u001b[?25hRequirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.7/dist-packages (from transformers==4.22.1->-r requirements.txt (line 9)) (2022.6.2)\n", "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.7/dist-packages (from transformers==4.22.1->-r requirements.txt (line 9)) (6.0)\n", "Requirement already satisfied: aiohttp in /usr/local/lib/python3.7/dist-packages (from datasets>=2.0.0->evaluate==0.2.2->-r requirements.txt (line 1)) (3.8.1)\n", "Requirement already satisfied: pyarrow>=6.0.0 in /usr/local/lib/python3.7/dist-packages (from datasets>=2.0.0->evaluate==0.2.2->-r requirements.txt (line 1)) (6.0.1)\n", "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets>=2.0.0->evaluate==0.2.2->-r requirements.txt (line 1)) (1.3.1)\n", "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets>=2.0.0->evaluate==0.2.2->-r requirements.txt (line 1)) (6.0.2)\n", "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets>=2.0.0->evaluate==0.2.2->-r requirements.txt (line 1)) (22.1.0)\n", "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets>=2.0.0->evaluate==0.2.2->-r requirements.txt (line 1)) (1.8.1)\n", "Requirement already satisfied: asynctest==0.13.0 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets>=2.0.0->evaluate==0.2.2->-r requirements.txt (line 1)) (0.13.0)\n", "Requirement already satisfied: charset-normalizer<3.0,>=2.0 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets>=2.0.0->evaluate==0.2.2->-r requirements.txt (line 1)) (2.1.1)\n", "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets>=2.0.0->evaluate==0.2.2->-r requirements.txt (line 1)) (1.2.0)\n", "Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets>=2.0.0->evaluate==0.2.2->-r requirements.txt (line 1)) (4.0.2)\n", "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib!=3.0.0,>=2.0.2->yellowbrick==1.5->-r requirements.txt (line 8)) (1.4.4)\n", "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib!=3.0.0,>=2.0.2->yellowbrick==1.5->-r requirements.txt (line 8)) (3.0.9)\n", "Requirement already satisfied: python-dateutil>=2.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib!=3.0.0,>=2.0.2->yellowbrick==1.5->-r requirements.txt (line 8)) (2.8.2)\n", "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.1->matplotlib!=3.0.0,>=2.0.2->yellowbrick==1.5->-r requirements.txt (line 8)) (1.15.0)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->evaluate==0.2.2->-r requirements.txt (line 1)) (2022.6.15)\n", "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->evaluate==0.2.2->-r requirements.txt (line 1)) (3.0.4)\n", "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->evaluate==0.2.2->-r requirements.txt (line 1)) (1.24.3)\n", "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->evaluate==0.2.2->-r requirements.txt (line 1)) (2.10)\n", "Collecting urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1\n", " Downloading urllib3-1.25.11-py2.py3-none-any.whl (127 kB)\n", "\u001b[K |████████████████████████████████| 127 kB 53.0 MB/s \n", "\u001b[?25hRequirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata->evaluate==0.2.2->-r requirements.txt (line 1)) (3.8.1)\n", "Requirement already satisfied: click in /usr/local/lib/python3.7/dist-packages (from nltk->sentence_transformers==2.2.2->-r requirements.txt (line 5)) (7.1.2)\n", "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.7/dist-packages (from pandas->evaluate==0.2.2->-r requirements.txt (line 1)) (2022.2.1)\n", "Requirement already satisfied: pillow!=8.3.*,>=5.3.0 in /usr/local/lib/python3.7/dist-packages (from torchvision->sentence_transformers==2.2.2->-r requirements.txt (line 5)) (7.1.2)\n", "Building wheels for collected packages: sentence-transformers\n", " Building wheel for sentence-transformers (setup.py) ... \u001b[?25l\u001b[?25hdone\n", " Created wheel for sentence-transformers: filename=sentence_transformers-2.2.2-py3-none-any.whl size=125938 sha256=422c6b8ba07037cbc0021b7dd77779f2f4cabd92e9a6edd18099753cd88d92d1\n", " Stored in directory: /root/.cache/pip/wheels/bf/06/fb/d59c1e5bd1dac7f6cf61ec0036cc3a10ab8fecaa6b2c3d3ee9\n", "Successfully built sentence-transformers\n", "Installing collected packages: urllib3, xxhash, tokenizers, responses, multiprocess, huggingface-hub, transformers, sentencepiece, datasets, textdistance, setuptools, sentence-transformers, kmeans-pytorch, evaluate\n", " Attempting uninstall: urllib3\n", " Found existing installation: urllib3 1.24.3\n", " Uninstalling urllib3-1.24.3:\n", " Successfully uninstalled urllib3-1.24.3\n", " Attempting uninstall: setuptools\n", " Found existing installation: setuptools 57.4.0\n", " Uninstalling setuptools-57.4.0:\n", " Successfully uninstalled setuptools-57.4.0\n", "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", "ipython 7.9.0 requires jedi>=0.10, which is not installed.\n", "numba 0.56.2 requires setuptools<60, but you have setuptools 63.4.1 which is incompatible.\u001b[0m\n", "Successfully installed datasets-2.5.1 evaluate-0.2.2 huggingface-hub-0.10.0 kmeans-pytorch-0.3 multiprocess-0.70.13 responses-0.18.0 sentence-transformers-2.2.2 sentencepiece-0.1.97 setuptools-63.4.1 textdistance-4.5.0 tokenizers-0.12.1 transformers-4.22.1 urllib3-1.25.11 xxhash-3.0.0\n" ] }, { "output_type": "display_data", "data": { "application/vnd.colab-display-data+json": { "pip_warning": { "packages": [ "pkg_resources" ] } } }, "metadata": {} } ] }, { "cell_type": "code", "source": [ "!python example_run.py" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "r5s28dVs4vmi", "outputId": "17395da1-2d67-48ad-a4f4-e885dfedee77" }, "execution_count": null, "outputs": [ { "metadata": { "tags": null }, "name": "stdout", "output_type": "stream", "text": [ "The cache for model files in Transformers v4.22.0 has been updated. Migrating your old cache. This is a one-time only operation. You can interrupt this and resume the migration later on by calling `transformers.utils.move_cache()`.\n", "Moving 0 files to the new cache system\n", "0it [00:00, ?it/s]\n", "Downloading: 100% 1.18k/1.18k [00:00<00:00, 1.23MB/s]\n", "Downloading: 100% 190/190 [00:00<00:00, 183kB/s]\n", "Downloading: 100% 10.6k/10.6k [00:00<00:00, 5.27MB/s]\n", "Downloading: 100% 612/612 [00:00<00:00, 537kB/s]\n", "Downloading: 100% 116/116 [00:00<00:00, 108kB/s]\n", "Downloading: 100% 39.3k/39.3k [00:00<00:00, 628kB/s]\n", "Downloading: 100% 90.9M/90.9M [00:01<00:00, 47.6MB/s]\n", "Downloading: 100% 53.0/53.0 [00:00<00:00, 52.2kB/s]\n", "Downloading: 100% 112/112 [00:00<00:00, 93.7kB/s]\n", "Downloading: 100% 466k/466k [00:00<00:00, 1.49MB/s]\n", "Downloading: 100% 350/350 [00:00<00:00, 299kB/s]\n", "Downloading: 100% 13.2k/13.2k [00:00<00:00, 8.80MB/s]\n", "Downloading: 100% 232k/232k [00:00<00:00, 1.24MB/s]\n", "Downloading: 100% 349/349 [00:00<00:00, 293kB/s]\n", "Downloading: 100% 1.92k/1.92k [00:00<00:00, 1.72MB/s]\n", "Downloading: 100% 792k/792k [00:00<00:00, 12.8MB/s]\n", "Downloading: 100% 2.42M/2.42M [00:00<00:00, 5.44MB/s]\n", "Downloading: 100% 1.79k/1.79k [00:00<00:00, 1.58MB/s]\n", "Downloading: 100% 1.38k/1.38k [00:00<00:00, 1.00MB/s]\n", "Downloading: 100% 892M/892M [00:17<00:00, 51.9MB/s]\n", ">>> pipeline starts...\n", ">>> start generating word embeddings...\n", ">>> successfully generated word embeddings...\n", ">>> start clustering...\n", ">>> The best K is 2.\n", ">>> finished clustering...\n", ">>> start keywords extraction\n", ">>> finished keywords extraction\n", ">>> pipeline finished!\n", "\n", "['machine translation/similar language translation/news translation', 'natural language processing/nlp/natural language inference', 'model pretraining/pretraining/pre-training', 'wmt 2020', 'model architecture']\n", "['deep learning/bayesian deep learning/machine learning', 'scene reconstruction/face recognition', 'convolutional networks', 'ilsvr', 'classification']\n" ] } ] } ] }