{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "Ifq5xlTrsc7B", "outputId": "48823469-9e7f-47e2-979c-02fff9911e79" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Collecting datasets==1.17.0\n", " Downloading datasets-1.17.0-py3-none-any.whl (306 kB)\n", "\u001b[K |████████████████████████████████| 306 kB 5.1 MB/s \n", "\u001b[?25hCollecting transformers==4.15.0\n", " Downloading transformers-4.15.0-py3-none-any.whl (3.4 MB)\n", "\u001b[K |████████████████████████████████| 3.4 MB 20.1 MB/s \n", "\u001b[?25hCollecting tokenizers\n", " Downloading tokenizers-0.11.4-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (6.8 MB)\n", "\u001b[K |████████████████████████████████| 6.8 MB 40.5 MB/s \n", "\u001b[?25hCollecting xxhash\n", " Downloading xxhash-2.0.2-cp37-cp37m-manylinux2010_x86_64.whl (243 kB)\n", "\u001b[K |████████████████████████████████| 243 kB 54.9 MB/s \n", "\u001b[?25hRequirement already satisfied: tqdm>=4.62.1 in /usr/local/lib/python3.7/dist-packages (from datasets==1.17.0) (4.62.3)\n", "Requirement already satisfied: dill in /usr/local/lib/python3.7/dist-packages (from datasets==1.17.0) (0.3.4)\n", "Requirement already satisfied: pandas in /usr/local/lib/python3.7/dist-packages (from datasets==1.17.0) (1.3.5)\n", "Collecting huggingface-hub<1.0.0,>=0.1.0\n", " Downloading huggingface_hub-0.4.0-py3-none-any.whl (67 kB)\n", "\u001b[K |████████████████████████████████| 67 kB 4.7 MB/s \n", "\u001b[?25hCollecting aiohttp\n", " Downloading aiohttp-3.8.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (1.1 MB)\n", "\u001b[K |████████████████████████████████| 1.1 MB 51.2 MB/s \n", "\u001b[?25hCollecting fsspec[http]>=2021.05.0\n", " Downloading fsspec-2022.1.0-py3-none-any.whl (133 kB)\n", "\u001b[K |████████████████████████████████| 133 kB 52.3 MB/s \n", "\u001b[?25hRequirement already satisfied: pyarrow!=4.0.0,>=3.0.0 in /usr/local/lib/python3.7/dist-packages (from datasets==1.17.0) (6.0.1)\n", "Requirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.7/dist-packages (from datasets==1.17.0) (2.23.0)\n", "Requirement already satisfied: packaging in /usr/local/lib/python3.7/dist-packages (from datasets==1.17.0) (21.3)\n", "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.7/dist-packages (from datasets==1.17.0) (1.19.5)\n", "Requirement already satisfied: importlib-metadata in /usr/local/lib/python3.7/dist-packages (from datasets==1.17.0) (4.10.1)\n", "Requirement already satisfied: multiprocess in /usr/local/lib/python3.7/dist-packages (from datasets==1.17.0) (0.70.12.2)\n", "Collecting tokenizers\n", " Downloading tokenizers-0.10.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (3.3 MB)\n", "\u001b[K |████████████████████████████████| 3.3 MB 17.8 MB/s \n", "\u001b[?25hCollecting sacremoses\n", " Downloading sacremoses-0.0.47-py2.py3-none-any.whl (895 kB)\n", "\u001b[K |████████████████████████████████| 895 kB 48.8 MB/s \n", "\u001b[?25hCollecting pyyaml>=5.1\n", " Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)\n", "\u001b[K |████████████████████████████████| 596 kB 44.0 MB/s \n", "\u001b[?25hRequirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.7/dist-packages (from transformers==4.15.0) (2019.12.20)\n", "Requirement already satisfied: filelock in /usr/local/lib/python3.7/dist-packages (from transformers==4.15.0) (3.4.2)\n", "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.7/dist-packages (from huggingface-hub<1.0.0,>=0.1.0->datasets==1.17.0) (3.10.0.2)\n", "Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging->datasets==1.17.0) (3.0.7)\n", "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->datasets==1.17.0) (2.10)\n", "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->datasets==1.17.0) (1.24.3)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->datasets==1.17.0) (2021.10.8)\n", "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->datasets==1.17.0) (3.0.4)\n", "Collecting multidict<7.0,>=4.5\n", " Downloading multidict-6.0.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (94 kB)\n", "\u001b[K |████████████████████████████████| 94 kB 3.2 MB/s \n", "\u001b[?25hRequirement already satisfied: charset-normalizer<3.0,>=2.0 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets==1.17.0) (2.0.11)\n", "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets==1.17.0) (21.4.0)\n", "Collecting asynctest==0.13.0\n", " Downloading asynctest-0.13.0-py3-none-any.whl (26 kB)\n", "Collecting aiosignal>=1.1.2\n", " Downloading aiosignal-1.2.0-py3-none-any.whl (8.2 kB)\n", "Collecting async-timeout<5.0,>=4.0.0a3\n", " Downloading async_timeout-4.0.2-py3-none-any.whl (5.8 kB)\n", "Collecting frozenlist>=1.1.1\n", " Downloading frozenlist-1.3.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (144 kB)\n", "\u001b[K |████████████████████████████████| 144 kB 53.4 MB/s \n", "\u001b[?25hCollecting yarl<2.0,>=1.0\n", " Downloading yarl-1.7.2-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (271 kB)\n", "\u001b[K |████████████████████████████████| 271 kB 51.7 MB/s \n", "\u001b[?25hRequirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata->datasets==1.17.0) (3.7.0)\n", "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/dist-packages (from pandas->datasets==1.17.0) (2.8.2)\n", "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.7/dist-packages (from pandas->datasets==1.17.0) (2018.9)\n", "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.7.3->pandas->datasets==1.17.0) (1.15.0)\n", "Requirement already satisfied: click in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers==4.15.0) (7.1.2)\n", "Requirement already satisfied: joblib in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers==4.15.0) (1.1.0)\n", "Installing collected packages: multidict, frozenlist, yarl, asynctest, async-timeout, aiosignal, pyyaml, fsspec, aiohttp, xxhash, tokenizers, sacremoses, huggingface-hub, transformers, datasets\n", " Attempting uninstall: pyyaml\n", " Found existing installation: PyYAML 3.13\n", " Uninstalling PyYAML-3.13:\n", " Successfully uninstalled PyYAML-3.13\n", "Successfully installed aiohttp-3.8.1 aiosignal-1.2.0 async-timeout-4.0.2 asynctest-0.13.0 datasets-1.17.0 frozenlist-1.3.0 fsspec-2022.1.0 huggingface-hub-0.4.0 multidict-6.0.2 pyyaml-6.0 sacremoses-0.0.47 tokenizers-0.10.3 transformers-4.15.0 xxhash-2.0.2 yarl-1.7.2\n" ] } ], "source": [ "! pip install datasets==1.17.0 transformers==4.15.0 tokenizers" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "ODMsWgERsuxG", "outputId": "87058980-d31d-4e05-c72e-d22aea47fb54" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001b[1m\u001b[31mWARNING! `transformers-cli login` is deprecated and will be removed in v5. Please use `huggingface-cli login` instead.\u001b[0m\n", "\n", " _| _| _| _| _|_|_| _|_|_| _|_|_| _| _| _|_|_| _|_|_|_| _|_| _|_|_| _|_|_|_|\n", " _| _| _| _| _| _| _| _|_| _| _| _| _| _| _| _|\n", " _|_|_|_| _| _| _| _|_| _| _|_| _| _| _| _| _| _|_| _|_|_| _|_|_|_| _| _|_|_|\n", " _| _| _| _| _| _| _| _| _| _| _|_| _| _| _| _| _| _| _|\n", " _| _| _|_| _|_|_| _|_|_| _|_|_| _| _| _|_|_| _| _| _| _|_|_| _|_|_|_|\n", "\n", " \n", "Username: Vasanth\n", "Password: \n", "ERROR:root:HfApi.login: This method is deprecated in favor of `set_access_token`.\n", "Login successful\n", "Your token: IsbXzzvgmnoTVFGAeeVMRdKdeivxofDKeDkNKQTkKlVoaZrNaBlZDHQXVgSxwmWopXgRtpbicOeXJVumsyxZOWGxnIqzZaWnQStFeObCthHaqDeLqbmoygwmxOrRKQmL \n", "\n", "Your token has been saved to /root/.huggingface/token\n" ] } ], "source": [ "! transformers-cli login" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "Q3UPBkputdWH", "outputId": "a8686c20-df9b-48d6-d6c6-24ac07223a6a" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Reading package lists... Done\n", "Building dependency tree \n", "Reading state information... Done\n", "The following packages were automatically installed and are no longer required:\n", " cuda-command-line-tools-10-0 cuda-command-line-tools-10-1\n", " cuda-command-line-tools-11-0 cuda-compiler-10-0 cuda-compiler-10-1\n", " cuda-compiler-11-0 cuda-cuobjdump-10-0 cuda-cuobjdump-10-1\n", " cuda-cuobjdump-11-0 cuda-cupti-10-0 cuda-cupti-10-1 cuda-cupti-11-0\n", " cuda-cupti-dev-11-0 cuda-documentation-10-0 cuda-documentation-10-1\n", " cuda-documentation-11-0 cuda-documentation-11-1 cuda-gdb-10-0 cuda-gdb-10-1\n", " cuda-gdb-11-0 cuda-gpu-library-advisor-10-0 cuda-gpu-library-advisor-10-1\n", " cuda-libraries-10-0 cuda-libraries-10-1 cuda-libraries-11-0\n", " cuda-memcheck-10-0 cuda-memcheck-10-1 cuda-memcheck-11-0 cuda-nsight-10-0\n", " cuda-nsight-10-1 cuda-nsight-11-0 cuda-nsight-11-1 cuda-nsight-compute-10-0\n", " cuda-nsight-compute-10-1 cuda-nsight-compute-11-0 cuda-nsight-compute-11-1\n", " cuda-nsight-systems-10-1 cuda-nsight-systems-11-0 cuda-nsight-systems-11-1\n", " cuda-nvcc-10-0 cuda-nvcc-10-1 cuda-nvcc-11-0 cuda-nvdisasm-10-0\n", " cuda-nvdisasm-10-1 cuda-nvdisasm-11-0 cuda-nvml-dev-10-0 cuda-nvml-dev-10-1\n", " cuda-nvml-dev-11-0 cuda-nvprof-10-0 cuda-nvprof-10-1 cuda-nvprof-11-0\n", " cuda-nvprune-10-0 cuda-nvprune-10-1 cuda-nvprune-11-0 cuda-nvtx-10-0\n", " cuda-nvtx-10-1 cuda-nvtx-11-0 cuda-nvvp-10-0 cuda-nvvp-10-1 cuda-nvvp-11-0\n", " cuda-nvvp-11-1 cuda-samples-10-0 cuda-samples-10-1 cuda-samples-11-0\n", " cuda-samples-11-1 cuda-sanitizer-11-0 cuda-sanitizer-api-10-1\n", " cuda-toolkit-10-0 cuda-toolkit-10-1 cuda-toolkit-11-0 cuda-toolkit-11-1\n", " cuda-tools-10-0 cuda-tools-10-1 cuda-tools-11-0 cuda-tools-11-1\n", " cuda-visual-tools-10-0 cuda-visual-tools-10-1 cuda-visual-tools-11-0\n", " cuda-visual-tools-11-1 default-jre dkms freeglut3 freeglut3-dev\n", " keyboard-configuration libargon2-0 libcap2 libcryptsetup12\n", " libdevmapper1.02.1 libfontenc1 libidn11 libip4tc0 libjansson4\n", " libnvidia-cfg1-510 libnvidia-common-460 libnvidia-common-510\n", " libnvidia-extra-510 libnvidia-fbc1-510 libnvidia-gl-510 libpam-systemd\n", " libpolkit-agent-1-0 libpolkit-backend-1-0 libpolkit-gobject-1-0 libxfont2\n", " libxi-dev libxkbfile1 libxmu-dev libxmu-headers libxnvctrl0 libxtst6\n", " nsight-compute-2020.2.1 nsight-compute-2022.1.0 nsight-systems-2020.3.2\n", " nsight-systems-2020.3.4 nsight-systems-2021.5.2 nvidia-dkms-510\n", " nvidia-kernel-common-510 nvidia-kernel-source-510 nvidia-modprobe\n", " nvidia-settings openjdk-11-jre policykit-1 policykit-1-gnome python3-xkit\n", " screen-resolution-extra systemd systemd-sysv udev x11-xkb-utils\n", " xserver-common xserver-xorg-core-hwe-18.04 xserver-xorg-video-nvidia-510\n", "Use 'sudo apt autoremove' to remove them.\n", "The following NEW packages will be installed:\n", " git-lfs\n", "0 upgraded, 1 newly installed, 0 to remove and 39 not upgraded.\n", "Need to get 2,129 kB of archives.\n", "After this operation, 7,662 kB of additional disk space will be used.\n", "Get:1 http://archive.ubuntu.com/ubuntu bionic/universe amd64 git-lfs amd64 2.3.4-1 [2,129 kB]\n", "Fetched 2,129 kB in 1s (2,247 kB/s)\n", "debconf: unable to initialize frontend: Dialog\n", "debconf: (No usable dialog-like program is installed, so the dialog based frontend cannot be used. at /usr/share/perl5/Debconf/FrontEnd/Dialog.pm line 76, <> line 1.)\n", "debconf: falling back to frontend: Readline\n", "debconf: unable to initialize frontend: Readline\n", "debconf: (This frontend requires a controlling tty.)\n", "debconf: falling back to frontend: Teletype\n", "dpkg-preconfigure: unable to re-open stdin: \n", "Selecting previously unselected package git-lfs.\n", "(Reading database ... 155113 files and directories currently installed.)\n", "Preparing to unpack .../git-lfs_2.3.4-1_amd64.deb ...\n", "Unpacking git-lfs (2.3.4-1) ...\n", "Setting up git-lfs (2.3.4-1) ...\n", "Processing triggers for man-db (2.8.3-2ubuntu0.1) ...\n" ] } ], "source": [ "!sudo apt-get install git-lfs" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 333, "referenced_widgets": [ "3ebed9ccd2c146c6ba9488127e6ee8a6", "c7bb0364afef434c96c8bdfbf3d9e723", "c6a31cfc68f043ab9d28a6d7392ea33e", "5801e46d0d48481c8e514c2c140a9e8a", "e0ec8eb26e904a5998b074a349772647", "0ca086fa0a7845dd99a0ff45397c432e", "0c2a3926fd3e4c2886cb3b8c85bd9d17", "2ef25b76c6294972a086aa6e7e9e489b", "cf5df892e3924d6b84f961e1ed92daa4", "be3a3be2e2834515bb38c17f5afae165", "126efe970315474eb54d365c5a4fdc11", "f125a7ec77e94965a259da747c5e977a", "8229aea668854eaf9253ef7d37b3c581", "8657099513a0477cb2f9a4085676384f", "282782442cd04f1e81db3c7bcaaa5053", "fcb64948987d4384a60ed6fec1d9907d", "5a515732569d405688f42531a82bd3cc", "bf0deaa3717c4dbc9070005f43905d5d", "13306c0145e841878a890cb187d8f6bc", "4069f772bfe14beb82723ceb9f68a2bc", "e6f8b69b6f884f90a18def2d48ba0dd4", "c5fa32ca4f43447b94ec10e3c9749992", "d71e5c4ed9fb42e5a6f9ac3eccdd54c3", "7f1c4627865f4e599de8ea2334d82ae3", "5a9d9c357b9f41298e76e02b96dc5ff2", "6d0cc97c93ad43b8b442540b9aafc93d", "951853a7ea9d45da823d77d26eb87bd1", "aa62ff2a8172405b9245c500c3f6cf39", "294f15b90016443c84a137d6bd863924", "3067b686fb9245e0b5e41a029109a998", "50674d7279e748b895414859de14e8bb", "dce930c31d1e4240a9e6183b5d2c924f", "79437dae9c214df1adebe1626251ee8e", "446f2185880544f2a78595139dad32e1", "c2805d4002884bf8ae1cbf2e361f6f01", "ac1acf89e8fe471e835f166e7117ca9e", "96b651538db14ebcb14c4c4977d8c543", "be789eae91c74cfe8f64056ba0a5ba96", "6c02dc0af3794da8a31d9041d3befeda", "b9316e36473a4fdab62e41e495e01931", "bc410fcbe11d446aa510d78100b1f3c8", "3110b59b9351417da2b0e724e78ab5f3", "19c8911cb3154cd5b20eebe75ddbc689", "ff01c87dd90c4658b33c29cfcd07925d", "58c77a53ceda434895df23b763f68199", "fc94d6f1e8654303ad6dce9f25daa1fd", "62f0938505354841ac2019d1f3dd95a2", "0c3bfe23ae884f4ba8f8ce65a0dcaa36", "16136e65fba04c7b924436ae946d90aa", "e01debb97b294e639bf633f84543f344", "024871a4ae304c03bc05e747e79b0ca2", "2a70f9a71fc341f685c048bc05615a83", "4f116ef426514c1ba1188b265db8dfc5", "b2cd5fd9b56247ac9210d64fd2a8fdb8", "cc0d5fee4a5644cc8b99db723e153ae0", "44a4d4e42bf445be85c69a4bdb62b3d9", "c3c2f922863e44ff943c8c05635f7599", "52df163f84284d4c9ac596a6dcb0fa2f", "903db59a7618480f9d4916d71dcbaf59", "50f81e09c64d4d2ab53eeb6a415c35d8", "0735546f3dda4a58a927d2fab38f514f", "974fccbd40834d6cbd39735e00b03289", "d44a9821ec7d40b7948d6607dd201884", "a0c38704a572478588ebdf3735dde651", "002666d96d8d4ce1b680eef5b3bc5869", "78809091fe924536b0633dc3051ff56b", "3b44fdcb0d2943f09fda4f13ded2f5f0", "36eeaeb35ef74798b8d7bda7c5e979b7", "d5d670adc61c485196f512e93aeaba40", "cf5d15cd75484c78923fcf105ac03c52", "a74d849fb4d44ddfa86fffb99003dad3", "e176cb8b749c40c48332afe51cd54299", "0e7f2f9eeff0450299197c40eaef5051", "29478c59878b46e79f9882de3a3cace4", "12c61b097a004560a5dd030a4104f427", "c7833d59d9bc49d281e8d6a5b69ba323", "e16bfe268db146749efb06cdc7a569c8", "69ea2300784a43b092eedd42a62db57c", "add25ee6ae48488dbad29ca2ca92ab7e", "662f131df312495f920fd953b6e30858", "f4a35b6b8ef8482f82d58cea30dcd6f7", "49e3f8c1535e42a9a6fffa4bb536d903", "a678557b4f4b4376a7cf7690f277cc5f", "c83617250a974affb1100f80ecd09bba", "30823c0ab1c04e4ca2069ace6abdd9c0", "e4323324090f4f19833bdf3feb5b91ee", "5311220d3c0545a18d58dc76caa455d2", "f34b8dbbcc514b18990da0d4857e256d" ] }, "id": "Q7SX18zHsLz1", "outputId": "1d0cf4e4-b33d-4157-efae-478e2ead2341" }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "3ebed9ccd2c146c6ba9488127e6ee8a6", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading: 0%| | 0.00/1.87k [00:00= mapping[0] and char_start <= mapping[1]:\n", " token_start = question_len + i\n", " if char_end >= mapping[0] and char_end <= mapping[1]:\n", " token_end = question_len + i + 1\n", " return {'start_positions': token_start, 'end_positions': token_end}\n", " if i == len(context_mappings) - 1:\n", " # this means the answer tokens are out of range, eg have been truncated\n", " # and therefore there is no answer\n", " token_start, token_end = 0, 0\n", " return {'start_positions': token_start, 'end_positions': token_end}" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 49, "referenced_widgets": [ "6057cd879aec4905919c0a6e0306b1f1", "ec58f280e1b9414a9265791ec8f5b37e", "28f2ded5887c4e7993b12540660aa31c", "61d4664e38cd48af9744dc4ad2408fc2", "6fb7e8b206c546649fd1236ebbd49521", "efb3d53bc4614ceea4fabab0502f316f", "d38311b80b2c477b980ec4b16343bbcc", "9af6e6f204b849ef8fe5f717c8d7f438", "7d23dfc9860e48e3a2e20d4c7d9e6ef3", "06d527b80eeb479e8aa7e05b367b3679", "8dfea51f1c8b4eca825cd7ec8bf7e961" ] }, "id": "yCpYQzafuHDk", "outputId": "bb5d7cd8-e2b4-4fef-9a9c-512c70932d95" }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "6057cd879aec4905919c0a6e0306b1f1", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/130319 [00:00