diff --git "a/finetune.ipynb" "b/finetune.ipynb" new file mode 100644--- /dev/null +++ "b/finetune.ipynb" @@ -0,0 +1,1017 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: huggingface_hub in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (0.23.3)\n", + "Requirement already satisfied: filelock in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from huggingface_hub) (3.14.0)\n", + "Requirement already satisfied: fsspec>=2023.5.0 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from huggingface_hub) (2024.3.1)\n", + "Requirement already satisfied: packaging>=20.9 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from huggingface_hub) (23.2)\n", + "Requirement already satisfied: pyyaml>=5.1 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from huggingface_hub) (6.0.1)\n", + "Requirement already satisfied: requests in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from huggingface_hub) (2.32.3)\n", + "Requirement already satisfied: tqdm>=4.42.1 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from huggingface_hub) (4.66.4)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from huggingface_hub) (4.12.0rc1)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from requests->huggingface_hub) (3.3.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from requests->huggingface_hub) (3.7)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from requests->huggingface_hub) (2.2.1)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from requests->huggingface_hub) (2024.6.2)\n", + "Requirement already satisfied: datasets in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (2.19.2)\n", + "Requirement already satisfied: peft in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (0.11.1)\n", + "Requirement already satisfied: transformers[torch] in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (4.41.2)\n", + "Requirement already satisfied: filelock in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from datasets) (3.14.0)\n", + "Requirement already satisfied: numpy>=1.17 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from datasets) (1.25.0)\n", + "Requirement already satisfied: pyarrow>=12.0.0 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from datasets) (16.1.0)\n", + "Requirement already satisfied: pyarrow-hotfix in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from datasets) (0.6)\n", + "Requirement already satisfied: dill<0.3.9,>=0.3.0 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from datasets) (0.3.8)\n", + "Requirement already satisfied: pandas in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from datasets) (2.2.2)\n", + "Requirement already satisfied: requests>=2.32.1 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from datasets) (2.32.3)\n", + "Requirement already satisfied: tqdm>=4.62.1 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from datasets) (4.66.4)\n", + "Requirement already satisfied: xxhash in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from datasets) (3.4.1)\n", + "Requirement already satisfied: multiprocess in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from datasets) (0.70.16)\n", + "Requirement already satisfied: fsspec<=2024.3.1,>=2023.1.0 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from fsspec[http]<=2024.3.1,>=2023.1.0->datasets) (2024.3.1)\n", + "Requirement already satisfied: aiohttp in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from datasets) (3.9.5)\n", + "Requirement already satisfied: huggingface-hub>=0.21.2 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from datasets) (0.23.3)\n", + "Requirement already satisfied: packaging in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from datasets) (23.2)\n", + "Requirement already satisfied: pyyaml>=5.1 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from datasets) (6.0.1)\n", + "Requirement already satisfied: psutil in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from peft) (5.9.0)\n", + "Requirement already satisfied: torch>=1.13.0 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from peft) (1.13.1)\n", + "Requirement already satisfied: accelerate>=0.21.0 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from peft) (0.30.1)\n", + "Requirement already satisfied: safetensors in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from peft) (0.4.3)\n", + "Requirement already satisfied: regex!=2019.12.17 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from transformers[torch]) (2024.5.15)\n", + "Requirement already satisfied: tokenizers<0.20,>=0.19 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from transformers[torch]) (0.19.1)\n", + "Requirement already satisfied: aiosignal>=1.1.2 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from aiohttp->datasets) (1.3.1)\n", + "Requirement already satisfied: attrs>=17.3.0 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from aiohttp->datasets) (23.2.0)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from aiohttp->datasets) (1.4.1)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from aiohttp->datasets) (6.0.5)\n", + "Requirement already satisfied: yarl<2.0,>=1.0 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from aiohttp->datasets) (1.9.4)\n", + "Requirement already satisfied: async-timeout<5.0,>=4.0 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from aiohttp->datasets) (4.0.3)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from huggingface-hub>=0.21.2->datasets) (4.12.0rc1)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from requests>=2.32.1->datasets) (3.3.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from requests>=2.32.1->datasets) (3.7)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from requests>=2.32.1->datasets) (2.2.1)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from requests>=2.32.1->datasets) (2024.6.2)\n", + "Requirement already satisfied: nvidia-cuda-runtime-cu11==11.7.99 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from torch>=1.13.0->peft) (11.7.99)\n", + "Requirement already satisfied: nvidia-cudnn-cu11==8.5.0.96 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from torch>=1.13.0->peft) (8.5.0.96)\n", + "Requirement already satisfied: nvidia-cublas-cu11==11.10.3.66 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from torch>=1.13.0->peft) (11.10.3.66)\n", + "Requirement already satisfied: nvidia-cuda-nvrtc-cu11==11.7.99 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from torch>=1.13.0->peft) (11.7.99)\n", + "Requirement already satisfied: setuptools in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from nvidia-cublas-cu11==11.10.3.66->torch>=1.13.0->peft) (69.5.1)\n", + "Requirement already satisfied: wheel in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from nvidia-cublas-cu11==11.10.3.66->torch>=1.13.0->peft) (0.43.0)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from pandas->datasets) (2.9.0.post0)\n", + "Requirement already satisfied: pytz>=2020.1 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from pandas->datasets) (2024.1)\n", + "Requirement already satisfied: tzdata>=2022.7 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from pandas->datasets) (2024.1)\n", + "Requirement already satisfied: six>=1.5 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from python-dateutil>=2.8.2->pandas->datasets) (1.16.0)\n", + "Requirement already satisfied: flash-attn in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (2.5.9.post1)\n", + "Requirement already satisfied: torch in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from flash-attn) (1.13.1)\n", + "Requirement already satisfied: einops in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from flash-attn) (0.8.0)\n", + "Requirement already satisfied: typing-extensions in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from torch->flash-attn) (4.12.0rc1)\n", + "Requirement already satisfied: nvidia-cuda-runtime-cu11==11.7.99 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from torch->flash-attn) (11.7.99)\n", + "Requirement already satisfied: nvidia-cudnn-cu11==8.5.0.96 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from torch->flash-attn) (8.5.0.96)\n", + "Requirement already satisfied: nvidia-cublas-cu11==11.10.3.66 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from torch->flash-attn) (11.10.3.66)\n", + "Requirement already satisfied: nvidia-cuda-nvrtc-cu11==11.7.99 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from torch->flash-attn) (11.7.99)\n", + "Requirement already satisfied: setuptools in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from nvidia-cublas-cu11==11.10.3.66->torch->flash-attn) (69.5.1)\n", + "Requirement already satisfied: wheel in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from nvidia-cublas-cu11==11.10.3.66->torch->flash-attn) (0.43.0)\n" + ] + } + ], + "source": [ + "!pip install huggingface_hub\n", + "!pip install -U datasets peft transformers[torch]\n", + "!pip install -q bitsandbytes trl accelerate\n", + "!pip install flash-attn --no-build-isolation" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "import re\n", + "from pprint import pprint\n", + " \n", + "import pandas as pd\n", + "import torch\n", + "from datasets import Dataset, load_dataset\n", + "from huggingface_hub import notebook_login\n", + "from peft import LoraConfig, PeftModel\n", + "from transformers import (\n", + " AutoModelForCausalLM,\n", + " AutoTokenizer,\n", + " BitsAndBytesConfig,\n", + " TrainingArguments,\n", + ")\n", + "from trl import SFTTrainer\n", + "import re" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "torch.cuda.set_per_process_memory_fraction(0.8) " + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "66d43524d1a04d309785e243f6f016a8", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "VBox(children=(HTML(value='
\n", + " \n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "OUTPUT_DIR = \"experiments\"\n", + " \n", + "%load_ext tensorboard\n", + "%tensorboard --logdir experiments/runs" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [], + "source": [ + "from peft import get_peft_model" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [], + "source": [ + "model = get_peft_model(model, peft_config)" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "trainable params: 6,815,744 || all params: 8,037,076,992 || trainable%: 0.0848\n" + ] + } + ], + "source": [ + "model.print_trainable_parameters()" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [], + "source": [ + "from transformers import DataCollatorForLanguageModeling\n", + "from trl import SFTTrainer, SFTConfig" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "os.environ[\"NCCL_P2P_DISABLE\"] = \"1\"\n", + "os.environ[\"NCCL_IB_DISABLE\"] = \"1\"" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages/transformers/training_args.py:1474: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n", + " warnings.warn(\n" + ] + } + ], + "source": [ + "training_arguments = SFTConfig(\n", + " per_device_train_batch_size=1,\n", + " gradient_accumulation_steps=8,\n", + " optim=\"paged_adamw_32bit\",\n", + " logging_steps=1,\n", + " learning_rate=1e-4,\n", + " fp16=True,\n", + " max_grad_norm=0.3,\n", + " num_train_epochs=3,\n", + " evaluation_strategy=\"steps\",\n", + " eval_steps=0.01,\n", + " warmup_ratio=0.05,\n", + " save_strategy=\"epoch\",\n", + " group_by_length=True,\n", + " output_dir=OUTPUT_DIR,\n", + " report_to=\"tensorboard\",\n", + " save_safetensors=True,\n", + " lr_scheduler_type=\"cosine\",\n", + " seed=42,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [], + "source": [ + "splitted_dataset = dataset.train_test_split(test_size=0.1, seed=42)" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": {}, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'EarlyStoppingCallback' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[57], line 10\u001b[0m\n\u001b[1;32m 1\u001b[0m trainer \u001b[38;5;241m=\u001b[39m SFTTrainer(\n\u001b[1;32m 2\u001b[0m model\u001b[38;5;241m=\u001b[39mmodel,\n\u001b[1;32m 3\u001b[0m train_dataset\u001b[38;5;241m=\u001b[39msplitted_dataset[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtrain\u001b[39m\u001b[38;5;124m'\u001b[39m],\n\u001b[1;32m 4\u001b[0m eval_dataset\u001b[38;5;241m=\u001b[39msplitted_dataset[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtest\u001b[39m\u001b[38;5;124m'\u001b[39m],\n\u001b[1;32m 5\u001b[0m peft_config\u001b[38;5;241m=\u001b[39mpeft_config,\n\u001b[1;32m 6\u001b[0m dataset_text_field\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtext\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 7\u001b[0m max_seq_length\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m512\u001b[39m,\n\u001b[1;32m 8\u001b[0m tokenizer\u001b[38;5;241m=\u001b[39mtokenizer,\n\u001b[1;32m 9\u001b[0m args\u001b[38;5;241m=\u001b[39mtraining_arguments,\n\u001b[0;32m---> 10\u001b[0m callbacks\u001b[38;5;241m=\u001b[39m[\u001b[43mEarlyStoppingCallback\u001b[49m(early_stopping_patience\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m3\u001b[39m)]\n\u001b[1;32m 11\u001b[0m )\n", + "\u001b[0;31mNameError\u001b[0m: name 'EarlyStoppingCallback' is not defined" + ] + } + ], + "source": [ + "trainer = SFTTrainer(\n", + " model=model,\n", + " train_dataset=splitted_dataset['train'],\n", + " eval_dataset=splitted_dataset['test'],\n", + " peft_config=peft_config,\n", + " dataset_text_field=\"text\",\n", + " max_seq_length=512,\n", + " tokenizer=tokenizer,\n", + " args=training_arguments,\n", + " callbacks=[EarlyStoppingCallback(early_stopping_patience=3)]\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "os.environ['CUDA_LAUNCH_BLOCKING'] = '1'" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [], + "source": [ + "# torch.cuda.synchronize()\n", + "# torch.cuda.empty_cache()" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "
\n", + " \n", + " \n", + " [205/360 15:44 < 12:01, 0.21 it/s, Epoch 1.70/3]\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
StepTraining LossValidation Loss
41.9812001.774238
81.7535001.754978
121.7726001.726179
161.5575001.692598
201.7902001.662171
241.5989001.633827
281.5265001.615106
321.5606001.600001
361.6185001.588000
401.6466001.579654
441.6571001.567710
481.5879001.558632
521.3977001.550794
561.7046001.543783
601.4565001.538500
641.9016001.532189
681.7743001.528996
721.3900001.524170
761.5588001.519067
801.6274001.517108
841.5041001.512800
881.6682001.509464
921.5267001.505236
961.6184001.503344
1001.4519001.499353
1041.6449001.496035
1081.4690001.492282
1121.6146001.489366
1161.5917001.487346
1201.4875001.482805
1241.4160001.480361
1281.3136001.481161
1321.3344001.479421
1361.4718001.476773
1401.5405001.474109
1441.4527001.473360
1481.3230001.472112
1521.5276001.470621
1561.5351001.469403
1601.3560001.467490
1641.4927001.465348
1681.3716001.464317
1721.6287001.463003
1761.2421001.462533
1801.2844001.461138
1841.5630001.459591
1881.4210001.457585
1921.2082001.456179
1961.3508001.454647
2001.6026001.454009

\n", + "

\n", + " \n", + " \n", + " [ 2/14 00:00 < 00:08, 1.40 it/s]\n", + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n", + " warnings.warn(\n" + ] + }, + { + "ename": "KeyboardInterrupt", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[55], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mtrainer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtrain\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages/trl/trainer/sft_trainer.py:440\u001b[0m, in \u001b[0;36mSFTTrainer.train\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 437\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mneftune_noise_alpha \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_trainer_supports_neftune:\n\u001b[1;32m 438\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_trl_activate_neftune(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel)\n\u001b[0;32m--> 440\u001b[0m output \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtrain\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 442\u001b[0m \u001b[38;5;66;03m# After training we make sure to retrieve back the original forward pass method\u001b[39;00m\n\u001b[1;32m 443\u001b[0m \u001b[38;5;66;03m# for the embedding layer by removing the forward post hook.\u001b[39;00m\n\u001b[1;32m 444\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mneftune_noise_alpha \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_trainer_supports_neftune:\n", + "File \u001b[0;32m~/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages/transformers/trainer.py:1885\u001b[0m, in \u001b[0;36mTrainer.train\u001b[0;34m(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)\u001b[0m\n\u001b[1;32m 1883\u001b[0m hf_hub_utils\u001b[38;5;241m.\u001b[39menable_progress_bars()\n\u001b[1;32m 1884\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1885\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43minner_training_loop\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1886\u001b[0m \u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1887\u001b[0m \u001b[43m \u001b[49m\u001b[43mresume_from_checkpoint\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mresume_from_checkpoint\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1888\u001b[0m \u001b[43m \u001b[49m\u001b[43mtrial\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtrial\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1889\u001b[0m \u001b[43m \u001b[49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1890\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages/transformers/trainer.py:2291\u001b[0m, in \u001b[0;36mTrainer._inner_training_loop\u001b[0;34m(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)\u001b[0m\n\u001b[1;32m 2288\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate\u001b[38;5;241m.\u001b[39mepoch \u001b[38;5;241m=\u001b[39m epoch \u001b[38;5;241m+\u001b[39m (step \u001b[38;5;241m+\u001b[39m \u001b[38;5;241m1\u001b[39m \u001b[38;5;241m+\u001b[39m steps_skipped) \u001b[38;5;241m/\u001b[39m steps_in_epoch\n\u001b[1;32m 2289\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcontrol \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcallback_handler\u001b[38;5;241m.\u001b[39mon_step_end(args, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcontrol)\n\u001b[0;32m-> 2291\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_maybe_log_save_evaluate\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtr_loss\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mgrad_norm\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtrial\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mepoch\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2292\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 2293\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcontrol \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcallback_handler\u001b[38;5;241m.\u001b[39mon_substep_end(args, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcontrol)\n", + "File \u001b[0;32m~/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages/transformers/trainer.py:2721\u001b[0m, in \u001b[0;36mTrainer._maybe_log_save_evaluate\u001b[0;34m(self, tr_loss, grad_norm, model, trial, epoch, ignore_keys_for_eval)\u001b[0m\n\u001b[1;32m 2719\u001b[0m metrics \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 2720\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcontrol\u001b[38;5;241m.\u001b[39mshould_evaluate:\n\u001b[0;32m-> 2721\u001b[0m metrics \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mevaluate\u001b[49m\u001b[43m(\u001b[49m\u001b[43mignore_keys\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2722\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_report_to_hp_search(trial, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate\u001b[38;5;241m.\u001b[39mglobal_step, metrics)\n\u001b[1;32m 2724\u001b[0m \u001b[38;5;66;03m# Run delayed LR scheduler now that metrics are populated\u001b[39;00m\n", + "File \u001b[0;32m~/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages/transformers/trainer.py:3572\u001b[0m, in \u001b[0;36mTrainer.evaluate\u001b[0;34m(self, eval_dataset, ignore_keys, metric_key_prefix)\u001b[0m\n\u001b[1;32m 3569\u001b[0m start_time \u001b[38;5;241m=\u001b[39m time\u001b[38;5;241m.\u001b[39mtime()\n\u001b[1;32m 3571\u001b[0m eval_loop \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mprediction_loop \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39muse_legacy_prediction_loop \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mevaluation_loop\n\u001b[0;32m-> 3572\u001b[0m output \u001b[38;5;241m=\u001b[39m \u001b[43meval_loop\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 3573\u001b[0m \u001b[43m \u001b[49m\u001b[43meval_dataloader\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3574\u001b[0m \u001b[43m \u001b[49m\u001b[43mdescription\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mEvaluation\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3575\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# No point gathering the predictions if there are no metrics, otherwise we defer to\u001b[39;49;00m\n\u001b[1;32m 3576\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# self.args.prediction_loss_only\u001b[39;49;00m\n\u001b[1;32m 3577\u001b[0m \u001b[43m \u001b[49m\u001b[43mprediction_loss_only\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcompute_metrics\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mis\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01melse\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 3578\u001b[0m \u001b[43m \u001b[49m\u001b[43mignore_keys\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mignore_keys\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3579\u001b[0m \u001b[43m \u001b[49m\u001b[43mmetric_key_prefix\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmetric_key_prefix\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3580\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3582\u001b[0m total_batch_size \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39meval_batch_size \u001b[38;5;241m*\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39mworld_size\n\u001b[1;32m 3583\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmetric_key_prefix\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m_jit_compilation_time\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m output\u001b[38;5;241m.\u001b[39mmetrics:\n", + "File \u001b[0;32m~/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages/transformers/trainer.py:3757\u001b[0m, in \u001b[0;36mTrainer.evaluation_loop\u001b[0;34m(self, dataloader, description, prediction_loss_only, ignore_keys, metric_key_prefix)\u001b[0m\n\u001b[1;32m 3754\u001b[0m batch_size \u001b[38;5;241m=\u001b[39m observed_batch_size\n\u001b[1;32m 3756\u001b[0m \u001b[38;5;66;03m# Prediction step\u001b[39;00m\n\u001b[0;32m-> 3757\u001b[0m loss, logits, labels \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mprediction_step\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minputs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mprediction_loss_only\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mignore_keys\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mignore_keys\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3758\u001b[0m main_input_name \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mgetattr\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmain_input_name\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124minput_ids\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 3759\u001b[0m inputs_decode \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_prepare_input(inputs[main_input_name]) \u001b[38;5;28;01mif\u001b[39;00m args\u001b[38;5;241m.\u001b[39minclude_inputs_for_metrics \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n", + "File \u001b[0;32m~/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages/transformers/trainer.py:3971\u001b[0m, in \u001b[0;36mTrainer.prediction_step\u001b[0;34m(self, model, inputs, prediction_loss_only, ignore_keys)\u001b[0m\n\u001b[1;32m 3969\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m has_labels \u001b[38;5;129;01mor\u001b[39;00m loss_without_labels:\n\u001b[1;32m 3970\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcompute_loss_context_manager():\n\u001b[0;32m-> 3971\u001b[0m loss, outputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcompute_loss\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minputs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mreturn_outputs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[1;32m 3972\u001b[0m loss \u001b[38;5;241m=\u001b[39m loss\u001b[38;5;241m.\u001b[39mmean()\u001b[38;5;241m.\u001b[39mdetach()\n\u001b[1;32m 3974\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(outputs, \u001b[38;5;28mdict\u001b[39m):\n", + "File \u001b[0;32m~/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages/transformers/trainer.py:3264\u001b[0m, in \u001b[0;36mTrainer.compute_loss\u001b[0;34m(self, model, inputs, return_outputs)\u001b[0m\n\u001b[1;32m 3262\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 3263\u001b[0m labels \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m-> 3264\u001b[0m outputs \u001b[38;5;241m=\u001b[39m \u001b[43mmodel\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43minputs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3265\u001b[0m \u001b[38;5;66;03m# Save past state if it exists\u001b[39;00m\n\u001b[1;32m 3266\u001b[0m \u001b[38;5;66;03m# TODO: this needs to be fixed and made cleaner later.\u001b[39;00m\n\u001b[1;32m 3267\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39mpast_index \u001b[38;5;241m>\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m:\n", + "File \u001b[0;32m~/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages/torch/nn/modules/module.py:1194\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m 1190\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1191\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1192\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1193\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1194\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1195\u001b[0m \u001b[38;5;66;03m# Do not call functions when jit is used\u001b[39;00m\n\u001b[1;32m 1196\u001b[0m full_backward_hooks, non_full_backward_hooks \u001b[38;5;241m=\u001b[39m [], []\n", + "File \u001b[0;32m~/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages/accelerate/utils/operations.py:822\u001b[0m, in \u001b[0;36mconvert_outputs_to_fp32..forward\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 821\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m--> 822\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmodel_forward\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages/accelerate/utils/operations.py:810\u001b[0m, in \u001b[0;36mConvertOutputsToFp32.__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 809\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__call__\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m--> 810\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m convert_to_fp32(\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel_forward\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m)\n", + "File \u001b[0;32m~/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages/torch/amp/autocast_mode.py:14\u001b[0m, in \u001b[0;36mautocast_decorator..decorate_autocast\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 12\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdecorate_autocast\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 13\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m autocast_instance:\n\u001b[0;32m---> 14\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages/accelerate/utils/operations.py:822\u001b[0m, in \u001b[0;36mconvert_outputs_to_fp32..forward\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 821\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m--> 822\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmodel_forward\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages/accelerate/utils/operations.py:810\u001b[0m, in \u001b[0;36mConvertOutputsToFp32.__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 809\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__call__\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m--> 810\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m convert_to_fp32(\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel_forward\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m)\n", + "File \u001b[0;32m~/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages/torch/amp/autocast_mode.py:14\u001b[0m, in \u001b[0;36mautocast_decorator..decorate_autocast\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 12\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdecorate_autocast\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 13\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m autocast_instance:\n\u001b[0;32m---> 14\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages/peft/peft_model.py:1430\u001b[0m, in \u001b[0;36mPeftModelForCausalLM.forward\u001b[0;34m(self, input_ids, attention_mask, inputs_embeds, labels, output_attentions, output_hidden_states, return_dict, task_ids, **kwargs)\u001b[0m\n\u001b[1;32m 1428\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_enable_peft_forward_hooks(\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 1429\u001b[0m kwargs \u001b[38;5;241m=\u001b[39m {k: v \u001b[38;5;28;01mfor\u001b[39;00m k, v \u001b[38;5;129;01min\u001b[39;00m kwargs\u001b[38;5;241m.\u001b[39mitems() \u001b[38;5;28;01mif\u001b[39;00m k \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mspecial_peft_forward_args}\n\u001b[0;32m-> 1430\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbase_model\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1431\u001b[0m \u001b[43m \u001b[49m\u001b[43minput_ids\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minput_ids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1432\u001b[0m \u001b[43m \u001b[49m\u001b[43mattention_mask\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mattention_mask\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1433\u001b[0m \u001b[43m \u001b[49m\u001b[43minputs_embeds\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minputs_embeds\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1434\u001b[0m \u001b[43m \u001b[49m\u001b[43mlabels\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlabels\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1435\u001b[0m \u001b[43m \u001b[49m\u001b[43moutput_attentions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moutput_attentions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1436\u001b[0m \u001b[43m \u001b[49m\u001b[43moutput_hidden_states\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moutput_hidden_states\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1437\u001b[0m \u001b[43m \u001b[49m\u001b[43mreturn_dict\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mreturn_dict\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1438\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1439\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1441\u001b[0m batch_size \u001b[38;5;241m=\u001b[39m _get_batch_size(input_ids, inputs_embeds)\n\u001b[1;32m 1442\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m attention_mask \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 1443\u001b[0m \u001b[38;5;66;03m# concat prompt attention mask\u001b[39;00m\n", + "File \u001b[0;32m~/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages/torch/nn/modules/module.py:1194\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m 1190\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1191\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1192\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1193\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1194\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1195\u001b[0m \u001b[38;5;66;03m# Do not call functions when jit is used\u001b[39;00m\n\u001b[1;32m 1196\u001b[0m full_backward_hooks, non_full_backward_hooks \u001b[38;5;241m=\u001b[39m [], []\n", + "File \u001b[0;32m~/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages/peft/tuners/tuners_utils.py:179\u001b[0m, in \u001b[0;36mBaseTuner.forward\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 178\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m*\u001b[39margs: Any, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs: Any):\n\u001b[0;32m--> 179\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mforward\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages/accelerate/hooks.py:167\u001b[0m, in \u001b[0;36madd_hook_to_module..new_forward\u001b[0;34m(module, *args, **kwargs)\u001b[0m\n\u001b[1;32m 165\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 166\u001b[0m output \u001b[38;5;241m=\u001b[39m module\u001b[38;5;241m.\u001b[39m_old_forward(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m--> 167\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmodule\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_hf_hook\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpost_forward\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodule\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43moutput\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages/accelerate/hooks.py:380\u001b[0m, in \u001b[0;36mAlignDevicesHook.post_forward\u001b[0;34m(self, module, output)\u001b[0m\n\u001b[1;32m 377\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtied_pointers_to_remove \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mset\u001b[39m()\n\u001b[1;32m 379\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mio_same_device \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39minput_device \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 380\u001b[0m output \u001b[38;5;241m=\u001b[39m \u001b[43msend_to_device\u001b[49m\u001b[43m(\u001b[49m\u001b[43moutput\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minput_device\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mskip_keys\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mskip_keys\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 382\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m output\n", + "File \u001b[0;32m~/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages/accelerate/utils/operations.py:186\u001b[0m, in \u001b[0;36msend_to_device\u001b[0;34m(tensor, device, non_blocking, skip_keys)\u001b[0m\n\u001b[1;32m 183\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m skip_keys \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 184\u001b[0m skip_keys \u001b[38;5;241m=\u001b[39m []\n\u001b[1;32m 185\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mtype\u001b[39m(tensor)(\n\u001b[0;32m--> 186\u001b[0m {\n\u001b[1;32m 187\u001b[0m k: t \u001b[38;5;28;01mif\u001b[39;00m k \u001b[38;5;129;01min\u001b[39;00m skip_keys \u001b[38;5;28;01melse\u001b[39;00m send_to_device(t, device, non_blocking\u001b[38;5;241m=\u001b[39mnon_blocking, skip_keys\u001b[38;5;241m=\u001b[39mskip_keys)\n\u001b[1;32m 188\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m k, t \u001b[38;5;129;01min\u001b[39;00m tensor\u001b[38;5;241m.\u001b[39mitems()\n\u001b[1;32m 189\u001b[0m }\n\u001b[1;32m 190\u001b[0m )\n\u001b[1;32m 191\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 192\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m tensor\n", + "File \u001b[0;32m~/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages/accelerate/utils/operations.py:187\u001b[0m, in \u001b[0;36m\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 183\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m skip_keys \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 184\u001b[0m skip_keys \u001b[38;5;241m=\u001b[39m []\n\u001b[1;32m 185\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mtype\u001b[39m(tensor)(\n\u001b[1;32m 186\u001b[0m {\n\u001b[0;32m--> 187\u001b[0m k: t \u001b[38;5;28;01mif\u001b[39;00m k \u001b[38;5;129;01min\u001b[39;00m skip_keys \u001b[38;5;28;01melse\u001b[39;00m \u001b[43msend_to_device\u001b[49m\u001b[43m(\u001b[49m\u001b[43mt\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdevice\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mnon_blocking\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mnon_blocking\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mskip_keys\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mskip_keys\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 188\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m k, t \u001b[38;5;129;01min\u001b[39;00m tensor\u001b[38;5;241m.\u001b[39mitems()\n\u001b[1;32m 189\u001b[0m }\n\u001b[1;32m 190\u001b[0m )\n\u001b[1;32m 191\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 192\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m tensor\n", + "File \u001b[0;32m~/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages/accelerate/utils/operations.py:158\u001b[0m, in \u001b[0;36msend_to_device\u001b[0;34m(tensor, device, non_blocking, skip_keys)\u001b[0m\n\u001b[1;32m 156\u001b[0m tensor \u001b[38;5;241m=\u001b[39m tensor\u001b[38;5;241m.\u001b[39mcpu()\n\u001b[1;32m 157\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 158\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mtensor\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdevice\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mnon_blocking\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mnon_blocking\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 159\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m: \u001b[38;5;66;03m# .to() doesn't accept non_blocking as kwarg\u001b[39;00m\n\u001b[1;32m 160\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m tensor\u001b[38;5;241m.\u001b[39mto(device)\n", + "\u001b[0;31mKeyboardInterrupt\u001b[0m: " + ] + } + ], + "source": [ + "trainer.train()" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n", + " warnings.warn(\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "bc20cacf58bd4f21bea7fd858060b3cd", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "training_args.bin: 0%| | 0.00/4.92k [00:00