diff --git "a/finetune.ipynb" "b/finetune.ipynb"
new file mode 100644--- /dev/null
+++ "b/finetune.ipynb"
@@ -0,0 +1,1017 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Requirement already satisfied: huggingface_hub in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (0.23.3)\n",
+ "Requirement already satisfied: filelock in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from huggingface_hub) (3.14.0)\n",
+ "Requirement already satisfied: fsspec>=2023.5.0 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from huggingface_hub) (2024.3.1)\n",
+ "Requirement already satisfied: packaging>=20.9 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from huggingface_hub) (23.2)\n",
+ "Requirement already satisfied: pyyaml>=5.1 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from huggingface_hub) (6.0.1)\n",
+ "Requirement already satisfied: requests in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from huggingface_hub) (2.32.3)\n",
+ "Requirement already satisfied: tqdm>=4.42.1 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from huggingface_hub) (4.66.4)\n",
+ "Requirement already satisfied: typing-extensions>=3.7.4.3 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from huggingface_hub) (4.12.0rc1)\n",
+ "Requirement already satisfied: charset-normalizer<4,>=2 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from requests->huggingface_hub) (3.3.2)\n",
+ "Requirement already satisfied: idna<4,>=2.5 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from requests->huggingface_hub) (3.7)\n",
+ "Requirement already satisfied: urllib3<3,>=1.21.1 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from requests->huggingface_hub) (2.2.1)\n",
+ "Requirement already satisfied: certifi>=2017.4.17 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from requests->huggingface_hub) (2024.6.2)\n",
+ "Requirement already satisfied: datasets in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (2.19.2)\n",
+ "Requirement already satisfied: peft in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (0.11.1)\n",
+ "Requirement already satisfied: transformers[torch] in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (4.41.2)\n",
+ "Requirement already satisfied: filelock in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from datasets) (3.14.0)\n",
+ "Requirement already satisfied: numpy>=1.17 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from datasets) (1.25.0)\n",
+ "Requirement already satisfied: pyarrow>=12.0.0 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from datasets) (16.1.0)\n",
+ "Requirement already satisfied: pyarrow-hotfix in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from datasets) (0.6)\n",
+ "Requirement already satisfied: dill<0.3.9,>=0.3.0 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from datasets) (0.3.8)\n",
+ "Requirement already satisfied: pandas in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from datasets) (2.2.2)\n",
+ "Requirement already satisfied: requests>=2.32.1 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from datasets) (2.32.3)\n",
+ "Requirement already satisfied: tqdm>=4.62.1 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from datasets) (4.66.4)\n",
+ "Requirement already satisfied: xxhash in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from datasets) (3.4.1)\n",
+ "Requirement already satisfied: multiprocess in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from datasets) (0.70.16)\n",
+ "Requirement already satisfied: fsspec<=2024.3.1,>=2023.1.0 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from fsspec[http]<=2024.3.1,>=2023.1.0->datasets) (2024.3.1)\n",
+ "Requirement already satisfied: aiohttp in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from datasets) (3.9.5)\n",
+ "Requirement already satisfied: huggingface-hub>=0.21.2 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from datasets) (0.23.3)\n",
+ "Requirement already satisfied: packaging in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from datasets) (23.2)\n",
+ "Requirement already satisfied: pyyaml>=5.1 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from datasets) (6.0.1)\n",
+ "Requirement already satisfied: psutil in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from peft) (5.9.0)\n",
+ "Requirement already satisfied: torch>=1.13.0 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from peft) (1.13.1)\n",
+ "Requirement already satisfied: accelerate>=0.21.0 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from peft) (0.30.1)\n",
+ "Requirement already satisfied: safetensors in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from peft) (0.4.3)\n",
+ "Requirement already satisfied: regex!=2019.12.17 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from transformers[torch]) (2024.5.15)\n",
+ "Requirement already satisfied: tokenizers<0.20,>=0.19 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from transformers[torch]) (0.19.1)\n",
+ "Requirement already satisfied: aiosignal>=1.1.2 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from aiohttp->datasets) (1.3.1)\n",
+ "Requirement already satisfied: attrs>=17.3.0 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from aiohttp->datasets) (23.2.0)\n",
+ "Requirement already satisfied: frozenlist>=1.1.1 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from aiohttp->datasets) (1.4.1)\n",
+ "Requirement already satisfied: multidict<7.0,>=4.5 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from aiohttp->datasets) (6.0.5)\n",
+ "Requirement already satisfied: yarl<2.0,>=1.0 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from aiohttp->datasets) (1.9.4)\n",
+ "Requirement already satisfied: async-timeout<5.0,>=4.0 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from aiohttp->datasets) (4.0.3)\n",
+ "Requirement already satisfied: typing-extensions>=3.7.4.3 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from huggingface-hub>=0.21.2->datasets) (4.12.0rc1)\n",
+ "Requirement already satisfied: charset-normalizer<4,>=2 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from requests>=2.32.1->datasets) (3.3.2)\n",
+ "Requirement already satisfied: idna<4,>=2.5 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from requests>=2.32.1->datasets) (3.7)\n",
+ "Requirement already satisfied: urllib3<3,>=1.21.1 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from requests>=2.32.1->datasets) (2.2.1)\n",
+ "Requirement already satisfied: certifi>=2017.4.17 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from requests>=2.32.1->datasets) (2024.6.2)\n",
+ "Requirement already satisfied: nvidia-cuda-runtime-cu11==11.7.99 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from torch>=1.13.0->peft) (11.7.99)\n",
+ "Requirement already satisfied: nvidia-cudnn-cu11==8.5.0.96 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from torch>=1.13.0->peft) (8.5.0.96)\n",
+ "Requirement already satisfied: nvidia-cublas-cu11==11.10.3.66 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from torch>=1.13.0->peft) (11.10.3.66)\n",
+ "Requirement already satisfied: nvidia-cuda-nvrtc-cu11==11.7.99 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from torch>=1.13.0->peft) (11.7.99)\n",
+ "Requirement already satisfied: setuptools in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from nvidia-cublas-cu11==11.10.3.66->torch>=1.13.0->peft) (69.5.1)\n",
+ "Requirement already satisfied: wheel in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from nvidia-cublas-cu11==11.10.3.66->torch>=1.13.0->peft) (0.43.0)\n",
+ "Requirement already satisfied: python-dateutil>=2.8.2 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from pandas->datasets) (2.9.0.post0)\n",
+ "Requirement already satisfied: pytz>=2020.1 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from pandas->datasets) (2024.1)\n",
+ "Requirement already satisfied: tzdata>=2022.7 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from pandas->datasets) (2024.1)\n",
+ "Requirement already satisfied: six>=1.5 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from python-dateutil>=2.8.2->pandas->datasets) (1.16.0)\n",
+ "Requirement already satisfied: flash-attn in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (2.5.9.post1)\n",
+ "Requirement already satisfied: torch in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from flash-attn) (1.13.1)\n",
+ "Requirement already satisfied: einops in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from flash-attn) (0.8.0)\n",
+ "Requirement already satisfied: typing-extensions in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from torch->flash-attn) (4.12.0rc1)\n",
+ "Requirement already satisfied: nvidia-cuda-runtime-cu11==11.7.99 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from torch->flash-attn) (11.7.99)\n",
+ "Requirement already satisfied: nvidia-cudnn-cu11==8.5.0.96 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from torch->flash-attn) (8.5.0.96)\n",
+ "Requirement already satisfied: nvidia-cublas-cu11==11.10.3.66 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from torch->flash-attn) (11.10.3.66)\n",
+ "Requirement already satisfied: nvidia-cuda-nvrtc-cu11==11.7.99 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from torch->flash-attn) (11.7.99)\n",
+ "Requirement already satisfied: setuptools in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from nvidia-cublas-cu11==11.10.3.66->torch->flash-attn) (69.5.1)\n",
+ "Requirement already satisfied: wheel in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from nvidia-cublas-cu11==11.10.3.66->torch->flash-attn) (0.43.0)\n"
+ ]
+ }
+ ],
+ "source": [
+ "!pip install huggingface_hub\n",
+ "!pip install -U datasets peft transformers[torch]\n",
+ "!pip install -q bitsandbytes trl accelerate\n",
+ "!pip install flash-attn --no-build-isolation"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import json\n",
+ "import re\n",
+ "from pprint import pprint\n",
+ " \n",
+ "import pandas as pd\n",
+ "import torch\n",
+ "from datasets import Dataset, load_dataset\n",
+ "from huggingface_hub import notebook_login\n",
+ "from peft import LoraConfig, PeftModel\n",
+ "from transformers import (\n",
+ " AutoModelForCausalLM,\n",
+ " AutoTokenizer,\n",
+ " BitsAndBytesConfig,\n",
+ " TrainingArguments,\n",
+ ")\n",
+ "from trl import SFTTrainer\n",
+ "import re"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "torch.cuda.set_per_process_memory_fraction(0.8) "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "66d43524d1a04d309785e243f6f016a8",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "VBox(children=(HTML(value='
\n",
+ " \n",
+ " \n",
+ " "
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "OUTPUT_DIR = \"experiments\"\n",
+ " \n",
+ "%load_ext tensorboard\n",
+ "%tensorboard --logdir experiments/runs"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 38,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from peft import get_peft_model"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 39,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "model = get_peft_model(model, peft_config)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 40,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "trainable params: 6,815,744 || all params: 8,037,076,992 || trainable%: 0.0848\n"
+ ]
+ }
+ ],
+ "source": [
+ "model.print_trainable_parameters()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 41,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from transformers import DataCollatorForLanguageModeling\n",
+ "from trl import SFTTrainer, SFTConfig"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 42,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "os.environ[\"NCCL_P2P_DISABLE\"] = \"1\"\n",
+ "os.environ[\"NCCL_IB_DISABLE\"] = \"1\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 50,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages/transformers/training_args.py:1474: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n",
+ " warnings.warn(\n"
+ ]
+ }
+ ],
+ "source": [
+ "training_arguments = SFTConfig(\n",
+ " per_device_train_batch_size=1,\n",
+ " gradient_accumulation_steps=8,\n",
+ " optim=\"paged_adamw_32bit\",\n",
+ " logging_steps=1,\n",
+ " learning_rate=1e-4,\n",
+ " fp16=True,\n",
+ " max_grad_norm=0.3,\n",
+ " num_train_epochs=3,\n",
+ " evaluation_strategy=\"steps\",\n",
+ " eval_steps=0.01,\n",
+ " warmup_ratio=0.05,\n",
+ " save_strategy=\"epoch\",\n",
+ " group_by_length=True,\n",
+ " output_dir=OUTPUT_DIR,\n",
+ " report_to=\"tensorboard\",\n",
+ " save_safetensors=True,\n",
+ " lr_scheduler_type=\"cosine\",\n",
+ " seed=42,\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 51,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "splitted_dataset = dataset.train_test_split(test_size=0.1, seed=42)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 57,
+ "metadata": {},
+ "outputs": [
+ {
+ "ename": "NameError",
+ "evalue": "name 'EarlyStoppingCallback' is not defined",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
+ "Cell \u001b[0;32mIn[57], line 10\u001b[0m\n\u001b[1;32m 1\u001b[0m trainer \u001b[38;5;241m=\u001b[39m SFTTrainer(\n\u001b[1;32m 2\u001b[0m model\u001b[38;5;241m=\u001b[39mmodel,\n\u001b[1;32m 3\u001b[0m train_dataset\u001b[38;5;241m=\u001b[39msplitted_dataset[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtrain\u001b[39m\u001b[38;5;124m'\u001b[39m],\n\u001b[1;32m 4\u001b[0m eval_dataset\u001b[38;5;241m=\u001b[39msplitted_dataset[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtest\u001b[39m\u001b[38;5;124m'\u001b[39m],\n\u001b[1;32m 5\u001b[0m peft_config\u001b[38;5;241m=\u001b[39mpeft_config,\n\u001b[1;32m 6\u001b[0m dataset_text_field\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtext\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 7\u001b[0m max_seq_length\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m512\u001b[39m,\n\u001b[1;32m 8\u001b[0m tokenizer\u001b[38;5;241m=\u001b[39mtokenizer,\n\u001b[1;32m 9\u001b[0m args\u001b[38;5;241m=\u001b[39mtraining_arguments,\n\u001b[0;32m---> 10\u001b[0m callbacks\u001b[38;5;241m=\u001b[39m[\u001b[43mEarlyStoppingCallback\u001b[49m(early_stopping_patience\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m3\u001b[39m)]\n\u001b[1;32m 11\u001b[0m )\n",
+ "\u001b[0;31mNameError\u001b[0m: name 'EarlyStoppingCallback' is not defined"
+ ]
+ }
+ ],
+ "source": [
+ "trainer = SFTTrainer(\n",
+ " model=model,\n",
+ " train_dataset=splitted_dataset['train'],\n",
+ " eval_dataset=splitted_dataset['test'],\n",
+ " peft_config=peft_config,\n",
+ " dataset_text_field=\"text\",\n",
+ " max_seq_length=512,\n",
+ " tokenizer=tokenizer,\n",
+ " args=training_arguments,\n",
+ " callbacks=[EarlyStoppingCallback(early_stopping_patience=3)]\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 53,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "os.environ['CUDA_LAUNCH_BLOCKING'] = '1'"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 54,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# torch.cuda.synchronize()\n",
+ "# torch.cuda.empty_cache()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 55,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ " \n",
+ "
\n",
+ " [205/360 15:44 < 12:01, 0.21 it/s, Epoch 1.70/3]\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " Step | \n",
+ " Training Loss | \n",
+ " Validation Loss | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 4 | \n",
+ " 1.981200 | \n",
+ " 1.774238 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " 1.753500 | \n",
+ " 1.754978 | \n",
+ "
\n",
+ " \n",
+ " 12 | \n",
+ " 1.772600 | \n",
+ " 1.726179 | \n",
+ "
\n",
+ " \n",
+ " 16 | \n",
+ " 1.557500 | \n",
+ " 1.692598 | \n",
+ "
\n",
+ " \n",
+ " 20 | \n",
+ " 1.790200 | \n",
+ " 1.662171 | \n",
+ "
\n",
+ " \n",
+ " 24 | \n",
+ " 1.598900 | \n",
+ " 1.633827 | \n",
+ "
\n",
+ " \n",
+ " 28 | \n",
+ " 1.526500 | \n",
+ " 1.615106 | \n",
+ "
\n",
+ " \n",
+ " 32 | \n",
+ " 1.560600 | \n",
+ " 1.600001 | \n",
+ "
\n",
+ " \n",
+ " 36 | \n",
+ " 1.618500 | \n",
+ " 1.588000 | \n",
+ "
\n",
+ " \n",
+ " 40 | \n",
+ " 1.646600 | \n",
+ " 1.579654 | \n",
+ "
\n",
+ " \n",
+ " 44 | \n",
+ " 1.657100 | \n",
+ " 1.567710 | \n",
+ "
\n",
+ " \n",
+ " 48 | \n",
+ " 1.587900 | \n",
+ " 1.558632 | \n",
+ "
\n",
+ " \n",
+ " 52 | \n",
+ " 1.397700 | \n",
+ " 1.550794 | \n",
+ "
\n",
+ " \n",
+ " 56 | \n",
+ " 1.704600 | \n",
+ " 1.543783 | \n",
+ "
\n",
+ " \n",
+ " 60 | \n",
+ " 1.456500 | \n",
+ " 1.538500 | \n",
+ "
\n",
+ " \n",
+ " 64 | \n",
+ " 1.901600 | \n",
+ " 1.532189 | \n",
+ "
\n",
+ " \n",
+ " 68 | \n",
+ " 1.774300 | \n",
+ " 1.528996 | \n",
+ "
\n",
+ " \n",
+ " 72 | \n",
+ " 1.390000 | \n",
+ " 1.524170 | \n",
+ "
\n",
+ " \n",
+ " 76 | \n",
+ " 1.558800 | \n",
+ " 1.519067 | \n",
+ "
\n",
+ " \n",
+ " 80 | \n",
+ " 1.627400 | \n",
+ " 1.517108 | \n",
+ "
\n",
+ " \n",
+ " 84 | \n",
+ " 1.504100 | \n",
+ " 1.512800 | \n",
+ "
\n",
+ " \n",
+ " 88 | \n",
+ " 1.668200 | \n",
+ " 1.509464 | \n",
+ "
\n",
+ " \n",
+ " 92 | \n",
+ " 1.526700 | \n",
+ " 1.505236 | \n",
+ "
\n",
+ " \n",
+ " 96 | \n",
+ " 1.618400 | \n",
+ " 1.503344 | \n",
+ "
\n",
+ " \n",
+ " 100 | \n",
+ " 1.451900 | \n",
+ " 1.499353 | \n",
+ "
\n",
+ " \n",
+ " 104 | \n",
+ " 1.644900 | \n",
+ " 1.496035 | \n",
+ "
\n",
+ " \n",
+ " 108 | \n",
+ " 1.469000 | \n",
+ " 1.492282 | \n",
+ "
\n",
+ " \n",
+ " 112 | \n",
+ " 1.614600 | \n",
+ " 1.489366 | \n",
+ "
\n",
+ " \n",
+ " 116 | \n",
+ " 1.591700 | \n",
+ " 1.487346 | \n",
+ "
\n",
+ " \n",
+ " 120 | \n",
+ " 1.487500 | \n",
+ " 1.482805 | \n",
+ "
\n",
+ " \n",
+ " 124 | \n",
+ " 1.416000 | \n",
+ " 1.480361 | \n",
+ "
\n",
+ " \n",
+ " 128 | \n",
+ " 1.313600 | \n",
+ " 1.481161 | \n",
+ "
\n",
+ " \n",
+ " 132 | \n",
+ " 1.334400 | \n",
+ " 1.479421 | \n",
+ "
\n",
+ " \n",
+ " 136 | \n",
+ " 1.471800 | \n",
+ " 1.476773 | \n",
+ "
\n",
+ " \n",
+ " 140 | \n",
+ " 1.540500 | \n",
+ " 1.474109 | \n",
+ "
\n",
+ " \n",
+ " 144 | \n",
+ " 1.452700 | \n",
+ " 1.473360 | \n",
+ "
\n",
+ " \n",
+ " 148 | \n",
+ " 1.323000 | \n",
+ " 1.472112 | \n",
+ "
\n",
+ " \n",
+ " 152 | \n",
+ " 1.527600 | \n",
+ " 1.470621 | \n",
+ "
\n",
+ " \n",
+ " 156 | \n",
+ " 1.535100 | \n",
+ " 1.469403 | \n",
+ "
\n",
+ " \n",
+ " 160 | \n",
+ " 1.356000 | \n",
+ " 1.467490 | \n",
+ "
\n",
+ " \n",
+ " 164 | \n",
+ " 1.492700 | \n",
+ " 1.465348 | \n",
+ "
\n",
+ " \n",
+ " 168 | \n",
+ " 1.371600 | \n",
+ " 1.464317 | \n",
+ "
\n",
+ " \n",
+ " 172 | \n",
+ " 1.628700 | \n",
+ " 1.463003 | \n",
+ "
\n",
+ " \n",
+ " 176 | \n",
+ " 1.242100 | \n",
+ " 1.462533 | \n",
+ "
\n",
+ " \n",
+ " 180 | \n",
+ " 1.284400 | \n",
+ " 1.461138 | \n",
+ "
\n",
+ " \n",
+ " 184 | \n",
+ " 1.563000 | \n",
+ " 1.459591 | \n",
+ "
\n",
+ " \n",
+ " 188 | \n",
+ " 1.421000 | \n",
+ " 1.457585 | \n",
+ "
\n",
+ " \n",
+ " 192 | \n",
+ " 1.208200 | \n",
+ " 1.456179 | \n",
+ "
\n",
+ " \n",
+ " 196 | \n",
+ " 1.350800 | \n",
+ " 1.454647 | \n",
+ "
\n",
+ " \n",
+ " 200 | \n",
+ " 1.602600 | \n",
+ " 1.454009 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ " [ 2/14 00:00 < 00:08, 1.40 it/s]\n",
+ "
\n",
+ " "
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n",
+ " warnings.warn(\n"
+ ]
+ },
+ {
+ "ename": "KeyboardInterrupt",
+ "evalue": "",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
+ "Cell \u001b[0;32mIn[55], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mtrainer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtrain\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
+ "File \u001b[0;32m~/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages/trl/trainer/sft_trainer.py:440\u001b[0m, in \u001b[0;36mSFTTrainer.train\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 437\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mneftune_noise_alpha \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_trainer_supports_neftune:\n\u001b[1;32m 438\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_trl_activate_neftune(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel)\n\u001b[0;32m--> 440\u001b[0m output \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtrain\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 442\u001b[0m \u001b[38;5;66;03m# After training we make sure to retrieve back the original forward pass method\u001b[39;00m\n\u001b[1;32m 443\u001b[0m \u001b[38;5;66;03m# for the embedding layer by removing the forward post hook.\u001b[39;00m\n\u001b[1;32m 444\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mneftune_noise_alpha \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_trainer_supports_neftune:\n",
+ "File \u001b[0;32m~/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages/transformers/trainer.py:1885\u001b[0m, in \u001b[0;36mTrainer.train\u001b[0;34m(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)\u001b[0m\n\u001b[1;32m 1883\u001b[0m hf_hub_utils\u001b[38;5;241m.\u001b[39menable_progress_bars()\n\u001b[1;32m 1884\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1885\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43minner_training_loop\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1886\u001b[0m \u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1887\u001b[0m \u001b[43m \u001b[49m\u001b[43mresume_from_checkpoint\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mresume_from_checkpoint\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1888\u001b[0m \u001b[43m \u001b[49m\u001b[43mtrial\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtrial\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1889\u001b[0m \u001b[43m \u001b[49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1890\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n",
+ "File \u001b[0;32m~/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages/transformers/trainer.py:2291\u001b[0m, in \u001b[0;36mTrainer._inner_training_loop\u001b[0;34m(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)\u001b[0m\n\u001b[1;32m 2288\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate\u001b[38;5;241m.\u001b[39mepoch \u001b[38;5;241m=\u001b[39m epoch \u001b[38;5;241m+\u001b[39m (step \u001b[38;5;241m+\u001b[39m \u001b[38;5;241m1\u001b[39m \u001b[38;5;241m+\u001b[39m steps_skipped) \u001b[38;5;241m/\u001b[39m steps_in_epoch\n\u001b[1;32m 2289\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcontrol \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcallback_handler\u001b[38;5;241m.\u001b[39mon_step_end(args, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcontrol)\n\u001b[0;32m-> 2291\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_maybe_log_save_evaluate\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtr_loss\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mgrad_norm\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtrial\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mepoch\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2292\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 2293\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcontrol \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcallback_handler\u001b[38;5;241m.\u001b[39mon_substep_end(args, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcontrol)\n",
+ "File \u001b[0;32m~/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages/transformers/trainer.py:2721\u001b[0m, in \u001b[0;36mTrainer._maybe_log_save_evaluate\u001b[0;34m(self, tr_loss, grad_norm, model, trial, epoch, ignore_keys_for_eval)\u001b[0m\n\u001b[1;32m 2719\u001b[0m metrics \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 2720\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcontrol\u001b[38;5;241m.\u001b[39mshould_evaluate:\n\u001b[0;32m-> 2721\u001b[0m metrics \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mevaluate\u001b[49m\u001b[43m(\u001b[49m\u001b[43mignore_keys\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2722\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_report_to_hp_search(trial, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate\u001b[38;5;241m.\u001b[39mglobal_step, metrics)\n\u001b[1;32m 2724\u001b[0m \u001b[38;5;66;03m# Run delayed LR scheduler now that metrics are populated\u001b[39;00m\n",
+ "File \u001b[0;32m~/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages/transformers/trainer.py:3572\u001b[0m, in \u001b[0;36mTrainer.evaluate\u001b[0;34m(self, eval_dataset, ignore_keys, metric_key_prefix)\u001b[0m\n\u001b[1;32m 3569\u001b[0m start_time \u001b[38;5;241m=\u001b[39m time\u001b[38;5;241m.\u001b[39mtime()\n\u001b[1;32m 3571\u001b[0m eval_loop \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mprediction_loop \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39muse_legacy_prediction_loop \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mevaluation_loop\n\u001b[0;32m-> 3572\u001b[0m output \u001b[38;5;241m=\u001b[39m \u001b[43meval_loop\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 3573\u001b[0m \u001b[43m \u001b[49m\u001b[43meval_dataloader\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3574\u001b[0m \u001b[43m \u001b[49m\u001b[43mdescription\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mEvaluation\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3575\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# No point gathering the predictions if there are no metrics, otherwise we defer to\u001b[39;49;00m\n\u001b[1;32m 3576\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# self.args.prediction_loss_only\u001b[39;49;00m\n\u001b[1;32m 3577\u001b[0m \u001b[43m \u001b[49m\u001b[43mprediction_loss_only\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcompute_metrics\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mis\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01melse\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 3578\u001b[0m \u001b[43m \u001b[49m\u001b[43mignore_keys\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mignore_keys\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3579\u001b[0m \u001b[43m \u001b[49m\u001b[43mmetric_key_prefix\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmetric_key_prefix\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3580\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3582\u001b[0m total_batch_size \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39meval_batch_size \u001b[38;5;241m*\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39mworld_size\n\u001b[1;32m 3583\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmetric_key_prefix\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m_jit_compilation_time\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m output\u001b[38;5;241m.\u001b[39mmetrics:\n",
+ "File \u001b[0;32m~/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages/transformers/trainer.py:3757\u001b[0m, in \u001b[0;36mTrainer.evaluation_loop\u001b[0;34m(self, dataloader, description, prediction_loss_only, ignore_keys, metric_key_prefix)\u001b[0m\n\u001b[1;32m 3754\u001b[0m batch_size \u001b[38;5;241m=\u001b[39m observed_batch_size\n\u001b[1;32m 3756\u001b[0m \u001b[38;5;66;03m# Prediction step\u001b[39;00m\n\u001b[0;32m-> 3757\u001b[0m loss, logits, labels \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mprediction_step\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minputs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mprediction_loss_only\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mignore_keys\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mignore_keys\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3758\u001b[0m main_input_name \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mgetattr\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmain_input_name\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124minput_ids\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 3759\u001b[0m inputs_decode \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_prepare_input(inputs[main_input_name]) \u001b[38;5;28;01mif\u001b[39;00m args\u001b[38;5;241m.\u001b[39minclude_inputs_for_metrics \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n",
+ "File \u001b[0;32m~/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages/transformers/trainer.py:3971\u001b[0m, in \u001b[0;36mTrainer.prediction_step\u001b[0;34m(self, model, inputs, prediction_loss_only, ignore_keys)\u001b[0m\n\u001b[1;32m 3969\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m has_labels \u001b[38;5;129;01mor\u001b[39;00m loss_without_labels:\n\u001b[1;32m 3970\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcompute_loss_context_manager():\n\u001b[0;32m-> 3971\u001b[0m loss, outputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcompute_loss\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minputs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mreturn_outputs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[1;32m 3972\u001b[0m loss \u001b[38;5;241m=\u001b[39m loss\u001b[38;5;241m.\u001b[39mmean()\u001b[38;5;241m.\u001b[39mdetach()\n\u001b[1;32m 3974\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(outputs, \u001b[38;5;28mdict\u001b[39m):\n",
+ "File \u001b[0;32m~/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages/transformers/trainer.py:3264\u001b[0m, in \u001b[0;36mTrainer.compute_loss\u001b[0;34m(self, model, inputs, return_outputs)\u001b[0m\n\u001b[1;32m 3262\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 3263\u001b[0m labels \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m-> 3264\u001b[0m outputs \u001b[38;5;241m=\u001b[39m \u001b[43mmodel\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43minputs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3265\u001b[0m \u001b[38;5;66;03m# Save past state if it exists\u001b[39;00m\n\u001b[1;32m 3266\u001b[0m \u001b[38;5;66;03m# TODO: this needs to be fixed and made cleaner later.\u001b[39;00m\n\u001b[1;32m 3267\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39mpast_index \u001b[38;5;241m>\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m:\n",
+ "File \u001b[0;32m~/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages/torch/nn/modules/module.py:1194\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m 1190\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1191\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1192\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1193\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1194\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1195\u001b[0m \u001b[38;5;66;03m# Do not call functions when jit is used\u001b[39;00m\n\u001b[1;32m 1196\u001b[0m full_backward_hooks, non_full_backward_hooks \u001b[38;5;241m=\u001b[39m [], []\n",
+ "File \u001b[0;32m~/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages/accelerate/utils/operations.py:822\u001b[0m, in \u001b[0;36mconvert_outputs_to_fp32..forward\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 821\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m--> 822\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmodel_forward\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+ "File \u001b[0;32m~/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages/accelerate/utils/operations.py:810\u001b[0m, in \u001b[0;36mConvertOutputsToFp32.__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 809\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__call__\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m--> 810\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m convert_to_fp32(\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel_forward\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m)\n",
+ "File \u001b[0;32m~/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages/torch/amp/autocast_mode.py:14\u001b[0m, in \u001b[0;36mautocast_decorator..decorate_autocast\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 12\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdecorate_autocast\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 13\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m autocast_instance:\n\u001b[0;32m---> 14\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+ "File \u001b[0;32m~/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages/accelerate/utils/operations.py:822\u001b[0m, in \u001b[0;36mconvert_outputs_to_fp32..forward\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 821\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m--> 822\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmodel_forward\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+ "File \u001b[0;32m~/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages/accelerate/utils/operations.py:810\u001b[0m, in \u001b[0;36mConvertOutputsToFp32.__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 809\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__call__\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m--> 810\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m convert_to_fp32(\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel_forward\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m)\n",
+ "File \u001b[0;32m~/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages/torch/amp/autocast_mode.py:14\u001b[0m, in \u001b[0;36mautocast_decorator..decorate_autocast\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 12\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdecorate_autocast\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 13\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m autocast_instance:\n\u001b[0;32m---> 14\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+ "File \u001b[0;32m~/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages/peft/peft_model.py:1430\u001b[0m, in \u001b[0;36mPeftModelForCausalLM.forward\u001b[0;34m(self, input_ids, attention_mask, inputs_embeds, labels, output_attentions, output_hidden_states, return_dict, task_ids, **kwargs)\u001b[0m\n\u001b[1;32m 1428\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_enable_peft_forward_hooks(\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 1429\u001b[0m kwargs \u001b[38;5;241m=\u001b[39m {k: v \u001b[38;5;28;01mfor\u001b[39;00m k, v \u001b[38;5;129;01min\u001b[39;00m kwargs\u001b[38;5;241m.\u001b[39mitems() \u001b[38;5;28;01mif\u001b[39;00m k \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mspecial_peft_forward_args}\n\u001b[0;32m-> 1430\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbase_model\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1431\u001b[0m \u001b[43m \u001b[49m\u001b[43minput_ids\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minput_ids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1432\u001b[0m \u001b[43m \u001b[49m\u001b[43mattention_mask\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mattention_mask\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1433\u001b[0m \u001b[43m \u001b[49m\u001b[43minputs_embeds\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minputs_embeds\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1434\u001b[0m \u001b[43m \u001b[49m\u001b[43mlabels\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlabels\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1435\u001b[0m \u001b[43m \u001b[49m\u001b[43moutput_attentions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moutput_attentions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1436\u001b[0m \u001b[43m \u001b[49m\u001b[43moutput_hidden_states\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moutput_hidden_states\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1437\u001b[0m \u001b[43m \u001b[49m\u001b[43mreturn_dict\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mreturn_dict\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1438\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1439\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1441\u001b[0m batch_size \u001b[38;5;241m=\u001b[39m _get_batch_size(input_ids, inputs_embeds)\n\u001b[1;32m 1442\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m attention_mask \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 1443\u001b[0m \u001b[38;5;66;03m# concat prompt attention mask\u001b[39;00m\n",
+ "File \u001b[0;32m~/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages/torch/nn/modules/module.py:1194\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m 1190\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1191\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1192\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1193\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1194\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1195\u001b[0m \u001b[38;5;66;03m# Do not call functions when jit is used\u001b[39;00m\n\u001b[1;32m 1196\u001b[0m full_backward_hooks, non_full_backward_hooks \u001b[38;5;241m=\u001b[39m [], []\n",
+ "File \u001b[0;32m~/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages/peft/tuners/tuners_utils.py:179\u001b[0m, in \u001b[0;36mBaseTuner.forward\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 178\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m*\u001b[39margs: Any, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs: Any):\n\u001b[0;32m--> 179\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mforward\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+ "File \u001b[0;32m~/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages/accelerate/hooks.py:167\u001b[0m, in \u001b[0;36madd_hook_to_module..new_forward\u001b[0;34m(module, *args, **kwargs)\u001b[0m\n\u001b[1;32m 165\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 166\u001b[0m output \u001b[38;5;241m=\u001b[39m module\u001b[38;5;241m.\u001b[39m_old_forward(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m--> 167\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmodule\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_hf_hook\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpost_forward\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodule\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43moutput\u001b[49m\u001b[43m)\u001b[49m\n",
+ "File \u001b[0;32m~/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages/accelerate/hooks.py:380\u001b[0m, in \u001b[0;36mAlignDevicesHook.post_forward\u001b[0;34m(self, module, output)\u001b[0m\n\u001b[1;32m 377\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtied_pointers_to_remove \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mset\u001b[39m()\n\u001b[1;32m 379\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mio_same_device \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39minput_device \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 380\u001b[0m output \u001b[38;5;241m=\u001b[39m \u001b[43msend_to_device\u001b[49m\u001b[43m(\u001b[49m\u001b[43moutput\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minput_device\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mskip_keys\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mskip_keys\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 382\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m output\n",
+ "File \u001b[0;32m~/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages/accelerate/utils/operations.py:186\u001b[0m, in \u001b[0;36msend_to_device\u001b[0;34m(tensor, device, non_blocking, skip_keys)\u001b[0m\n\u001b[1;32m 183\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m skip_keys \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 184\u001b[0m skip_keys \u001b[38;5;241m=\u001b[39m []\n\u001b[1;32m 185\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mtype\u001b[39m(tensor)(\n\u001b[0;32m--> 186\u001b[0m {\n\u001b[1;32m 187\u001b[0m k: t \u001b[38;5;28;01mif\u001b[39;00m k \u001b[38;5;129;01min\u001b[39;00m skip_keys \u001b[38;5;28;01melse\u001b[39;00m send_to_device(t, device, non_blocking\u001b[38;5;241m=\u001b[39mnon_blocking, skip_keys\u001b[38;5;241m=\u001b[39mskip_keys)\n\u001b[1;32m 188\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m k, t \u001b[38;5;129;01min\u001b[39;00m tensor\u001b[38;5;241m.\u001b[39mitems()\n\u001b[1;32m 189\u001b[0m }\n\u001b[1;32m 190\u001b[0m )\n\u001b[1;32m 191\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 192\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m tensor\n",
+ "File \u001b[0;32m~/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages/accelerate/utils/operations.py:187\u001b[0m, in \u001b[0;36m\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 183\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m skip_keys \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 184\u001b[0m skip_keys \u001b[38;5;241m=\u001b[39m []\n\u001b[1;32m 185\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mtype\u001b[39m(tensor)(\n\u001b[1;32m 186\u001b[0m {\n\u001b[0;32m--> 187\u001b[0m k: t \u001b[38;5;28;01mif\u001b[39;00m k \u001b[38;5;129;01min\u001b[39;00m skip_keys \u001b[38;5;28;01melse\u001b[39;00m \u001b[43msend_to_device\u001b[49m\u001b[43m(\u001b[49m\u001b[43mt\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdevice\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mnon_blocking\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mnon_blocking\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mskip_keys\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mskip_keys\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 188\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m k, t \u001b[38;5;129;01min\u001b[39;00m tensor\u001b[38;5;241m.\u001b[39mitems()\n\u001b[1;32m 189\u001b[0m }\n\u001b[1;32m 190\u001b[0m )\n\u001b[1;32m 191\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 192\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m tensor\n",
+ "File \u001b[0;32m~/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages/accelerate/utils/operations.py:158\u001b[0m, in \u001b[0;36msend_to_device\u001b[0;34m(tensor, device, non_blocking, skip_keys)\u001b[0m\n\u001b[1;32m 156\u001b[0m tensor \u001b[38;5;241m=\u001b[39m tensor\u001b[38;5;241m.\u001b[39mcpu()\n\u001b[1;32m 157\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 158\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mtensor\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdevice\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mnon_blocking\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mnon_blocking\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 159\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m: \u001b[38;5;66;03m# .to() doesn't accept non_blocking as kwarg\u001b[39;00m\n\u001b[1;32m 160\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m tensor\u001b[38;5;241m.\u001b[39mto(device)\n",
+ "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
+ ]
+ }
+ ],
+ "source": [
+ "trainer.train()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 56,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n",
+ " warnings.warn(\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "bc20cacf58bd4f21bea7fd858060b3cd",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "training_args.bin: 0%| | 0.00/4.92k [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "a84f635f17304651adffa1f7a00225ab",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "adapter_model.safetensors: 0%| | 0.00/27.3M [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "6ee95ca486d94f6182b6da552794e8b1",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "events.out.tfevents.1718234993.jupiter.364556.2: 0%| | 0.00/61.6k [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "4205c431d3634c8a84fe19f0fd686e82",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "events.out.tfevents.1718234875.jupiter.364556.1: 0%| | 0.00/12.2k [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "4dfa92c14f4c42c786bb313e57617d63",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Upload 4 LFS files: 0%| | 0/4 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/plain": [
+ "CommitInfo(commit_url='https://huggingface.co/shin00001/experiments/commit/61a909cf186f09d62099fa9b62a70519fd2be619', commit_message='AI-4-Health/HPP-FINETUNED-Meta-Llama-3-8B-Instruct', commit_description='', oid='61a909cf186f09d62099fa9b62a70519fd2be619', pr_url=None, pr_revision=None, pr_num=None)"
+ ]
+ },
+ "execution_count": 56,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "trainer.push_to_hub(\"AI-4-Health/HPP-FINETUNED-Meta-Llama-3-8B-Instruct\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.0"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}