ryefoxlime commited on
Commit
3d5069a
1 Parent(s): d1734fa

tidied up the code

Browse files
Files changed (1) hide show
  1. Gemma2_2B/inference.ipynb +59 -203
Gemma2_2B/inference.ipynb CHANGED
@@ -2,7 +2,7 @@
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
- "execution_count": 6,
6
  "metadata": {},
7
  "outputs": [],
8
  "source": [
@@ -17,92 +17,13 @@
17
  },
18
  {
19
  "cell_type": "code",
20
- "execution_count": null,
21
  "metadata": {},
22
  "outputs": [
23
  {
24
  "data": {
25
  "application/vnd.jupyter.widget-view+json": {
26
- "model_id": "d00ec085003e409d906784abc1f89dc1",
27
- "version_major": 2,
28
- "version_minor": 0
29
- },
30
- "text/plain": [
31
- "config.json: 0%| | 0.00/838 [00:00<?, ?B/s]"
32
- ]
33
- },
34
- "metadata": {},
35
- "output_type": "display_data"
36
- },
37
- {
38
- "name": "stderr",
39
- "output_type": "stream",
40
- "text": [
41
- "f:\\TADBot\\.venv\\Lib\\site-packages\\huggingface_hub\\file_download.py:139: UserWarning: `huggingface_hub` cache-system uses symlinks by default to efficiently store duplicated files but your machine does not support them in F:\\TADBot\\Gemma2_2B\\.cache\\models--google--gemma-2-2b-it. Caching files will still work but in a degraded version that might require more space on your disk. This warning can be disabled by setting the `HF_HUB_DISABLE_SYMLINKS_WARNING` environment variable. For more details, see https://huggingface.co/docs/huggingface_hub/how-to-cache#limitations.\n",
42
- "To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development\n",
43
- " warnings.warn(message)\n"
44
- ]
45
- },
46
- {
47
- "data": {
48
- "application/vnd.jupyter.widget-view+json": {
49
- "model_id": "bdee67c51d7547a48e45f17db7fb3734",
50
- "version_major": 2,
51
- "version_minor": 0
52
- },
53
- "text/plain": [
54
- "model.safetensors.index.json: 0%| | 0.00/24.2k [00:00<?, ?B/s]"
55
- ]
56
- },
57
- "metadata": {},
58
- "output_type": "display_data"
59
- },
60
- {
61
- "data": {
62
- "application/vnd.jupyter.widget-view+json": {
63
- "model_id": "ad86eff32cc1447486e69c5f5f90e4a4",
64
- "version_major": 2,
65
- "version_minor": 0
66
- },
67
- "text/plain": [
68
- "Downloading shards: 0%| | 0/2 [00:00<?, ?it/s]"
69
- ]
70
- },
71
- "metadata": {},
72
- "output_type": "display_data"
73
- },
74
- {
75
- "data": {
76
- "application/vnd.jupyter.widget-view+json": {
77
- "model_id": "78cab016a2d54731a94ef45e85d65ddd",
78
- "version_major": 2,
79
- "version_minor": 0
80
- },
81
- "text/plain": [
82
- "model-00001-of-00002.safetensors: 0%| | 0.00/4.99G [00:00<?, ?B/s]"
83
- ]
84
- },
85
- "metadata": {},
86
- "output_type": "display_data"
87
- },
88
- {
89
- "data": {
90
- "application/vnd.jupyter.widget-view+json": {
91
- "model_id": "52b50ff81d0d481ab475878606935162",
92
- "version_major": 2,
93
- "version_minor": 0
94
- },
95
- "text/plain": [
96
- "model-00002-of-00002.safetensors: 0%| | 0.00/241M [00:00<?, ?B/s]"
97
- ]
98
- },
99
- "metadata": {},
100
- "output_type": "display_data"
101
- },
102
- {
103
- "data": {
104
- "application/vnd.jupyter.widget-view+json": {
105
- "model_id": "7dfed61b7e0a4338aee7ad14df4d85ca",
106
  "version_major": 2,
107
  "version_minor": 0
108
  },
@@ -112,88 +33,18 @@
112
  },
113
  "metadata": {},
114
  "output_type": "display_data"
115
- },
116
- {
117
- "data": {
118
- "application/vnd.jupyter.widget-view+json": {
119
- "model_id": "9ac1e6a0b72a44d3a8a648bce2138c3d",
120
- "version_major": 2,
121
- "version_minor": 0
122
- },
123
- "text/plain": [
124
- "generation_config.json: 0%| | 0.00/187 [00:00<?, ?B/s]"
125
- ]
126
- },
127
- "metadata": {},
128
- "output_type": "display_data"
129
- },
130
- {
131
- "data": {
132
- "application/vnd.jupyter.widget-view+json": {
133
- "model_id": "f0129c204a454f22968aebe59b75ea1a",
134
- "version_major": 2,
135
- "version_minor": 0
136
- },
137
- "text/plain": [
138
- "tokenizer_config.json: 0%| | 0.00/47.0k [00:00<?, ?B/s]"
139
- ]
140
- },
141
- "metadata": {},
142
- "output_type": "display_data"
143
- },
144
- {
145
- "data": {
146
- "application/vnd.jupyter.widget-view+json": {
147
- "model_id": "ca55b303b11347cbbf5970327d2d8a82",
148
- "version_major": 2,
149
- "version_minor": 0
150
- },
151
- "text/plain": [
152
- "tokenizer.model: 0%| | 0.00/4.24M [00:00<?, ?B/s]"
153
- ]
154
- },
155
- "metadata": {},
156
- "output_type": "display_data"
157
- },
158
- {
159
- "data": {
160
- "application/vnd.jupyter.widget-view+json": {
161
- "model_id": "33601521ca8544e7a98c88506257dd20",
162
- "version_major": 2,
163
- "version_minor": 0
164
- },
165
- "text/plain": [
166
- "tokenizer.json: 0%| | 0.00/17.5M [00:00<?, ?B/s]"
167
- ]
168
- },
169
- "metadata": {},
170
- "output_type": "display_data"
171
- },
172
- {
173
- "data": {
174
- "application/vnd.jupyter.widget-view+json": {
175
- "model_id": "f353232bbf6b4da3ac62e02fa7f58990",
176
- "version_major": 2,
177
- "version_minor": 0
178
- },
179
- "text/plain": [
180
- "special_tokens_map.json: 0%| | 0.00/636 [00:00<?, ?B/s]"
181
- ]
182
- },
183
- "metadata": {},
184
- "output_type": "display_data"
185
  }
186
  ],
187
  "source": [
188
  "from transformers import AutoTokenizer, AutoModelForCausalLM\n",
189
  "model_name = \"google/gemma-2-2b-it\"\n",
190
- "model = AutoModelForCausalLM.from_pretrained(model_name, device_map=\"auto\", cache_dir=\".cache/\")\n",
191
  "tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=\".cache/\")"
192
  ]
193
  },
194
  {
195
  "cell_type": "code",
196
- "execution_count": 6,
197
  "metadata": {},
198
  "outputs": [
199
  {
@@ -237,44 +88,27 @@
237
  },
238
  {
239
  "cell_type": "code",
240
- "execution_count": 9,
241
  "metadata": {},
242
  "outputs": [
243
  {
244
  "name": "stdout",
245
  "output_type": "stream",
246
  "text": [
247
- "<bos>What should I do on a trip to Europe?\n",
248
- "\n",
249
- "That's a great question! To give you the best advice, I need a little more information. Tell me about:\n",
250
- "\n",
251
- "**1. Your Interests:** \n",
252
- " * What kind of things do you enjoy doing? (History, art, food, nightlife, nature, adventure, relaxation, etc.)\n",
253
- " * Are there any specific places or activities you've always wanted to experience?\n",
254
- "\n",
255
- "**2. Your Travel Style:**\n",
256
- " * Do you prefer to travel on your own, with a partner, or with a group?\n",
257
- " * Do you like to plan everything in advance or be more spontaneous?\n",
258
- " * What's your budget like?\n",
259
- "\n",
260
- "**3. Your Trip Details:**\n",
261
- " * How long will you be traveling for?\n",
262
- " * What time of year are you planning to go?\n",
263
- " * Do you have any specific destinations in mind?\n",
264
  "\n",
265
- "Once I have this information, I can give you personalized recommendations for your European adventure! \n",
266
- "<end_of_turn>\n",
267
- "CPU times: total: 7.23 s\n",
268
- "Wall time: 7.56 s\n"
269
  ]
270
  }
271
  ],
272
  "source": [
273
  "%%time\n",
274
- "input_text = \"What should I do on a trip to Europe?\"\n",
275
  "\n",
276
- "input_ids = tokenizer(input_text, return_tensors=\"pt\").to(\"cuda\")\n",
277
- "outputs = model.generate(**input_ids, max_length=2048)\n",
278
  "print(tokenizer.decode(outputs[0]))"
279
  ]
280
  },
@@ -287,7 +121,7 @@
287
  },
288
  {
289
  "cell_type": "code",
290
- "execution_count": 1,
291
  "metadata": {},
292
  "outputs": [],
293
  "source": [
@@ -303,13 +137,13 @@
303
  },
304
  {
305
  "cell_type": "code",
306
- "execution_count": 2,
307
  "metadata": {},
308
  "outputs": [
309
  {
310
  "data": {
311
  "application/vnd.jupyter.widget-view+json": {
312
- "model_id": "21f72716997c42cfa2244677b36b85f8",
313
  "version_major": 2,
314
  "version_minor": 0
315
  },
@@ -323,12 +157,12 @@
323
  {
324
  "data": {
325
  "text/plain": [
326
- "('gemma2-TADBot\\\\tokenizer_config.json',\n",
327
- " 'gemma2-TADBot\\\\special_tokens_map.json',\n",
328
- " 'gemma2-TADBot\\\\tokenizer.json')"
329
  ]
330
  },
331
- "execution_count": 2,
332
  "metadata": {},
333
  "output_type": "execute_result"
334
  }
@@ -344,38 +178,55 @@
344
  ")\n",
345
  "model = PeftModel.from_pretrained(base_model, new_model, cache_dir = \".cache/\")\n",
346
  "model = model.merge_and_unload()\n",
347
- "model.save_pretrained(\"gemma2-TADBot\")\n",
348
  "\n",
349
  "# Reload tokenizer to save it\n",
350
- "tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, cache_dir = \".cache/\")\n",
351
- "tokenizer.pad_token = tokenizer.eos_token\n",
352
- "tokenizer.padding_side = \"right\"\n",
353
- "tokenizer.save_pretrained(\"gemma2-TADBot\")"
354
  ]
355
  },
356
  {
357
  "cell_type": "code",
358
- "execution_count": 5,
359
  "metadata": {},
360
  "outputs": [
361
  {
362
- "name": "stdout",
363
  "output_type": "stream",
364
  "text": [
365
- "<bos>I have so many issues to address. I have a history of sexual abuse, I’m a breast cancer survivor and I am a lifetime insomniac. I have a long history of depression and I’m beginning to have anxiety. I have low self esteem but I’ve been happily married for almost 35 years.I’ve never had counseling about any of this. Do I have too many issues to address in counseling?\n",
366
- "\n",
367
- "### Response:\n",
368
- "I would say absolutely not!  It is never too many issues to address in counseling.  It is actually quite common for people to come into therapy with a lot of issues and it is often the case that the issues are interconnected.  For example, a person who has experienced trauma may have difficulty sleeping, have low self esteem, and have anxiety.  It is important to remember that counseling is a collaborative process and the therapist will work with you to help you address all of your issues.\n",
369
- "<eos>\n",
370
- "CPU times: total: 16 s\n",
371
- "Wall time: 17.3 s\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
372
  ]
373
  }
374
  ],
375
  "source": [
376
  "%%time\n",
377
- "input_text = \"I have so many issues to address. I have a history of sexual abuse, I’m a breast cancer survivor and I am a lifetime insomniac. I have a long history of depression and I’m beginning to have anxiety. I have low self esteem but I’ve been happily married for almost 35 years.I’ve never had counseling about any of this. Do I have too many issues to address in counseling?\"\n",
378
- "input_ids = tokenizer(input_text, return_tensors=\"pt\").to(\"cuda\")\n",
379
  "outputs = model.generate(**input_ids, max_length=2048)\n",
380
  "print(tokenizer.decode(outputs[0]))"
381
  ]
@@ -385,7 +236,12 @@
385
  "execution_count": null,
386
  "metadata": {},
387
  "outputs": [],
388
- "source": []
 
 
 
 
 
389
  }
390
  ],
391
  "metadata": {
 
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
+ "execution_count": 1,
6
  "metadata": {},
7
  "outputs": [],
8
  "source": [
 
17
  },
18
  {
19
  "cell_type": "code",
20
+ "execution_count": 2,
21
  "metadata": {},
22
  "outputs": [
23
  {
24
  "data": {
25
  "application/vnd.jupyter.widget-view+json": {
26
+ "model_id": "6124a76f904b49be930009acef84305b",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  "version_major": 2,
28
  "version_minor": 0
29
  },
 
33
  },
34
  "metadata": {},
35
  "output_type": "display_data"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  }
37
  ],
38
  "source": [
39
  "from transformers import AutoTokenizer, AutoModelForCausalLM\n",
40
  "model_name = \"google/gemma-2-2b-it\"\n",
41
+ "model = AutoModelForCausalLM.from_pretrained(model_name, cache_dir=\".cache/\")\n",
42
  "tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=\".cache/\")"
43
  ]
44
  },
45
  {
46
  "cell_type": "code",
47
+ "execution_count": 3,
48
  "metadata": {},
49
  "outputs": [
50
  {
 
88
  },
89
  {
90
  "cell_type": "code",
91
+ "execution_count": 4,
92
  "metadata": {},
93
  "outputs": [
94
  {
95
  "name": "stdout",
96
  "output_type": "stream",
97
  "text": [
98
+ "<bos>I have so many issues to address. I have a history of sexual abuse, I’m a breast cancer survivor and I am a lifetime insomniac. I have a long history of depression and I’m beginning to have anxiety. I have low self esteem but I’ve been happily married for almost 35 years.I’ve never had counseling about any of this. Do I have too many issues to address in counseling?\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  "\n",
100
+ "It's wonderful that you're recognizing the need for support and seeking help. You absolutely do not have too many issues to address in counseling. In fact, it's\n",
101
+ "CPU times: total: 28.8 s\n",
102
+ "Wall time: 20.3 s\n"
 
103
  ]
104
  }
105
  ],
106
  "source": [
107
  "%%time\n",
108
+ "input_text = \"I have so many issues to address. I have a history of sexual abuse, I’m a breast cancer survivor and I am a lifetime insomniac. I have a long history of depression and I’m beginning to have anxiety. I have low self esteem but I’ve been happily married for almost 35 years.I’ve never had counseling about any of this. Do I have too many issues to address in counseling?\"\n",
109
  "\n",
110
+ "input_ids = tokenizer(input_text, return_tensors=\"pt\")\n",
111
+ "outputs = model.generate(**input_ids, max_length=128)\n",
112
  "print(tokenizer.decode(outputs[0]))"
113
  ]
114
  },
 
121
  },
122
  {
123
  "cell_type": "code",
124
+ "execution_count": 5,
125
  "metadata": {},
126
  "outputs": [],
127
  "source": [
 
137
  },
138
  {
139
  "cell_type": "code",
140
+ "execution_count": 6,
141
  "metadata": {},
142
  "outputs": [
143
  {
144
  "data": {
145
  "application/vnd.jupyter.widget-view+json": {
146
+ "model_id": "7e7639d5cbc748f189f84f0287700585",
147
  "version_major": 2,
148
  "version_minor": 0
149
  },
 
157
  {
158
  "data": {
159
  "text/plain": [
160
+ "('gemma-2-2b-it-therapist\\\\tokenizer_config.json',\n",
161
+ " 'gemma-2-2b-it-therapist\\\\special_tokens_map.json',\n",
162
+ " 'gemma-2-2b-it-therapist\\\\tokenizer.json')"
163
  ]
164
  },
165
+ "execution_count": 6,
166
  "metadata": {},
167
  "output_type": "execute_result"
168
  }
 
178
  ")\n",
179
  "model = PeftModel.from_pretrained(base_model, new_model, cache_dir = \".cache/\")\n",
180
  "model = model.merge_and_unload()\n",
 
181
  "\n",
182
  "# Reload tokenizer to save it\n",
183
+ "tokenizer = AutoTokenizer.from_pretrained(\n",
184
+ " model_name, trust_remote_code=True, cache_dir=\".cache/\"\n",
185
+ ")\n",
186
+ "tokenizer.save_pretrained(\"gemma-2-2b-it-therapist\")\n"
187
  ]
188
  },
189
  {
190
  "cell_type": "code",
191
+ "execution_count": null,
192
  "metadata": {},
193
  "outputs": [
194
  {
195
+ "name": "stderr",
196
  "output_type": "stream",
197
  "text": [
198
+ "f:\\TADBot\\.venv\\Lib\\site-packages\\transformers\\generation\\utils.py:2097: UserWarning: You are calling .generate() with the `input_ids` being on a device type different than your model's device. `input_ids` is on cuda, whereas the model is on cpu. You may experience unexpected behaviors or slower generation. Please make sure that you have put `input_ids` to the correct device by calling for example input_ids = input_ids.to('cpu') before running `.generate()`.\n",
199
+ " warnings.warn(\n"
200
+ ]
201
+ },
202
+ {
203
+ "ename": "RuntimeError",
204
+ "evalue": "Expected all tensors to be on the same device, but found at least two devices, cpu and cuda:0! (when checking argument for argument index in method wrapper_CUDA__index_select)",
205
+ "output_type": "error",
206
+ "traceback": [
207
+ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
208
+ "\u001b[1;31mRuntimeError\u001b[0m Traceback (most recent call last)",
209
+ "File \u001b[1;32m<timed exec>:2\u001b[0m\n",
210
+ "File \u001b[1;32mf:\\TADBot\\.venv\\Lib\\site-packages\\torch\\utils\\_contextlib.py:116\u001b[0m, in \u001b[0;36mcontext_decorator.<locals>.decorate_context\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 113\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[0;32m 114\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdecorate_context\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m 115\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m ctx_factory():\n\u001b[1;32m--> 116\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
211
+ "File \u001b[1;32mf:\\TADBot\\.venv\\Lib\\site-packages\\transformers\\generation\\utils.py:2215\u001b[0m, in \u001b[0;36mGenerationMixin.generate\u001b[1;34m(self, inputs, generation_config, logits_processor, stopping_criteria, prefix_allowed_tokens_fn, synced_gpus, assistant_model, streamer, negative_prompt_ids, negative_prompt_attention_mask, **kwargs)\u001b[0m\n\u001b[0;32m 2207\u001b[0m input_ids, model_kwargs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_expand_inputs_for_generation(\n\u001b[0;32m 2208\u001b[0m input_ids\u001b[38;5;241m=\u001b[39minput_ids,\n\u001b[0;32m 2209\u001b[0m expand_size\u001b[38;5;241m=\u001b[39mgeneration_config\u001b[38;5;241m.\u001b[39mnum_return_sequences,\n\u001b[0;32m 2210\u001b[0m is_encoder_decoder\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mconfig\u001b[38;5;241m.\u001b[39mis_encoder_decoder,\n\u001b[0;32m 2211\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mmodel_kwargs,\n\u001b[0;32m 2212\u001b[0m )\n\u001b[0;32m 2214\u001b[0m \u001b[38;5;66;03m# 12. run sample (it degenerates to greedy search when `generation_config.do_sample=False`)\u001b[39;00m\n\u001b[1;32m-> 2215\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_sample\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 2216\u001b[0m \u001b[43m \u001b[49m\u001b[43minput_ids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 2217\u001b[0m \u001b[43m \u001b[49m\u001b[43mlogits_processor\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mprepared_logits_processor\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 2218\u001b[0m \u001b[43m \u001b[49m\u001b[43mstopping_criteria\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mprepared_stopping_criteria\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 2219\u001b[0m \u001b[43m \u001b[49m\u001b[43mgeneration_config\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgeneration_config\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 2220\u001b[0m \u001b[43m \u001b[49m\u001b[43msynced_gpus\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msynced_gpus\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 2221\u001b[0m \u001b[43m \u001b[49m\u001b[43mstreamer\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstreamer\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 2222\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mmodel_kwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 2223\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 2225\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m generation_mode \u001b[38;5;129;01min\u001b[39;00m (GenerationMode\u001b[38;5;241m.\u001b[39mBEAM_SAMPLE, GenerationMode\u001b[38;5;241m.\u001b[39mBEAM_SEARCH):\n\u001b[0;32m 2226\u001b[0m \u001b[38;5;66;03m# 11. prepare beam search scorer\u001b[39;00m\n\u001b[0;32m 2227\u001b[0m beam_scorer \u001b[38;5;241m=\u001b[39m BeamSearchScorer(\n\u001b[0;32m 2228\u001b[0m batch_size\u001b[38;5;241m=\u001b[39mbatch_size,\n\u001b[0;32m 2229\u001b[0m num_beams\u001b[38;5;241m=\u001b[39mgeneration_config\u001b[38;5;241m.\u001b[39mnum_beams,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 2234\u001b[0m max_length\u001b[38;5;241m=\u001b[39mgeneration_config\u001b[38;5;241m.\u001b[39mmax_length,\n\u001b[0;32m 2235\u001b[0m )\n",
212
+ "File \u001b[1;32mf:\\TADBot\\.venv\\Lib\\site-packages\\transformers\\generation\\utils.py:3206\u001b[0m, in \u001b[0;36mGenerationMixin._sample\u001b[1;34m(self, input_ids, logits_processor, stopping_criteria, generation_config, synced_gpus, streamer, **model_kwargs)\u001b[0m\n\u001b[0;32m 3203\u001b[0m model_inputs\u001b[38;5;241m.\u001b[39mupdate({\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124moutput_hidden_states\u001b[39m\u001b[38;5;124m\"\u001b[39m: output_hidden_states} \u001b[38;5;28;01mif\u001b[39;00m output_hidden_states \u001b[38;5;28;01melse\u001b[39;00m {})\n\u001b[0;32m 3205\u001b[0m \u001b[38;5;66;03m# forward pass to get next token\u001b[39;00m\n\u001b[1;32m-> 3206\u001b[0m outputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mmodel_inputs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mreturn_dict\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[0;32m 3208\u001b[0m \u001b[38;5;66;03m# synced_gpus: don't waste resources running the code we don't need; kwargs must be updated before skipping\u001b[39;00m\n\u001b[0;32m 3209\u001b[0m model_kwargs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_update_model_kwargs_for_generation(\n\u001b[0;32m 3210\u001b[0m outputs,\n\u001b[0;32m 3211\u001b[0m model_kwargs,\n\u001b[0;32m 3212\u001b[0m is_encoder_decoder\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mconfig\u001b[38;5;241m.\u001b[39mis_encoder_decoder,\n\u001b[0;32m 3213\u001b[0m )\n",
213
+ "File \u001b[1;32mf:\\TADBot\\.venv\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1736\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 1734\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[0;32m 1735\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m-> 1736\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
214
+ "File \u001b[1;32mf:\\TADBot\\.venv\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1747\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 1742\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[0;32m 1743\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[0;32m 1744\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[0;32m 1745\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[0;32m 1746\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[1;32m-> 1747\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 1749\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m 1750\u001b[0m called_always_called_hooks \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mset\u001b[39m()\n",
215
+ "File \u001b[1;32mf:\\TADBot\\.venv\\Lib\\site-packages\\transformers\\models\\gemma2\\modeling_gemma2.py:1049\u001b[0m, in \u001b[0;36mGemma2ForCausalLM.forward\u001b[1;34m(self, input_ids, attention_mask, position_ids, past_key_values, inputs_embeds, labels, use_cache, output_attentions, output_hidden_states, return_dict, cache_position, num_logits_to_keep, **loss_kwargs)\u001b[0m\n\u001b[0;32m 1047\u001b[0m return_dict \u001b[38;5;241m=\u001b[39m return_dict \u001b[38;5;28;01mif\u001b[39;00m return_dict \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mconfig\u001b[38;5;241m.\u001b[39muse_return_dict\n\u001b[0;32m 1048\u001b[0m \u001b[38;5;66;03m# decoder outputs consists of (dec_features, layer_state, dec_hidden, dec_attn)\u001b[39;00m\n\u001b[1;32m-> 1049\u001b[0m outputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 1050\u001b[0m \u001b[43m \u001b[49m\u001b[43minput_ids\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minput_ids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1051\u001b[0m \u001b[43m \u001b[49m\u001b[43mattention_mask\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mattention_mask\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1052\u001b[0m \u001b[43m \u001b[49m\u001b[43mposition_ids\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mposition_ids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1053\u001b[0m \u001b[43m \u001b[49m\u001b[43mpast_key_values\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpast_key_values\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1054\u001b[0m \u001b[43m \u001b[49m\u001b[43minputs_embeds\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minputs_embeds\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1055\u001b[0m \u001b[43m \u001b[49m\u001b[43muse_cache\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43muse_cache\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1056\u001b[0m \u001b[43m \u001b[49m\u001b[43moutput_attentions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moutput_attentions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1057\u001b[0m \u001b[43m \u001b[49m\u001b[43moutput_hidden_states\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moutput_hidden_states\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1058\u001b[0m \u001b[43m \u001b[49m\u001b[43mreturn_dict\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mreturn_dict\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1059\u001b[0m \u001b[43m \u001b[49m\u001b[43mcache_position\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_position\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1060\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 1062\u001b[0m hidden_states \u001b[38;5;241m=\u001b[39m outputs[\u001b[38;5;241m0\u001b[39m]\n\u001b[0;32m 1063\u001b[0m \u001b[38;5;66;03m# Only compute necessary logits, and do not upcast them to float if we are not computing the loss\u001b[39;00m\n",
216
+ "File \u001b[1;32mf:\\TADBot\\.venv\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1736\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 1734\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[0;32m 1735\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m-> 1736\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
217
+ "File \u001b[1;32mf:\\TADBot\\.venv\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1747\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 1742\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[0;32m 1743\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[0;32m 1744\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[0;32m 1745\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[0;32m 1746\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[1;32m-> 1747\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 1749\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m 1750\u001b[0m called_always_called_hooks \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mset\u001b[39m()\n",
218
+ "File \u001b[1;32mf:\\TADBot\\.venv\\Lib\\site-packages\\transformers\\models\\gemma2\\modeling_gemma2.py:783\u001b[0m, in \u001b[0;36mGemma2Model.forward\u001b[1;34m(self, input_ids, attention_mask, position_ids, past_key_values, inputs_embeds, use_cache, output_attentions, output_hidden_states, return_dict, cache_position)\u001b[0m\n\u001b[0;32m 780\u001b[0m use_cache \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[0;32m 782\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m inputs_embeds \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m--> 783\u001b[0m inputs_embeds \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43membed_tokens\u001b[49m\u001b[43m(\u001b[49m\u001b[43minput_ids\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 785\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m use_cache \u001b[38;5;129;01mand\u001b[39;00m past_key_values \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtraining:\n\u001b[0;32m 786\u001b[0m batch_size, seq_len, _ \u001b[38;5;241m=\u001b[39m inputs_embeds\u001b[38;5;241m.\u001b[39mshape\n",
219
+ "File \u001b[1;32mf:\\TADBot\\.venv\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1736\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 1734\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[0;32m 1735\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m-> 1736\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
220
+ "File \u001b[1;32mf:\\TADBot\\.venv\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1747\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 1742\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[0;32m 1743\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[0;32m 1744\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[0;32m 1745\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[0;32m 1746\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[1;32m-> 1747\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 1749\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m 1750\u001b[0m called_always_called_hooks \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mset\u001b[39m()\n",
221
+ "File \u001b[1;32mf:\\TADBot\\.venv\\Lib\\site-packages\\torch\\nn\\modules\\sparse.py:190\u001b[0m, in \u001b[0;36mEmbedding.forward\u001b[1;34m(self, input)\u001b[0m\n\u001b[0;32m 189\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;28minput\u001b[39m: Tensor) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Tensor:\n\u001b[1;32m--> 190\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mF\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43membedding\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 191\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[0;32m 192\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mweight\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 193\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpadding_idx\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 194\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmax_norm\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 195\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnorm_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 196\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mscale_grad_by_freq\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 197\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msparse\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 198\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n",
222
+ "File \u001b[1;32mf:\\TADBot\\.venv\\Lib\\site-packages\\torch\\nn\\functional.py:2551\u001b[0m, in \u001b[0;36membedding\u001b[1;34m(input, weight, padding_idx, max_norm, norm_type, scale_grad_by_freq, sparse)\u001b[0m\n\u001b[0;32m 2545\u001b[0m \u001b[38;5;66;03m# Note [embedding_renorm set_grad_enabled]\u001b[39;00m\n\u001b[0;32m 2546\u001b[0m \u001b[38;5;66;03m# XXX: equivalent to\u001b[39;00m\n\u001b[0;32m 2547\u001b[0m \u001b[38;5;66;03m# with torch.no_grad():\u001b[39;00m\n\u001b[0;32m 2548\u001b[0m \u001b[38;5;66;03m# torch.embedding_renorm_\u001b[39;00m\n\u001b[0;32m 2549\u001b[0m \u001b[38;5;66;03m# remove once script supports set_grad_enabled\u001b[39;00m\n\u001b[0;32m 2550\u001b[0m _no_grad_embedding_renorm_(weight, \u001b[38;5;28minput\u001b[39m, max_norm, norm_type)\n\u001b[1;32m-> 2551\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mtorch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43membedding\u001b[49m\u001b[43m(\u001b[49m\u001b[43mweight\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpadding_idx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mscale_grad_by_freq\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msparse\u001b[49m\u001b[43m)\u001b[49m\n",
223
+ "\u001b[1;31mRuntimeError\u001b[0m: Expected all tensors to be on the same device, but found at least two devices, cpu and cuda:0! (when checking argument for argument index in method wrapper_CUDA__index_select)"
224
  ]
225
  }
226
  ],
227
  "source": [
228
  "%%time\n",
229
+ "input_ids = tokenizer(input_text, return_tensors=\"pt\")\n",
 
230
  "outputs = model.generate(**input_ids, max_length=2048)\n",
231
  "print(tokenizer.decode(outputs[0]))"
232
  ]
 
236
  "execution_count": null,
237
  "metadata": {},
238
  "outputs": [],
239
+ "source": [
240
+ "model.save_pretrained(\"gemma2-TADBot\")\n",
241
+ "model.push_to_hub(\"gemma-2-2b-it-therapist\", use_auth_token=True, use_temp_dir=False)\n",
242
+ "tokenizer.save_pretrained(\"gemma-2-2b-it-therapist\")\n",
243
+ "tokenizer.push_to_hub(\"gemma-2-2b-it-therapist\", use_auth_token=True, use_temp_dir=False)"
244
+ ]
245
  }
246
  ],
247
  "metadata": {