ryefoxlime
commited on
Commit
•
3d5069a
1
Parent(s):
d1734fa
tidied up the code
Browse files- Gemma2_2B/inference.ipynb +59 -203
Gemma2_2B/inference.ipynb
CHANGED
@@ -2,7 +2,7 @@
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
-
"execution_count":
|
6 |
"metadata": {},
|
7 |
"outputs": [],
|
8 |
"source": [
|
@@ -17,92 +17,13 @@
|
|
17 |
},
|
18 |
{
|
19 |
"cell_type": "code",
|
20 |
-
"execution_count":
|
21 |
"metadata": {},
|
22 |
"outputs": [
|
23 |
{
|
24 |
"data": {
|
25 |
"application/vnd.jupyter.widget-view+json": {
|
26 |
-
"model_id": "
|
27 |
-
"version_major": 2,
|
28 |
-
"version_minor": 0
|
29 |
-
},
|
30 |
-
"text/plain": [
|
31 |
-
"config.json: 0%| | 0.00/838 [00:00<?, ?B/s]"
|
32 |
-
]
|
33 |
-
},
|
34 |
-
"metadata": {},
|
35 |
-
"output_type": "display_data"
|
36 |
-
},
|
37 |
-
{
|
38 |
-
"name": "stderr",
|
39 |
-
"output_type": "stream",
|
40 |
-
"text": [
|
41 |
-
"f:\\TADBot\\.venv\\Lib\\site-packages\\huggingface_hub\\file_download.py:139: UserWarning: `huggingface_hub` cache-system uses symlinks by default to efficiently store duplicated files but your machine does not support them in F:\\TADBot\\Gemma2_2B\\.cache\\models--google--gemma-2-2b-it. Caching files will still work but in a degraded version that might require more space on your disk. This warning can be disabled by setting the `HF_HUB_DISABLE_SYMLINKS_WARNING` environment variable. For more details, see https://huggingface.co/docs/huggingface_hub/how-to-cache#limitations.\n",
|
42 |
-
"To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development\n",
|
43 |
-
" warnings.warn(message)\n"
|
44 |
-
]
|
45 |
-
},
|
46 |
-
{
|
47 |
-
"data": {
|
48 |
-
"application/vnd.jupyter.widget-view+json": {
|
49 |
-
"model_id": "bdee67c51d7547a48e45f17db7fb3734",
|
50 |
-
"version_major": 2,
|
51 |
-
"version_minor": 0
|
52 |
-
},
|
53 |
-
"text/plain": [
|
54 |
-
"model.safetensors.index.json: 0%| | 0.00/24.2k [00:00<?, ?B/s]"
|
55 |
-
]
|
56 |
-
},
|
57 |
-
"metadata": {},
|
58 |
-
"output_type": "display_data"
|
59 |
-
},
|
60 |
-
{
|
61 |
-
"data": {
|
62 |
-
"application/vnd.jupyter.widget-view+json": {
|
63 |
-
"model_id": "ad86eff32cc1447486e69c5f5f90e4a4",
|
64 |
-
"version_major": 2,
|
65 |
-
"version_minor": 0
|
66 |
-
},
|
67 |
-
"text/plain": [
|
68 |
-
"Downloading shards: 0%| | 0/2 [00:00<?, ?it/s]"
|
69 |
-
]
|
70 |
-
},
|
71 |
-
"metadata": {},
|
72 |
-
"output_type": "display_data"
|
73 |
-
},
|
74 |
-
{
|
75 |
-
"data": {
|
76 |
-
"application/vnd.jupyter.widget-view+json": {
|
77 |
-
"model_id": "78cab016a2d54731a94ef45e85d65ddd",
|
78 |
-
"version_major": 2,
|
79 |
-
"version_minor": 0
|
80 |
-
},
|
81 |
-
"text/plain": [
|
82 |
-
"model-00001-of-00002.safetensors: 0%| | 0.00/4.99G [00:00<?, ?B/s]"
|
83 |
-
]
|
84 |
-
},
|
85 |
-
"metadata": {},
|
86 |
-
"output_type": "display_data"
|
87 |
-
},
|
88 |
-
{
|
89 |
-
"data": {
|
90 |
-
"application/vnd.jupyter.widget-view+json": {
|
91 |
-
"model_id": "52b50ff81d0d481ab475878606935162",
|
92 |
-
"version_major": 2,
|
93 |
-
"version_minor": 0
|
94 |
-
},
|
95 |
-
"text/plain": [
|
96 |
-
"model-00002-of-00002.safetensors: 0%| | 0.00/241M [00:00<?, ?B/s]"
|
97 |
-
]
|
98 |
-
},
|
99 |
-
"metadata": {},
|
100 |
-
"output_type": "display_data"
|
101 |
-
},
|
102 |
-
{
|
103 |
-
"data": {
|
104 |
-
"application/vnd.jupyter.widget-view+json": {
|
105 |
-
"model_id": "7dfed61b7e0a4338aee7ad14df4d85ca",
|
106 |
"version_major": 2,
|
107 |
"version_minor": 0
|
108 |
},
|
@@ -112,88 +33,18 @@
|
|
112 |
},
|
113 |
"metadata": {},
|
114 |
"output_type": "display_data"
|
115 |
-
},
|
116 |
-
{
|
117 |
-
"data": {
|
118 |
-
"application/vnd.jupyter.widget-view+json": {
|
119 |
-
"model_id": "9ac1e6a0b72a44d3a8a648bce2138c3d",
|
120 |
-
"version_major": 2,
|
121 |
-
"version_minor": 0
|
122 |
-
},
|
123 |
-
"text/plain": [
|
124 |
-
"generation_config.json: 0%| | 0.00/187 [00:00<?, ?B/s]"
|
125 |
-
]
|
126 |
-
},
|
127 |
-
"metadata": {},
|
128 |
-
"output_type": "display_data"
|
129 |
-
},
|
130 |
-
{
|
131 |
-
"data": {
|
132 |
-
"application/vnd.jupyter.widget-view+json": {
|
133 |
-
"model_id": "f0129c204a454f22968aebe59b75ea1a",
|
134 |
-
"version_major": 2,
|
135 |
-
"version_minor": 0
|
136 |
-
},
|
137 |
-
"text/plain": [
|
138 |
-
"tokenizer_config.json: 0%| | 0.00/47.0k [00:00<?, ?B/s]"
|
139 |
-
]
|
140 |
-
},
|
141 |
-
"metadata": {},
|
142 |
-
"output_type": "display_data"
|
143 |
-
},
|
144 |
-
{
|
145 |
-
"data": {
|
146 |
-
"application/vnd.jupyter.widget-view+json": {
|
147 |
-
"model_id": "ca55b303b11347cbbf5970327d2d8a82",
|
148 |
-
"version_major": 2,
|
149 |
-
"version_minor": 0
|
150 |
-
},
|
151 |
-
"text/plain": [
|
152 |
-
"tokenizer.model: 0%| | 0.00/4.24M [00:00<?, ?B/s]"
|
153 |
-
]
|
154 |
-
},
|
155 |
-
"metadata": {},
|
156 |
-
"output_type": "display_data"
|
157 |
-
},
|
158 |
-
{
|
159 |
-
"data": {
|
160 |
-
"application/vnd.jupyter.widget-view+json": {
|
161 |
-
"model_id": "33601521ca8544e7a98c88506257dd20",
|
162 |
-
"version_major": 2,
|
163 |
-
"version_minor": 0
|
164 |
-
},
|
165 |
-
"text/plain": [
|
166 |
-
"tokenizer.json: 0%| | 0.00/17.5M [00:00<?, ?B/s]"
|
167 |
-
]
|
168 |
-
},
|
169 |
-
"metadata": {},
|
170 |
-
"output_type": "display_data"
|
171 |
-
},
|
172 |
-
{
|
173 |
-
"data": {
|
174 |
-
"application/vnd.jupyter.widget-view+json": {
|
175 |
-
"model_id": "f353232bbf6b4da3ac62e02fa7f58990",
|
176 |
-
"version_major": 2,
|
177 |
-
"version_minor": 0
|
178 |
-
},
|
179 |
-
"text/plain": [
|
180 |
-
"special_tokens_map.json: 0%| | 0.00/636 [00:00<?, ?B/s]"
|
181 |
-
]
|
182 |
-
},
|
183 |
-
"metadata": {},
|
184 |
-
"output_type": "display_data"
|
185 |
}
|
186 |
],
|
187 |
"source": [
|
188 |
"from transformers import AutoTokenizer, AutoModelForCausalLM\n",
|
189 |
"model_name = \"google/gemma-2-2b-it\"\n",
|
190 |
-
"model = AutoModelForCausalLM.from_pretrained(model_name,
|
191 |
"tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=\".cache/\")"
|
192 |
]
|
193 |
},
|
194 |
{
|
195 |
"cell_type": "code",
|
196 |
-
"execution_count":
|
197 |
"metadata": {},
|
198 |
"outputs": [
|
199 |
{
|
@@ -237,44 +88,27 @@
|
|
237 |
},
|
238 |
{
|
239 |
"cell_type": "code",
|
240 |
-
"execution_count":
|
241 |
"metadata": {},
|
242 |
"outputs": [
|
243 |
{
|
244 |
"name": "stdout",
|
245 |
"output_type": "stream",
|
246 |
"text": [
|
247 |
-
"<bos>
|
248 |
-
"\n",
|
249 |
-
"That's a great question! To give you the best advice, I need a little more information. Tell me about:\n",
|
250 |
-
"\n",
|
251 |
-
"**1. Your Interests:** \n",
|
252 |
-
" * What kind of things do you enjoy doing? (History, art, food, nightlife, nature, adventure, relaxation, etc.)\n",
|
253 |
-
" * Are there any specific places or activities you've always wanted to experience?\n",
|
254 |
-
"\n",
|
255 |
-
"**2. Your Travel Style:**\n",
|
256 |
-
" * Do you prefer to travel on your own, with a partner, or with a group?\n",
|
257 |
-
" * Do you like to plan everything in advance or be more spontaneous?\n",
|
258 |
-
" * What's your budget like?\n",
|
259 |
-
"\n",
|
260 |
-
"**3. Your Trip Details:**\n",
|
261 |
-
" * How long will you be traveling for?\n",
|
262 |
-
" * What time of year are you planning to go?\n",
|
263 |
-
" * Do you have any specific destinations in mind?\n",
|
264 |
"\n",
|
265 |
-
"
|
266 |
-
"
|
267 |
-
"
|
268 |
-
"Wall time: 7.56 s\n"
|
269 |
]
|
270 |
}
|
271 |
],
|
272 |
"source": [
|
273 |
"%%time\n",
|
274 |
-
"input_text = \"
|
275 |
"\n",
|
276 |
-
"input_ids = tokenizer(input_text, return_tensors=\"pt\")
|
277 |
-
"outputs = model.generate(**input_ids, max_length=
|
278 |
"print(tokenizer.decode(outputs[0]))"
|
279 |
]
|
280 |
},
|
@@ -287,7 +121,7 @@
|
|
287 |
},
|
288 |
{
|
289 |
"cell_type": "code",
|
290 |
-
"execution_count":
|
291 |
"metadata": {},
|
292 |
"outputs": [],
|
293 |
"source": [
|
@@ -303,13 +137,13 @@
|
|
303 |
},
|
304 |
{
|
305 |
"cell_type": "code",
|
306 |
-
"execution_count":
|
307 |
"metadata": {},
|
308 |
"outputs": [
|
309 |
{
|
310 |
"data": {
|
311 |
"application/vnd.jupyter.widget-view+json": {
|
312 |
-
"model_id": "
|
313 |
"version_major": 2,
|
314 |
"version_minor": 0
|
315 |
},
|
@@ -323,12 +157,12 @@
|
|
323 |
{
|
324 |
"data": {
|
325 |
"text/plain": [
|
326 |
-
"('
|
327 |
-
" '
|
328 |
-
" '
|
329 |
]
|
330 |
},
|
331 |
-
"execution_count":
|
332 |
"metadata": {},
|
333 |
"output_type": "execute_result"
|
334 |
}
|
@@ -344,38 +178,55 @@
|
|
344 |
")\n",
|
345 |
"model = PeftModel.from_pretrained(base_model, new_model, cache_dir = \".cache/\")\n",
|
346 |
"model = model.merge_and_unload()\n",
|
347 |
-
"model.save_pretrained(\"gemma2-TADBot\")\n",
|
348 |
"\n",
|
349 |
"# Reload tokenizer to save it\n",
|
350 |
-
"tokenizer = AutoTokenizer.from_pretrained(
|
351 |
-
"
|
352 |
-
"
|
353 |
-
"tokenizer.save_pretrained(\"
|
354 |
]
|
355 |
},
|
356 |
{
|
357 |
"cell_type": "code",
|
358 |
-
"execution_count":
|
359 |
"metadata": {},
|
360 |
"outputs": [
|
361 |
{
|
362 |
-
"name": "
|
363 |
"output_type": "stream",
|
364 |
"text": [
|
365 |
-
"
|
366 |
-
"\n"
|
367 |
-
|
368 |
-
|
369 |
-
|
370 |
-
|
371 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
372 |
]
|
373 |
}
|
374 |
],
|
375 |
"source": [
|
376 |
"%%time\n",
|
377 |
-
"
|
378 |
-
"input_ids = tokenizer(input_text, return_tensors=\"pt\").to(\"cuda\")\n",
|
379 |
"outputs = model.generate(**input_ids, max_length=2048)\n",
|
380 |
"print(tokenizer.decode(outputs[0]))"
|
381 |
]
|
@@ -385,7 +236,12 @@
|
|
385 |
"execution_count": null,
|
386 |
"metadata": {},
|
387 |
"outputs": [],
|
388 |
-
"source": [
|
|
|
|
|
|
|
|
|
|
|
389 |
}
|
390 |
],
|
391 |
"metadata": {
|
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
+
"execution_count": 1,
|
6 |
"metadata": {},
|
7 |
"outputs": [],
|
8 |
"source": [
|
|
|
17 |
},
|
18 |
{
|
19 |
"cell_type": "code",
|
20 |
+
"execution_count": 2,
|
21 |
"metadata": {},
|
22 |
"outputs": [
|
23 |
{
|
24 |
"data": {
|
25 |
"application/vnd.jupyter.widget-view+json": {
|
26 |
+
"model_id": "6124a76f904b49be930009acef84305b",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
"version_major": 2,
|
28 |
"version_minor": 0
|
29 |
},
|
|
|
33 |
},
|
34 |
"metadata": {},
|
35 |
"output_type": "display_data"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
}
|
37 |
],
|
38 |
"source": [
|
39 |
"from transformers import AutoTokenizer, AutoModelForCausalLM\n",
|
40 |
"model_name = \"google/gemma-2-2b-it\"\n",
|
41 |
+
"model = AutoModelForCausalLM.from_pretrained(model_name, cache_dir=\".cache/\")\n",
|
42 |
"tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=\".cache/\")"
|
43 |
]
|
44 |
},
|
45 |
{
|
46 |
"cell_type": "code",
|
47 |
+
"execution_count": 3,
|
48 |
"metadata": {},
|
49 |
"outputs": [
|
50 |
{
|
|
|
88 |
},
|
89 |
{
|
90 |
"cell_type": "code",
|
91 |
+
"execution_count": 4,
|
92 |
"metadata": {},
|
93 |
"outputs": [
|
94 |
{
|
95 |
"name": "stdout",
|
96 |
"output_type": "stream",
|
97 |
"text": [
|
98 |
+
"<bos>I have so many issues to address. I have a history of sexual abuse, I’m a breast cancer survivor and I am a lifetime insomniac. I have a long history of depression and I’m beginning to have anxiety. I have low self esteem but I’ve been happily married for almost 35 years.I’ve never had counseling about any of this. Do I have too many issues to address in counseling?\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
99 |
"\n",
|
100 |
+
"It's wonderful that you're recognizing the need for support and seeking help. You absolutely do not have too many issues to address in counseling. In fact, it's\n",
|
101 |
+
"CPU times: total: 28.8 s\n",
|
102 |
+
"Wall time: 20.3 s\n"
|
|
|
103 |
]
|
104 |
}
|
105 |
],
|
106 |
"source": [
|
107 |
"%%time\n",
|
108 |
+
"input_text = \"I have so many issues to address. I have a history of sexual abuse, I’m a breast cancer survivor and I am a lifetime insomniac. I have a long history of depression and I’m beginning to have anxiety. I have low self esteem but I’ve been happily married for almost 35 years.I’ve never had counseling about any of this. Do I have too many issues to address in counseling?\"\n",
|
109 |
"\n",
|
110 |
+
"input_ids = tokenizer(input_text, return_tensors=\"pt\")\n",
|
111 |
+
"outputs = model.generate(**input_ids, max_length=128)\n",
|
112 |
"print(tokenizer.decode(outputs[0]))"
|
113 |
]
|
114 |
},
|
|
|
121 |
},
|
122 |
{
|
123 |
"cell_type": "code",
|
124 |
+
"execution_count": 5,
|
125 |
"metadata": {},
|
126 |
"outputs": [],
|
127 |
"source": [
|
|
|
137 |
},
|
138 |
{
|
139 |
"cell_type": "code",
|
140 |
+
"execution_count": 6,
|
141 |
"metadata": {},
|
142 |
"outputs": [
|
143 |
{
|
144 |
"data": {
|
145 |
"application/vnd.jupyter.widget-view+json": {
|
146 |
+
"model_id": "7e7639d5cbc748f189f84f0287700585",
|
147 |
"version_major": 2,
|
148 |
"version_minor": 0
|
149 |
},
|
|
|
157 |
{
|
158 |
"data": {
|
159 |
"text/plain": [
|
160 |
+
"('gemma-2-2b-it-therapist\\\\tokenizer_config.json',\n",
|
161 |
+
" 'gemma-2-2b-it-therapist\\\\special_tokens_map.json',\n",
|
162 |
+
" 'gemma-2-2b-it-therapist\\\\tokenizer.json')"
|
163 |
]
|
164 |
},
|
165 |
+
"execution_count": 6,
|
166 |
"metadata": {},
|
167 |
"output_type": "execute_result"
|
168 |
}
|
|
|
178 |
")\n",
|
179 |
"model = PeftModel.from_pretrained(base_model, new_model, cache_dir = \".cache/\")\n",
|
180 |
"model = model.merge_and_unload()\n",
|
|
|
181 |
"\n",
|
182 |
"# Reload tokenizer to save it\n",
|
183 |
+
"tokenizer = AutoTokenizer.from_pretrained(\n",
|
184 |
+
" model_name, trust_remote_code=True, cache_dir=\".cache/\"\n",
|
185 |
+
")\n",
|
186 |
+
"tokenizer.save_pretrained(\"gemma-2-2b-it-therapist\")\n"
|
187 |
]
|
188 |
},
|
189 |
{
|
190 |
"cell_type": "code",
|
191 |
+
"execution_count": null,
|
192 |
"metadata": {},
|
193 |
"outputs": [
|
194 |
{
|
195 |
+
"name": "stderr",
|
196 |
"output_type": "stream",
|
197 |
"text": [
|
198 |
+
"f:\\TADBot\\.venv\\Lib\\site-packages\\transformers\\generation\\utils.py:2097: UserWarning: You are calling .generate() with the `input_ids` being on a device type different than your model's device. `input_ids` is on cuda, whereas the model is on cpu. You may experience unexpected behaviors or slower generation. Please make sure that you have put `input_ids` to the correct device by calling for example input_ids = input_ids.to('cpu') before running `.generate()`.\n",
|
199 |
+
" warnings.warn(\n"
|
200 |
+
]
|
201 |
+
},
|
202 |
+
{
|
203 |
+
"ename": "RuntimeError",
|
204 |
+
"evalue": "Expected all tensors to be on the same device, but found at least two devices, cpu and cuda:0! (when checking argument for argument index in method wrapper_CUDA__index_select)",
|
205 |
+
"output_type": "error",
|
206 |
+
"traceback": [
|
207 |
+
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
208 |
+
"\u001b[1;31mRuntimeError\u001b[0m Traceback (most recent call last)",
|
209 |
+
"File \u001b[1;32m<timed exec>:2\u001b[0m\n",
|
210 |
+
"File \u001b[1;32mf:\\TADBot\\.venv\\Lib\\site-packages\\torch\\utils\\_contextlib.py:116\u001b[0m, in \u001b[0;36mcontext_decorator.<locals>.decorate_context\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 113\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[0;32m 114\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdecorate_context\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m 115\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m ctx_factory():\n\u001b[1;32m--> 116\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
|
211 |
+
"File \u001b[1;32mf:\\TADBot\\.venv\\Lib\\site-packages\\transformers\\generation\\utils.py:2215\u001b[0m, in \u001b[0;36mGenerationMixin.generate\u001b[1;34m(self, inputs, generation_config, logits_processor, stopping_criteria, prefix_allowed_tokens_fn, synced_gpus, assistant_model, streamer, negative_prompt_ids, negative_prompt_attention_mask, **kwargs)\u001b[0m\n\u001b[0;32m 2207\u001b[0m input_ids, model_kwargs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_expand_inputs_for_generation(\n\u001b[0;32m 2208\u001b[0m input_ids\u001b[38;5;241m=\u001b[39minput_ids,\n\u001b[0;32m 2209\u001b[0m expand_size\u001b[38;5;241m=\u001b[39mgeneration_config\u001b[38;5;241m.\u001b[39mnum_return_sequences,\n\u001b[0;32m 2210\u001b[0m is_encoder_decoder\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mconfig\u001b[38;5;241m.\u001b[39mis_encoder_decoder,\n\u001b[0;32m 2211\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mmodel_kwargs,\n\u001b[0;32m 2212\u001b[0m )\n\u001b[0;32m 2214\u001b[0m \u001b[38;5;66;03m# 12. run sample (it degenerates to greedy search when `generation_config.do_sample=False`)\u001b[39;00m\n\u001b[1;32m-> 2215\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_sample\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 2216\u001b[0m \u001b[43m \u001b[49m\u001b[43minput_ids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 2217\u001b[0m \u001b[43m \u001b[49m\u001b[43mlogits_processor\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mprepared_logits_processor\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 2218\u001b[0m \u001b[43m \u001b[49m\u001b[43mstopping_criteria\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mprepared_stopping_criteria\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 2219\u001b[0m \u001b[43m \u001b[49m\u001b[43mgeneration_config\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgeneration_config\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 2220\u001b[0m \u001b[43m \u001b[49m\u001b[43msynced_gpus\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msynced_gpus\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 2221\u001b[0m \u001b[43m \u001b[49m\u001b[43mstreamer\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstreamer\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 2222\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mmodel_kwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 2223\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 2225\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m generation_mode \u001b[38;5;129;01min\u001b[39;00m (GenerationMode\u001b[38;5;241m.\u001b[39mBEAM_SAMPLE, GenerationMode\u001b[38;5;241m.\u001b[39mBEAM_SEARCH):\n\u001b[0;32m 2226\u001b[0m \u001b[38;5;66;03m# 11. prepare beam search scorer\u001b[39;00m\n\u001b[0;32m 2227\u001b[0m beam_scorer \u001b[38;5;241m=\u001b[39m BeamSearchScorer(\n\u001b[0;32m 2228\u001b[0m batch_size\u001b[38;5;241m=\u001b[39mbatch_size,\n\u001b[0;32m 2229\u001b[0m num_beams\u001b[38;5;241m=\u001b[39mgeneration_config\u001b[38;5;241m.\u001b[39mnum_beams,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 2234\u001b[0m max_length\u001b[38;5;241m=\u001b[39mgeneration_config\u001b[38;5;241m.\u001b[39mmax_length,\n\u001b[0;32m 2235\u001b[0m )\n",
|
212 |
+
"File \u001b[1;32mf:\\TADBot\\.venv\\Lib\\site-packages\\transformers\\generation\\utils.py:3206\u001b[0m, in \u001b[0;36mGenerationMixin._sample\u001b[1;34m(self, input_ids, logits_processor, stopping_criteria, generation_config, synced_gpus, streamer, **model_kwargs)\u001b[0m\n\u001b[0;32m 3203\u001b[0m model_inputs\u001b[38;5;241m.\u001b[39mupdate({\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124moutput_hidden_states\u001b[39m\u001b[38;5;124m\"\u001b[39m: output_hidden_states} \u001b[38;5;28;01mif\u001b[39;00m output_hidden_states \u001b[38;5;28;01melse\u001b[39;00m {})\n\u001b[0;32m 3205\u001b[0m \u001b[38;5;66;03m# forward pass to get next token\u001b[39;00m\n\u001b[1;32m-> 3206\u001b[0m outputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mmodel_inputs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mreturn_dict\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[0;32m 3208\u001b[0m \u001b[38;5;66;03m# synced_gpus: don't waste resources running the code we don't need; kwargs must be updated before skipping\u001b[39;00m\n\u001b[0;32m 3209\u001b[0m model_kwargs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_update_model_kwargs_for_generation(\n\u001b[0;32m 3210\u001b[0m outputs,\n\u001b[0;32m 3211\u001b[0m model_kwargs,\n\u001b[0;32m 3212\u001b[0m is_encoder_decoder\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mconfig\u001b[38;5;241m.\u001b[39mis_encoder_decoder,\n\u001b[0;32m 3213\u001b[0m )\n",
|
213 |
+
"File \u001b[1;32mf:\\TADBot\\.venv\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1736\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 1734\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[0;32m 1735\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m-> 1736\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
|
214 |
+
"File \u001b[1;32mf:\\TADBot\\.venv\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1747\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 1742\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[0;32m 1743\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[0;32m 1744\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[0;32m 1745\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[0;32m 1746\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[1;32m-> 1747\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 1749\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m 1750\u001b[0m called_always_called_hooks \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mset\u001b[39m()\n",
|
215 |
+
"File \u001b[1;32mf:\\TADBot\\.venv\\Lib\\site-packages\\transformers\\models\\gemma2\\modeling_gemma2.py:1049\u001b[0m, in \u001b[0;36mGemma2ForCausalLM.forward\u001b[1;34m(self, input_ids, attention_mask, position_ids, past_key_values, inputs_embeds, labels, use_cache, output_attentions, output_hidden_states, return_dict, cache_position, num_logits_to_keep, **loss_kwargs)\u001b[0m\n\u001b[0;32m 1047\u001b[0m return_dict \u001b[38;5;241m=\u001b[39m return_dict \u001b[38;5;28;01mif\u001b[39;00m return_dict \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mconfig\u001b[38;5;241m.\u001b[39muse_return_dict\n\u001b[0;32m 1048\u001b[0m \u001b[38;5;66;03m# decoder outputs consists of (dec_features, layer_state, dec_hidden, dec_attn)\u001b[39;00m\n\u001b[1;32m-> 1049\u001b[0m outputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 1050\u001b[0m \u001b[43m \u001b[49m\u001b[43minput_ids\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minput_ids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1051\u001b[0m \u001b[43m \u001b[49m\u001b[43mattention_mask\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mattention_mask\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1052\u001b[0m \u001b[43m \u001b[49m\u001b[43mposition_ids\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mposition_ids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1053\u001b[0m \u001b[43m \u001b[49m\u001b[43mpast_key_values\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpast_key_values\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1054\u001b[0m \u001b[43m \u001b[49m\u001b[43minputs_embeds\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minputs_embeds\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1055\u001b[0m \u001b[43m \u001b[49m\u001b[43muse_cache\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43muse_cache\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1056\u001b[0m \u001b[43m \u001b[49m\u001b[43moutput_attentions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moutput_attentions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1057\u001b[0m \u001b[43m \u001b[49m\u001b[43moutput_hidden_states\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moutput_hidden_states\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1058\u001b[0m \u001b[43m \u001b[49m\u001b[43mreturn_dict\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mreturn_dict\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1059\u001b[0m \u001b[43m \u001b[49m\u001b[43mcache_position\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_position\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1060\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 1062\u001b[0m hidden_states \u001b[38;5;241m=\u001b[39m outputs[\u001b[38;5;241m0\u001b[39m]\n\u001b[0;32m 1063\u001b[0m \u001b[38;5;66;03m# Only compute necessary logits, and do not upcast them to float if we are not computing the loss\u001b[39;00m\n",
|
216 |
+
"File \u001b[1;32mf:\\TADBot\\.venv\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1736\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 1734\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[0;32m 1735\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m-> 1736\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
|
217 |
+
"File \u001b[1;32mf:\\TADBot\\.venv\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1747\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 1742\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[0;32m 1743\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[0;32m 1744\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[0;32m 1745\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[0;32m 1746\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[1;32m-> 1747\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 1749\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m 1750\u001b[0m called_always_called_hooks \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mset\u001b[39m()\n",
|
218 |
+
"File \u001b[1;32mf:\\TADBot\\.venv\\Lib\\site-packages\\transformers\\models\\gemma2\\modeling_gemma2.py:783\u001b[0m, in \u001b[0;36mGemma2Model.forward\u001b[1;34m(self, input_ids, attention_mask, position_ids, past_key_values, inputs_embeds, use_cache, output_attentions, output_hidden_states, return_dict, cache_position)\u001b[0m\n\u001b[0;32m 780\u001b[0m use_cache \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[0;32m 782\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m inputs_embeds \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m--> 783\u001b[0m inputs_embeds \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43membed_tokens\u001b[49m\u001b[43m(\u001b[49m\u001b[43minput_ids\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 785\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m use_cache \u001b[38;5;129;01mand\u001b[39;00m past_key_values \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtraining:\n\u001b[0;32m 786\u001b[0m batch_size, seq_len, _ \u001b[38;5;241m=\u001b[39m inputs_embeds\u001b[38;5;241m.\u001b[39mshape\n",
|
219 |
+
"File \u001b[1;32mf:\\TADBot\\.venv\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1736\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 1734\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[0;32m 1735\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m-> 1736\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
|
220 |
+
"File \u001b[1;32mf:\\TADBot\\.venv\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1747\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 1742\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[0;32m 1743\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[0;32m 1744\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[0;32m 1745\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[0;32m 1746\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[1;32m-> 1747\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 1749\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m 1750\u001b[0m called_always_called_hooks \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mset\u001b[39m()\n",
|
221 |
+
"File \u001b[1;32mf:\\TADBot\\.venv\\Lib\\site-packages\\torch\\nn\\modules\\sparse.py:190\u001b[0m, in \u001b[0;36mEmbedding.forward\u001b[1;34m(self, input)\u001b[0m\n\u001b[0;32m 189\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;28minput\u001b[39m: Tensor) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Tensor:\n\u001b[1;32m--> 190\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mF\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43membedding\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 191\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[0;32m 192\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mweight\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 193\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpadding_idx\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 194\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmax_norm\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 195\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnorm_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 196\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mscale_grad_by_freq\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 197\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msparse\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 198\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n",
|
222 |
+
"File \u001b[1;32mf:\\TADBot\\.venv\\Lib\\site-packages\\torch\\nn\\functional.py:2551\u001b[0m, in \u001b[0;36membedding\u001b[1;34m(input, weight, padding_idx, max_norm, norm_type, scale_grad_by_freq, sparse)\u001b[0m\n\u001b[0;32m 2545\u001b[0m \u001b[38;5;66;03m# Note [embedding_renorm set_grad_enabled]\u001b[39;00m\n\u001b[0;32m 2546\u001b[0m \u001b[38;5;66;03m# XXX: equivalent to\u001b[39;00m\n\u001b[0;32m 2547\u001b[0m \u001b[38;5;66;03m# with torch.no_grad():\u001b[39;00m\n\u001b[0;32m 2548\u001b[0m \u001b[38;5;66;03m# torch.embedding_renorm_\u001b[39;00m\n\u001b[0;32m 2549\u001b[0m \u001b[38;5;66;03m# remove once script supports set_grad_enabled\u001b[39;00m\n\u001b[0;32m 2550\u001b[0m _no_grad_embedding_renorm_(weight, \u001b[38;5;28minput\u001b[39m, max_norm, norm_type)\n\u001b[1;32m-> 2551\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mtorch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43membedding\u001b[49m\u001b[43m(\u001b[49m\u001b[43mweight\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpadding_idx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mscale_grad_by_freq\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msparse\u001b[49m\u001b[43m)\u001b[49m\n",
|
223 |
+
"\u001b[1;31mRuntimeError\u001b[0m: Expected all tensors to be on the same device, but found at least two devices, cpu and cuda:0! (when checking argument for argument index in method wrapper_CUDA__index_select)"
|
224 |
]
|
225 |
}
|
226 |
],
|
227 |
"source": [
|
228 |
"%%time\n",
|
229 |
+
"input_ids = tokenizer(input_text, return_tensors=\"pt\")\n",
|
|
|
230 |
"outputs = model.generate(**input_ids, max_length=2048)\n",
|
231 |
"print(tokenizer.decode(outputs[0]))"
|
232 |
]
|
|
|
236 |
"execution_count": null,
|
237 |
"metadata": {},
|
238 |
"outputs": [],
|
239 |
+
"source": [
|
240 |
+
"model.save_pretrained(\"gemma2-TADBot\")\n",
|
241 |
+
"model.push_to_hub(\"gemma-2-2b-it-therapist\", use_auth_token=True, use_temp_dir=False)\n",
|
242 |
+
"tokenizer.save_pretrained(\"gemma-2-2b-it-therapist\")\n",
|
243 |
+
"tokenizer.push_to_hub(\"gemma-2-2b-it-therapist\", use_auth_token=True, use_temp_dir=False)"
|
244 |
+
]
|
245 |
}
|
246 |
],
|
247 |
"metadata": {
|