AlexZigma commited on
Commit
96e0f72
1 Parent(s): f4c9911

Training in progress, step 2000

Browse files
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ checkpoint-*/
config.json CHANGED
@@ -1,6 +1,5 @@
1
  {
2
  "_commit_hash": null,
3
- "_name_or_path": "/content/content/timesformer-bert-video-captioning/checkpoint-1700",
4
  "architectures": [
5
  "VisionEncoderDecoderModel"
6
  ],
 
1
  {
2
  "_commit_hash": null,
 
3
  "architectures": [
4
  "VisionEncoderDecoderModel"
5
  ],
last-checkpoint/config.json ADDED
@@ -0,0 +1,973 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_commit_hash": null,
3
+ "architectures": [
4
+ "VisionEncoderDecoderModel"
5
+ ],
6
+ "decoder": {
7
+ "_name_or_path": "bert-base-uncased",
8
+ "add_cross_attention": true,
9
+ "architectures": [
10
+ "BertForMaskedLM"
11
+ ],
12
+ "attention_probs_dropout_prob": 0.1,
13
+ "bad_words_ids": null,
14
+ "begin_suppress_tokens": null,
15
+ "bos_token_id": null,
16
+ "chunk_size_feed_forward": 0,
17
+ "classifier_dropout": null,
18
+ "cross_attention_hidden_size": null,
19
+ "decoder_start_token_id": null,
20
+ "diversity_penalty": 0.0,
21
+ "do_sample": false,
22
+ "early_stopping": false,
23
+ "encoder_no_repeat_ngram_size": 0,
24
+ "eos_token_id": null,
25
+ "exponential_decay_length_penalty": null,
26
+ "finetuning_task": null,
27
+ "forced_bos_token_id": null,
28
+ "forced_eos_token_id": null,
29
+ "gradient_checkpointing": false,
30
+ "hidden_act": "gelu",
31
+ "hidden_dropout_prob": 0.1,
32
+ "hidden_size": 768,
33
+ "id2label": {
34
+ "0": "LABEL_0",
35
+ "1": "LABEL_1"
36
+ },
37
+ "initializer_range": 0.02,
38
+ "intermediate_size": 3072,
39
+ "is_decoder": true,
40
+ "is_encoder_decoder": false,
41
+ "label2id": {
42
+ "LABEL_0": 0,
43
+ "LABEL_1": 1
44
+ },
45
+ "layer_norm_eps": 1e-12,
46
+ "length_penalty": 1.0,
47
+ "max_length": 20,
48
+ "max_position_embeddings": 512,
49
+ "min_length": 0,
50
+ "model_type": "bert",
51
+ "no_repeat_ngram_size": 0,
52
+ "num_attention_heads": 12,
53
+ "num_beam_groups": 1,
54
+ "num_beams": 1,
55
+ "num_hidden_layers": 12,
56
+ "num_return_sequences": 1,
57
+ "output_attentions": false,
58
+ "output_hidden_states": false,
59
+ "output_scores": false,
60
+ "pad_token_id": 0,
61
+ "position_embedding_type": "absolute",
62
+ "prefix": null,
63
+ "problem_type": null,
64
+ "pruned_heads": {},
65
+ "remove_invalid_values": false,
66
+ "repetition_penalty": 1.0,
67
+ "return_dict": true,
68
+ "return_dict_in_generate": false,
69
+ "sep_token_id": null,
70
+ "suppress_tokens": null,
71
+ "task_specific_params": null,
72
+ "temperature": 1.0,
73
+ "tf_legacy_loss": false,
74
+ "tie_encoder_decoder": false,
75
+ "tie_word_embeddings": true,
76
+ "tokenizer_class": null,
77
+ "top_k": 50,
78
+ "top_p": 1.0,
79
+ "torch_dtype": null,
80
+ "torchscript": false,
81
+ "transformers_version": "4.30.2",
82
+ "type_vocab_size": 2,
83
+ "typical_p": 1.0,
84
+ "use_bfloat16": false,
85
+ "use_cache": true,
86
+ "vocab_size": 30522
87
+ },
88
+ "decoder_start_token_id": 101,
89
+ "encoder": {
90
+ "_name_or_path": "facebook/timesformer-base-finetuned-k400",
91
+ "add_cross_attention": false,
92
+ "architectures": [
93
+ "TimesformerForVideoClassification"
94
+ ],
95
+ "attention_probs_dropout_prob": 0.0,
96
+ "attention_type": "divided_space_time",
97
+ "bad_words_ids": null,
98
+ "begin_suppress_tokens": null,
99
+ "bos_token_id": null,
100
+ "chunk_size_feed_forward": 0,
101
+ "cross_attention_hidden_size": null,
102
+ "decoder_start_token_id": null,
103
+ "diversity_penalty": 0.0,
104
+ "do_sample": false,
105
+ "drop_path_rate": 0,
106
+ "early_stopping": false,
107
+ "encoder_no_repeat_ngram_size": 0,
108
+ "eos_token_id": null,
109
+ "exponential_decay_length_penalty": null,
110
+ "finetuning_task": null,
111
+ "forced_bos_token_id": null,
112
+ "forced_eos_token_id": null,
113
+ "hidden_act": "gelu",
114
+ "hidden_dropout_prob": 0.0,
115
+ "hidden_size": 768,
116
+ "id2label": {
117
+ "0": "abseiling",
118
+ "1": "air drumming",
119
+ "2": "answering questions",
120
+ "3": "applauding",
121
+ "4": "applying cream",
122
+ "5": "archery",
123
+ "6": "arm wrestling",
124
+ "7": "arranging flowers",
125
+ "8": "assembling computer",
126
+ "9": "auctioning",
127
+ "10": "baby waking up",
128
+ "11": "baking cookies",
129
+ "12": "balloon blowing",
130
+ "13": "bandaging",
131
+ "14": "barbequing",
132
+ "15": "bartending",
133
+ "16": "beatboxing",
134
+ "17": "bee keeping",
135
+ "18": "belly dancing",
136
+ "19": "bench pressing",
137
+ "20": "bending back",
138
+ "21": "bending metal",
139
+ "22": "biking through snow",
140
+ "23": "blasting sand",
141
+ "24": "blowing glass",
142
+ "25": "blowing leaves",
143
+ "26": "blowing nose",
144
+ "27": "blowing out candles",
145
+ "28": "bobsledding",
146
+ "29": "bookbinding",
147
+ "30": "bouncing on trampoline",
148
+ "31": "bowling",
149
+ "32": "braiding hair",
150
+ "33": "breading or breadcrumbing",
151
+ "34": "breakdancing",
152
+ "35": "brush painting",
153
+ "36": "brushing hair",
154
+ "37": "brushing teeth",
155
+ "38": "building cabinet",
156
+ "39": "building shed",
157
+ "40": "bungee jumping",
158
+ "41": "busking",
159
+ "42": "canoeing or kayaking",
160
+ "43": "capoeira",
161
+ "44": "carrying baby",
162
+ "45": "cartwheeling",
163
+ "46": "carving pumpkin",
164
+ "47": "catching fish",
165
+ "48": "catching or throwing baseball",
166
+ "49": "catching or throwing frisbee",
167
+ "50": "catching or throwing softball",
168
+ "51": "celebrating",
169
+ "52": "changing oil",
170
+ "53": "changing wheel",
171
+ "54": "checking tires",
172
+ "55": "cheerleading",
173
+ "56": "chopping wood",
174
+ "57": "clapping",
175
+ "58": "clay pottery making",
176
+ "59": "clean and jerk",
177
+ "60": "cleaning floor",
178
+ "61": "cleaning gutters",
179
+ "62": "cleaning pool",
180
+ "63": "cleaning shoes",
181
+ "64": "cleaning toilet",
182
+ "65": "cleaning windows",
183
+ "66": "climbing a rope",
184
+ "67": "climbing ladder",
185
+ "68": "climbing tree",
186
+ "69": "contact juggling",
187
+ "70": "cooking chicken",
188
+ "71": "cooking egg",
189
+ "72": "cooking on campfire",
190
+ "73": "cooking sausages",
191
+ "74": "counting money",
192
+ "75": "country line dancing",
193
+ "76": "cracking neck",
194
+ "77": "crawling baby",
195
+ "78": "crossing river",
196
+ "79": "crying",
197
+ "80": "curling hair",
198
+ "81": "cutting nails",
199
+ "82": "cutting pineapple",
200
+ "83": "cutting watermelon",
201
+ "84": "dancing ballet",
202
+ "85": "dancing charleston",
203
+ "86": "dancing gangnam style",
204
+ "87": "dancing macarena",
205
+ "88": "deadlifting",
206
+ "89": "decorating the christmas tree",
207
+ "90": "digging",
208
+ "91": "dining",
209
+ "92": "disc golfing",
210
+ "93": "diving cliff",
211
+ "94": "dodgeball",
212
+ "95": "doing aerobics",
213
+ "96": "doing laundry",
214
+ "97": "doing nails",
215
+ "98": "drawing",
216
+ "99": "dribbling basketball",
217
+ "100": "drinking",
218
+ "101": "drinking beer",
219
+ "102": "drinking shots",
220
+ "103": "driving car",
221
+ "104": "driving tractor",
222
+ "105": "drop kicking",
223
+ "106": "drumming fingers",
224
+ "107": "dunking basketball",
225
+ "108": "dying hair",
226
+ "109": "eating burger",
227
+ "110": "eating cake",
228
+ "111": "eating carrots",
229
+ "112": "eating chips",
230
+ "113": "eating doughnuts",
231
+ "114": "eating hotdog",
232
+ "115": "eating ice cream",
233
+ "116": "eating spaghetti",
234
+ "117": "eating watermelon",
235
+ "118": "egg hunting",
236
+ "119": "exercising arm",
237
+ "120": "exercising with an exercise ball",
238
+ "121": "extinguishing fire",
239
+ "122": "faceplanting",
240
+ "123": "feeding birds",
241
+ "124": "feeding fish",
242
+ "125": "feeding goats",
243
+ "126": "filling eyebrows",
244
+ "127": "finger snapping",
245
+ "128": "fixing hair",
246
+ "129": "flipping pancake",
247
+ "130": "flying kite",
248
+ "131": "folding clothes",
249
+ "132": "folding napkins",
250
+ "133": "folding paper",
251
+ "134": "front raises",
252
+ "135": "frying vegetables",
253
+ "136": "garbage collecting",
254
+ "137": "gargling",
255
+ "138": "getting a haircut",
256
+ "139": "getting a tattoo",
257
+ "140": "giving or receiving award",
258
+ "141": "golf chipping",
259
+ "142": "golf driving",
260
+ "143": "golf putting",
261
+ "144": "grinding meat",
262
+ "145": "grooming dog",
263
+ "146": "grooming horse",
264
+ "147": "gymnastics tumbling",
265
+ "148": "hammer throw",
266
+ "149": "headbanging",
267
+ "150": "headbutting",
268
+ "151": "high jump",
269
+ "152": "high kick",
270
+ "153": "hitting baseball",
271
+ "154": "hockey stop",
272
+ "155": "holding snake",
273
+ "156": "hopscotch",
274
+ "157": "hoverboarding",
275
+ "158": "hugging",
276
+ "159": "hula hooping",
277
+ "160": "hurdling",
278
+ "161": "hurling (sport)",
279
+ "162": "ice climbing",
280
+ "163": "ice fishing",
281
+ "164": "ice skating",
282
+ "165": "ironing",
283
+ "166": "javelin throw",
284
+ "167": "jetskiing",
285
+ "168": "jogging",
286
+ "169": "juggling balls",
287
+ "170": "juggling fire",
288
+ "171": "juggling soccer ball",
289
+ "172": "jumping into pool",
290
+ "173": "jumpstyle dancing",
291
+ "174": "kicking field goal",
292
+ "175": "kicking soccer ball",
293
+ "176": "kissing",
294
+ "177": "kitesurfing",
295
+ "178": "knitting",
296
+ "179": "krumping",
297
+ "180": "laughing",
298
+ "181": "laying bricks",
299
+ "182": "long jump",
300
+ "183": "lunge",
301
+ "184": "making a cake",
302
+ "185": "making a sandwich",
303
+ "186": "making bed",
304
+ "187": "making jewelry",
305
+ "188": "making pizza",
306
+ "189": "making snowman",
307
+ "190": "making sushi",
308
+ "191": "making tea",
309
+ "192": "marching",
310
+ "193": "massaging back",
311
+ "194": "massaging feet",
312
+ "195": "massaging legs",
313
+ "196": "massaging person's head",
314
+ "197": "milking cow",
315
+ "198": "mopping floor",
316
+ "199": "motorcycling",
317
+ "200": "moving furniture",
318
+ "201": "mowing lawn",
319
+ "202": "news anchoring",
320
+ "203": "opening bottle",
321
+ "204": "opening present",
322
+ "205": "paragliding",
323
+ "206": "parasailing",
324
+ "207": "parkour",
325
+ "208": "passing American football (in game)",
326
+ "209": "passing American football (not in game)",
327
+ "210": "peeling apples",
328
+ "211": "peeling potatoes",
329
+ "212": "petting animal (not cat)",
330
+ "213": "petting cat",
331
+ "214": "picking fruit",
332
+ "215": "planting trees",
333
+ "216": "plastering",
334
+ "217": "playing accordion",
335
+ "218": "playing badminton",
336
+ "219": "playing bagpipes",
337
+ "220": "playing basketball",
338
+ "221": "playing bass guitar",
339
+ "222": "playing cards",
340
+ "223": "playing cello",
341
+ "224": "playing chess",
342
+ "225": "playing clarinet",
343
+ "226": "playing controller",
344
+ "227": "playing cricket",
345
+ "228": "playing cymbals",
346
+ "229": "playing didgeridoo",
347
+ "230": "playing drums",
348
+ "231": "playing flute",
349
+ "232": "playing guitar",
350
+ "233": "playing harmonica",
351
+ "234": "playing harp",
352
+ "235": "playing ice hockey",
353
+ "236": "playing keyboard",
354
+ "237": "playing kickball",
355
+ "238": "playing monopoly",
356
+ "239": "playing organ",
357
+ "240": "playing paintball",
358
+ "241": "playing piano",
359
+ "242": "playing poker",
360
+ "243": "playing recorder",
361
+ "244": "playing saxophone",
362
+ "245": "playing squash or racquetball",
363
+ "246": "playing tennis",
364
+ "247": "playing trombone",
365
+ "248": "playing trumpet",
366
+ "249": "playing ukulele",
367
+ "250": "playing violin",
368
+ "251": "playing volleyball",
369
+ "252": "playing xylophone",
370
+ "253": "pole vault",
371
+ "254": "presenting weather forecast",
372
+ "255": "pull ups",
373
+ "256": "pumping fist",
374
+ "257": "pumping gas",
375
+ "258": "punching bag",
376
+ "259": "punching person (boxing)",
377
+ "260": "push up",
378
+ "261": "pushing car",
379
+ "262": "pushing cart",
380
+ "263": "pushing wheelchair",
381
+ "264": "reading book",
382
+ "265": "reading newspaper",
383
+ "266": "recording music",
384
+ "267": "riding a bike",
385
+ "268": "riding camel",
386
+ "269": "riding elephant",
387
+ "270": "riding mechanical bull",
388
+ "271": "riding mountain bike",
389
+ "272": "riding mule",
390
+ "273": "riding or walking with horse",
391
+ "274": "riding scooter",
392
+ "275": "riding unicycle",
393
+ "276": "ripping paper",
394
+ "277": "robot dancing",
395
+ "278": "rock climbing",
396
+ "279": "rock scissors paper",
397
+ "280": "roller skating",
398
+ "281": "running on treadmill",
399
+ "282": "sailing",
400
+ "283": "salsa dancing",
401
+ "284": "sanding floor",
402
+ "285": "scrambling eggs",
403
+ "286": "scuba diving",
404
+ "287": "setting table",
405
+ "288": "shaking hands",
406
+ "289": "shaking head",
407
+ "290": "sharpening knives",
408
+ "291": "sharpening pencil",
409
+ "292": "shaving head",
410
+ "293": "shaving legs",
411
+ "294": "shearing sheep",
412
+ "295": "shining shoes",
413
+ "296": "shooting basketball",
414
+ "297": "shooting goal (soccer)",
415
+ "298": "shot put",
416
+ "299": "shoveling snow",
417
+ "300": "shredding paper",
418
+ "301": "shuffling cards",
419
+ "302": "side kick",
420
+ "303": "sign language interpreting",
421
+ "304": "singing",
422
+ "305": "situp",
423
+ "306": "skateboarding",
424
+ "307": "ski jumping",
425
+ "308": "skiing (not slalom or crosscountry)",
426
+ "309": "skiing crosscountry",
427
+ "310": "skiing slalom",
428
+ "311": "skipping rope",
429
+ "312": "skydiving",
430
+ "313": "slacklining",
431
+ "314": "slapping",
432
+ "315": "sled dog racing",
433
+ "316": "smoking",
434
+ "317": "smoking hookah",
435
+ "318": "snatch weight lifting",
436
+ "319": "sneezing",
437
+ "320": "sniffing",
438
+ "321": "snorkeling",
439
+ "322": "snowboarding",
440
+ "323": "snowkiting",
441
+ "324": "snowmobiling",
442
+ "325": "somersaulting",
443
+ "326": "spinning poi",
444
+ "327": "spray painting",
445
+ "328": "spraying",
446
+ "329": "springboard diving",
447
+ "330": "squat",
448
+ "331": "sticking tongue out",
449
+ "332": "stomping grapes",
450
+ "333": "stretching arm",
451
+ "334": "stretching leg",
452
+ "335": "strumming guitar",
453
+ "336": "surfing crowd",
454
+ "337": "surfing water",
455
+ "338": "sweeping floor",
456
+ "339": "swimming backstroke",
457
+ "340": "swimming breast stroke",
458
+ "341": "swimming butterfly stroke",
459
+ "342": "swing dancing",
460
+ "343": "swinging legs",
461
+ "344": "swinging on something",
462
+ "345": "sword fighting",
463
+ "346": "tai chi",
464
+ "347": "taking a shower",
465
+ "348": "tango dancing",
466
+ "349": "tap dancing",
467
+ "350": "tapping guitar",
468
+ "351": "tapping pen",
469
+ "352": "tasting beer",
470
+ "353": "tasting food",
471
+ "354": "testifying",
472
+ "355": "texting",
473
+ "356": "throwing axe",
474
+ "357": "throwing ball",
475
+ "358": "throwing discus",
476
+ "359": "tickling",
477
+ "360": "tobogganing",
478
+ "361": "tossing coin",
479
+ "362": "tossing salad",
480
+ "363": "training dog",
481
+ "364": "trapezing",
482
+ "365": "trimming or shaving beard",
483
+ "366": "trimming trees",
484
+ "367": "triple jump",
485
+ "368": "tying bow tie",
486
+ "369": "tying knot (not on a tie)",
487
+ "370": "tying tie",
488
+ "371": "unboxing",
489
+ "372": "unloading truck",
490
+ "373": "using computer",
491
+ "374": "using remote controller (not gaming)",
492
+ "375": "using segway",
493
+ "376": "vault",
494
+ "377": "waiting in line",
495
+ "378": "walking the dog",
496
+ "379": "washing dishes",
497
+ "380": "washing feet",
498
+ "381": "washing hair",
499
+ "382": "washing hands",
500
+ "383": "water skiing",
501
+ "384": "water sliding",
502
+ "385": "watering plants",
503
+ "386": "waxing back",
504
+ "387": "waxing chest",
505
+ "388": "waxing eyebrows",
506
+ "389": "waxing legs",
507
+ "390": "weaving basket",
508
+ "391": "welding",
509
+ "392": "whistling",
510
+ "393": "windsurfing",
511
+ "394": "wrapping present",
512
+ "395": "wrestling",
513
+ "396": "writing",
514
+ "397": "yawning",
515
+ "398": "yoga",
516
+ "399": "zumba"
517
+ },
518
+ "image_size": 224,
519
+ "initializer_range": 0.02,
520
+ "intermediate_size": 3072,
521
+ "is_decoder": false,
522
+ "is_encoder_decoder": false,
523
+ "label2id": {
524
+ "abseiling": 0,
525
+ "air drumming": 1,
526
+ "answering questions": 2,
527
+ "applauding": 3,
528
+ "applying cream": 4,
529
+ "archery": 5,
530
+ "arm wrestling": 6,
531
+ "arranging flowers": 7,
532
+ "assembling computer": 8,
533
+ "auctioning": 9,
534
+ "baby waking up": 10,
535
+ "baking cookies": 11,
536
+ "balloon blowing": 12,
537
+ "bandaging": 13,
538
+ "barbequing": 14,
539
+ "bartending": 15,
540
+ "beatboxing": 16,
541
+ "bee keeping": 17,
542
+ "belly dancing": 18,
543
+ "bench pressing": 19,
544
+ "bending back": 20,
545
+ "bending metal": 21,
546
+ "biking through snow": 22,
547
+ "blasting sand": 23,
548
+ "blowing glass": 24,
549
+ "blowing leaves": 25,
550
+ "blowing nose": 26,
551
+ "blowing out candles": 27,
552
+ "bobsledding": 28,
553
+ "bookbinding": 29,
554
+ "bouncing on trampoline": 30,
555
+ "bowling": 31,
556
+ "braiding hair": 32,
557
+ "breading or breadcrumbing": 33,
558
+ "breakdancing": 34,
559
+ "brush painting": 35,
560
+ "brushing hair": 36,
561
+ "brushing teeth": 37,
562
+ "building cabinet": 38,
563
+ "building shed": 39,
564
+ "bungee jumping": 40,
565
+ "busking": 41,
566
+ "canoeing or kayaking": 42,
567
+ "capoeira": 43,
568
+ "carrying baby": 44,
569
+ "cartwheeling": 45,
570
+ "carving pumpkin": 46,
571
+ "catching fish": 47,
572
+ "catching or throwing baseball": 48,
573
+ "catching or throwing frisbee": 49,
574
+ "catching or throwing softball": 50,
575
+ "celebrating": 51,
576
+ "changing oil": 52,
577
+ "changing wheel": 53,
578
+ "checking tires": 54,
579
+ "cheerleading": 55,
580
+ "chopping wood": 56,
581
+ "clapping": 57,
582
+ "clay pottery making": 58,
583
+ "clean and jerk": 59,
584
+ "cleaning floor": 60,
585
+ "cleaning gutters": 61,
586
+ "cleaning pool": 62,
587
+ "cleaning shoes": 63,
588
+ "cleaning toilet": 64,
589
+ "cleaning windows": 65,
590
+ "climbing a rope": 66,
591
+ "climbing ladder": 67,
592
+ "climbing tree": 68,
593
+ "contact juggling": 69,
594
+ "cooking chicken": 70,
595
+ "cooking egg": 71,
596
+ "cooking on campfire": 72,
597
+ "cooking sausages": 73,
598
+ "counting money": 74,
599
+ "country line dancing": 75,
600
+ "cracking neck": 76,
601
+ "crawling baby": 77,
602
+ "crossing river": 78,
603
+ "crying": 79,
604
+ "curling hair": 80,
605
+ "cutting nails": 81,
606
+ "cutting pineapple": 82,
607
+ "cutting watermelon": 83,
608
+ "dancing ballet": 84,
609
+ "dancing charleston": 85,
610
+ "dancing gangnam style": 86,
611
+ "dancing macarena": 87,
612
+ "deadlifting": 88,
613
+ "decorating the christmas tree": 89,
614
+ "digging": 90,
615
+ "dining": 91,
616
+ "disc golfing": 92,
617
+ "diving cliff": 93,
618
+ "dodgeball": 94,
619
+ "doing aerobics": 95,
620
+ "doing laundry": 96,
621
+ "doing nails": 97,
622
+ "drawing": 98,
623
+ "dribbling basketball": 99,
624
+ "drinking": 100,
625
+ "drinking beer": 101,
626
+ "drinking shots": 102,
627
+ "driving car": 103,
628
+ "driving tractor": 104,
629
+ "drop kicking": 105,
630
+ "drumming fingers": 106,
631
+ "dunking basketball": 107,
632
+ "dying hair": 108,
633
+ "eating burger": 109,
634
+ "eating cake": 110,
635
+ "eating carrots": 111,
636
+ "eating chips": 112,
637
+ "eating doughnuts": 113,
638
+ "eating hotdog": 114,
639
+ "eating ice cream": 115,
640
+ "eating spaghetti": 116,
641
+ "eating watermelon": 117,
642
+ "egg hunting": 118,
643
+ "exercising arm": 119,
644
+ "exercising with an exercise ball": 120,
645
+ "extinguishing fire": 121,
646
+ "faceplanting": 122,
647
+ "feeding birds": 123,
648
+ "feeding fish": 124,
649
+ "feeding goats": 125,
650
+ "filling eyebrows": 126,
651
+ "finger snapping": 127,
652
+ "fixing hair": 128,
653
+ "flipping pancake": 129,
654
+ "flying kite": 130,
655
+ "folding clothes": 131,
656
+ "folding napkins": 132,
657
+ "folding paper": 133,
658
+ "front raises": 134,
659
+ "frying vegetables": 135,
660
+ "garbage collecting": 136,
661
+ "gargling": 137,
662
+ "getting a haircut": 138,
663
+ "getting a tattoo": 139,
664
+ "giving or receiving award": 140,
665
+ "golf chipping": 141,
666
+ "golf driving": 142,
667
+ "golf putting": 143,
668
+ "grinding meat": 144,
669
+ "grooming dog": 145,
670
+ "grooming horse": 146,
671
+ "gymnastics tumbling": 147,
672
+ "hammer throw": 148,
673
+ "headbanging": 149,
674
+ "headbutting": 150,
675
+ "high jump": 151,
676
+ "high kick": 152,
677
+ "hitting baseball": 153,
678
+ "hockey stop": 154,
679
+ "holding snake": 155,
680
+ "hopscotch": 156,
681
+ "hoverboarding": 157,
682
+ "hugging": 158,
683
+ "hula hooping": 159,
684
+ "hurdling": 160,
685
+ "hurling (sport)": 161,
686
+ "ice climbing": 162,
687
+ "ice fishing": 163,
688
+ "ice skating": 164,
689
+ "ironing": 165,
690
+ "javelin throw": 166,
691
+ "jetskiing": 167,
692
+ "jogging": 168,
693
+ "juggling balls": 169,
694
+ "juggling fire": 170,
695
+ "juggling soccer ball": 171,
696
+ "jumping into pool": 172,
697
+ "jumpstyle dancing": 173,
698
+ "kicking field goal": 174,
699
+ "kicking soccer ball": 175,
700
+ "kissing": 176,
701
+ "kitesurfing": 177,
702
+ "knitting": 178,
703
+ "krumping": 179,
704
+ "laughing": 180,
705
+ "laying bricks": 181,
706
+ "long jump": 182,
707
+ "lunge": 183,
708
+ "making a cake": 184,
709
+ "making a sandwich": 185,
710
+ "making bed": 186,
711
+ "making jewelry": 187,
712
+ "making pizza": 188,
713
+ "making snowman": 189,
714
+ "making sushi": 190,
715
+ "making tea": 191,
716
+ "marching": 192,
717
+ "massaging back": 193,
718
+ "massaging feet": 194,
719
+ "massaging legs": 195,
720
+ "massaging person's head": 196,
721
+ "milking cow": 197,
722
+ "mopping floor": 198,
723
+ "motorcycling": 199,
724
+ "moving furniture": 200,
725
+ "mowing lawn": 201,
726
+ "news anchoring": 202,
727
+ "opening bottle": 203,
728
+ "opening present": 204,
729
+ "paragliding": 205,
730
+ "parasailing": 206,
731
+ "parkour": 207,
732
+ "passing American football (in game)": 208,
733
+ "passing American football (not in game)": 209,
734
+ "peeling apples": 210,
735
+ "peeling potatoes": 211,
736
+ "petting animal (not cat)": 212,
737
+ "petting cat": 213,
738
+ "picking fruit": 214,
739
+ "planting trees": 215,
740
+ "plastering": 216,
741
+ "playing accordion": 217,
742
+ "playing badminton": 218,
743
+ "playing bagpipes": 219,
744
+ "playing basketball": 220,
745
+ "playing bass guitar": 221,
746
+ "playing cards": 222,
747
+ "playing cello": 223,
748
+ "playing chess": 224,
749
+ "playing clarinet": 225,
750
+ "playing controller": 226,
751
+ "playing cricket": 227,
752
+ "playing cymbals": 228,
753
+ "playing didgeridoo": 229,
754
+ "playing drums": 230,
755
+ "playing flute": 231,
756
+ "playing guitar": 232,
757
+ "playing harmonica": 233,
758
+ "playing harp": 234,
759
+ "playing ice hockey": 235,
760
+ "playing keyboard": 236,
761
+ "playing kickball": 237,
762
+ "playing monopoly": 238,
763
+ "playing organ": 239,
764
+ "playing paintball": 240,
765
+ "playing piano": 241,
766
+ "playing poker": 242,
767
+ "playing recorder": 243,
768
+ "playing saxophone": 244,
769
+ "playing squash or racquetball": 245,
770
+ "playing tennis": 246,
771
+ "playing trombone": 247,
772
+ "playing trumpet": 248,
773
+ "playing ukulele": 249,
774
+ "playing violin": 250,
775
+ "playing volleyball": 251,
776
+ "playing xylophone": 252,
777
+ "pole vault": 253,
778
+ "presenting weather forecast": 254,
779
+ "pull ups": 255,
780
+ "pumping fist": 256,
781
+ "pumping gas": 257,
782
+ "punching bag": 258,
783
+ "punching person (boxing)": 259,
784
+ "push up": 260,
785
+ "pushing car": 261,
786
+ "pushing cart": 262,
787
+ "pushing wheelchair": 263,
788
+ "reading book": 264,
789
+ "reading newspaper": 265,
790
+ "recording music": 266,
791
+ "riding a bike": 267,
792
+ "riding camel": 268,
793
+ "riding elephant": 269,
794
+ "riding mechanical bull": 270,
795
+ "riding mountain bike": 271,
796
+ "riding mule": 272,
797
+ "riding or walking with horse": 273,
798
+ "riding scooter": 274,
799
+ "riding unicycle": 275,
800
+ "ripping paper": 276,
801
+ "robot dancing": 277,
802
+ "rock climbing": 278,
803
+ "rock scissors paper": 279,
804
+ "roller skating": 280,
805
+ "running on treadmill": 281,
806
+ "sailing": 282,
807
+ "salsa dancing": 283,
808
+ "sanding floor": 284,
809
+ "scrambling eggs": 285,
810
+ "scuba diving": 286,
811
+ "setting table": 287,
812
+ "shaking hands": 288,
813
+ "shaking head": 289,
814
+ "sharpening knives": 290,
815
+ "sharpening pencil": 291,
816
+ "shaving head": 292,
817
+ "shaving legs": 293,
818
+ "shearing sheep": 294,
819
+ "shining shoes": 295,
820
+ "shooting basketball": 296,
821
+ "shooting goal (soccer)": 297,
822
+ "shot put": 298,
823
+ "shoveling snow": 299,
824
+ "shredding paper": 300,
825
+ "shuffling cards": 301,
826
+ "side kick": 302,
827
+ "sign language interpreting": 303,
828
+ "singing": 304,
829
+ "situp": 305,
830
+ "skateboarding": 306,
831
+ "ski jumping": 307,
832
+ "skiing (not slalom or crosscountry)": 308,
833
+ "skiing crosscountry": 309,
834
+ "skiing slalom": 310,
835
+ "skipping rope": 311,
836
+ "skydiving": 312,
837
+ "slacklining": 313,
838
+ "slapping": 314,
839
+ "sled dog racing": 315,
840
+ "smoking": 316,
841
+ "smoking hookah": 317,
842
+ "snatch weight lifting": 318,
843
+ "sneezing": 319,
844
+ "sniffing": 320,
845
+ "snorkeling": 321,
846
+ "snowboarding": 322,
847
+ "snowkiting": 323,
848
+ "snowmobiling": 324,
849
+ "somersaulting": 325,
850
+ "spinning poi": 326,
851
+ "spray painting": 327,
852
+ "spraying": 328,
853
+ "springboard diving": 329,
854
+ "squat": 330,
855
+ "sticking tongue out": 331,
856
+ "stomping grapes": 332,
857
+ "stretching arm": 333,
858
+ "stretching leg": 334,
859
+ "strumming guitar": 335,
860
+ "surfing crowd": 336,
861
+ "surfing water": 337,
862
+ "sweeping floor": 338,
863
+ "swimming backstroke": 339,
864
+ "swimming breast stroke": 340,
865
+ "swimming butterfly stroke": 341,
866
+ "swing dancing": 342,
867
+ "swinging legs": 343,
868
+ "swinging on something": 344,
869
+ "sword fighting": 345,
870
+ "tai chi": 346,
871
+ "taking a shower": 347,
872
+ "tango dancing": 348,
873
+ "tap dancing": 349,
874
+ "tapping guitar": 350,
875
+ "tapping pen": 351,
876
+ "tasting beer": 352,
877
+ "tasting food": 353,
878
+ "testifying": 354,
879
+ "texting": 355,
880
+ "throwing axe": 356,
881
+ "throwing ball": 357,
882
+ "throwing discus": 358,
883
+ "tickling": 359,
884
+ "tobogganing": 360,
885
+ "tossing coin": 361,
886
+ "tossing salad": 362,
887
+ "training dog": 363,
888
+ "trapezing": 364,
889
+ "trimming or shaving beard": 365,
890
+ "trimming trees": 366,
891
+ "triple jump": 367,
892
+ "tying bow tie": 368,
893
+ "tying knot (not on a tie)": 369,
894
+ "tying tie": 370,
895
+ "unboxing": 371,
896
+ "unloading truck": 372,
897
+ "using computer": 373,
898
+ "using remote controller (not gaming)": 374,
899
+ "using segway": 375,
900
+ "vault": 376,
901
+ "waiting in line": 377,
902
+ "walking the dog": 378,
903
+ "washing dishes": 379,
904
+ "washing feet": 380,
905
+ "washing hair": 381,
906
+ "washing hands": 382,
907
+ "water skiing": 383,
908
+ "water sliding": 384,
909
+ "watering plants": 385,
910
+ "waxing back": 386,
911
+ "waxing chest": 387,
912
+ "waxing eyebrows": 388,
913
+ "waxing legs": 389,
914
+ "weaving basket": 390,
915
+ "welding": 391,
916
+ "whistling": 392,
917
+ "windsurfing": 393,
918
+ "wrapping present": 394,
919
+ "wrestling": 395,
920
+ "writing": 396,
921
+ "yawning": 397,
922
+ "yoga": 398,
923
+ "zumba": 399
924
+ },
925
+ "layer_norm_eps": 1e-06,
926
+ "length_penalty": 1.0,
927
+ "max_length": 20,
928
+ "min_length": 0,
929
+ "model_type": "timesformer",
930
+ "no_repeat_ngram_size": 0,
931
+ "num_attention_heads": 12,
932
+ "num_beam_groups": 1,
933
+ "num_beams": 1,
934
+ "num_channels": 3,
935
+ "num_frames": 8,
936
+ "num_hidden_layers": 12,
937
+ "num_return_sequences": 1,
938
+ "output_attentions": false,
939
+ "output_hidden_states": false,
940
+ "output_scores": false,
941
+ "pad_token_id": null,
942
+ "patch_size": 16,
943
+ "prefix": null,
944
+ "problem_type": null,
945
+ "pruned_heads": {},
946
+ "qkv_bias": true,
947
+ "remove_invalid_values": false,
948
+ "repetition_penalty": 1.0,
949
+ "return_dict": true,
950
+ "return_dict_in_generate": false,
951
+ "sep_token_id": null,
952
+ "suppress_tokens": null,
953
+ "task_specific_params": null,
954
+ "temperature": 1.0,
955
+ "tf_legacy_loss": false,
956
+ "tie_encoder_decoder": false,
957
+ "tie_word_embeddings": true,
958
+ "tokenizer_class": null,
959
+ "top_k": 50,
960
+ "top_p": 1.0,
961
+ "torch_dtype": "float32",
962
+ "torchscript": false,
963
+ "transformers_version": "4.30.2",
964
+ "typical_p": 1.0,
965
+ "use_bfloat16": false
966
+ },
967
+ "is_encoder_decoder": true,
968
+ "model_type": "vision-encoder-decoder",
969
+ "pad_token_id": 0,
970
+ "tie_word_embeddings": false,
971
+ "torch_dtype": "float32",
972
+ "transformers_version": null
973
+ }
last-checkpoint/generation_config.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "decoder_start_token_id": 101,
3
+ "pad_token_id": 0,
4
+ "transformers_version": "4.30.2"
5
+ }
last-checkpoint/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad5f7ad6486722a0456ed45c07327b3a819e36f2fc7b1a300f1c65b8a95ebafc
3
+ size 2073453967
last-checkpoint/preprocessor_config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "crop_size": {
3
+ "height": 224,
4
+ "width": 224
5
+ },
6
+ "do_center_crop": true,
7
+ "do_normalize": true,
8
+ "do_rescale": true,
9
+ "do_resize": true,
10
+ "image_mean": [
11
+ 0.45,
12
+ 0.45,
13
+ 0.45
14
+ ],
15
+ "image_processor_type": "VideoMAEImageProcessor",
16
+ "image_std": [
17
+ 0.225,
18
+ 0.225,
19
+ 0.225
20
+ ],
21
+ "resample": 2,
22
+ "rescale_factor": 0.00392156862745098,
23
+ "size": {
24
+ "shortest_edge": 224
25
+ }
26
+ }
last-checkpoint/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:005e3cf3759a7ef5e3f3d65a34dc380364e9a7804d1f85f4f0e269426de63476
3
+ size 1036763317
last-checkpoint/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d413219336d02ecc51d7da2ef35aad572fd5519a308eb9fca273914667554c4d
3
+ size 14575
last-checkpoint/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f32af2d2da8be9a9aafcf0242f6a0ee5b6af62bad6151977f24ed5afd72eaec
3
+ size 627
last-checkpoint/trainer_state.json ADDED
@@ -0,0 +1,216 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.2277470841006752,
5
+ "global_step": 2000,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.12,
12
+ "learning_rate": 4.6930632289748316e-05,
13
+ "loss": 2.4961,
14
+ "step": 200
15
+ },
16
+ {
17
+ "epoch": 0.12,
18
+ "eval_bleu": 1.5879,
19
+ "eval_gen_len": 9.533199195171026,
20
+ "eval_loss": 1.6547783613204956,
21
+ "eval_rouge1": 25.4717,
22
+ "eval_rouge2": 5.11,
23
+ "eval_rougeL": 24.6679,
24
+ "eval_rougeLsum": 24.6696,
25
+ "eval_runtime": 425.1194,
26
+ "eval_samples_per_second": 1.169,
27
+ "eval_steps_per_second": 0.294,
28
+ "step": 200
29
+ },
30
+ {
31
+ "epoch": 0.25,
32
+ "learning_rate": 4.386126457949662e-05,
33
+ "loss": 1.6561,
34
+ "step": 400
35
+ },
36
+ {
37
+ "epoch": 0.25,
38
+ "eval_bleu": 2.3515,
39
+ "eval_gen_len": 9.533199195171026,
40
+ "eval_loss": 1.5339239835739136,
41
+ "eval_rouge1": 26.1748,
42
+ "eval_rouge2": 5.9106,
43
+ "eval_rougeL": 25.413,
44
+ "eval_rougeLsum": 25.3958,
45
+ "eval_runtime": 422.3316,
46
+ "eval_samples_per_second": 1.177,
47
+ "eval_steps_per_second": 0.296,
48
+ "step": 400
49
+ },
50
+ {
51
+ "epoch": 0.37,
52
+ "learning_rate": 4.079189686924494e-05,
53
+ "loss": 1.5772,
54
+ "step": 600
55
+ },
56
+ {
57
+ "epoch": 0.37,
58
+ "eval_bleu": 2.266,
59
+ "eval_gen_len": 9.533199195171026,
60
+ "eval_loss": 1.4510468244552612,
61
+ "eval_rouge1": 28.6891,
62
+ "eval_rouge2": 6.0431,
63
+ "eval_rougeL": 27.7387,
64
+ "eval_rougeLsum": 27.8043,
65
+ "eval_runtime": 433.608,
66
+ "eval_samples_per_second": 1.146,
67
+ "eval_steps_per_second": 0.288,
68
+ "step": 600
69
+ },
70
+ {
71
+ "epoch": 0.49,
72
+ "learning_rate": 3.772252915899325e-05,
73
+ "loss": 1.492,
74
+ "step": 800
75
+ },
76
+ {
77
+ "epoch": 0.49,
78
+ "eval_bleu": 3.6517,
79
+ "eval_gen_len": 9.533199195171026,
80
+ "eval_loss": 1.3759902715682983,
81
+ "eval_rouge1": 29.0257,
82
+ "eval_rouge2": 7.8515,
83
+ "eval_rougeL": 28.3142,
84
+ "eval_rougeLsum": 28.3036,
85
+ "eval_runtime": 430.0719,
86
+ "eval_samples_per_second": 1.156,
87
+ "eval_steps_per_second": 0.291,
88
+ "step": 800
89
+ },
90
+ {
91
+ "epoch": 0.61,
92
+ "learning_rate": 3.4653161448741564e-05,
93
+ "loss": 1.4736,
94
+ "step": 1000
95
+ },
96
+ {
97
+ "epoch": 0.61,
98
+ "eval_bleu": 3.4866,
99
+ "eval_gen_len": 9.533199195171026,
100
+ "eval_loss": 1.3425214290618896,
101
+ "eval_rouge1": 27.9774,
102
+ "eval_rouge2": 6.2175,
103
+ "eval_rougeL": 26.7783,
104
+ "eval_rougeLsum": 26.7207,
105
+ "eval_runtime": 428.7998,
106
+ "eval_samples_per_second": 1.159,
107
+ "eval_steps_per_second": 0.292,
108
+ "step": 1000
109
+ },
110
+ {
111
+ "epoch": 0.74,
112
+ "learning_rate": 3.158379373848988e-05,
113
+ "loss": 1.3856,
114
+ "step": 1200
115
+ },
116
+ {
117
+ "epoch": 0.74,
118
+ "eval_bleu": 3.1649,
119
+ "eval_gen_len": 9.533199195171026,
120
+ "eval_loss": 1.311830997467041,
121
+ "eval_rouge1": 27.3532,
122
+ "eval_rouge2": 6.5569,
123
+ "eval_rougeL": 26.4964,
124
+ "eval_rougeLsum": 26.5087,
125
+ "eval_runtime": 430.7347,
126
+ "eval_samples_per_second": 1.154,
127
+ "eval_steps_per_second": 0.29,
128
+ "step": 1200
129
+ },
130
+ {
131
+ "epoch": 0.86,
132
+ "learning_rate": 2.8514426028238185e-05,
133
+ "loss": 1.3972,
134
+ "step": 1400
135
+ },
136
+ {
137
+ "epoch": 0.86,
138
+ "eval_bleu": 3.5337,
139
+ "eval_gen_len": 9.533199195171026,
140
+ "eval_loss": 1.2867928743362427,
141
+ "eval_rouge1": 28.233,
142
+ "eval_rouge2": 7.6471,
143
+ "eval_rougeL": 27.3651,
144
+ "eval_rougeLsum": 27.3354,
145
+ "eval_runtime": 428.0559,
146
+ "eval_samples_per_second": 1.161,
147
+ "eval_steps_per_second": 0.292,
148
+ "step": 1400
149
+ },
150
+ {
151
+ "epoch": 0.98,
152
+ "learning_rate": 2.54450583179865e-05,
153
+ "loss": 1.374,
154
+ "step": 1600
155
+ },
156
+ {
157
+ "epoch": 0.98,
158
+ "eval_bleu": 3.5737,
159
+ "eval_gen_len": 9.533199195171026,
160
+ "eval_loss": 1.2571070194244385,
161
+ "eval_rouge1": 28.8216,
162
+ "eval_rouge2": 7.542,
163
+ "eval_rougeL": 27.9166,
164
+ "eval_rougeLsum": 27.9353,
165
+ "eval_runtime": 432.0511,
166
+ "eval_samples_per_second": 1.15,
167
+ "eval_steps_per_second": 0.289,
168
+ "step": 1600
169
+ },
170
+ {
171
+ "epoch": 1.1,
172
+ "learning_rate": 2.237569060773481e-05,
173
+ "loss": 1.2207,
174
+ "step": 1800
175
+ },
176
+ {
177
+ "epoch": 1.1,
178
+ "eval_bleu": 3.7983,
179
+ "eval_gen_len": 9.533199195171026,
180
+ "eval_loss": 1.3362118005752563,
181
+ "eval_rouge1": 29.9574,
182
+ "eval_rouge2": 8.1088,
183
+ "eval_rougeL": 28.8866,
184
+ "eval_rougeLsum": 28.855,
185
+ "eval_runtime": 447.7731,
186
+ "eval_samples_per_second": 1.11,
187
+ "eval_steps_per_second": 0.141,
188
+ "step": 1800
189
+ },
190
+ {
191
+ "epoch": 1.23,
192
+ "learning_rate": 1.930632289748312e-05,
193
+ "loss": 1.1861,
194
+ "step": 2000
195
+ },
196
+ {
197
+ "epoch": 1.23,
198
+ "eval_bleu": 3.6521,
199
+ "eval_gen_len": 9.533199195171026,
200
+ "eval_loss": 1.3295202255249023,
201
+ "eval_rouge1": 30.072,
202
+ "eval_rouge2": 7.7799,
203
+ "eval_rougeL": 28.8417,
204
+ "eval_rougeLsum": 28.864,
205
+ "eval_runtime": 427.6198,
206
+ "eval_samples_per_second": 1.162,
207
+ "eval_steps_per_second": 0.147,
208
+ "step": 2000
209
+ }
210
+ ],
211
+ "max_steps": 3258,
212
+ "num_train_epochs": 2,
213
+ "total_flos": 1.3630135827861504e+19,
214
+ "trial_name": null,
215
+ "trial_params": null
216
+ }
last-checkpoint/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd11c6902a2eff6937b7a85a3ed0149c018cb79c811c2997f6d1ef1fd9cc1135
3
+ size 4155
preprocessor_config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "crop_size": {
3
+ "height": 224,
4
+ "width": 224
5
+ },
6
+ "do_center_crop": true,
7
+ "do_normalize": true,
8
+ "do_rescale": true,
9
+ "do_resize": true,
10
+ "image_mean": [
11
+ 0.45,
12
+ 0.45,
13
+ 0.45
14
+ ],
15
+ "image_processor_type": "VideoMAEImageProcessor",
16
+ "image_std": [
17
+ 0.225,
18
+ 0.225,
19
+ 0.225
20
+ ],
21
+ "resample": 2,
22
+ "rescale_factor": 0.00392156862745098,
23
+ "size": {
24
+ "shortest_edge": 224
25
+ }
26
+ }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:550bbc864591948087fad67256537d7ba1c3f58ebdab57f05ee81cfc56fad4ae
3
- size 1036755317
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:005e3cf3759a7ef5e3f3d65a34dc380364e9a7804d1f85f4f0e269426de63476
3
+ size 1036763317
runs/Jul13_06-44-19_0388ea093402/events.out.tfevents.1689231025.0388ea093402.188.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:773be6752f326a2d802680446385f13b09e0d40ce2820440fdd14fd57856a5fe
3
+ size 32777
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd11c6902a2eff6937b7a85a3ed0149c018cb79c811c2997f6d1ef1fd9cc1135
3
+ size 4155