Update README.md
Browse files
README.md
CHANGED
@@ -16,5 +16,41 @@ metrics:
|
|
16 |
|
17 |
# μ€λͺ
|
18 |
- μ£Όμ μμλ³ νμ μμ± λ°μ΄ν°μ
680GB μ€ μ²«λ²μ§Έ λ°μ΄ν°(10GB)λ₯Ό νμΈνλν λͺ¨λΈμ
λλ€.
|
19 |
-
- λ§ν¬ : https://huggingface.co/datasets/maxseats/aihub-464-preprocessed-680GB-set-0
|
20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
|
17 |
# μ€λͺ
|
18 |
- μ£Όμ μμλ³ νμ μμ± λ°μ΄ν°μ
680GB μ€ μ²«λ²μ§Έ λ°μ΄ν°(10GB)λ₯Ό νμΈνλν λͺ¨λΈμ
λλ€.
|
19 |
+
- λ°μ΄ν°μ
λ§ν¬ : https://huggingface.co/datasets/maxseats/aihub-464-preprocessed-680GB-set-0
|
20 |
|
21 |
+
# νλΌλ―Έν°
|
22 |
+
# model_name = "openai/whisper-base"
|
23 |
+
```
|
24 |
+
model_name = "SungBeom/whisper-small-ko" # λμ : "SungBeom/whisper-small-ko"
|
25 |
+
dataset_name = "maxseats/aihub-464-preprocessed-680GB-set-0" # λΆλ¬μ¬ λ°μ΄ν°μ
(νκΉ
νμ΄μ€ κΈ°μ€)
|
26 |
+
|
27 |
+
CACHE_DIR = '/mnt/a/maxseats/.finetuning_cache' # μΊμ λλ ν 리 μ§μ
|
28 |
+
is_test = False # True: μλμ μν λ°μ΄ν°λ‘ ν
μ€νΈ, False: μ€μ νμΈνλ
|
29 |
+
|
30 |
+
token = "hf_" # νκΉ
νμ΄μ€ ν ν° μ
λ ₯
|
31 |
+
|
32 |
+
training_args = Seq2SeqTrainingArguments(
|
33 |
+
output_dir=model_dir, # μνλ 리ν¬μ§ν 리 μ΄λ¦μ μ
λ ₯νλ€.
|
34 |
+
per_device_train_batch_size=16,
|
35 |
+
gradient_accumulation_steps=2, # λ°°μΉ ν¬κΈ°κ° 2λ°° κ°μν λλ§λ€ 2λ°°μ© μ¦κ°
|
36 |
+
learning_rate=1e-5,
|
37 |
+
warmup_steps=1000,
|
38 |
+
# max_steps=2, # epoch λμ μ€μ
|
39 |
+
num_train_epochs=1, # epoch μ μ€μ / max_stepsμ μ΄κ² μ€ νλλ§ μ€μ
|
40 |
+
gradient_checkpointing=True,
|
41 |
+
fp16=True,
|
42 |
+
evaluation_strategy="steps",
|
43 |
+
per_device_eval_batch_size=16,
|
44 |
+
predict_with_generate=True,
|
45 |
+
generation_max_length=225,
|
46 |
+
save_steps=1000,
|
47 |
+
eval_steps=1000,
|
48 |
+
logging_steps=25,
|
49 |
+
report_to=["tensorboard"],
|
50 |
+
load_best_model_at_end=True,
|
51 |
+
metric_for_best_model="cer", # νκ΅μ΄μ κ²½μ° 'wer'보λ€λ 'cer'μ΄ λ μ ν©ν κ²
|
52 |
+
greater_is_better=False,
|
53 |
+
push_to_hub=True,
|
54 |
+
save_total_limit=5, # μ΅λ μ μ₯ν λͺ¨λΈ μ μ§μ
|
55 |
+
)
|
56 |
+
```
|