marinone94
commited on
Commit
•
fd7be5b
1
Parent(s):
2affbb8
get token from creds if not set in venv
Browse files- run_speech_recognition_seq2seq_streaming.py +10 -1
- test_run.sh +1 -1
run_speech_recognition_seq2seq_streaming.py
CHANGED
@@ -63,6 +63,14 @@ logger = logging.getLogger(__name__)
|
|
63 |
|
64 |
wandb_token = os.environ.get("WANDB_TOKEN", "None")
|
65 |
hf_token = os.environ.get("HF_TOKEN", None)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
66 |
if hf_token is not None:
|
67 |
os.makedirs("/root/.huggingface", exist_ok=True)
|
68 |
with open("/root/.huggingface/token", "w") as f:
|
@@ -348,7 +356,7 @@ def main():
|
|
348 |
model_args, data_args, training_args = parser.parse_args_into_dataclasses()
|
349 |
training_args.do_train = True
|
350 |
training_args.do_eval = True
|
351 |
-
|
352 |
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
|
353 |
# information sent is the one passed as arguments along with your Python/PyTorch versions.
|
354 |
send_example_telemetry("run_speech_recognition_seq2seq_streaming", model_args, data_args)
|
@@ -624,6 +632,7 @@ def main():
|
|
624 |
compute_metrics=compute_metrics if training_args.predict_with_generate else None,
|
625 |
callbacks=[ShuffleCallback()] if data_args.streaming else None,
|
626 |
)
|
|
|
627 |
|
628 |
# 12. Training
|
629 |
if training_args.do_train:
|
|
|
63 |
|
64 |
wandb_token = os.environ.get("WANDB_TOKEN", "None")
|
65 |
hf_token = os.environ.get("HF_TOKEN", None)
|
66 |
+
if hf_token is None and os.path.exists("./creds.txt"):
|
67 |
+
with open("./creds.txt", "r") as f:
|
68 |
+
lines = f.readlines()
|
69 |
+
for line in lines:
|
70 |
+
key, valye = line.split("=")
|
71 |
+
if key == "HF_TOKEN":
|
72 |
+
hf_token = value.strip()
|
73 |
+
|
74 |
if hf_token is not None:
|
75 |
os.makedirs("/root/.huggingface", exist_ok=True)
|
76 |
with open("/root/.huggingface/token", "w") as f:
|
|
|
356 |
model_args, data_args, training_args = parser.parse_args_into_dataclasses()
|
357 |
training_args.do_train = True
|
358 |
training_args.do_eval = True
|
359 |
+
|
360 |
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
|
361 |
# information sent is the one passed as arguments along with your Python/PyTorch versions.
|
362 |
send_example_telemetry("run_speech_recognition_seq2seq_streaming", model_args, data_args)
|
|
|
632 |
compute_metrics=compute_metrics if training_args.predict_with_generate else None,
|
633 |
callbacks=[ShuffleCallback()] if data_args.streaming else None,
|
634 |
)
|
635 |
+
logger.info("*** Trainer initialized ***")
|
636 |
|
637 |
# 12. Training
|
638 |
if training_args.do_train:
|
test_run.sh
CHANGED
@@ -3,7 +3,7 @@ python $1run_speech_recognition_seq2seq_streaming.py \
|
|
3 |
--dataset_name="mozilla-foundation/common_voice_11_0" \
|
4 |
--dataset_config_name="sv-SE" \
|
5 |
--language="swedish" \
|
6 |
-
--train_split_name="train" \
|
7 |
--eval_split_name="test" \
|
8 |
--model_index_name="Whisper Tiny Swedish" \
|
9 |
--max_train_samples="64" \
|
|
|
3 |
--dataset_name="mozilla-foundation/common_voice_11_0" \
|
4 |
--dataset_config_name="sv-SE" \
|
5 |
--language="swedish" \
|
6 |
+
--train_split_name="train+validation" \
|
7 |
--eval_split_name="test" \
|
8 |
--model_index_name="Whisper Tiny Swedish" \
|
9 |
--max_train_samples="64" \
|