Model save
Browse files
.ipynb_checkpoints/run_speech_recognition_ctc-checkpoint.py
CHANGED
@@ -434,14 +434,11 @@ def main():
|
|
434 |
# that make training complicated and do not help in transcribing the speech
|
435 |
# E.g. characters, such as `,` and `.` do not really have an acoustic characteristic
|
436 |
# that could be easily picked up by the model
|
437 |
-
chars_to_ignore_regex = '[
|
438 |
text_column_name = data_args.text_column_name
|
439 |
|
440 |
def remove_and_replace_special_characters(batch):
|
441 |
-
|
442 |
-
batch["target_text"] = re.sub(chars_to_ignore_regex, "", batch[text_column_name]).lower().replace('’', "'") + " "
|
443 |
-
else:
|
444 |
-
batch["target_text"] = batch[text_column_name].lower().replace('’', "'") + " "
|
445 |
return batch
|
446 |
|
447 |
with training_args.main_process_first(desc="dataset map special characters removal"):
|
|
|
434 |
# that make training complicated and do not help in transcribing the speech
|
435 |
# E.g. characters, such as `,` and `.` do not really have an acoustic characteristic
|
436 |
# that could be easily picked up by the model
|
437 |
+
chars_to_ignore_regex = '[^a-zàâäçéèêëîïôöùûüÿ\'’ ]'
|
438 |
text_column_name = data_args.text_column_name
|
439 |
|
440 |
def remove_and_replace_special_characters(batch):
|
441 |
+
batch["target_text"] = re.sub(chars_to_ignore_regex, "", batch[text_column_name].lower()).replace('’', "'") + " "
|
|
|
|
|
|
|
442 |
return batch
|
443 |
|
444 |
with training_args.main_process_first(desc="dataset map special characters removal"):
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1263088113
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3ec1c7675b56877de46f623ac51149e73d4f88cac691dc72595621d35344ce9b
|
3 |
size 1263088113
|
run_speech_recognition_ctc.py
CHANGED
@@ -434,14 +434,11 @@ def main():
|
|
434 |
# that make training complicated and do not help in transcribing the speech
|
435 |
# E.g. characters, such as `,` and `.` do not really have an acoustic characteristic
|
436 |
# that could be easily picked up by the model
|
437 |
-
chars_to_ignore_regex = '[
|
438 |
text_column_name = data_args.text_column_name
|
439 |
|
440 |
def remove_and_replace_special_characters(batch):
|
441 |
-
|
442 |
-
batch["target_text"] = re.sub(chars_to_ignore_regex, "", batch[text_column_name]).lower().replace('’', "'") + " "
|
443 |
-
else:
|
444 |
-
batch["target_text"] = batch[text_column_name].lower().replace('’', "'") + " "
|
445 |
return batch
|
446 |
|
447 |
with training_args.main_process_first(desc="dataset map special characters removal"):
|
|
|
434 |
# that make training complicated and do not help in transcribing the speech
|
435 |
# E.g. characters, such as `,` and `.` do not really have an acoustic characteristic
|
436 |
# that could be easily picked up by the model
|
437 |
+
chars_to_ignore_regex = '[^a-zàâäçéèêëîïôöùûüÿ\'’ ]'
|
438 |
text_column_name = data_args.text_column_name
|
439 |
|
440 |
def remove_and_replace_special_characters(batch):
|
441 |
+
batch["target_text"] = re.sub(chars_to_ignore_regex, "", batch[text_column_name].lower()).replace('’', "'") + " "
|
|
|
|
|
|
|
442 |
return batch
|
443 |
|
444 |
with training_args.main_process_first(desc="dataset map special characters removal"):
|