anuragshas
/

wav2vec2-large-xlsr-53-telugu

Automatic Speech Recognition

xlsr-fine-tuning-week

Inference Endpoints

Model card Files Files and versions Community

anuragshas commited on Mar 24, 2021

Commit

032c0ae

•

1 Parent(s): 64ae049

Update README.md

Files changed (1) hide show

README.md +5 -5

README.md CHANGED Viewed

@@ -38,7 +38,7 @@ from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
 import pandas as pd
 # Evaluation notebook contains the procedure to download the data
-df = pd.read_csv("/content/te/test.tsv", sep="\\t")
 df["path"] = "/content/te/clips/" + df["path"]
 test_dataset = Dataset.from_pandas(df)
@@ -72,7 +72,7 @@ from sklearn.model_selection import train_test_split
 import pandas as pd
 # Evaluation notebook contains the procedure to download the data
-df = pd.read_csv("/content/te/test.tsv", sep="\\t")
 df["path"] = "/content/te/clips/" + df["path"]
 test_dataset = Dataset.from_pandas(df)
 wer = load_metric("wer")
@@ -81,13 +81,13 @@ processor = Wav2Vec2Processor.from_pretrained("anuragshas/wav2vec2-large-xlsr-53
 model = Wav2Vec2ForCTC.from_pretrained("anuragshas/wav2vec2-large-xlsr-53-telugu")
 model.to("cuda")
-chars_to_ignore_regex = '[\\,\\?\\.\\!\\-\\_\\;\\:\\"\\“\\%\\‘\\”\\।\\’\\'\\&]'
 resampler = torchaudio.transforms.Resample(48_000, 16_000)
 def normalizer(text):
     # Use your custom normalizer
-    text = text.replace("\\\
-","\
 ")
     text = ' '.join(text.split())
     text = re.sub(r'''([a-z]+)''','',text,flags=re.IGNORECASE)

 import pandas as pd
 # Evaluation notebook contains the procedure to download the data
+df = pd.read_csv("/content/te/test.tsv", sep="\\\\t")
 df["path"] = "/content/te/clips/" + df["path"]
 test_dataset = Dataset.from_pandas(df)
 import pandas as pd
 # Evaluation notebook contains the procedure to download the data
+df = pd.read_csv("/content/te/test.tsv", sep="\\\\t")
 df["path"] = "/content/te/clips/" + df["path"]
 test_dataset = Dataset.from_pandas(df)
 wer = load_metric("wer")
 model = Wav2Vec2ForCTC.from_pretrained("anuragshas/wav2vec2-large-xlsr-53-telugu")
 model.to("cuda")
+chars_to_ignore_regex = '[\\,\\?\\.\\!\\-\\_\\;\\:\\"\\“\\%\\‘\\”\\।\\’\'\\&]'
 resampler = torchaudio.transforms.Resample(48_000, 16_000)
 def normalizer(text):
     # Use your custom normalizer
+    text = text.replace("\\\\\\
+","\\
 ")
     text = ' '.join(text.split())
     text = re.sub(r'''([a-z]+)''','',text,flags=re.IGNORECASE)