SeyedAli commited on
Commit
12a7531
β€’
1 Parent(s): 068cd7d

Rename app.txt to app.py

Browse files
Files changed (2) hide show
  1. app.py +19 -0
  2. app.txt +0 -22
app.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tempfile ,os
2
+ import gradio as gr
3
+ from transformers import VitsModel, AutoTokenizer,pipeline
4
+ import torch
5
+ import numpy as np
6
+ import torchaudio
7
+
8
+ # Load model directly
9
+ from transformers import AutoProcessor, AutoModelForCTC
10
+
11
+ processor = AutoProcessor.from_pretrained("SeyedAli/Persian-Speech-Transcription-Wav2Vec2-V1")
12
+ model = AutoModelForCTC.from_pretrained("SeyedAli/Persian-Speech-Transcription-Wav2Vec2-V1")
13
+
14
+ def ASR(audio):
15
+ pipe = pipeline("automatic-speech-recognition", model="SeyedAli/Persian-Speech-Transcription-Wav2Vec2-V1")
16
+ text=pipe(torchaudio.load(audio))
17
+ return text
18
+ iface = gr.Interface(fn=TTS, inputs="audio", outputs="text")
19
+ iface.launch(share=False)
app.txt DELETED
@@ -1,22 +0,0 @@
1
- import tempfile ,os
2
- import gradio as gr
3
- from transformers import VitsModel, AutoTokenizer,pipeline
4
- import torch
5
- import numpy as np
6
- import torchaudio
7
-
8
- model = VitsModel.from_pretrained("SeyedAli/Persian-Speech-synthesis")
9
- tokenizer = AutoTokenizer.from_pretrained("SeyedAli/Persian-Speech-synthesis")
10
-
11
- def ASR(audio):
12
- inputs = tokenizer(text, return_tensors="pt")
13
- with torch.no_grad():
14
- output = model(**inputs).waveform
15
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
16
- torchaudio.save(fp, output, model.config.sampling_rate,format="wav")
17
- return fp.name
18
- iface = gr.Interface(fn=TTS, inputs="text", outputs="audio")
19
- iface.launch(share=False)
20
- pipe = pipeline("automatic-speech-recognition", model=Model)
21
- prediction = asr(audio, chunk_length_s=5, stride_length_s=1)
22
- return prediction["text"]