Spaces:

Siddhant
/

Voice_Assistant

Sleeping

Siddhant commited on Sep 8

Commit

16c6824

•

1 Parent(s): a5ee5dc

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -48,7 +48,7 @@ user_role = "user"
 tts_model = TTS(language="EN_NEWEST", device="auto")
 speaker_id = tts_model.hps.data.spk2id["EN-Newest"]
 blocksize = 512
-transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")
 def int2float(sound):
     """
     Taken from https://github.com/snakers4/silero-vad
@@ -66,14 +66,14 @@ audio_output = None
 min_speech_ms=500
 max_speech_ms=float("inf")
 # ASR_model = LightningWhisperMLX(model="distil-large-v3", batch_size=6, quant=None)
-ASR_processor = AutoProcessor.from_pretrained("distil-whisper/distil-large-v3")
-ASR_model = AutoModelForSpeechSeq2Seq.from_pretrained(
-    "distil-whisper/distil-large-v3",
-    torch_dtype="float16",
-).to("cpu")
-LM_tokenizer = AutoTokenizer.from_pretrained("HuggingFaceTB/SmolLM-360M-Instruct")
 LM_model = AutoModelForCausalLM.from_pretrained(
-    "HuggingFaceTB/SmolLM-360M-Instruct", torch_dtype="float16", trust_remote_code=True
 ).to("cpu")
 LM_pipe = pipeline(
     "text-generation", model=LM_model, tokenizer=LM_tokenizer, device="cpu"

 tts_model = TTS(language="EN_NEWEST", device="auto")
 speaker_id = tts_model.hps.data.spk2id["EN-Newest"]
 blocksize = 512
+transcriber = pipeline("automatic-speech-recognition", model="distil-whisper/distil-large-v3")
 def int2float(sound):
     """
     Taken from https://github.com/snakers4/silero-vad
 min_speech_ms=500
 max_speech_ms=float("inf")
 # ASR_model = LightningWhisperMLX(model="distil-large-v3", batch_size=6, quant=None)
+# ASR_processor = AutoProcessor.from_pretrained("distil-whisper/distil-large-v3")
+# ASR_model = AutoModelForSpeechSeq2Seq.from_pretrained(
+#     "distil-whisper/distil-large-v3",
+#     torch_dtype="float16",
+# ).to("cpu")
+LM_tokenizer = AutoTokenizer.from_pretrained("HuggingFaceTB/SmolLM-135M-Instruct")
 LM_model = AutoModelForCausalLM.from_pretrained(
+    "HuggingFaceTB/SmolLM-135M-Instruct", torch_dtype="float16", trust_remote_code=True
 ).to("cpu")
 LM_pipe = pipeline(
     "text-generation", model=LM_model, tokenizer=LM_tokenizer, device="cpu"