Spaces:

awacke1
/

TTS-STT-Blocks

Runtime error

App Files Files Community

awacke1 commited on Jun 5, 2022

Commit

e954652

•

1 Parent(s): eb611ba

Create app.py

Browse files

Files changed (1) hide show

app.py +162 -0

app.py ADDED Viewed

	@@ -0,0 +1,162 @@

+import streamlit as st
+import firebase_admin
+from firebase_admin import credentials
+from firebase_admin import firestore
+import datetime
+from transformers import pipeline
+import gradio as gr
+import tempfile
+from typing import Optional
+import numpy as np
+from TTS.utils.manage import ModelManager
+from TTS.utils.synthesizer import Synthesizer
+@st.experimental_singleton
+def get_db_firestore():
+    cred = credentials.Certificate('test.json')
+    firebase_admin.initialize_app(cred, {'projectId': u'clinical-nlp-b9117',})
+    db = firestore.client()
+    return db
+db = get_db_firestore()
+asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h")
+def transcribe(audio):
+    text = asr(audio)["text"]
+    return text
+classifier = pipeline("text-classification")
+def speech_to_text(speech):
+    text = asr(speech)["text"]
+    return text
+def text_to_sentiment(text):
+    sentiment = classifier(text)[0]["label"]
+    return sentiment
+def upsert(text):
+    date_time =str(datetime.datetime.today())
+    doc_ref = db.collection('Text2SpeechSentimentSave').document(date_time)
+    doc_ref.set({u'firefield': 'Recognize Speech', u'first': 'https://huggingface.co/spaces/awacke1/Text2SpeechSentimentSave', u'last': text, u'born': date_time,})
+    saved = select('Text2SpeechSentimentSave', date_time)
+    # check it here:  https://console.firebase.google.com/u/0/project/clinical-nlp-b9117/firestore/data/~2FStreamlitSpaces
+    return saved
+def select(collection, document):
+    doc_ref = db.collection(collection).document(document)
+    doc = doc_ref.get()
+    docid = ("The id is: ", doc.id)
+    contents = ("The contents are: ", doc.to_dict())
+    return contents
+def selectall(text):
+    docs = db.collection('Text2SpeechSentimentSave').stream()
+    doclist=''
+    for doc in docs:
+        #docid=doc.id
+        #dict=doc.to_dict()
+        #doclist+=doc.to_dict()
+        r=(f'{doc.id} => {doc.to_dict()}')
+        doclist += r
+    return doclist
+demo = gr.Blocks()
+with demo:
+    #audio_file = gr.Audio(type="filepath")
+    audio_file = gr.inputs.Audio(source="microphone", type="filepath")
+    text = gr.Textbox()
+    label = gr.Label()
+    saved = gr.Textbox()
+    savedAll = gr.Textbox()
+    b1 = gr.Button("Recognize Speech")
+    b2 = gr.Button("Classify Sentiment")
+    b3 = gr.Button("Save Speech to Text")
+    b4 = gr.Button("Retrieve All")
+    b1.click(speech_to_text, inputs=audio_file, outputs=text)
+    b2.click(text_to_sentiment, inputs=text, outputs=label)
+    b3.click(upsert, inputs=text, outputs=saved)
+    b4.click(selectall, inputs=text, outputs=savedAll)
+demo.launch(share=True)
+MODEL_NAMES = [
+    # "en/ek1/tacotron2",
+    "en/ljspeech/tacotron2-DDC",
+    # "en/ljspeech/tacotron2-DDC_ph",
+    # "en/ljspeech/glow-tts",
+    # "en/ljspeech/tacotron2-DCA",
+    # "en/ljspeech/speedy-speech-wn",
+    # "en/ljspeech/vits",
+    # "en/vctk/sc-glow-tts",
+    # "en/vctk/vits",
+    # "en/sam/tacotron-DDC",
+    # "es/mai/tacotron2-DDC",
+    "fr/mai/tacotron2-DDC",
+    "zh-CN/baker/tacotron2-DDC-GST",
+    "nl/mai/tacotron2-DDC",
+    "de/thorsten/tacotron2-DCA",
+    # "ja/kokoro/tacotron2-DDC",
+]
+MODELS = {}
+manager = ModelManager()
+for MODEL_NAME in MODEL_NAMES:
+    print(f"downloading {MODEL_NAME}")
+    model_path, config_path, model_item = manager.download_model(f"tts_models/{MODEL_NAME}")
+    vocoder_name: Optional[str] = model_item["default_vocoder"]
+    vocoder_path = None
+    vocoder_config_path = None
+    if vocoder_name is not None:
+        vocoder_path, vocoder_config_path, _ = manager.download_model(vocoder_name)
+    synthesizer = Synthesizer(
+        model_path, config_path, None, vocoder_path, vocoder_config_path,
+    )
+    MODELS[MODEL_NAME] = synthesizer
+def tts(text: str, model_name: str):
+    print(text, model_name)
+    synthesizer = MODELS.get(model_name, None)
+    if synthesizer is None:
+        raise NameError("model not found")
+    wavs = synthesizer.tts(text)
+    # output = (synthesizer.output_sample_rate, np.array(wavs))
+    # return output
+    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
+        synthesizer.save_wav(wavs, fp)
+        return fp.name
+iface = gr.Interface(
+    fn=tts,
+    inputs=[
+        gr.inputs.Textbox(
+            label="Input",
+            default="Hello, how are you?",
+        ),
+        gr.inputs.Radio(
+            label="Pick a TTS Model",
+            choices=MODEL_NAMES,
+        ),
+    ],
+    outputs=gr.outputs.Audio(label="Output"),
+    title="🐸💬 - Coqui TTS",
+    theme="huggingface",
+    description="🐸💬 - a deep learning toolkit for Text-to-Speech, battle-tested in research and production",
+    article="more info at https://github.com/coqui-ai/TTS",
+)
+iface.launch()