voiceoperation / app.py
Zeimoto
update app.py
9f27454
raw
history blame
3.15 kB
import streamlit as st
from st_audiorec import st_audiorec
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
#from datasets import load_dataset
import torch
from gliner import GLiNER
from resources import Lead_Labels, entity_labels, set_start, audit_elapsedtime
def main ():
print("------------------------------")
print(f"Running main")
rec = init_model_trans()
ner = init_model_ner() #async
labels = entity_labels
text = "I have a proposal from cgd where they want one outsystems junior developers and one senior for an estimate of three hundred euros a day, for six months."
print(f"get entities from sample text: {text}")
get_entity_labels(model=ner, text=text, labels=labels)
print("Render UI")
wav_audio_data = st_audiorec()
if wav_audio_data is not None and rec is not None:
print("Loading data...")
st.audio(wav_audio_data, format='audio/wav')
text = transcribe(wav_audio_data, rec)
if text is not None:
get_entity_labels(labels=labels, model=ner, text=text)
def init_model_trans ():
print("Initiating transcription model...")
start = set_start()
device = "cuda:0" if torch.cuda.is_available() else "cpu"
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
model_id = "openai/whisper-large-v3"
model = AutoModelForSpeechSeq2Seq.from_pretrained(
model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
)
model.to(device)
processor = AutoProcessor.from_pretrained(model_id)
pipe = pipeline(
"automatic-speech-recognition",
model=model,
tokenizer=processor.tokenizer,
feature_extractor=processor.feature_extractor,
max_new_tokens=128,
chunk_length_s=30,
batch_size=16,
return_timestamps=True,
torch_dtype=torch_dtype,
device=device,
)
print(f'Init model successful')
audit_elapsedtime(function="Initiating transcription model", start=start)
return pipe
def init_model_ner():
print("Initiating NER model...")
start = set_start()
model = GLiNER.from_pretrained("urchade/gliner_multi")
audit_elapsedtime(function="Initiating NER model", start=start)
return model
def transcribe (audio_sample: bytes, pipe) -> str:
start = set_start()
# dataset = load_dataset("distil-whisper/librispeech_long", "clean", split="validation")
# sample = dataset[0]["audio"]
result = pipe(audio_sample)
audit_elapsedtime(function="Transcription", start=start)
print(result)
st.write('trancription: ', result["text"])
return result["text"]
def get_entity_labels(model: GLiNER, text: str, labels: list): #-> Lead_labels:
start = set_start()
entities = model.predict_entities(text, labels)
audit_elapsedtime(function="Retreiving entity labels from text", start=start)
for entity in entities:
print(entity["text"], "=>", entity["label"])
st.write('Entities: ', entities)
# return Lead_Labels()
if __name__ == "__main__":
print("IN __name__")
main()