import time import torch from peft import PeftModel, PeftConfig from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModelForSeq2SeqLM import gradio as gr import speech_recognition as sr from math import log2, pow import os #from scipy.fftpack import fft import gc peft_model_id='hackathon-somos-nlp-2023/T5unami-small-v1' config = PeftConfig.from_pretrained(peft_model_id) model2 = AutoModelForSeq2SeqLM.from_pretrained(config.base_model_name_or_path, return_dict=True, # load_in_8bit=True, # load_in_8bit_fp32_cpu_offload=True, device_map='auto') tokenizer2 = AutoTokenizer.from_pretrained(peft_model_id) model2 = PeftModel.from_pretrained(model2, peft_model_id) Problema_tarjetaCredito= os.path.abspath("Problema_tarjetaCredito.ogg") list_audios= [[Problema_tarjetaCredito]] def gen_conversation(text,max_new_tokens=100): text = "instruction: " + text + "\n " batch = tokenizer2(text, return_tensors='pt') output_tokens = model2.generate(**batch, max_new_tokens=max_new_tokens, eos_token_id= tokenizer2.eos_token_id, pad_token_id= tokenizer2.pad_token_id, bos_token_id= tokenizer2.bos_token_id, early_stopping = True, no_repeat_ngram_size=2, repetition_penalty=1.2, temperature=.69, num_beams=3 ) gc.collect() return tokenizer2.decode(output_tokens[0], skip_special_tokens=True).split("\n")[-1].replace("output:","") conversacion = "" def speech_to_text(audio_file, texto_adicional): global conversacion if audio_file is not None: # Lógica para entrada de audio r = sr.Recognizer() audio_data = sr.AudioFile(audio_file) with audio_data as source: audio = r.record(source) text_enrada="" texto_generado = r.recognize_google(audio, language="es-ES") texto_generado= f"[|Audio a texto|]:{texto_generado}\n" + "
[AGENTE]:"+gen_conversation(texto_generado,max_new_tokens=50) texto_generado = "
" + texto_generado + "

" else: texto_generado= f"[|Solo texto|]:{texto_adicional}\n" + "
[AGENTE]:"+gen_conversation(texto_adicional,max_new_tokens=50) texto_generado = "
" + texto_generado + "

" conversacion += texto_generado return conversacion iface = gr.Interface( fn=speech_to_text, inputs=[gr.inputs.Audio(label="Voz", type="filepath"), gr.inputs.Textbox(label="Texto adicional")], outputs=gr.outputs.HTML(label=["chatbot","state"]), title="Chat bot para empresas.", description="Este modelo convierte la entrada de voz a texto e inferencia, texto a inferencia", examples=list_audios, theme="default", layout="vertical", allow_flagging=False, flagging_dir=None, server_name=None, server_port=None, live=False, capture_session=False ) iface.launch()