Spaces:
Runtime error
Runtime error
File size: 5,001 Bytes
cada0f5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 |
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor, pipeline
from pydub import AudioSegment
import whisper
from settings import MODEL_PARSER
from pytube import YouTube
class BagOfModels:
'''model -> is a model from hugging face
model_names -> modelnames that can be chosen from in streamlit
model_settinsg -> settings of model that can be customized by user
'''
args = MODEL_PARSER
barfs = 5
def __init__(self,model,model_names,model_settings,model_tasks, **kwargs):
self.model = model
self.model_names = model_names
self.model_settings = model_settings
self.model_tasks = model_tasks
self.kwargs = kwargs
@classmethod
def get_model_settings(cls):
bag_of_models = BagOfModels(**vars(cls.args))
return bag_of_models.model_settings
@classmethod
def get_model_names(cls):
bag_of_models = BagOfModels(**vars(cls.args))
return bag_of_models.model_names
@classmethod
def get_model(cls):
bag_of_models = BagOfModels(**vars(cls.args))
return bag_of_models.model
@classmethod
def get_model_tasks(cls):
bag_of_models = BagOfModels(**vars(cls.args))
return bag_of_models.model_tasks
@classmethod
def load_model(cls,model_name,**kwargs):
bag_of_models = BagOfModels(**vars(cls.args))
cls.model = bag_of_models.model
assert model_name in bag_of_models.model_names, f"please pick one of the available models: {bag_of_models.model_names}"
return Model(model_name,**cls.model[model_name])
class Model:
def __init__(self,model_name,task,url,**kwargs):
self.url = url
self.model_name = model_name
self.name = self.url.split("https://huggingface.co/")[1]
self.task = task
self.kwargs = kwargs
self.init_optional_args(**self.kwargs)
def init_optional_args(self,year=None,description=None):
self._year = year
self._description = description
def predict_stt(self,source,source_type,model_task):
model = whisper.load_model(self.model_name.split("_")[1]) #tiny - base - medium
stt = SoundToText(source,source_type,model_task,model=model,tokenizer=None)
stt.whisper()
return stt
def predict_summary(self):
tokenizer = Wav2Vec2Processor.from_pretrained(self.name)
model = Wav2Vec2ForCTC.from_pretrained(self.name) # Note: PyTorch Model
class Transcription():
def __init__(self,model,source,source_type) -> None:
pass
class SoundToText():
def __init__(self,source,source_type,model_task,model,tokenizer=None):
self.source = source
self.source_type = source_type
self.model = model
self.model_task = model_task
self.tokenizer = tokenizer
def wav2vec(self,size):
pass
def wav2vec2(self,size):
pass
def whisper(self):
# download youtube url
if self.source_type == "YouTube":
self.audio_path = YouTube(self.source).streams.get_by_itag(140).download("output/", filename="audio")
# if self.source_type == "File":
# audio = None
# if self.source.name.endswith('.wav'): audio = AudioSegment.from_wav(self.source)
# elif self.source.name.endswith('.mp3'): audio = AudioSegment.from_mp3(self.source)
# audio.export('output/audio.wav', format='wav')
# self.audio_path = "output/audio.wav"
model = whisper.load_model("base")
self.raw_output = model.transcribe(self.audio_path,verbose=True)
self.text = self.raw_output["text"]
self.language = self.raw_output["language"]
self.segments = self.raw_output["segments"]
# Remove token ids from the output
for segment in self.segments:
del segment["tokens"]
self.transcribed = True
class TextToSummary():
def __init__(self,input_text,min_length,max_length):
self.summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
self.summary_input = input_text
self.summary_output = (self.summarizer(self.summary_input, min_length=min_length, max_length=max_length, do_sample=False))
def get_summary(self):
return self.summary_output
def wav2vec(self):
pass
def record(model_name):
args = MODEL_PARSER
models = BagOfModels.get_model_names()
tasks = BagOfModels.get_model_tasks()
whisper_base = BagOfModels.load_model(model_name,**vars(args))
whisper_base.predict()
if __name__== "__main__":
args = MODEL_PARSER
models = BagOfModels.get_model_names()
tasks = BagOfModels.get_model_tasks()
whisper_base = BagOfModels.load_model("whisper_base",**vars(args))
whisper_base.predict_stt() |