Spaces:

mskov
/

test

Runtime error

App Files Files Community

test / app.py

mskov

Update app.py

5e24192 over 1 year ago

raw

history blame

1.99 kB

	import os
	import sys
	os.system("pip install transformers==4.27.0")
	from transformers import pipeline, WhisperModel, WhisperTokenizer, AutoModelForCTC
	os.system("pip install evaluate")
	import evaluate
	os.system("pip install datasets")
	os.system("pip install llvmlite")
	os.system("pip install spicy")
	os.system("pip install soundfile")
	os.system("pip install jiwer")
	os.system("pip install datasets[audio]")
	#os.system("pip install numpy==1.21.4")
	#os.system("pip install numpy==1.22.1")
	os.system("pip install numba==0.51.2")
	from evaluate import evaluator
	from datasets import load_dataset, Audio
	from datasets import disable_caching
	from datasets import set_caching_enabled
	set_caching_enabled(False)
	disable_caching()

	from transformers import pipeline, WhisperModel, WhisperTokenizer, AutoConfig
	from datasets import load_dataset


	metric = evaluate.load("wer")

	# Load the Whisper model and tokenizer
	huggingface_token = os.environ["huggingface_token"]
	whisper_miso =AutoModelForCTC.from_pretrained("mskov/whisper_miso", use_auth_token=huggingface_token)
	miso_tokenizer = AutoModelForCTC.from_pretrained("mskov/whisper_miso", use_auth_token=huggingface_token)

	# Initialize the automatic-speech-recognition pipeline with the Whisper model and tokenizer
	asr_pipeline = pipeline(
	"automatic-speech-recognition",
	model=whisper_miso,
	tokenizer=miso_tokenizer
	)

	# Load the dataset
	dataset = load_dataset("mskov/miso_test", split="test").cast_column("audio", Audio())

	# Compute the evaluation results
	results = asr_pipeline(dataset)
	metric = WERMetric()
	wer = metric.compute(predictions=results, references=dataset["audio"])
	print(wer)



	def transcribe(audio, state=""):
	text = p(audio)["text"]
	state += text + " "
	return state, state

	gr.Interface(
	fn=transcribe,
	inputs=[
	gr.Audio(source="microphone", type="filepath", streaming=True),
	"state"
	],
	outputs=[
	"textbox",
	"state"
	],
	live=True).launch()