Spaces:

yalsaffar
/

S3TVR-Demo

Sleeping

S3TVR-Demo / models /es_fastconformer.py

init

aa7cb02 3 months ago

No virus

1.34 kB

	import nemo.collections.asr as nemo_asr
	import torch

	def stt_es_model():
	"""
	Load and return the pre-trained Spanish ASR model.

	This function loads the pre-trained EncDecCTCModelBPE model from NVIDIA's NeMo collection.
	The model is configured to use a GPU if available, otherwise it defaults to CPU.

	Returns:
	nemo_asr.models.EncDecCTCModelBPE: The loaded ASR model.
	Example usage:
	asr_model = stt_es_model()
	"""
	# Load the pre-trained model
	asr_model = nemo_asr.models.EncDecCTCModelBPE.from_pretrained("nvidia/stt_es_fastconformer_hybrid_large_pc")
	device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
	asr_model = asr_model.to(device)
	return asr_model

	def stt_es_process(asr_model, audio_file):
	"""
	Transcribe an audio file using the given ASR model.

	Args:
	asr_model (nemo_asr.models.EncDecCTCModelBPE): The ASR model to use for transcription.
	Example: asr_model = stt_es_model()
	audio_file (str): Path to the audio file to be transcribed.
	Example: "path/to/audio_file.wav"

	Returns:
	list: A list containing the transcribed text.
	Example: ["transcribed text"]
	"""
	text = asr_model.transcribe(paths2audio_files=[audio_file], batch_size=1)
	return text