Spaces:
Build error
Build error
import soundfile as sf | |
import numpy as np | |
async def fluency_scoring(file: UploadFile = File(...)): | |
with sf.SoundFile(file.file, 'r') as sound_file: | |
audio_array = sound_file.read(dtype="float32") | |
sample_rate = sound_file.samplerate | |
if sample_rate != 16000: | |
# Resample to 16000 Hz if needed | |
audio_array = librosa.resample(audio_array, sample_rate, 16000) | |
print(audio_array) | |
return audio_array[:5].tolist() | |
# import re | |
# import requests | |
# import pyarrow as pa | |
# import librosa | |
# import torch | |
# from transformers import Wav2Vec2ForCTC, Wav2Vec2Tokenizer | |
# from fastapi import FastAPI, File, UploadFile | |
# import warnings | |
# from starlette.formparsers import MultiPartParser | |
# import io | |
# import random | |
# import tempfile | |
# import os | |
# import numba | |
# import soundfile as sf | |
# import asyncio | |
# MultiPartParser.max_file_size = 200 * 1024 * 1024 | |
# # Initialize FastAPI app | |
# app = FastAPI() | |
# # Load Wav2Vec2 tokenizer and model | |
# tokenizer = Wav2Vec2Tokenizer.from_pretrained("./models/tokenizer") | |
# model = Wav2Vec2ForCTC.from_pretrained("./models/model") | |
# # Function to download English word list | |
# def download_word_list(): | |
# print("Downloading English word list...") | |
# url = "https://raw.githubusercontent.com/dwyl/english-words/master/words_alpha.txt" | |
# response = requests.get(url) | |
# words = set(response.text.split()) | |
# print("Word list downloaded.") | |
# return words | |
# english_words = download_word_list() | |
# # Function to count correctly spelled words in text | |
# def count_spelled_words(text, word_list): | |
# print("Counting spelled words...") | |
# # Split the text into words | |
# words = re.findall(r'\b\w+\b', text.lower()) | |
# correct = sum(1 for word in words if word in word_list) | |
# incorrect = len(words) - correct | |
# print("Spelling check complete.") | |
# return incorrect, correct | |
# # Function to apply spell check to an item (assuming it's a dictionary) | |
# def apply_spell_check(item, word_list): | |
# print("Applying spell check...") | |
# if isinstance(item, dict): | |
# # This is a single item | |
# text = item['transcription'] | |
# incorrect, correct = count_spelled_words(text, word_list) | |
# item['incorrect_words'] = incorrect | |
# item['correct_words'] = correct | |
# print("Spell check applied to single item.") | |
# return item | |
# else: | |
# # This is likely a batch | |
# texts = item['transcription'] | |
# results = [count_spelled_words(text, word_list) for text in texts] | |
# incorrect_counts, correct_counts = zip(*results) | |
# item = item.append_column('incorrect_words', pa.array(incorrect_counts)) | |
# item = item.append_column('correct_words', pa.array(correct_counts)) | |
# print("Spell check applied to batch of items.") | |
# return item | |
# # FastAPI routes | |
# @app.get('/') | |
# async def root(): | |
# return "Welcome to the pronunciation scoring API!" | |
# @app.post('/check_post') | |
# async def rnc(number): | |
# return { | |
# "your value:" , number | |
# } | |
# @app.get('/check_get') | |
# async def get_rnc(): | |
# return random.randint(0 , 10) | |
# @app.post('/fluency_score') | |
# async def fluency_scoring(file: UploadFile = File(...)): | |
# audio_array, sample_rate = librosa.load(file.file, sr=16000) | |
# print(audio_array) | |
# return audio_array[:5] | |
# @app.post('/pronunciation_score') | |
# async def pronunciation_scoring(file: UploadFile = File(...)): | |
# print("loading the file") | |
# url = "https://speech-processing-6.onrender.com/process_audio" | |
# files = {'file': await file.read()} | |
# print("file loaded") | |
# # print(files) | |
# print("making a POST request on speech processor") | |
# # Make the POST request | |
# response = requests.post(url, files=files) | |
# audio = response.json().get('audio_array') | |
# print("audio:" , audio[:5]) | |
# print("length of the audio array:" , len(audio)) | |
# print("*" * 100) | |
# # Tokenization | |
# print("Tokenizing audio...") | |
# input_values = tokenizer( | |
# audio, | |
# return_tensors="pt", | |
# padding="max_length", | |
# max_length= 386380, | |
# truncation=True | |
# ).input_values | |
# print(input_values.shape) | |
# print("Tokenization complete. Shape of input_values:", input_values.shape) | |
# return "tokenization successful" | |
# # Perform inference | |
# print("Performing inference with Wav2Vec2 model...") | |
# logits = model(input_values).logits | |
# print("Inference complete. Shape of logits:", logits.shape) | |
# # Get predictions | |
# print("Getting predictions...") | |
# prediction = torch.argmax(logits, dim=-1) | |
# print("Prediction shape:", prediction.shape) | |
# # Decode predictions | |
# print("Decoding predictions...") | |
# transcription = tokenizer.batch_decode(prediction)[0] | |
# # Convert transcription to lowercase | |
# transcription = transcription.lower() | |
# print("Decoded transcription:", transcription) | |
# incorrect, correct = count_spelled_words(transcription, english_words) | |
# print("Spelling check - Incorrect words:", incorrect, ", Correct words:", correct) | |
# # Calculate pronunciation score | |
# fraction = correct / (incorrect + correct) | |
# score = round(fraction * 100, 2) | |
# print("Pronunciation score for", transcription, ":", score) | |
# print("Pronunciation scoring process complete.") | |
# return { | |
# "transcription": transcription, | |
# "pronunciation_score": score | |
# } |