Spaces:

Kartikeyssj2
/

pronunciation-scoring

Build error

App Files Files Community

pronunciation-scoring / main.py

Kartikeyssj2

Update main.py

2f6faa5 verified 4 months ago

raw

history blame

4.91 kB

	import re
	import requests
	import pyarrow as pa
	import librosa
	import torch
	from transformers import Wav2Vec2ForCTC, Wav2Vec2Tokenizer
	from fastapi import FastAPI, File, UploadFile
	import warnings
	from starlette.formparsers import MultiPartParser
	import io
	import random
	import tempfile
	import os
	import numba
	import soundfile as sf
	import asyncio

	MultiPartParser.max_file_size = 200 * 1024 * 1024

	# Initialize FastAPI app
	app = FastAPI()

	# Load Wav2Vec2 tokenizer and model
	tokenizer = Wav2Vec2Tokenizer.from_pretrained("./models/tokenizer")
	model = Wav2Vec2ForCTC.from_pretrained("./models/model")


	# Function to download English word list
	def download_word_list():
	print("Downloading English word list...")
	url = "https://raw.githubusercontent.com/dwyl/english-words/master/words_alpha.txt"
	response = requests.get(url)
	words = set(response.text.split())
	print("Word list downloaded.")
	return words

	english_words = download_word_list()

	# Function to count correctly spelled words in text
	def count_spelled_words(text, word_list):
	print("Counting spelled words...")
	# Split the text into words
	words = re.findall(r'\b\w+\b', text.lower())

	correct = sum(1 for word in words if word in word_list)
	incorrect = len(words) - correct

	print("Spelling check complete.")
	return incorrect, correct

	# Function to apply spell check to an item (assuming it's a dictionary)
	def apply_spell_check(item, word_list):
	print("Applying spell check...")
	if isinstance(item, dict):
	# This is a single item
	text = item['transcription']
	incorrect, correct = count_spelled_words(text, word_list)
	item['incorrect_words'] = incorrect
	item['correct_words'] = correct
	print("Spell check applied to single item.")
	return item
	else:
	# This is likely a batch
	texts = item['transcription']
	results = [count_spelled_words(text, word_list) for text in texts]

	incorrect_counts, correct_counts = zip(*results)

	item = item.append_column('incorrect_words', pa.array(incorrect_counts))
	item = item.append_column('correct_words', pa.array(correct_counts))

	print("Spell check applied to batch of items.")
	return item

	# FastAPI routes
	@app.get('/')
	async def root():
	return "Welcome to the pronunciation scoring API!"

	@app.post('/check_post')
	async def rnc(number):
	return {
	"your value:" , number
	}

	@app.get('/check_get')
	async def get_rnc():
	return random.randint(0 , 10)


	@app.post('/fluency_score')
	async def fluency_scoring(file: UploadFile = File(...)):
	audio_array, sample_rate = librosa.load(file.file, sr=16000)
	print(audio_array)
	return audio_array[:5]


	@app.post('/pronunciation_score')
	async def pronunciation_scoring(file: UploadFile = File(...)):
	print("loading the file")
	url = "https://speech-processing-6.onrender.com/process_audio"
	files = {'file': await file.read()}

	print("file loaded")

	# print(files)

	print("making a POST request on speech processor")

	# Make the POST request
	response = requests.post(url, files=files)

	audio = response.json().get('audio_array')

	print("audio:" , audio[:5])



	print("length of the audio array:" , len(audio))

	print("" 100)

	# Tokenization
	print("Tokenizing audio...")
	input_values = tokenizer(
	audio,
	return_tensors="pt",
	padding="max_length",
	max_length= 386380,
	truncation=True
	).input_values

	print(input_values.shape)

	print("Tokenization complete. Shape of input_values:", input_values.shape)

	return "tokenization successful"

	# Perform inference
	print("Performing inference with Wav2Vec2 model...")

	logits = model(input_values).logits

	print("Inference complete. Shape of logits:", logits.shape)

	# Get predictions
	print("Getting predictions...")
	prediction = torch.argmax(logits, dim=-1)
	print("Prediction shape:", prediction.shape)

	# Decode predictions
	print("Decoding predictions...")
	transcription = tokenizer.batch_decode(prediction)[0]

	# Convert transcription to lowercase
	transcription = transcription.lower()

	print("Decoded transcription:", transcription)

	incorrect, correct = count_spelled_words(transcription, english_words)
	print("Spelling check - Incorrect words:", incorrect, ", Correct words:", correct)

	# Calculate pronunciation score
	fraction = correct / (incorrect + correct)
	score = round(fraction * 100, 2)
	print("Pronunciation score for", transcription, ":", score)

	print("Pronunciation scoring process complete.")

	return {
	"transcription": transcription,
	"pronunciation_score": score
	}