Spaces:

Kartikeyssj2
/

pronunciation-scoring

Build error

Kartikeyssj2 commited on Jul 15

Commit

5efbe11

•

1 Parent(s): 813004d

Update main.py

Files changed (1) hide show

main.py CHANGED Viewed

@@ -105,19 +105,24 @@ async def upload_audio(file: UploadFile = File(...)):
     print("length of the audio array:" , len(audio))
     print("*" * 100)
-    # Tokenize audio
     print("Tokenizing audio...")
-    input_values = tokenizer(audio, return_tensors="pt").input_values
     # Perform inference
     print("Performing inference with Wav2Vec2 model...")
-    logits = model(input_values).logits
     # Get predictions
     print("Getting predictions...")
     prediction = torch.argmax(logits, dim=-1)
     # Decode predictions
     print("Decoding predictions...")
     transcription = tokenizer.batch_decode(prediction)[0]
@@ -125,8 +130,8 @@ async def upload_audio(file: UploadFile = File(...)):
     # Convert transcription to lowercase
     transcription = transcription.lower()
-    # Print transcription and word counts
     print("Decoded transcription:", transcription)
     incorrect, correct = count_spelled_words(transcription, english_words)
     print("Spelling check - Incorrect words:", incorrect, ", Correct words:", correct)

     print("length of the audio array:" , len(audio))
     print("*" * 100)
+    # Tokenization
     print("Tokenizing audio...")
+    input_values = await asyncio.to_thread(tokenizer, audio, return_tensors="pt")
+    input_values = input_values.input_values
+    print("Tokenization complete. Shape of input_values:", input_values.shape)
     # Perform inference
     print("Performing inference with Wav2Vec2 model...")
+    output = await asyncio.to_thread(model, input_values)
+    logits = output.logits
+    print("Inference complete. Shape of logits:", logits.shape)
     # Get predictions
     print("Getting predictions...")
     prediction = torch.argmax(logits, dim=-1)
+    print("Prediction shape:", prediction.shape)
     # Decode predictions
     print("Decoding predictions...")
     transcription = tokenizer.batch_decode(prediction)[0]
     # Convert transcription to lowercase
     transcription = transcription.lower()
     print("Decoded transcription:", transcription)
     incorrect, correct = count_spelled_words(transcription, english_words)
     print("Spelling check - Incorrect words:", incorrect, ", Correct words:", correct)