outlines / app.py
dynamicmortal's picture
Update app.py
c6d8763
from flask import Flask, render_template, request, jsonify
import torch
from transformers import pipeline
import gradio as gr
app = Flask(__name__)
# Load the automatic speech recognition model
pipe = pipeline("automatic-speech-recognition",
"openai/whisper-large-v3",
torch_dtype=torch.float16,
device="cuda:0")
# Load the emotion classification model
emotion_classifier = pipeline(
"text-classification",
model="j-hartmann/emotion-english-distilroberta-base",
return_all_scores=True
)
def transcribe(audio_file, task):
if audio_file is None:
return "Please upload or record an audio file."
# Check if the audio file is in bytes format (drag-and-drop file)
if isinstance(audio_file, bytes):
text = pipe(audio_file, generate_kwargs={"task": task}, return_timestamps=True)["text"]
else:
# Handle the case where the file is uploaded using the file uploader
text = pipe(audio_file.name, generate_kwargs={"task": task}, return_timestamps=True)["text"]
return text
@app.route('/')
def index():
return render_template('index.html')
@app.route('/transcribe', methods=['POST'])
def transcribe_endpoint():
audio_file = request.files.get('audio_file')
task = request.form.get('task')
text = transcribe(audio_file, task)
return jsonify({'text': text})
@app.route('/classify_emotion', methods=['POST'])
def classify_emotion_endpoint():
text = request.form.get('text')
result = emotion_classifier(text)
return jsonify(result)
if __name__ == '__main__':
app.run(debug=True)