File size: 2,702 Bytes
efabbbd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import gradio as gr
import librosa
import numpy as np
import matplotlib.pyplot as plt
import tempfile

def extract_audio_features(audio_file):
    y, sr = librosa.load(audio_file, sr=None)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    pitches, magnitudes = librosa.piptrack(y=y, sr=sr)
    pitches = pitches[(magnitudes > np.median(magnitudes)) & (pitches > 0)]
    energy = librosa.feature.rms(y=y)[0]
    return mfccs, pitches, energy

def analyze_voice_stress(audio_file):
    if not audio_file:
        return "No audio file provided.", None
    
    try:
        mfccs, pitches, energy = extract_audio_features(audio_file)
        
        stress_level = (np.var(mfccs) + (np.var(pitches) if len(pitches) > 0 else 0) + np.var(energy)) / 3
        normalized_stress = min(100, (stress_level / 1000) * 100)
        
        fig, axs = plt.subplots(3, 1, figsize=(10, 12))
        plots = [
            (mfccs, 'MFCCs', 'MFCC Coefficient', 'imshow', {'aspect': 'auto', 'origin': 'lower'}),
            (pitches, 'Pitch', 'Frequency (Hz)', 'plot', {}),
            (energy, 'Energy', 'RMS Energy', 'plot', {})
        ]
        
        for i, (data, title, ylabel, plot_type, plot_args) in enumerate(plots):
            getattr(axs[i], plot_type)(data, **plot_args)
            axs[i].set_title(title)
            axs[i].set_ylabel(ylabel)
        axs[2].set_xlabel('Time')
        
        plt.tight_layout()
        with tempfile.NamedTemporaryFile(delete=False, suffix='.png') as temp_file:
            plt.savefig(temp_file.name)
        plt.close()
        
        stress_interpretation = "Low" if normalized_stress < 33 else "Medium" if normalized_stress < 66 else "High"
        return f"{normalized_stress:.2f}% - {stress_interpretation} Stress", temp_file.name
    except Exception as e:
        return str(e), None

def create_voice_stress_tab():
    with gr.Row():
        with gr.Column(scale=2):
            input_audio = gr.Audio(label="Input Audio", type="filepath")
            with gr.Row():
                clear_btn = gr.Button("Clear", scale=1)
                submit_btn = gr.Button("Analyze", scale=1, elem_classes="submit")
        with gr.Column(scale=1):
            output_stress = gr.Label(label="Stress Level")
            output_plot = gr.Image(label="Stress Analysis Plot")
    
    submit_btn.click(analyze_voice_stress, inputs=[input_audio], outputs=[output_stress, output_plot])
    clear_btn.click(lambda: (None, None, None), outputs=[input_audio, output_stress, output_plot])
    
    gr.Examples(["./assets/audio/fitness.wav"], inputs=[input_audio])

with gr.Blocks() as demo:
    gr.Markdown("# Voice Stress Analysis")
    create_voice_stress_tab()