Rahulk2197 commited on
Commit
489f5d0
1 Parent(s): d146fa5

Upload 4 files

Browse files
Files changed (5) hide show
  1. .gitattributes +1 -0
  2. app.py +126 -0
  3. cnn_lstm.keras +3 -0
  4. feat.py +134 -0
  5. requirements.txt +9 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ cnn_lstm.keras filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import soundfile as sf
3
+ import numpy as np
4
+ from feat import *
5
+ from tensorflow.keras.models import load_model
6
+ from sklearn.preprocessing import LabelEncoder
7
+ import pandas as pd
8
+ import librosa
9
+ import numpy as np
10
+ from pyAudioAnalysis import audioSegmentation as aS
11
+ import speech_recognition as sr
12
+ import wave
13
+
14
+
15
+ # Label encoder
16
+ labelencoder = LabelEncoder()
17
+
18
+ # Load the saved model
19
+ model_path = 'cnn_lstm.keras'
20
+ model = load_model(model_path)
21
+
22
+ # Label mapping
23
+ label_mapping = {0: 'angry',
24
+ 1: 'excited',
25
+ 2: 'fear',
26
+ 3: 'happy',
27
+ 4: 'neutral',
28
+ 5: 'sad'}
29
+
30
+ # Set the title of the Streamlit app
31
+ st.title("Speech Emotion Recognition")
32
+
33
+ # File uploader for audio files
34
+ audio_file = st.file_uploader("Upload an audio file:", type=["mp3", "wav"])
35
+
36
+ # Set the interval for segments
37
+ interval = st.number_input("Set the interval (0.00-15.00 seconds) for emotion detection segments:",
38
+ min_value=0.00, max_value=15.00, value=3.00, step=0.01)
39
+
40
+ # Button to upload
41
+ if st.button("Upload"):
42
+
43
+ if audio_file:
44
+ audio_data, samplerate = sf.read(audio_file)
45
+ # Convert the audio file to WAV format and save it
46
+ output_file_path = 'uploaded_audio.wav'
47
+ sf.write(output_file_path, audio_data, samplerate)
48
+
49
+ st.audio(audio_file)
50
+ else:
51
+ st.error("Please upload an audio file.")
52
+
53
+ # Function to process audio and predict emotions
54
+
55
+
56
+ def predict_emotions(audio_path, interval):
57
+ audio_data, samplerate = sf.read(audio_path)
58
+ duration = len(audio_data) / samplerate
59
+ emotions = []
60
+
61
+ for start in np.arange(0, duration, interval):
62
+ end = start + interval
63
+ if end > duration:
64
+ end = duration
65
+ segment = audio_data[int(start*samplerate):int(end*samplerate)]
66
+ segment_path = 'segment.wav'
67
+ sf.write(segment_path, segment, samplerate)
68
+ feat = features_extractor(segment_path)
69
+ feat = feat.reshape(1, -1)
70
+ predictions = model.predict(feat)
71
+ predicted_label = np.argmax(predictions, axis=1)
72
+ emotions.append((start, end, label_mapping[predicted_label[0]]))
73
+
74
+ return emotions
75
+
76
+
77
+ # Button to predict
78
+ if st.button("Predict"):
79
+ if audio_file:
80
+ print()
81
+ emotions = predict_emotions('uploaded_audio.wav', interval=interval)
82
+
83
+ # Create a DataFrame to display emotions
84
+ emotions_df = pd.DataFrame(
85
+ emotions, columns=["Start", "End", "Emotion"])
86
+ st.write(emotions_df)
87
+
88
+ # Save emotions to a log file
89
+ log_file_path = 'emotion_log.csv'
90
+ emotions_df.to_csv(log_file_path, index=False)
91
+
92
+ # Extrapolate major emotions
93
+ major_emotion = emotions_df['Emotion'].mode().values[0]
94
+ st.write(f"Major emotion: {major_emotion}")
95
+
96
+ st.success(f"Emotion log saved to {log_file_path}")
97
+
98
+ # Add download button for the emotion log file
99
+ with open(log_file_path, "rb") as file:
100
+ btn = st.download_button(
101
+ label="Download Emotion Log",
102
+ data=file,
103
+ file_name='emotion_log.csv',
104
+ mime='text/csv'
105
+ )
106
+
107
+ x = word_count1('uploaded_audio.wav')
108
+ y = get_speaking_rate('uploaded_audio.wav')
109
+
110
+ st.write(f'Number of words = {x[0]}')
111
+ st.write(f'Transcript = {x[1]}')
112
+
113
+ st.write(f'Speaking rate = {y} syllables per second')
114
+
115
+ else:
116
+ st.error("Please upload an audio file.")
117
+
118
+
119
+ # Additional message at the bottom of the page
120
+ st.write("Thank you for using the app!")
121
+
122
+ file_path = 'path/to/your/audio/file'
123
+ try:
124
+ audio, sr = librosa.load(audio_file, sr=None)
125
+ except Exception as e:
126
+ print(f"An error occurred: {e}")
cnn_lstm.keras ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc25f03aa81c2b73b835963bcc5e94312f2dee1df661e46df1180adc387b3b4d
3
+ size 23364981
feat.py ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import librosa
3
+ import numpy as np
4
+
5
+ def features_extractor(file_name):
6
+ audio, sample_rate = librosa.load(file_name, res_type='kaiser_fast')
7
+
8
+ # Extract MFCC features
9
+ mfccs_features = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=25)
10
+ mfccs_scaled_features = np.mean(mfccs_features.T, axis=0)
11
+
12
+ # Extract Zero Crossing Rate
13
+ zcr = librosa.feature.zero_crossing_rate(y=audio)
14
+ zcr_scaled_features = np.mean(zcr.T, axis=0)
15
+
16
+ # Extract Chroma Features
17
+ chroma = librosa.feature.chroma_stft(y=audio, sr=sample_rate)
18
+ chroma_scaled_features = np.mean(chroma.T, axis=0)
19
+
20
+ # Extract Mel Spectrogram Features
21
+ mel = librosa.feature.melspectrogram(y=audio, sr=sample_rate)
22
+ mel_scaled_features = np.mean(mel.T, axis=0)
23
+
24
+ # Concatenate all features into a single array
25
+ features = np.hstack((mfccs_scaled_features, zcr_scaled_features, chroma_scaled_features, mel_scaled_features))
26
+
27
+ return features
28
+
29
+
30
+ #########################################################################################################################
31
+ import speech_recognition as sr
32
+
33
+ def recognize_speech_from_file(audio_file_path):
34
+ # Initialize the recognizer
35
+ recognizer = sr.Recognizer()
36
+
37
+ # Load the audio file
38
+ with sr.AudioFile(audio_file_path) as source:
39
+
40
+ audio_data = recognizer.record(source) # Read the entire audio file
41
+
42
+ try:
43
+ # Recognize speech using Google Web Speech API
44
+ text = recognizer.recognize_google(audio_data)
45
+
46
+ return text
47
+ except sr.RequestError as e:
48
+ print(f"Could not request results; {e}")
49
+ except sr.UnknownValueError:
50
+ print("Could not understand the audio")
51
+
52
+ def count_words(text):
53
+ words = text.split()
54
+ return len(words)
55
+
56
+ def word_count(audio_path):
57
+ transcript = recognize_speech_from_file(audio_file_path=audio_path)
58
+ if transcript:
59
+ return [count_words(transcript),transcript]
60
+
61
+ ########################################################################################################################
62
+ import speech_recognition as sr
63
+ import wave
64
+
65
+ def recognize_speech_from_file(audio_file_path):
66
+ recognizer = sr.Recognizer()
67
+ audio_file = sr.AudioFile(audio_file_path)
68
+ with audio_file as source:
69
+ audio = recognizer.record(source)
70
+ try:
71
+ transcript = recognizer.recognize_google(audio)
72
+ return transcript
73
+ except sr.UnknownValueError:
74
+ return None
75
+ except sr.RequestError as e:
76
+ print(f"Could not request results from Google Speech Recognition service; {e}")
77
+ return None
78
+
79
+ def count_words(text):
80
+ words = text.split()
81
+ return len(words)
82
+
83
+ def get_audio_duration(audio_file_path):
84
+ with wave.open(audio_file_path, 'r') as audio_file:
85
+ frames = audio_file.getnframes()
86
+ rate = audio_file.getframerate()
87
+ duration = frames / float(rate)
88
+ return duration
89
+
90
+ def word_count1(audio_path):
91
+ transcript = recognize_speech_from_file(audio_file_path=audio_path)
92
+ if transcript:
93
+ duration = get_audio_duration(audio_path)
94
+ return [count_words(transcript), transcript, duration]
95
+ else:
96
+ return [0, None, 0.0]
97
+
98
+ word_count('angry_Akash.wav')
99
+
100
+ # print(word_count1(r'c:\Users\hp\OneDrive\Desktop\Major Emotions\Mixed\Angry-1-3-1.wav'))
101
+ # Example usage
102
+ # audio_path = 'angry_Ansh.wav'
103
+ # result = word_count(audio_path)
104
+ # print(result)
105
+
106
+ import librosa
107
+ import numpy as np
108
+ from pyAudioAnalysis import audioSegmentation as aS
109
+
110
+ def get_speaking_rate(file_path):
111
+ # Load audio file
112
+ y, sr = librosa.load(file_path, sr=None)
113
+
114
+ # Extract speech segments
115
+ segments = aS.silence_removal(y, sr, 0.020, 0.020, smooth_window=1.0, weight=0.3, plot=False)
116
+
117
+ # Total speech duration
118
+ speech_duration = sum([end - start for start, end in segments])
119
+
120
+ # Number of syllables (approximation)
121
+ num_syllables = len(librosa.effects.split(y, top_db=30))
122
+
123
+ # Calculate speaking rate (syllables per second)
124
+ speaking_rate = num_syllables / speech_duration if speech_duration > 0 else 0
125
+
126
+ return speaking_rate
127
+
128
+ # Example usage
129
+ # file_path = 'angry_Ansh.wav'
130
+ # speaking_rate = get_speaking_rate(file_path)[0]
131
+ # print(f"Speaking Rate: {speaking_rate:.2f} syllables per second")
132
+ # print(get_speaking_rate(file_path)[1])
133
+ # print(get_speaking_rate(file_path)[2])
134
+
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ librosa
2
+ numpy
3
+ speech_recognition
4
+ pyAudioAnalysis
5
+ streamlit
6
+ soundfile
7
+ tensorflow
8
+ scikit-learn
9
+ pandas