Spaces:

Rahulk2197
/

rashmi_app

Sleeping

App Files Files Community

Rahulk2197 commited on Jul 11

Commit

489f5d0

•

1 Parent(s): d146fa5

Upload 4 files

Browse files

Files changed (5) hide show

.gitattributes +1 -0
app.py +126 -0
cnn_lstm.keras +3 -0
feat.py +134 -0
requirements.txt +9 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+cnn_lstm.keras filter=lfs diff=lfs merge=lfs -text

app.py ADDED Viewed

	@@ -0,0 +1,126 @@

+import streamlit as st
+import soundfile as sf
+import numpy as np
+from feat import *
+from tensorflow.keras.models import load_model
+from sklearn.preprocessing import LabelEncoder
+import pandas as pd
+import librosa
+import numpy as np
+from pyAudioAnalysis import audioSegmentation as aS
+import speech_recognition as sr
+import wave
+# Label encoder
+labelencoder = LabelEncoder()
+# Load the saved model
+model_path = 'cnn_lstm.keras'
+model = load_model(model_path)
+# Label mapping
+label_mapping = {0: 'angry',
+                 1: 'excited',
+                 2: 'fear',
+                 3: 'happy',
+                 4: 'neutral',
+                 5: 'sad'}
+# Set the title of the Streamlit app
+st.title("Speech Emotion Recognition")
+# File uploader for audio files
+audio_file = st.file_uploader("Upload an audio file:", type=["mp3", "wav"])
+# Set the interval for segments
+interval = st.number_input("Set the interval (0.00-15.00 seconds) for emotion detection segments:",
+                           min_value=0.00, max_value=15.00, value=3.00, step=0.01)
+# Button to upload
+if st.button("Upload"):
+    if audio_file:
+        audio_data, samplerate = sf.read(audio_file)
+        # Convert the audio file to WAV format and save it
+        output_file_path = 'uploaded_audio.wav'
+        sf.write(output_file_path, audio_data, samplerate)
+        st.audio(audio_file)
+    else:
+        st.error("Please upload an audio file.")
+# Function to process audio and predict emotions
+def predict_emotions(audio_path, interval):
+    audio_data, samplerate = sf.read(audio_path)
+    duration = len(audio_data) / samplerate
+    emotions = []
+    for start in np.arange(0, duration, interval):
+        end = start + interval
+        if end > duration:
+            end = duration
+        segment = audio_data[int(start*samplerate):int(end*samplerate)]
+        segment_path = 'segment.wav'
+        sf.write(segment_path, segment, samplerate)
+        feat = features_extractor(segment_path)
+        feat = feat.reshape(1, -1)
+        predictions = model.predict(feat)
+        predicted_label = np.argmax(predictions, axis=1)
+        emotions.append((start, end, label_mapping[predicted_label[0]]))
+    return emotions
+# Button to predict
+if st.button("Predict"):
+    if audio_file:
+        print()
+        emotions = predict_emotions('uploaded_audio.wav', interval=interval)
+        # Create a DataFrame to display emotions
+        emotions_df = pd.DataFrame(
+            emotions, columns=["Start", "End", "Emotion"])
+        st.write(emotions_df)
+        # Save emotions to a log file
+        log_file_path = 'emotion_log.csv'
+        emotions_df.to_csv(log_file_path, index=False)
+        # Extrapolate major emotions
+        major_emotion = emotions_df['Emotion'].mode().values[0]
+        st.write(f"Major emotion: {major_emotion}")
+        st.success(f"Emotion log saved to {log_file_path}")
+        # Add download button for the emotion log file
+        with open(log_file_path, "rb") as file:
+            btn = st.download_button(
+                label="Download Emotion Log",
+                data=file,
+                file_name='emotion_log.csv',
+                mime='text/csv'
+            )
+        x = word_count1('uploaded_audio.wav')
+        y = get_speaking_rate('uploaded_audio.wav')
+        st.write(f'Number of words = {x[0]}')
+        st.write(f'Transcript = {x[1]}')
+        st.write(f'Speaking rate = {y} syllables per second')
+    else:
+        st.error("Please upload an audio file.")
+# Additional message at the bottom of the page
+st.write("Thank you for using the app!")
+file_path = 'path/to/your/audio/file'
+try:
+    audio, sr = librosa.load(audio_file, sr=None)
+except Exception as e:
+    print(f"An error occurred: {e}")

cnn_lstm.keras ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dc25f03aa81c2b73b835963bcc5e94312f2dee1df661e46df1180adc387b3b4d
+size 23364981

feat.py ADDED Viewed

	@@ -0,0 +1,134 @@

+import librosa
+import numpy as np
+def features_extractor(file_name):
+    audio, sample_rate = librosa.load(file_name, res_type='kaiser_fast')
+    # Extract MFCC features
+    mfccs_features = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=25)
+    mfccs_scaled_features = np.mean(mfccs_features.T, axis=0)
+    # Extract Zero Crossing Rate
+    zcr = librosa.feature.zero_crossing_rate(y=audio)
+    zcr_scaled_features = np.mean(zcr.T, axis=0)
+    # Extract Chroma Features
+    chroma = librosa.feature.chroma_stft(y=audio, sr=sample_rate)
+    chroma_scaled_features = np.mean(chroma.T, axis=0)
+    # Extract Mel Spectrogram Features
+    mel = librosa.feature.melspectrogram(y=audio, sr=sample_rate)
+    mel_scaled_features = np.mean(mel.T, axis=0)
+    # Concatenate all features into a single array
+    features = np.hstack((mfccs_scaled_features, zcr_scaled_features, chroma_scaled_features, mel_scaled_features))
+    return features
+#########################################################################################################################
+import speech_recognition as sr
+def recognize_speech_from_file(audio_file_path):
+    # Initialize the recognizer
+    recognizer = sr.Recognizer()
+    # Load the audio file
+    with sr.AudioFile(audio_file_path) as source:
+        audio_data = recognizer.record(source)  # Read the entire audio file
+        try:
+            # Recognize speech using Google Web Speech API
+            text = recognizer.recognize_google(audio_data)
+            return text
+        except sr.RequestError as e:
+            print(f"Could not request results; {e}")
+        except sr.UnknownValueError:
+            print("Could not understand the audio")
+def count_words(text):
+    words = text.split()
+    return len(words)
+def word_count(audio_path):
+    transcript = recognize_speech_from_file(audio_file_path=audio_path)
+    if transcript:
+        return [count_words(transcript),transcript]
+########################################################################################################################
+import speech_recognition as sr
+import wave
+def recognize_speech_from_file(audio_file_path):
+    recognizer = sr.Recognizer()
+    audio_file = sr.AudioFile(audio_file_path)
+    with audio_file as source:
+        audio = recognizer.record(source)
+    try:
+        transcript = recognizer.recognize_google(audio)
+        return transcript
+    except sr.UnknownValueError:
+        return None
+    except sr.RequestError as e:
+        print(f"Could not request results from Google Speech Recognition service; {e}")
+        return None
+def count_words(text):
+    words = text.split()
+    return len(words)
+def get_audio_duration(audio_file_path):
+    with wave.open(audio_file_path, 'r') as audio_file:
+        frames = audio_file.getnframes()
+        rate = audio_file.getframerate()
+        duration = frames / float(rate)
+    return duration
+def word_count1(audio_path):
+    transcript = recognize_speech_from_file(audio_file_path=audio_path)
+    if transcript:
+        duration = get_audio_duration(audio_path)
+        return [count_words(transcript), transcript, duration]
+    else:
+        return [0, None, 0.0]
+word_count('angry_Akash.wav')
+# print(word_count1(r'c:\Users\hp\OneDrive\Desktop\Major Emotions\Mixed\Angry-1-3-1.wav'))
+# Example usage
+# audio_path = 'angry_Ansh.wav'
+# result = word_count(audio_path)
+# print(result)
+import librosa
+import numpy as np
+from pyAudioAnalysis import audioSegmentation as aS
+def get_speaking_rate(file_path):
+    # Load audio file
+    y, sr = librosa.load(file_path, sr=None)
+    # Extract speech segments
+    segments = aS.silence_removal(y, sr, 0.020, 0.020, smooth_window=1.0, weight=0.3, plot=False)
+    # Total speech duration
+    speech_duration = sum([end - start for start, end in segments])
+    # Number of syllables (approximation)
+    num_syllables = len(librosa.effects.split(y, top_db=30))
+    # Calculate speaking rate (syllables per second)
+    speaking_rate = num_syllables / speech_duration if speech_duration > 0 else 0
+    return speaking_rate
+# Example usage
+# file_path = 'angry_Ansh.wav'
+# speaking_rate = get_speaking_rate(file_path)[0]
+# print(f"Speaking Rate: {speaking_rate:.2f} syllables per second")
+# print(get_speaking_rate(file_path)[1])
+# print(get_speaking_rate(file_path)[2])

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+librosa
+numpy
+speech_recognition
+pyAudioAnalysis
+streamlit
+soundfile
+tensorflow
+scikit-learn
+pandas