import librosa import numpy as np def features_extractor(file_name): audio, sample_rate = librosa.load(file_name, res_type='kaiser_fast') # Extract MFCC features mfccs_features = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=25) mfccs_scaled_features = np.mean(mfccs_features.T, axis=0) # Extract Zero Crossing Rate zcr = librosa.feature.zero_crossing_rate(y=audio) zcr_scaled_features = np.mean(zcr.T, axis=0) # Extract Chroma Features chroma = librosa.feature.chroma_stft(y=audio, sr=sample_rate) chroma_scaled_features = np.mean(chroma.T, axis=0) # Extract Mel Spectrogram Features mel = librosa.feature.melspectrogram(y=audio, sr=sample_rate) mel_scaled_features = np.mean(mel.T, axis=0) # Concatenate all features into a single array features = np.hstack((mfccs_scaled_features, zcr_scaled_features, chroma_scaled_features, mel_scaled_features)) return features ######################################################################################################################### import speech_recognition as sr def recognize_speech_from_file(audio_file_path): # Initialize the recognizer recognizer = sr.Recognizer() # Load the audio file with sr.AudioFile(audio_file_path) as source: audio_data = recognizer.record(source) # Read the entire audio file try: # Recognize speech using Google Web Speech API text = recognizer.recognize_google(audio_data) return text except sr.RequestError as e: print(f"Could not request results; {e}") except sr.UnknownValueError: print("Could not understand the audio") def count_words(text): words = text.split() return len(words) def word_count(audio_path): transcript = recognize_speech_from_file(audio_file_path=audio_path) if transcript: return [count_words(transcript),transcript] ######################################################################################################################## import speech_recognition as sr import wave def recognize_speech_from_file(audio_file_path): recognizer = sr.Recognizer() audio_file = sr.AudioFile(audio_file_path) with audio_file as source: audio = recognizer.record(source) try: transcript = recognizer.recognize_google(audio) return transcript except sr.UnknownValueError: return None except sr.RequestError as e: print(f"Could not request results from Google Speech Recognition service; {e}") return None def count_words(text): words = text.split() return len(words) def get_audio_duration(audio_file_path): with wave.open(audio_file_path, 'r') as audio_file: frames = audio_file.getnframes() rate = audio_file.getframerate() duration = frames / float(rate) return duration def word_count1(audio_path): transcript = recognize_speech_from_file(audio_file_path=audio_path) if transcript: duration = get_audio_duration(audio_path) return [count_words(transcript), transcript, duration] else: return [0, None, 0.0] # word_count('angry_Akash.wav') # print(word_count1(r'c:\Users\hp\OneDrive\Desktop\Major Emotions\Mixed\Angry-1-3-1.wav')) # Example usage # audio_path = 'angry_Ansh.wav' # result = word_count(audio_path) # print(result) import librosa import numpy as np from pyAudioAnalysis import audioSegmentation as aS def get_speaking_rate(file_path): # Load audio file y, sr = librosa.load(file_path, sr=None) # Extract speech segments segments = aS.silence_removal(y, sr, 0.020, 0.020, smooth_window=1.0, weight=0.3, plot=False) # Total speech duration speech_duration = sum([end - start for start, end in segments]) # Number of syllables (approximation) num_syllables = len(librosa.effects.split(y, top_db=30)) # Calculate speaking rate (syllables per second) speaking_rate = num_syllables / speech_duration if speech_duration > 0 else 0 return speaking_rate # Example usage # file_path = 'angry_Ansh.wav' # speaking_rate = get_speaking_rate(file_path)[0] # print(f"Speaking Rate: {speaking_rate:.2f} syllables per second") # print(get_speaking_rate(file_path)[1]) # print(get_speaking_rate(file_path)[2])