Spaces:
Sleeping
Sleeping
File size: 4,417 Bytes
5d5d275 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 |
import librosa
import numpy as np
def features_extractor(file_name):
audio, sample_rate = librosa.load(file_name, res_type='kaiser_fast')
# Extract MFCC features
mfccs_features = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=25)
mfccs_scaled_features = np.mean(mfccs_features.T, axis=0)
# Extract Zero Crossing Rate
zcr = librosa.feature.zero_crossing_rate(y=audio)
zcr_scaled_features = np.mean(zcr.T, axis=0)
# Extract Chroma Features
chroma = librosa.feature.chroma_stft(y=audio, sr=sample_rate)
chroma_scaled_features = np.mean(chroma.T, axis=0)
# Extract Mel Spectrogram Features
mel = librosa.feature.melspectrogram(y=audio, sr=sample_rate)
mel_scaled_features = np.mean(mel.T, axis=0)
# Concatenate all features into a single array
features = np.hstack((mfccs_scaled_features, zcr_scaled_features, chroma_scaled_features, mel_scaled_features))
return features
#########################################################################################################################
import speech_recognition as sr
def recognize_speech_from_file(audio_file_path):
# Initialize the recognizer
recognizer = sr.Recognizer()
# Load the audio file
with sr.AudioFile(audio_file_path) as source:
audio_data = recognizer.record(source) # Read the entire audio file
try:
# Recognize speech using Google Web Speech API
text = recognizer.recognize_google(audio_data)
return text
except sr.RequestError as e:
print(f"Could not request results; {e}")
except sr.UnknownValueError:
print("Could not understand the audio")
def count_words(text):
words = text.split()
return len(words)
def word_count(audio_path):
transcript = recognize_speech_from_file(audio_file_path=audio_path)
if transcript:
return [count_words(transcript),transcript]
########################################################################################################################
import speech_recognition as sr
import wave
def recognize_speech_from_file(audio_file_path):
recognizer = sr.Recognizer()
audio_file = sr.AudioFile(audio_file_path)
with audio_file as source:
audio = recognizer.record(source)
try:
transcript = recognizer.recognize_google(audio)
return transcript
except sr.UnknownValueError:
return None
except sr.RequestError as e:
print(f"Could not request results from Google Speech Recognition service; {e}")
return None
def count_words(text):
words = text.split()
return len(words)
def get_audio_duration(audio_file_path):
with wave.open(audio_file_path, 'r') as audio_file:
frames = audio_file.getnframes()
rate = audio_file.getframerate()
duration = frames / float(rate)
return duration
def word_count1(audio_path):
transcript = recognize_speech_from_file(audio_file_path=audio_path)
if transcript:
duration = get_audio_duration(audio_path)
return [count_words(transcript), transcript, duration]
else:
return [0, None, 0.0]
# word_count('angry_Akash.wav')
# print(word_count1(r'c:\Users\hp\OneDrive\Desktop\Major Emotions\Mixed\Angry-1-3-1.wav'))
# Example usage
# audio_path = 'angry_Ansh.wav'
# result = word_count(audio_path)
# print(result)
import librosa
import numpy as np
from pyAudioAnalysis import audioSegmentation as aS
def get_speaking_rate(file_path):
# Load audio file
y, sr = librosa.load(file_path, sr=None)
# Extract speech segments
segments = aS.silence_removal(y, sr, 0.020, 0.020, smooth_window=1.0, weight=0.3, plot=False)
# Total speech duration
speech_duration = sum([end - start for start, end in segments])
# Number of syllables (approximation)
num_syllables = len(librosa.effects.split(y, top_db=30))
# Calculate speaking rate (syllables per second)
speaking_rate = num_syllables / speech_duration if speech_duration > 0 else 0
return speaking_rate
# Example usage
# file_path = 'angry_Ansh.wav'
# speaking_rate = get_speaking_rate(file_path)[0]
# print(f"Speaking Rate: {speaking_rate:.2f} syllables per second")
# print(get_speaking_rate(file_path)[1])
# print(get_speaking_rate(file_path)[2])
|