Rahulk2197 commited on
Commit
5d5d275
1 Parent(s): fac34c9

Update feat.py

Browse files
Files changed (1) hide show
  1. feat.py +134 -134
feat.py CHANGED
@@ -1,134 +1,134 @@
1
-
2
- import librosa
3
- import numpy as np
4
-
5
- def features_extractor(file_name):
6
- audio, sample_rate = librosa.load(file_name, res_type='kaiser_fast')
7
-
8
- # Extract MFCC features
9
- mfccs_features = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=25)
10
- mfccs_scaled_features = np.mean(mfccs_features.T, axis=0)
11
-
12
- # Extract Zero Crossing Rate
13
- zcr = librosa.feature.zero_crossing_rate(y=audio)
14
- zcr_scaled_features = np.mean(zcr.T, axis=0)
15
-
16
- # Extract Chroma Features
17
- chroma = librosa.feature.chroma_stft(y=audio, sr=sample_rate)
18
- chroma_scaled_features = np.mean(chroma.T, axis=0)
19
-
20
- # Extract Mel Spectrogram Features
21
- mel = librosa.feature.melspectrogram(y=audio, sr=sample_rate)
22
- mel_scaled_features = np.mean(mel.T, axis=0)
23
-
24
- # Concatenate all features into a single array
25
- features = np.hstack((mfccs_scaled_features, zcr_scaled_features, chroma_scaled_features, mel_scaled_features))
26
-
27
- return features
28
-
29
-
30
- #########################################################################################################################
31
- import speech_recognition as sr
32
-
33
- def recognize_speech_from_file(audio_file_path):
34
- # Initialize the recognizer
35
- recognizer = sr.Recognizer()
36
-
37
- # Load the audio file
38
- with sr.AudioFile(audio_file_path) as source:
39
-
40
- audio_data = recognizer.record(source) # Read the entire audio file
41
-
42
- try:
43
- # Recognize speech using Google Web Speech API
44
- text = recognizer.recognize_google(audio_data)
45
-
46
- return text
47
- except sr.RequestError as e:
48
- print(f"Could not request results; {e}")
49
- except sr.UnknownValueError:
50
- print("Could not understand the audio")
51
-
52
- def count_words(text):
53
- words = text.split()
54
- return len(words)
55
-
56
- def word_count(audio_path):
57
- transcript = recognize_speech_from_file(audio_file_path=audio_path)
58
- if transcript:
59
- return [count_words(transcript),transcript]
60
-
61
- ########################################################################################################################
62
- import speech_recognition as sr
63
- import wave
64
-
65
- def recognize_speech_from_file(audio_file_path):
66
- recognizer = sr.Recognizer()
67
- audio_file = sr.AudioFile(audio_file_path)
68
- with audio_file as source:
69
- audio = recognizer.record(source)
70
- try:
71
- transcript = recognizer.recognize_google(audio)
72
- return transcript
73
- except sr.UnknownValueError:
74
- return None
75
- except sr.RequestError as e:
76
- print(f"Could not request results from Google Speech Recognition service; {e}")
77
- return None
78
-
79
- def count_words(text):
80
- words = text.split()
81
- return len(words)
82
-
83
- def get_audio_duration(audio_file_path):
84
- with wave.open(audio_file_path, 'r') as audio_file:
85
- frames = audio_file.getnframes()
86
- rate = audio_file.getframerate()
87
- duration = frames / float(rate)
88
- return duration
89
-
90
- def word_count1(audio_path):
91
- transcript = recognize_speech_from_file(audio_file_path=audio_path)
92
- if transcript:
93
- duration = get_audio_duration(audio_path)
94
- return [count_words(transcript), transcript, duration]
95
- else:
96
- return [0, None, 0.0]
97
-
98
- word_count('angry_Akash.wav')
99
-
100
- # print(word_count1(r'c:\Users\hp\OneDrive\Desktop\Major Emotions\Mixed\Angry-1-3-1.wav'))
101
- # Example usage
102
- # audio_path = 'angry_Ansh.wav'
103
- # result = word_count(audio_path)
104
- # print(result)
105
-
106
- import librosa
107
- import numpy as np
108
- from pyAudioAnalysis import audioSegmentation as aS
109
-
110
- def get_speaking_rate(file_path):
111
- # Load audio file
112
- y, sr = librosa.load(file_path, sr=None)
113
-
114
- # Extract speech segments
115
- segments = aS.silence_removal(y, sr, 0.020, 0.020, smooth_window=1.0, weight=0.3, plot=False)
116
-
117
- # Total speech duration
118
- speech_duration = sum([end - start for start, end in segments])
119
-
120
- # Number of syllables (approximation)
121
- num_syllables = len(librosa.effects.split(y, top_db=30))
122
-
123
- # Calculate speaking rate (syllables per second)
124
- speaking_rate = num_syllables / speech_duration if speech_duration > 0 else 0
125
-
126
- return speaking_rate
127
-
128
- # Example usage
129
- # file_path = 'angry_Ansh.wav'
130
- # speaking_rate = get_speaking_rate(file_path)[0]
131
- # print(f"Speaking Rate: {speaking_rate:.2f} syllables per second")
132
- # print(get_speaking_rate(file_path)[1])
133
- # print(get_speaking_rate(file_path)[2])
134
-
 
1
+
2
+ import librosa
3
+ import numpy as np
4
+
5
+ def features_extractor(file_name):
6
+ audio, sample_rate = librosa.load(file_name, res_type='kaiser_fast')
7
+
8
+ # Extract MFCC features
9
+ mfccs_features = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=25)
10
+ mfccs_scaled_features = np.mean(mfccs_features.T, axis=0)
11
+
12
+ # Extract Zero Crossing Rate
13
+ zcr = librosa.feature.zero_crossing_rate(y=audio)
14
+ zcr_scaled_features = np.mean(zcr.T, axis=0)
15
+
16
+ # Extract Chroma Features
17
+ chroma = librosa.feature.chroma_stft(y=audio, sr=sample_rate)
18
+ chroma_scaled_features = np.mean(chroma.T, axis=0)
19
+
20
+ # Extract Mel Spectrogram Features
21
+ mel = librosa.feature.melspectrogram(y=audio, sr=sample_rate)
22
+ mel_scaled_features = np.mean(mel.T, axis=0)
23
+
24
+ # Concatenate all features into a single array
25
+ features = np.hstack((mfccs_scaled_features, zcr_scaled_features, chroma_scaled_features, mel_scaled_features))
26
+
27
+ return features
28
+
29
+
30
+ #########################################################################################################################
31
+ import speech_recognition as sr
32
+
33
+ def recognize_speech_from_file(audio_file_path):
34
+ # Initialize the recognizer
35
+ recognizer = sr.Recognizer()
36
+
37
+ # Load the audio file
38
+ with sr.AudioFile(audio_file_path) as source:
39
+
40
+ audio_data = recognizer.record(source) # Read the entire audio file
41
+
42
+ try:
43
+ # Recognize speech using Google Web Speech API
44
+ text = recognizer.recognize_google(audio_data)
45
+
46
+ return text
47
+ except sr.RequestError as e:
48
+ print(f"Could not request results; {e}")
49
+ except sr.UnknownValueError:
50
+ print("Could not understand the audio")
51
+
52
+ def count_words(text):
53
+ words = text.split()
54
+ return len(words)
55
+
56
+ def word_count(audio_path):
57
+ transcript = recognize_speech_from_file(audio_file_path=audio_path)
58
+ if transcript:
59
+ return [count_words(transcript),transcript]
60
+
61
+ ########################################################################################################################
62
+ import speech_recognition as sr
63
+ import wave
64
+
65
+ def recognize_speech_from_file(audio_file_path):
66
+ recognizer = sr.Recognizer()
67
+ audio_file = sr.AudioFile(audio_file_path)
68
+ with audio_file as source:
69
+ audio = recognizer.record(source)
70
+ try:
71
+ transcript = recognizer.recognize_google(audio)
72
+ return transcript
73
+ except sr.UnknownValueError:
74
+ return None
75
+ except sr.RequestError as e:
76
+ print(f"Could not request results from Google Speech Recognition service; {e}")
77
+ return None
78
+
79
+ def count_words(text):
80
+ words = text.split()
81
+ return len(words)
82
+
83
+ def get_audio_duration(audio_file_path):
84
+ with wave.open(audio_file_path, 'r') as audio_file:
85
+ frames = audio_file.getnframes()
86
+ rate = audio_file.getframerate()
87
+ duration = frames / float(rate)
88
+ return duration
89
+
90
+ def word_count1(audio_path):
91
+ transcript = recognize_speech_from_file(audio_file_path=audio_path)
92
+ if transcript:
93
+ duration = get_audio_duration(audio_path)
94
+ return [count_words(transcript), transcript, duration]
95
+ else:
96
+ return [0, None, 0.0]
97
+
98
+ # word_count('angry_Akash.wav')
99
+
100
+ # print(word_count1(r'c:\Users\hp\OneDrive\Desktop\Major Emotions\Mixed\Angry-1-3-1.wav'))
101
+ # Example usage
102
+ # audio_path = 'angry_Ansh.wav'
103
+ # result = word_count(audio_path)
104
+ # print(result)
105
+
106
+ import librosa
107
+ import numpy as np
108
+ from pyAudioAnalysis import audioSegmentation as aS
109
+
110
+ def get_speaking_rate(file_path):
111
+ # Load audio file
112
+ y, sr = librosa.load(file_path, sr=None)
113
+
114
+ # Extract speech segments
115
+ segments = aS.silence_removal(y, sr, 0.020, 0.020, smooth_window=1.0, weight=0.3, plot=False)
116
+
117
+ # Total speech duration
118
+ speech_duration = sum([end - start for start, end in segments])
119
+
120
+ # Number of syllables (approximation)
121
+ num_syllables = len(librosa.effects.split(y, top_db=30))
122
+
123
+ # Calculate speaking rate (syllables per second)
124
+ speaking_rate = num_syllables / speech_duration if speech_duration > 0 else 0
125
+
126
+ return speaking_rate
127
+
128
+ # Example usage
129
+ # file_path = 'angry_Ansh.wav'
130
+ # speaking_rate = get_speaking_rate(file_path)[0]
131
+ # print(f"Speaking Rate: {speaking_rate:.2f} syllables per second")
132
+ # print(get_speaking_rate(file_path)[1])
133
+ # print(get_speaking_rate(file_path)[2])
134
+