Spaces:

Rahulk2197
/

rashmi_app

Sleeping

File size: 4,549 Bytes

489f5d0


import librosa
import numpy as np

def features_extractor(file_name):
    audio, sample_rate = librosa.load(file_name, res_type='kaiser_fast')
    
    # Extract MFCC features
    mfccs_features = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=25)
    mfccs_scaled_features = np.mean(mfccs_features.T, axis=0)
    
    # Extract Zero Crossing Rate
    zcr = librosa.feature.zero_crossing_rate(y=audio)
    zcr_scaled_features = np.mean(zcr.T, axis=0)
    
    # Extract Chroma Features
    chroma = librosa.feature.chroma_stft(y=audio, sr=sample_rate)
    chroma_scaled_features = np.mean(chroma.T, axis=0)
    
    # Extract Mel Spectrogram Features
    mel = librosa.feature.melspectrogram(y=audio, sr=sample_rate)
    mel_scaled_features = np.mean(mel.T, axis=0)
    
    # Concatenate all features into a single array
    features = np.hstack((mfccs_scaled_features, zcr_scaled_features, chroma_scaled_features, mel_scaled_features))
    
    return features


#########################################################################################################################
import speech_recognition as sr

def recognize_speech_from_file(audio_file_path):
    # Initialize the recognizer
    recognizer = sr.Recognizer()

    # Load the audio file
    with sr.AudioFile(audio_file_path) as source:
        
        audio_data = recognizer.record(source)  # Read the entire audio file
        
        try:
            # Recognize speech using Google Web Speech API
            text = recognizer.recognize_google(audio_data)
          
            return text
        except sr.RequestError as e:
            print(f"Could not request results; {e}")
        except sr.UnknownValueError:
            print("Could not understand the audio")

def count_words(text):
    words = text.split()
    return len(words)

def word_count(audio_path):
    transcript = recognize_speech_from_file(audio_file_path=audio_path)
    if transcript:
        return [count_words(transcript),transcript]

########################################################################################################################
import speech_recognition as sr
import wave

def recognize_speech_from_file(audio_file_path):
    recognizer = sr.Recognizer()
    audio_file = sr.AudioFile(audio_file_path)
    with audio_file as source:
        audio = recognizer.record(source)
    try:
        transcript = recognizer.recognize_google(audio)
        return transcript
    except sr.UnknownValueError:
        return None
    except sr.RequestError as e:
        print(f"Could not request results from Google Speech Recognition service; {e}")
        return None

def count_words(text):
    words = text.split()
    return len(words)

def get_audio_duration(audio_file_path):
    with wave.open(audio_file_path, 'r') as audio_file:
        frames = audio_file.getnframes()
        rate = audio_file.getframerate()
        duration = frames / float(rate)
    return duration

def word_count1(audio_path):
    transcript = recognize_speech_from_file(audio_file_path=audio_path)
    if transcript:
        duration = get_audio_duration(audio_path)
        return [count_words(transcript), transcript, duration]
    else:
        return [0, None, 0.0]
    
word_count('angry_Akash.wav')

# print(word_count1(r'c:\Users\hp\OneDrive\Desktop\Major Emotions\Mixed\Angry-1-3-1.wav'))
# Example usage
# audio_path = 'angry_Ansh.wav'
# result = word_count(audio_path)
# print(result)

import librosa
import numpy as np
from pyAudioAnalysis import audioSegmentation as aS

def get_speaking_rate(file_path):
    # Load audio file
    y, sr = librosa.load(file_path, sr=None)
    
    # Extract speech segments
    segments = aS.silence_removal(y, sr, 0.020, 0.020, smooth_window=1.0, weight=0.3, plot=False)
    
    # Total speech duration
    speech_duration = sum([end - start for start, end in segments])
    
    # Number of syllables (approximation)
    num_syllables = len(librosa.effects.split(y, top_db=30))
    
    # Calculate speaking rate (syllables per second)
    speaking_rate = num_syllables / speech_duration if speech_duration > 0 else 0
    
    return speaking_rate

# Example usage
# file_path = 'angry_Ansh.wav'
# speaking_rate = get_speaking_rate(file_path)[0]
# print(f"Speaking Rate: {speaking_rate:.2f} syllables per second")
# print(get_speaking_rate(file_path)[1])
# print(get_speaking_rate(file_path)[2])