import os import csv import random import pandas as pd import numpy as np import gradio as gr from collections import Counter from utils import * import matplotlib.pyplot as plt import scipy.io.wavfile as wavf from huggingface_hub import Repository, upload_file HF_TOKEN = os.environ.get("HF_TOKEN") GREETINGS_DIR = './greetings' greeting_files = [f.name for f in os.scandir(GREETINGS_DIR)] DATASET_REPO_URL = "https://huggingface.co/datasets/meyabase/crowd-oshiwambo-speech-greetings" REPOSITORY_DIR = "data" LOCAL_DIR = 'data_local' os.makedirs(LOCAL_DIR,exist_ok=True) GENDER = ['Choose Gender','Male','Female','Other','Prefer not to say'] #------------------Work on Languages-------------------- languages = ["oshindonga", "oshikwanyama"] language_id = ["ng","kj"] #------------------Work on Languages-------------------- repo = Repository( local_dir="data", clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN ) repo.git_pull() with open('app.css','r') as f: BLOCK_CSS = f.read() def save_record(language,record,greeting,gender,accent,greeting_history,current_greeting,done_recording): # set default greeting_history = greeting_history if greeting_history is not None else [0] current_greeting = current_greeting if current_greeting is not None else 0 # 0 is the default greeting done_recording = done_recording if done_recording is not None else False #---- # Save text and its corresponding record to flag speaker_metadata={} speaker_metadata['gender'] = gender if gender!=GENDER[0] else '' speaker_metadata['accent'] = accent if accent!='' else '' default_record = None if not done_recording: if language!=None and language!='Choose language' and record is not None and greeting is not None: # language = language.lower() lang_id = language_id[languages.index(language)] # Write audio to file audio_name = get_unique_name() SAVE_FILE_DIR = os.path.join(LOCAL_DIR,audio_name) os.makedirs(SAVE_FILE_DIR,exist_ok=True) audio_output_filename = os.path.join(SAVE_FILE_DIR,'audio.wav') wavf.write(audio_output_filename,record[0],record[1]) # Write metadata.json to file json_file_path = os.path.join(SAVE_FILE_DIR,'metadata.jsonl') metadata= { 'id':audio_name, 'file_name':'audio.wav', 'language_name':language, 'language_id':lang_id, 'greeting':current_greeting, 'frequency':record[0], 'gender': speaker_metadata['gender'], 'accent': speaker_metadata['accent'], } dump_json(metadata,json_file_path) # Upload the audio repo_audio_path = os.path.join(REPOSITORY_DIR,os.path.join(audio_name,'audio.wav')) _ = upload_file(path_or_fileobj = audio_output_filename, path_in_repo =repo_audio_path, repo_id='meyabase/crowd-oshiwambo-speech-greetings', repo_type='dataset', token=HF_TOKEN ) # Upload the metadata repo_json_path = os.path.join(REPOSITORY_DIR,os.path.join(audio_name,'metadata.jsonl')) _ = upload_file(path_or_fileobj = json_file_path, path_in_repo =repo_json_path, repo_id='meyabase/crowd-oshiwambo-speech-greetings', repo_type='dataset', token=HF_TOKEN ) output = f'Recording successfully saved! On to the next one...' # Choose the next greeting greeting_history.append(current_greeting) # check the language selected and choose the next greeting based on the images available if language=='oshindonga': greeting_choices = [greet for greet in [i for i in range(3)] if greet not in greeting_history] if greeting_choices!=[]: next_greeting = random.choice(greeting_choices) next_greeting_image = f'greetings/{language}/{next_greeting}.png' else: done_recording=True next_greeting = 0 next_greeting_image = 'greetings/best.gif' output = "You have finished all recording! You can reload to start again." elif language=='oshikwanyama': greeting_choices = [greet for greet in [i for i in range(3)] if greet not in greeting_history] if greeting_choices!=[]: next_greeting = random.choice(greeting_choices) next_greeting_image = f'greetings/{language}/{next_greeting}.png' else: done_recording=True next_greeting = 0 next_greeting_image = 'greetings/best.gif' output = "You have finished all recording! You can reload to start again." output_string = "
"+output+"
" return output_string,next_greeting_image,greeting_history,next_greeting,done_recording,default_record if greeting is None: output = "greeting must be specified!" if record is None: output="No recording found!" if language is None or language=='Choose language': output = 'Language must be specified!' output_string = "
"+output+"
" # return output_string, previous image and state return output_string, greeting,greeting_history,current_greeting,done_recording,default_record else: # Stop submitting recording (best.gif is displaying) output = '🙌 You have finished all recording! Thank You. You can reload to start again.' output_string = "
"+output+"
" next_greeting = 0 # the default greeting next_greeting_image = 'greetings/best.gif' return output_string,next_greeting_image,greeting_history,next_greeting,done_recording,default_record def get_metadata_json(path): try: return read_json_lines(path)[0] except Exception: return [] def get_metadata_of_dataset(): repo.git_pull() REPOSITORY_DATA_DIR = os.path.join(REPOSITORY_DIR,'data') repo_recordings = [os.path.join(REPOSITORY_DATA_DIR,f.name) for f in os.scandir(REPOSITORY_DATA_DIR)] if os.path.isdir(REPOSITORY_DATA_DIR) else [] audio_repo = [os.path.join(f,'audio.wav') for f in repo_recordings] audio_repo = [a.replace('data/data/','https://huggingface.co/datasets/meyabase/crowd-oshiwambo-speech-greetings/resolve/main/data/') for a in audio_repo] metadata_all = [get_metadata_json(os.path.join(f,'metadata.jsonl')) for f in repo_recordings] metadata_all = [m for m in metadata_all if m!=[]] return metadata_all def display_records(): repo.git_pull() REPOSITORY_DATA_DIR = os.path.join(REPOSITORY_DIR,'data') repo_recordings = [os.path.join(REPOSITORY_DATA_DIR,f.name) for f in os.scandir(REPOSITORY_DATA_DIR)] if os.path.isdir(REPOSITORY_DATA_DIR) else [] audio_repo = [os.path.join(f,'audio.wav') for f in repo_recordings] audio_repo = [a.replace('data/data/','https://huggingface.co/datasets/meyabase/crowd-oshiwambo-speech-greetings/resolve/main/data/') for a in audio_repo] metadata_repo = [read_json_lines(os.path.join(f,'metadata.jsonl'))[0] for f in repo_recordings] audios_all = audio_repo metadata_all = metadata_repo langs=[m['language_name'] for m in metadata_all] audios = [a for a in audios_all] texts = [m['text'] for m in metadata_all] greetings = [m['greeting'] for m in metadata_all] html = f"""

Hooray! We have collected {len(metadata_all)} samples!

""" for lang, audio, text,greet_ in zip(langs,audios,texts,greetings): html+= f"""""" html+="
language audio greeting text
{lang} {greet_} {text}
" return html markdown = """

🔊 Oshiwambo Speech Greetings


This is a platform to contribute to your Oshiwambo greeting for the speech recognition task.
""" record_markdown = """
Record greetings in your language and help us build a dataset for speech recognition in Oshiwambo.
""" # # Interface design begins block = gr.Blocks(css=BLOCK_CSS) with block: gr.Markdown(markdown) with gr.Tabs(): with gr.TabItem('Record'): gr.Markdown(record_markdown) with gr.Row(): language = gr.inputs.Dropdown(choices = sorted([lang_.title() for lang_ in list(languages)]), label="Choose language", default=languages[0].title()) gender = gr.inputs.Dropdown(choices=GENDER, type="value", default=None, label="Gender (optional)") accent = gr.inputs.Textbox(label="Accent (optional)", default='', placeholder="e.g. oshikwanyama, oshindonga, oshimbadja, oshingadjera, etc.") # define a default greeting first for each language greeting = gr.Image(f'greetings/{languages[0].lower()}/0.png', image_mode="L") greeting_history = gr.Variable() # stores the history of greetings record = gr.Audio(source="microphone", label='Record your voice') output_result = gr.outputs.HTML() state = gr.Variable() current_greeting = gr.Variable() done_recording = gr.Variable() # Signifies when to stop submitting records even if `submit`` is clicked save = gr.Button("Submit") save.click(save_record, inputs=[language,record,greeting,gender,accent,state,current_greeting,done_recording],outputs=[output_result,greeting,state,current_greeting,done_recording,record]) block.launch()