Spaces:

marianna13
/

annotate-audio

Runtime error

File size: 3,986 Bytes


import gradio as gr
import json
import re
import string
import pandas as pd
import os
import requests
from textwrap import wrap
import uuid
import gspread
import ast



def download_and_save_file(URL, audio_dir):
    headers = {
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36',
        'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
        'referer': 'https://www.google.com/',
        'accept-encoding': 'gzip, deflate, br',
        'accept-language': 'en-US,en;q=0.9,',
        'cookie': 'prov=6bb44cc9-dfe4-1b95-a65d-5250b3b4c9fb; _ga=GA1.2.1363624981.1550767314; __qca=P0-1074700243-1550767314392; notice-ctt=4%3B1550784035760; _gid=GA1.2.1415061800.1552935051; acct=t=4CnQ70qSwPMzOe6jigQlAR28TSW%2fMxzx&s=32zlYt1%2b3TBwWVaCHxH%2bl5aDhLjmq4Xr',
    }
    doc = requests.get(URL, headers=headers)
    file_name = URL.split('/')[-1].split('?')[0]
    audio_path = f'{audio_dir}/{file_name}'
    with open(audio_path, 'wb') as f:
        f.write(doc.content)  
    return audio_path



credentials = os.environ['CREDENTIALS']
data = json.loads(credentials, strict=False)
with open('credentials.json', 'w') as f:
    json.dump(data, f)




title = '🎵 Annotate audio'
description = '''Choose a sentence (or sentences) that describes audio the best.'''

audio_dir = 'AUDIO'
os.makedirs(audio_dir, exist_ok=True)

def sample_df():

    gc = gspread.service_account(filename='credentials.json')
    sh = gc.open('Annotated CC Audio')
    worksheet = sh.sheet1
    df = pd.DataFrame(worksheet.get_all_records())
    sample_df = df[df['caption']==''].sample(1)

    audio_url, audio_meta, page_title, img_metadata, sibling_elems = sample_df[['audio_url', 'audio_meta', 'page_title', 'imgs_metadata', 'sibling_elems']].values[0]
    audio_path = download_and_save_file(audio_url, audio_dir)
    sibling_elems = ast.literal_eval(sibling_elems)
    sibling_elems = [s.replace('\n', '') for s in sibling_elems]
    sibling_elems = ["\n".join(wrap(s)) for s in sibling_elems if len(s) > 0]
    sibling_elems = list(set(sibling_elems))
    img_metadata = ast.literal_eval(img_metadata)
    if len(img_metadata) > 0:
      img_metadata = [[f'{k}: {meta[k]}' for k in meta] for meta in img_metadata]
    audio_meta = ast.literal_eval(audio_meta).get('tags', None)
    if audio_meta:
      audio_meta = [f'{k}: {audio_meta[k]}' for k in audio_meta.keys() if k.lower() in ['title', 'album', 'artist', 'genre', 'date', 'language']]
      audio_meta = '; '.join(audio_meta)
    return audio_path, audio_url, sibling_elems, audio_meta, page_title, df, worksheet

def audio_demo(siblings, page_title, audio_meta, audio, annotator, audio_url):
    annotator = annotator if annotator else str(uuid.uuid4())
    siblings.extend(page_title)
    siblings.extend(audio_meta)
    siblings = [s for s in siblings if s!=[]]
    cap = '\n'.join(siblings)
    df['caption'].loc[df['audio_url'] == audio_url] = cap
    df['annotator'].loc[df['audio_url'] == audio_url] = annotator
    worksheet.update([df.columns.values.tolist()] + df.values.tolist())
    return 'success!'


if __name__ == "__main__":
    audio_path, audio_url, sibling_elems, audio_meta, page_title, df, worksheet = sample_df()

    iface = gr.Interface(
        audio_demo,  
        inputs=[
            gr.CheckboxGroup(sibling_elems, label='sibling elements text'), 
            gr.CheckboxGroup(label='page title', choices=[page_title]), 
            gr.CheckboxGroup([audio_meta], label='audio metadata'), 
            gr.Audio(audio_path, type="filepath", interactive=False), 
            gr.Textbox(label='please enter your name'), 
            gr.Textbox(value=audio_url, visible=False)
            ],
        outputs=[gr.Textbox(label="output")],
        allow_flagging="never",
        title=title,
        description=description,
        )

    iface.launch(show_error=True, debug=True)