File size: 3,986 Bytes
89d4656
f97c0ed
 
 
 
 
 
 
4766c38
89d4656
dd657ca
9d65325
2b05ce7
f97c0ed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4766c38
f97c0ed
 
 
 
4766c38
 
 
 
f97c0ed
 
89d4656
f97c0ed
 
c343c55
f97c0ed
4766c38
 
 
9d65325
 
 
 
 
 
 
 
 
 
 
 
4f8ce01
9d65325
 
 
 
 
 
 
 
 
 
 
89d4656
9d65325
 
 
 
89d4656
 
4766c38
f97c0ed
 
 
 
3a97bdf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d11cee1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102

import gradio as gr
import json
import re
import string
import pandas as pd
import os
import requests
from textwrap import wrap
import uuid
import gspread
import ast



def download_and_save_file(URL, audio_dir):
    headers = {
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36',
        'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
        'referer': 'https://www.google.com/',
        'accept-encoding': 'gzip, deflate, br',
        'accept-language': 'en-US,en;q=0.9,',
        'cookie': 'prov=6bb44cc9-dfe4-1b95-a65d-5250b3b4c9fb; _ga=GA1.2.1363624981.1550767314; __qca=P0-1074700243-1550767314392; notice-ctt=4%3B1550784035760; _gid=GA1.2.1415061800.1552935051; acct=t=4CnQ70qSwPMzOe6jigQlAR28TSW%2fMxzx&s=32zlYt1%2b3TBwWVaCHxH%2bl5aDhLjmq4Xr',
    }
    doc = requests.get(URL, headers=headers)
    file_name = URL.split('/')[-1].split('?')[0]
    audio_path = f'{audio_dir}/{file_name}'
    with open(audio_path, 'wb') as f:
        f.write(doc.content)  
    return audio_path



credentials = os.environ['CREDENTIALS']
data = json.loads(credentials, strict=False)
with open('credentials.json', 'w') as f:
    json.dump(data, f)




title = '🎵 Annotate audio'
description = '''Choose a sentence (or sentences) that describes audio the best.'''

audio_dir = 'AUDIO'
os.makedirs(audio_dir, exist_ok=True)

def sample_df():

    gc = gspread.service_account(filename='credentials.json')
    sh = gc.open('Annotated CC Audio')
    worksheet = sh.sheet1
    df = pd.DataFrame(worksheet.get_all_records())
    sample_df = df[df['caption']==''].sample(1)

    audio_url, audio_meta, page_title, img_metadata, sibling_elems = sample_df[['audio_url', 'audio_meta', 'page_title', 'imgs_metadata', 'sibling_elems']].values[0]
    audio_path = download_and_save_file(audio_url, audio_dir)
    sibling_elems = ast.literal_eval(sibling_elems)
    sibling_elems = [s.replace('\n', '') for s in sibling_elems]
    sibling_elems = ["\n".join(wrap(s)) for s in sibling_elems if len(s) > 0]
    sibling_elems = list(set(sibling_elems))
    img_metadata = ast.literal_eval(img_metadata)
    if len(img_metadata) > 0:
      img_metadata = [[f'{k}: {meta[k]}' for k in meta] for meta in img_metadata]
    audio_meta = ast.literal_eval(audio_meta).get('tags', None)
    if audio_meta:
      audio_meta = [f'{k}: {audio_meta[k]}' for k in audio_meta.keys() if k.lower() in ['title', 'album', 'artist', 'genre', 'date', 'language']]
      audio_meta = '; '.join(audio_meta)
    return audio_path, audio_url, sibling_elems, audio_meta, page_title, df, worksheet

def audio_demo(siblings, page_title, audio_meta, audio, annotator, audio_url):
    annotator = annotator if annotator else str(uuid.uuid4())
    siblings.extend(page_title)
    siblings.extend(audio_meta)
    siblings = [s for s in siblings if s!=[]]
    cap = '\n'.join(siblings)
    df['caption'].loc[df['audio_url'] == audio_url] = cap
    df['annotator'].loc[df['audio_url'] == audio_url] = annotator
    worksheet.update([df.columns.values.tolist()] + df.values.tolist())
    return 'success!'


if __name__ == "__main__":
    audio_path, audio_url, sibling_elems, audio_meta, page_title, df, worksheet = sample_df()

    iface = gr.Interface(
        audio_demo,  
        inputs=[
            gr.CheckboxGroup(sibling_elems, label='sibling elements text'), 
            gr.CheckboxGroup(label='page title', choices=[page_title]), 
            gr.CheckboxGroup([audio_meta], label='audio metadata'), 
            gr.Audio(audio_path, type="filepath", interactive=False), 
            gr.Textbox(label='please enter your name'), 
            gr.Textbox(value=audio_url, visible=False)
            ],
        outputs=[gr.Textbox(label="output")],
        allow_flagging="never",
        title=title,
        description=description,
        )

    iface.launch(show_error=True, debug=True)