Spaces:

marianna13
/

annotate-audio

Runtime error

App Files Files Community

annotate-audio / app.py

marianna13

Update app.py

4f8ce01 over 1 year ago

raw

history blame contribute delete

3.99 kB


	import gradio as gr
	import json
	import re
	import string
	import pandas as pd
	import os
	import requests
	from textwrap import wrap
	import uuid
	import gspread
	import ast



	def download_and_save_file(URL, audio_dir):
	headers = {
	'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36',
	'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,/;q=0.8',
	'referer': 'https://www.google.com/',
	'accept-encoding': 'gzip, deflate, br',
	'accept-language': 'en-US,en;q=0.9,',
	'cookie': 'prov=6bb44cc9-dfe4-1b95-a65d-5250b3b4c9fb; _ga=GA1.2.1363624981.1550767314; __qca=P0-1074700243-1550767314392; notice-ctt=4%3B1550784035760; _gid=GA1.2.1415061800.1552935051; acct=t=4CnQ70qSwPMzOe6jigQlAR28TSW%2fMxzx&s=32zlYt1%2b3TBwWVaCHxH%2bl5aDhLjmq4Xr',
	}
	doc = requests.get(URL, headers=headers)
	file_name = URL.split('/')[-1].split('?')[0]
	audio_path = f'{audio_dir}/{file_name}'
	with open(audio_path, 'wb') as f:
	f.write(doc.content)
	return audio_path



	credentials = os.environ['CREDENTIALS']
	data = json.loads(credentials, strict=False)
	with open('credentials.json', 'w') as f:
	json.dump(data, f)




	title = '🎵 Annotate audio'
	description = '''Choose a sentence (or sentences) that describes audio the best.'''

	audio_dir = 'AUDIO'
	os.makedirs(audio_dir, exist_ok=True)

	def sample_df():

	gc = gspread.service_account(filename='credentials.json')
	sh = gc.open('Annotated CC Audio')
	worksheet = sh.sheet1
	df = pd.DataFrame(worksheet.get_all_records())
	sample_df = df[df['caption']==''].sample(1)

	audio_url, audio_meta, page_title, img_metadata, sibling_elems = sample_df[['audio_url', 'audio_meta', 'page_title', 'imgs_metadata', 'sibling_elems']].values[0]
	audio_path = download_and_save_file(audio_url, audio_dir)
	sibling_elems = ast.literal_eval(sibling_elems)
	sibling_elems = [s.replace('\n', '') for s in sibling_elems]
	sibling_elems = ["\n".join(wrap(s)) for s in sibling_elems if len(s) > 0]
	sibling_elems = list(set(sibling_elems))
	img_metadata = ast.literal_eval(img_metadata)
	if len(img_metadata) > 0:
	img_metadata = [[f'{k}: {meta[k]}' for k in meta] for meta in img_metadata]
	audio_meta = ast.literal_eval(audio_meta).get('tags', None)
	if audio_meta:
	audio_meta = [f'{k}: {audio_meta[k]}' for k in audio_meta.keys() if k.lower() in ['title', 'album', 'artist', 'genre', 'date', 'language']]
	audio_meta = '; '.join(audio_meta)
	return audio_path, audio_url, sibling_elems, audio_meta, page_title, df, worksheet

	def audio_demo(siblings, page_title, audio_meta, audio, annotator, audio_url):
	annotator = annotator if annotator else str(uuid.uuid4())
	siblings.extend(page_title)
	siblings.extend(audio_meta)
	siblings = [s for s in siblings if s!=[]]
	cap = '\n'.join(siblings)
	df['caption'].loc[df['audio_url'] == audio_url] = cap
	df['annotator'].loc[df['audio_url'] == audio_url] = annotator
	worksheet.update([df.columns.values.tolist()] + df.values.tolist())
	return 'success!'


	if __name__ == "__main__":
	audio_path, audio_url, sibling_elems, audio_meta, page_title, df, worksheet = sample_df()

	iface = gr.Interface(
	audio_demo,
	inputs=[
	gr.CheckboxGroup(sibling_elems, label='sibling elements text'),
	gr.CheckboxGroup(label='page title', choices=[page_title]),
	gr.CheckboxGroup([audio_meta], label='audio metadata'),
	gr.Audio(audio_path, type="filepath", interactive=False),
	gr.Textbox(label='please enter your name'),
	gr.Textbox(value=audio_url, visible=False)
	],
	outputs=[gr.Textbox(label="output")],
	allow_flagging="never",
	title=title,
	description=description,
	)

	iface.launch(show_error=True, debug=True)