marianna13 commited on
Commit
f97c0ed
1 Parent(s): 2a05e60

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +80 -0
app.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import json
3
+ import spacy
4
+ import re
5
+ import string
6
+ import pandas as pd
7
+ import os
8
+ import requests
9
+
10
+ nlp = spacy.load("en_core_web_sm")
11
+ nlp.add_pipe('sentencizer')
12
+
13
+
14
+
15
+ def read_gs(sheet_url):
16
+ s_url = sheet_url.replace('/edit#gid=', '/export?format=csv&gid=')
17
+ df = pd.read_csv(s_url)
18
+ return df
19
+
20
+
21
+ def download_and_save_file(URL, audio_dir):
22
+ headers = {
23
+ 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36',
24
+ 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
25
+ 'referer': 'https://www.google.com/',
26
+ 'accept-encoding': 'gzip, deflate, br',
27
+ 'accept-language': 'en-US,en;q=0.9,',
28
+ 'cookie': 'prov=6bb44cc9-dfe4-1b95-a65d-5250b3b4c9fb; _ga=GA1.2.1363624981.1550767314; __qca=P0-1074700243-1550767314392; notice-ctt=4%3B1550784035760; _gid=GA1.2.1415061800.1552935051; acct=t=4CnQ70qSwPMzOe6jigQlAR28TSW%2fMxzx&s=32zlYt1%2b3TBwWVaCHxH%2bl5aDhLjmq4Xr',
29
+ }
30
+ doc = requests.get(URL, headers=headers)
31
+ file_name = URL.split('/')[-1].split('?')[0]
32
+ audio_path = f'{audio_dir}/{file_name}'
33
+ with open(audio_path, 'wb') as f:
34
+ f.write(doc.content)
35
+ return audio_path
36
+
37
+ def select_samples():
38
+
39
+ df = read_gs('https://docs.google.com/spreadsheets/d/17QG4puJRXN8V5froIv8YrJIMsns0GTt4/edit#gid=1020901598')
40
+ audio_dir = 'AUDIO'
41
+ os.makedirs(audio_dir, exist_ok=True)
42
+ df = df.sample(1)
43
+
44
+
45
+ audio_url = df.url.values[0]
46
+
47
+ audio_path = download_and_save_file(audio_url, audio_dir)
48
+ return audio_path, df['text'].values[0]
49
+
50
+
51
+ title = '🎵 Annotate audio'
52
+ description = '''Choose a sentence that describes audio the best if there's no such sentence please choose `No audio description`'''
53
+
54
+ audio_path, full_text = select_samples()
55
+ full_text = full_text.translate(str.maketrans('', '', string.punctuation))
56
+ sents = [re.sub(r'###audio###\d###', '', s.text) for s in nlp(full_text).sents]
57
+ sents.append('No audio description')
58
+
59
+ def audio_demo(text, audio, audio_id):
60
+
61
+ with open('data.json', 'w') as f:
62
+ data = {
63
+ 'audio':audio_id,
64
+ 'text':text
65
+ }
66
+ json.dump(data, f)
67
+ return 'success!'
68
+
69
+
70
+ iface = gr.Interface(
71
+ audio_demo,
72
+ inputs=[gr.Dropdown(sents, label='audio description'), gr.Audio(audio_path, type="filepath"), gr.Textbox(value=audio_path, visible=False)],
73
+ outputs=[gr.Textbox(label="output")],
74
+ allow_flagging="never",
75
+ title=title,
76
+ description=description,
77
+ )
78
+
79
+ if __name__ == "__main__":
80
+ iface.launch(show_error=True, debug=True)