Spaces:
Running
Running
shaipeerms
commited on
Commit
β’
aaef8e0
1
Parent(s):
f4289e9
init challenge code
Browse files- README.md +4 -4
- app.py +168 -0
- content.py +58 -0
- requirements.txt +7 -0
- server.py +149 -0
- validation.py +79 -0
README.md
CHANGED
@@ -1,12 +1,12 @@
|
|
1 |
---
|
2 |
title: CHiME8Challenge
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: gradio
|
7 |
sdk_version: 4.21.0
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
---
|
11 |
|
12 |
-
|
|
|
1 |
---
|
2 |
title: CHiME8Challenge
|
3 |
+
emoji: π
|
4 |
+
colorFrom: blue
|
5 |
+
colorTo: red
|
6 |
sdk: gradio
|
7 |
sdk_version: 4.21.0
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
---
|
11 |
|
12 |
+
# CHiME8 Challenge
|
app.py
ADDED
@@ -0,0 +1,168 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import logging
|
3 |
+
|
4 |
+
import pandas as pd
|
5 |
+
import gradio as gr
|
6 |
+
from gradio.themes.utils.sizes import text_md
|
7 |
+
|
8 |
+
from content import (HEADER_MARKDOWN, LEADERBOARD_TAB_TITLE_MARKDOWN, SUBMISSION_TAB_TITLE_MARKDOWN,
|
9 |
+
MY_SUBMISSIONS_TAB_TITLE_MARKDOWN)
|
10 |
+
from validation import validate_zip
|
11 |
+
|
12 |
+
from server import LeaderboardServer
|
13 |
+
|
14 |
+
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
15 |
+
|
16 |
+
lb_server = LeaderboardServer()
|
17 |
+
|
18 |
+
LEADERBOARD_TYPES = ['NOTSOFAR-SC', 'NOTSOFAR-MC', 'DASR-Constrained-LM', 'DASR-Unconstrained-LM']
|
19 |
+
MAX_SUBMISSIONS_PER_24H = 5
|
20 |
+
|
21 |
+
|
22 |
+
with (gr.Blocks(theme=gr.themes.Soft(text_size=text_md), css="footer {visibility: hidden}") as main):
|
23 |
+
app_state = gr.State({})
|
24 |
+
|
25 |
+
with gr.Row():
|
26 |
+
with gr.Row():
|
27 |
+
gr.Markdown(HEADER_MARKDOWN)
|
28 |
+
|
29 |
+
with gr.Row():
|
30 |
+
# Leaderboards Tab #
|
31 |
+
####################
|
32 |
+
with gr.Tab('Leaderboards') as leaderboards_tab:
|
33 |
+
gr.Markdown(LEADERBOARD_TAB_TITLE_MARKDOWN)
|
34 |
+
with gr.Row():
|
35 |
+
def populate_leaderboard(leaderboard_type):
|
36 |
+
leaderboard_df = lb_server.get_leaderboard(submission_type=leaderboard_type)
|
37 |
+
if leaderboard_df.empty:
|
38 |
+
return pd.DataFrame(columns=['No submissions yet'])
|
39 |
+
return leaderboard_df
|
40 |
+
|
41 |
+
for idx, tab_name in enumerate(LEADERBOARD_TYPES):
|
42 |
+
with gr.Tab(tab_name) as leaderboard_tab:
|
43 |
+
leaderboard_table = gr.DataFrame(populate_leaderboard(tab_name)) if idx == 0 else gr.DataFrame(pd.DataFrame(columns=['No submissions yet']))
|
44 |
+
leaderboard_tab.select(fn=populate_leaderboard,
|
45 |
+
inputs=[gr.Text(tab_name, visible=False)],
|
46 |
+
outputs=[leaderboard_table])
|
47 |
+
leaderboard_table.change(fn=populate_leaderboard, inputs=[gr.Text(tab_name, visible=False)],
|
48 |
+
outputs=[leaderboard_table])
|
49 |
+
|
50 |
+
# Submission Tab #
|
51 |
+
##################
|
52 |
+
with gr.Tab('Submission'):
|
53 |
+
with gr.Column():
|
54 |
+
def on_submit_pressed():
|
55 |
+
return gr.update(value='Processing submission...', interactive=False)
|
56 |
+
|
57 |
+
def validate_submission_inputs(team_name, submission_zip, submission_type, token):
|
58 |
+
if not team_name or not submission_zip or not submission_type:
|
59 |
+
raise ValueError('Please fill in all fields')
|
60 |
+
if not os.path.exists(submission_zip):
|
61 |
+
raise ValueError('File does not exist')
|
62 |
+
if not submission_zip.endswith('.zip'):
|
63 |
+
raise ValueError('File must be a zip')
|
64 |
+
if not token:
|
65 |
+
raise ValueError('Please insert a valid Hugging Face token')
|
66 |
+
|
67 |
+
def process_submission(team_name, submission_zip, submission_type, description,
|
68 |
+
app_state, request: gr.Request):
|
69 |
+
logging.info(f'{team_name}: new submission for track: {submission_type}')
|
70 |
+
try:
|
71 |
+
token = app_state.get('hf_token')
|
72 |
+
validate_submission_inputs(team_name, submission_zip, submission_type, token)
|
73 |
+
validate_zip(submission_type, submission_zip)
|
74 |
+
except ValueError as err:
|
75 |
+
gr.Warning(str(err))
|
76 |
+
return
|
77 |
+
|
78 |
+
metadata = {'challenge_name': 'NOTSOFAR1',
|
79 |
+
'team_name': team_name,
|
80 |
+
'submission_type': submission_type,
|
81 |
+
'description': description,
|
82 |
+
'token': token,
|
83 |
+
'file_name': os.path.basename(submission_zip),
|
84 |
+
'file_size_mb': os.path.getsize(submission_zip) / 1024 / 1024,
|
85 |
+
'ip': request.client.host}
|
86 |
+
try:
|
87 |
+
gr.Info('Processing submission...')
|
88 |
+
response = lb_server.add_submission(token=token, file_path=submission_zip, metadata=metadata)
|
89 |
+
if 'error' in response:
|
90 |
+
gr.Warning(f'Failed to process submission - {response["error"]}')
|
91 |
+
else:
|
92 |
+
gr.Info('Done processing submission')
|
93 |
+
except Exception as e:
|
94 |
+
gr.Warning(f'Submission failed to upload - {e}')
|
95 |
+
|
96 |
+
def on_submit_done():
|
97 |
+
return gr.update(value='Submit', interactive=True)
|
98 |
+
|
99 |
+
gr.Markdown(SUBMISSION_TAB_TITLE_MARKDOWN)
|
100 |
+
submission_team_name_tb = gr.Textbox(label='Team Name')
|
101 |
+
submission_file_path = gr.File(label='Upload your results', type='filepath')
|
102 |
+
submission_type_radio = gr.Radio(label='Submission Track', choices=LEADERBOARD_TYPES)
|
103 |
+
with gr.Row():
|
104 |
+
hf_token_tb = gr.Textbox(label='Token', type='password')
|
105 |
+
submissions_24h_txt = gr.Textbox(label='Submissions 24h', value='')
|
106 |
+
description_tb = gr.Textbox(label='Description', type='text')
|
107 |
+
submission_btn = gr.Button(value='Submit')
|
108 |
+
|
109 |
+
submission_btn.click(
|
110 |
+
fn=on_submit_pressed,
|
111 |
+
outputs=[submission_btn]
|
112 |
+
).then(
|
113 |
+
fn=process_submission,
|
114 |
+
inputs=[submission_team_name_tb, submission_file_path,
|
115 |
+
submission_type_radio, description_tb, app_state]
|
116 |
+
).then(
|
117 |
+
fn=on_submit_done,
|
118 |
+
outputs=[submission_btn]
|
119 |
+
)
|
120 |
+
|
121 |
+
# My Submissions Tab #
|
122 |
+
######################
|
123 |
+
with gr.Tab('My Submissions') as my_submissions_tab:
|
124 |
+
def on_my_submissions_tab_select(app_state):
|
125 |
+
hf_token = app_state.get('hf_token')
|
126 |
+
if not hf_token:
|
127 |
+
return pd.DataFrame(columns=['Please insert your Hugging Face token'])
|
128 |
+
submissions = lb_server.get_submissions_by_hf_token(hf_token=hf_token)
|
129 |
+
if submissions.empty:
|
130 |
+
submissions = pd.DataFrame(columns=['No submissions yet'])
|
131 |
+
return submissions
|
132 |
+
|
133 |
+
gr.Markdown(MY_SUBMISSIONS_TAB_TITLE_MARKDOWN)
|
134 |
+
my_submissions_table = gr.DataFrame()
|
135 |
+
|
136 |
+
my_submissions_tab.select(fn=on_my_submissions_tab_select, inputs=[app_state],
|
137 |
+
outputs=[my_submissions_table])
|
138 |
+
my_submissions_token_tb = gr.Textbox(label='Token', type='password')
|
139 |
+
|
140 |
+
# Token Insertion #
|
141 |
+
###################
|
142 |
+
with gr.Row():
|
143 |
+
def on_token_insert(hf_token, app_state):
|
144 |
+
gr.Info(f'Verifying token...')
|
145 |
+
submission_count = lb_server.get_submission_count_last_24_hours(hf_token=hf_token)
|
146 |
+
if submission_count is None:
|
147 |
+
# Invalid token
|
148 |
+
app_state['hf_token'] = None
|
149 |
+
submissions_24h_str = ''
|
150 |
+
team_submissions_df = pd.DataFrame(columns=['Invalid Token'])
|
151 |
+
gr.Warning('Invalid token')
|
152 |
+
|
153 |
+
else:
|
154 |
+
app_state['hf_token'] = hf_token
|
155 |
+
submissions_24h_str = f'{submission_count}/{MAX_SUBMISSIONS_PER_24H}'
|
156 |
+
team_submissions_df = lb_server.get_submissions_by_hf_token(hf_token=hf_token)
|
157 |
+
if team_submissions_df.empty:
|
158 |
+
team_submissions_df = pd.DataFrame(columns=['No submissions yet'])
|
159 |
+
gr.Info('Token verified!')
|
160 |
+
|
161 |
+
return app_state, team_submissions_df, submissions_24h_str
|
162 |
+
|
163 |
+
hf_token_tb.change(fn=on_token_insert, inputs=[hf_token_tb, app_state],
|
164 |
+
outputs=[app_state, my_submissions_table, submissions_24h_txt])
|
165 |
+
my_submissions_token_tb.change(fn=on_token_insert, inputs=[my_submissions_token_tb, app_state],
|
166 |
+
outputs=[app_state, my_submissions_table, submissions_24h_txt])
|
167 |
+
|
168 |
+
main.launch()
|
content.py
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
This file contains the text content for the leaderboard client.
|
3 |
+
"""
|
4 |
+
|
5 |
+
|
6 |
+
HEADER_MARKDOWN = """
|
7 |
+
# CHiME-8 Leaderboard
|
8 |
+
In collaboration with the CHiME-8 Challenge, the NOTSOFAR team is proud to host the official leaderboard for the three tasks this year.\n
|
9 |
+
For details, visit:
|
10 |
+
1. [DASR](https://www.chimechallenge.org/current/task1/index)
|
11 |
+
2. [NOTSOFAR](https://www.chimechallenge.org/current/task2/index)
|
12 |
+
3. [MMCSG](https://www.chimechallenge.org/current/task3/index)
|
13 |
+
|
14 |
+
|
15 |
+
### DASR and NOTSOFAR - the scientific story
|
16 |
+
Both tasks focus on distant automatic speech recognition and speaker diarization, offering a fundamental comparison
|
17 |
+
among different system designs:
|
18 |
+
- Single-channel (SC), 1 device (NOTSOFAR-SC)
|
19 |
+
- Multi-channel (MC), known-geometry, 1 device (NOTSOFAR-MC)
|
20 |
+
- Multi-channel (MC), geometry-agnostic, multiple devices (DASR-Constrained-LM and DASR-Unconstrained-LM)
|
21 |
+
|
22 |
+
Featured in both tasks, the NOTSOFAR recorded meeting dataset is leveraged as a common benchmark:
|
23 |
+
each geometry-agnostic MC system submitted to DASR tracks (constrained or not) will also be **automatically submitted**
|
24 |
+
to the known-geometry NOTSOFAR-MC track. These entries will be marked with "DASR" to denote their origin.
|
25 |
+
"""
|
26 |
+
|
27 |
+
|
28 |
+
LEADERBOARD_TAB_TITLE_MARKDOWN = """
|
29 |
+
## Leaderboards for CHiME-8 Tracks
|
30 |
+
"""
|
31 |
+
|
32 |
+
|
33 |
+
SUBMISSION_TAB_TITLE_MARKDOWN = """
|
34 |
+
## Submission
|
35 |
+
|
36 |
+
To submit your results, please fill in the form below.
|
37 |
+
|
38 |
+
- *Team Name:* The name of your team, as it will appear on the leaderboard'
|
39 |
+
- *Results:* Results zip file to submit
|
40 |
+
- *Submission track:* The track to submit results to
|
41 |
+
- *Token:* Your Hugging Face token
|
42 |
+
- *Description:* Short description of your submission (optional)
|
43 |
+
|
44 |
+
**Hugging Face tokens:** To create a token, go to your profile settings > Access Tokens > New Token.
|
45 |
+
Name the token and give it a write role, then copy the token and paste it in the field below.\n
|
46 |
+
**Team creation:** Upon the first submission, your team name is associated with your Hugging Face user account.
|
47 |
+
Any token generated by your account can be used. All team members should use this specific user's token for
|
48 |
+
future submissions.
|
49 |
+
|
50 |
+
New tokens can be created by the team member who initially linked the team to the token.
|
51 |
+
"""
|
52 |
+
|
53 |
+
|
54 |
+
MY_SUBMISSIONS_TAB_TITLE_MARKDOWN = """
|
55 |
+
## My Submissions
|
56 |
+
|
57 |
+
To view all submissions, please enter the Hugging Face token associated with your team in the field below
|
58 |
+
"""
|
requirements.txt
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
gradio
|
2 |
+
pandas
|
3 |
+
azure-cosmos
|
4 |
+
huggingface_hub
|
5 |
+
requests
|
6 |
+
Pyarrow
|
7 |
+
tabulate
|
server.py
ADDED
@@ -0,0 +1,149 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import logging
|
3 |
+
from typing import Optional
|
4 |
+
|
5 |
+
import pandas as pd
|
6 |
+
import requests
|
7 |
+
|
8 |
+
|
9 |
+
class LeaderboardServer:
|
10 |
+
def __init__(self):
|
11 |
+
self._LOG = logging.getLogger('leaderboard_server')
|
12 |
+
self._server_address = os.environ['LEADERBOARD_SERVER_ADDRESS']
|
13 |
+
|
14 |
+
def get_leaderboard(self, submission_type: str) -> pd.DataFrame:
|
15 |
+
"""
|
16 |
+
Gets the leaderboard of the given submission type
|
17 |
+
Args:
|
18 |
+
submission_type: the type of the submission to get the leaderboard of:
|
19 |
+
'SC' / 'MC-specific' / 'MC-agnostic' / 'MC-agnostic-all'
|
20 |
+
"""
|
21 |
+
self._LOG.info(f'Getting leaderboard for submission type: {submission_type}')
|
22 |
+
endpoint = f'{self._server_address}/leaderboard'
|
23 |
+
submission_type = submission_type.lower().replace('-', '_')
|
24 |
+
response = requests.get(endpoint, params={'submission_type': submission_type})
|
25 |
+
if response.status_code != 200:
|
26 |
+
self._LOG.error(f'Error while fetching leaderboard, status code: {response.status_code}, '
|
27 |
+
f'response: {response.text}, endpoint: {endpoint}')
|
28 |
+
return pd.DataFrame()
|
29 |
+
return pd.DataFrame(response.json())
|
30 |
+
|
31 |
+
def get_submissions_by_hf_token(self, hf_token: str) -> pd.DataFrame:
|
32 |
+
"""
|
33 |
+
Gets the submissions of the given hf token
|
34 |
+
Args:
|
35 |
+
hf_token: the hf token to get the submissions of
|
36 |
+
"""
|
37 |
+
self._LOG.info(f'Getting submissions for hf token: {hf_token}')
|
38 |
+
endpoint = f'{self._server_address}/submissions'
|
39 |
+
response = requests.get(endpoint, params={'token': hf_token})
|
40 |
+
if response.status_code != 200:
|
41 |
+
self._LOG.error(f'Error while fetching submissions, status code: {response.status_code}, '
|
42 |
+
f'response: {response.text}, endpoint: {endpoint}')
|
43 |
+
return pd.DataFrame()
|
44 |
+
return pd.DataFrame(response.json())
|
45 |
+
|
46 |
+
def is_hf_token_valid(self, hf_token: str) -> Optional[bool]:
|
47 |
+
"""
|
48 |
+
Validates the given hf token
|
49 |
+
Args:
|
50 |
+
hf_token: the hf token to validate
|
51 |
+
"""
|
52 |
+
self._LOG.info(f'Validating hf token: {hf_token}')
|
53 |
+
endpoint = f'{self._server_address}/validate_hf_token'
|
54 |
+
response = requests.get(endpoint, params={'token': hf_token})
|
55 |
+
if response.status_code != 200:
|
56 |
+
self._LOG.error(f'Error while validating hf token, status code: {response.status_code}, '
|
57 |
+
f'response: {response.text}, endpoint: {endpoint}')
|
58 |
+
return None
|
59 |
+
return response.json()['valid']
|
60 |
+
|
61 |
+
def get_submission_count_last_24_hours(self, hf_token: str) -> Optional[int]:
|
62 |
+
"""
|
63 |
+
Gets the number of submissions of the given hf token in the last 24 hours
|
64 |
+
Args:
|
65 |
+
hf_token: the hf token to get the submissions count of
|
66 |
+
"""
|
67 |
+
self._LOG.info(f'Getting submissions count for hf token: {hf_token} in the last 24 hours')
|
68 |
+
endpoint = f'{self._server_address}/submission_count_last_24_hours'
|
69 |
+
response = requests.get(endpoint, params={'token': hf_token})
|
70 |
+
if response.status_code != 200:
|
71 |
+
self._LOG.error(f'Error while fetching submissions count, status code: {response.status_code}, '
|
72 |
+
f'response: {response.text}, endpoint: {endpoint}')
|
73 |
+
return None
|
74 |
+
return int(response.json()['count'])
|
75 |
+
|
76 |
+
def add_submission(self, token: str, file_path: str, metadata: dict) -> dict:
|
77 |
+
"""
|
78 |
+
Adds a submission to the leaderboard based on the given file and metadata
|
79 |
+
Args:
|
80 |
+
token: the token of the team
|
81 |
+
file_path: the path of the file to submit
|
82 |
+
metadata: the metadata of the submission, structure:
|
83 |
+
{
|
84 |
+
'challenge_name': 'NOTSOFAR1',
|
85 |
+
'team_name': (str),
|
86 |
+
'submission_type': (str),
|
87 |
+
'token': (str),
|
88 |
+
'file_name': (str),
|
89 |
+
'file_size_mb': (int),
|
90 |
+
'ip': (str) xxx.xxx.xxx.xxx
|
91 |
+
}
|
92 |
+
"""
|
93 |
+
self._LOG.info(f'Adding submission for team: {metadata["team_name"]}, '
|
94 |
+
f'submission type: {metadata["submission_type"]}')
|
95 |
+
endpoint = f'{self._server_address}/add_submission'
|
96 |
+
metadata['token'] = token
|
97 |
+
metadata['submission_type'] = metadata['submission_type'].lower().replace('-', '_')
|
98 |
+
with open(file_path, 'rb') as payload_file:
|
99 |
+
files = {'zip_file': payload_file}
|
100 |
+
response = requests.post(endpoint, files=files, params=metadata, timeout=600)
|
101 |
+
if response.status_code != 200:
|
102 |
+
self._LOG.error(f'Error while adding submission, status code: {int(response.status_code)}, '
|
103 |
+
f'response: {response.text}, endpoint: {endpoint}')
|
104 |
+
return dict(error=response.json()['message'])
|
105 |
+
return response.json()
|
106 |
+
|
107 |
+
|
108 |
+
def test_server():
|
109 |
+
"""
|
110 |
+
Basic server tests for the leaderboard server
|
111 |
+
"""
|
112 |
+
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
113 |
+
|
114 |
+
server = LeaderboardServer()
|
115 |
+
hf_token = str(os.environ['HF_TOKEN'])
|
116 |
+
print('leaderboard:\n', server.get_leaderboard('notsofar_mc'))
|
117 |
+
print('submissions by hf token:\n', server.get_submissions_by_hf_token(hf_token))
|
118 |
+
print('is hf token valid:\n', server.is_hf_token_valid(hf_token))
|
119 |
+
print('is hf token valid:\n', server.is_hf_token_valid(hf_token + '1'))
|
120 |
+
print('add_submission:\n', server.add_submission(
|
121 |
+
token=hf_token,
|
122 |
+
file_path=fr"C:\Users\shaipeer\Downloads\submissions\notsofar_submission.zip",
|
123 |
+
metadata={
|
124 |
+
'challenge_name': 'NOTSOFAR1',
|
125 |
+
'team_name': 'NOTSOFAR Test Team',
|
126 |
+
'submission_type': 'notsofar_mc',
|
127 |
+
'description': 'Test NOTSOFAR submission',
|
128 |
+
'token': hf_token,
|
129 |
+
'file_name': 'notsofar_submission.zip',
|
130 |
+
'file_size_mb': 10,
|
131 |
+
'ip': '127.0.0.1'
|
132 |
+
}))
|
133 |
+
print('add_submission:\n', server.add_submission(
|
134 |
+
token=hf_token,
|
135 |
+
file_path=fr"C:\Users\shaipeer\Downloads\submissions\chime_submission.zip",
|
136 |
+
metadata={
|
137 |
+
'challenge_name': 'NOTSOFAR1',
|
138 |
+
'team_name': 'Chime Test Team',
|
139 |
+
'submission_type': 'dasr_unconstrained_lm',
|
140 |
+
'description': 'Test chime submission',
|
141 |
+
'token': hf_token,
|
142 |
+
'file_name': 'chime_submission.zip',
|
143 |
+
'file_size_mb': 10,
|
144 |
+
'ip': '127.0.0.1'
|
145 |
+
}))
|
146 |
+
|
147 |
+
|
148 |
+
if __name__ == '__main__':
|
149 |
+
test_server()
|
validation.py
ADDED
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
from pathlib import Path
|
3 |
+
from zipfile import ZipFile
|
4 |
+
from typing import List, Dict, Any
|
5 |
+
from tempfile import TemporaryDirectory
|
6 |
+
|
7 |
+
|
8 |
+
def validate_zip(submission_track: str, submission_zip: str):
|
9 |
+
"""
|
10 |
+
Validates the submission format and contents
|
11 |
+
Args:
|
12 |
+
submission_track: the track of the submission
|
13 |
+
submission_zip: path to the submission zip file
|
14 |
+
Raises:
|
15 |
+
ValueError: if the submission zip is invalid
|
16 |
+
|
17 |
+
"""
|
18 |
+
with TemporaryDirectory() as temp_dir:
|
19 |
+
with ZipFile(submission_zip, 'r') as submission_zip_file:
|
20 |
+
submission_zip_file.extractall(temp_dir)
|
21 |
+
submission_dir = Path(temp_dir)
|
22 |
+
if submission_track in ['NOTSOFAR-SC', 'NOTSOFAR-MC']:
|
23 |
+
validate_notsofar_submission(submission_dir=submission_dir)
|
24 |
+
elif submission_track in ['DASR-Constrained-LM', 'DASR-Unconstrained-LM']:
|
25 |
+
validate_dasr_submission(submission_dir=submission_dir)
|
26 |
+
else:
|
27 |
+
raise ValueError(f'Invalid submission track: {submission_track}')
|
28 |
+
|
29 |
+
|
30 |
+
def validate_notsofar_submission(submission_dir: Path):
|
31 |
+
"""
|
32 |
+
Validates NOTSOFAR submission format and contents
|
33 |
+
Args:
|
34 |
+
submission_dir: path to the submission directory
|
35 |
+
Raises:
|
36 |
+
ValueError: if the submission zip is invalid
|
37 |
+
"""
|
38 |
+
submission_file_names = ['tc_orc_wer_hyp.json', 'tcp_wer_hyp.json']
|
39 |
+
fields = ['session_id', 'words', 'speaker', 'start_time', 'end_time']
|
40 |
+
|
41 |
+
for file_name in submission_file_names:
|
42 |
+
file_path = submission_dir / file_name
|
43 |
+
if not file_path.exists():
|
44 |
+
raise ValueError(f'Missing {file_name}')
|
45 |
+
with open(file_path, 'r') as json_file:
|
46 |
+
json_data: List[Dict[str, Any]] = json.load(json_file)
|
47 |
+
if not isinstance(json_data, list):
|
48 |
+
raise ValueError(f'Invalid `{file_name}` format, expecting a list of entries')
|
49 |
+
for data in json_data:
|
50 |
+
if not all(field in data for field in fields):
|
51 |
+
raise ValueError(f'Invalid `{file_name}` format, fields: {fields} are required in each entry')
|
52 |
+
|
53 |
+
|
54 |
+
def validate_dasr_submission(submission_dir: Path):
|
55 |
+
"""
|
56 |
+
Validates DASR submission format and contents
|
57 |
+
Args:
|
58 |
+
submission_dir: path to the submission directory
|
59 |
+
Raises:
|
60 |
+
ValueError: if the submission zip is invalid
|
61 |
+
|
62 |
+
"""
|
63 |
+
submission_file_names = ['chime6.json', 'dipco.json', 'mixer6.json', 'notsofar1.json']
|
64 |
+
fields = ['session_id', 'words', 'speaker', 'start_time', 'end_time']
|
65 |
+
|
66 |
+
if not (submission_dir / 'dev').exists():
|
67 |
+
raise ValueError('Missing dev directory, expecting a directory named `dev` with the submission files in it.')
|
68 |
+
|
69 |
+
for file_name in submission_file_names:
|
70 |
+
file_path = submission_dir / 'dev' / file_name
|
71 |
+
if not file_path.exists():
|
72 |
+
raise ValueError(f'Missing {file_name}')
|
73 |
+
with open(file_path, 'r') as json_file:
|
74 |
+
json_data: List[Dict[str, Any]] = json.load(json_file)
|
75 |
+
if not isinstance(json_data, list):
|
76 |
+
raise ValueError(f'Invalid `{file_name}` format, expecting a list of entries')
|
77 |
+
for data in json_data:
|
78 |
+
if not all(field in data for field in fields):
|
79 |
+
raise ValueError(f'Invalid `{file_name}` format, fields: {fields} are required in each entry')
|