shaipeerms commited on
Commit
fbb1d85
1 Parent(s): bdf49c6

Added validate_zip tests

Browse files
Files changed (1) hide show
  1. validation.py +110 -47
validation.py CHANGED
@@ -1,4 +1,6 @@
 
1
  import json
 
2
  from pathlib import Path
3
  from zipfile import ZipFile
4
  from typing import List, Dict, Any, Union
@@ -92,53 +94,114 @@ def validate_json_file_structure(file_path: Path, fields: List[str]):
92
  raise ValueError(f'Invalid `{file_path.name}` format, fields: {fields} are required in each entry')
93
 
94
 
95
- def test_validate_zip(data_samples: int = 10):
96
- import os
97
- with TemporaryDirectory() as temp_dir:
98
- submission_zip = Path(temp_dir) / 'submission.zip'
99
- valid_data = [{'session_id': 'session_id', 'words': 'words', 'speaker': 'speaker',
100
- 'start_time': 0.0, 'end_time': 1.0} for _ in range(data_samples)]
101
- invalid_data = [{'session_id': 'session_id', 'words': 'words', 'start_time': 0.0} for _ in range(data_samples)]
102
-
103
- def create_test_data(submission_track: str, data: List[Dict[str, Any]], json_file_names: List[str],
104
- parent_zip_dir: str = None):
105
- submission_dir = Path(temp_dir) / submission_track
106
- os.makedirs(submission_dir, exist_ok=True)
107
- with ZipFile(submission_zip, 'w') as submission_zip_file:
108
- for json_file_name in json_file_names:
109
- if parent_zip_dir:
110
- json_file_name = str(Path(parent_zip_dir) / json_file_name)
111
- submission_zip_file.writestr(json_file_name, json.dumps(data))
112
- return submission_track, submission_zip
113
-
114
- def test(track: str, data: List[Dict[str, Any]], json_file_names: List[str], expected_error: bool,
115
- parent_zip_dir=None):
116
- try:
117
- validate_zip(*create_test_data(track, data, json_file_names, parent_zip_dir))
118
- assert not expected_error, f'Expected error for {track}'
119
- except ValueError as e:
120
- assert expected_error, f'Unexpected error for {track}'
121
-
122
- # NOTSOFAR-SC
123
- test('NOTSOFAR-SC', valid_data, ['tcp_wer_hyp.json'], False)
124
- test('NOTSOFAR-SC', valid_data, ['tcp_wer_hyp.json', 'tc_orc_wer_ref.json'], False)
125
- test('NOTSOFAR-SC', invalid_data, ['tcp_wer_hyp.json'], True)
126
- test('NOTSOFAR-SC', invalid_data, ['tcp_wer_hyp.json', 'tc_orc_wer_ref.json'], True)
127
-
128
- # NOTSOFAR-MC
129
- test('NOTSOFAR-MC', valid_data, ['tcp_wer_hyp.json'], False)
130
- test('NOTSOFAR-MC', valid_data, ['tcp_wer_hyp.json', 'tc_orc_wer_ref.json'], False)
131
- test('NOTSOFAR-MC', invalid_data, ['tcp_wer_hyp.json'], True)
132
- test('NOTSOFAR-MC', invalid_data, ['tcp_wer_hyp.json', 'tc_orc_wer_ref.json'], True)
133
-
134
- # DASR-Constrained-LM
135
- test('DASR-Constrained-LM', valid_data, ['chime6.json', 'dipco.json', 'mixer6.json', 'notsofar1.json'], False, 'dev')
136
- test('DASR-Constrained-LM', invalid_data, ['chime6.json', 'dipco.json', 'mixer6.json', 'notsofar1.json'], True, 'dev')
137
-
138
- # DASR-Unconstrained-LM
139
- test('DASR-Unconstrained-LM', valid_data, ['chime6.json', 'dipco.json', 'mixer6.json', 'notsofar1.json'], False, 'dev')
140
- test('DASR-Unconstrained-LM', invalid_data, ['chime6.json', 'dipco.json', 'mixer6.json', 'notsofar1.json'], True, 'dev')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
 
142
 
143
  if __name__ == '__main__':
144
- test_validate_zip()
 
1
+ import os
2
  import json
3
+ import unittest
4
  from pathlib import Path
5
  from zipfile import ZipFile
6
  from typing import List, Dict, Any, Union
 
94
  raise ValueError(f'Invalid `{file_path.name}` format, fields: {fields} are required in each entry')
95
 
96
 
97
+ ####################################################################################################
98
+ # Tests
99
+ ####################################################################################################
100
+
101
+ class TestValidateZip(unittest.TestCase):
102
+ DATA_SAMPLES = 10
103
+
104
+ @classmethod
105
+ def setUpClass(cls):
106
+ cls.valid_data = [{'session_id': 'session_id', 'words': 'words', 'speaker': 'speaker',
107
+ 'start_time': 0.0, 'end_time': 1.0} for _ in range(cls.DATA_SAMPLES)]
108
+ cls.invalid_data = [{'session_id': 'session_id', 'words': 'words',
109
+ 'start_time': 0.0} for _ in range(cls.DATA_SAMPLES)]
110
+
111
+ def setUp(self):
112
+ self.temp_dir = TemporaryDirectory()
113
+ self.submission_zip = Path(self.temp_dir.name) / 'submission.zip'
114
+
115
+ def create_test_data(self, submission_track: str, data: List[Dict[str, Any]], json_file_names: List[str],
116
+ parent_zip_dir: str = None):
117
+ submission_dir = Path(self.temp_dir.name) / submission_track
118
+ os.makedirs(submission_dir, exist_ok=True)
119
+ with ZipFile(self.submission_zip, 'w') as submission_zip_file:
120
+ for json_file_name in json_file_names:
121
+ if parent_zip_dir:
122
+ json_file_name = str(Path(parent_zip_dir) / json_file_name)
123
+ submission_zip_file.writestr(json_file_name, json.dumps(data))
124
+ return submission_track, self.submission_zip
125
+
126
+ def tearDown(self):
127
+ self.temp_dir.cleanup()
128
+
129
+ def test_NOTSOFAR_SC_valid_data_tcp(self):
130
+ self.assertEqual(validate_zip(*self.create_test_data(
131
+ 'NOTSOFAR-SC', self.valid_data, ['tcp_wer_hyp.json'])), None)
132
+
133
+ def test_NOTSOFAR_SC_valid_data_tcp_and_tcorc(self):
134
+ self.assertEqual(validate_zip(*self.create_test_data(
135
+ 'NOTSOFAR-SC', self.valid_data, ['tcp_wer_hyp.json', 'tc_orc_wer_ref.json'])), None)
136
+
137
+ def test_NOTSOFAR_SC_missing_tcp_file(self):
138
+ with self.assertRaises(ValueError):
139
+ validate_zip(*self.create_test_data(
140
+ 'NOTSOFAR-SC', self.valid_data, ['tc_orc_wer_ref.json']))
141
+
142
+ def test_NOTSOFAR_SC_invalid_data(self):
143
+ with self.assertRaises(ValueError):
144
+ validate_zip(*self.create_test_data(
145
+ 'NOTSOFAR-SC', self.invalid_data, ['tcp_wer_hyp.json']))
146
+
147
+ def test_NOTSOFAR_MC_valid_data_tcp(self):
148
+ self.assertEqual(validate_zip(*self.create_test_data(
149
+ 'NOTSOFAR-MC', self.valid_data, ['tcp_wer_hyp.json'])), None)
150
+
151
+ def test_NOTSOFAR_MC_valid_data_tcp_and_tcorc(self):
152
+ self.assertEqual(validate_zip(*self.create_test_data(
153
+ 'NOTSOFAR-MC', self.valid_data, ['tcp_wer_hyp.json', 'tc_orc_wer_ref.json'])), None)
154
+
155
+ def test_NOTSOFAR_MC_missing_tcp_file(self):
156
+ with self.assertRaises(ValueError):
157
+ validate_zip(*self.create_test_data(
158
+ 'NOTSOFAR-MC', self.valid_data, ['tc_orc_wer_ref.json']))
159
+
160
+ def test_NOTSOFAR_MC_invalid_data(self):
161
+ with self.assertRaises(ValueError):
162
+ validate_zip(*self.create_test_data(
163
+ 'NOTSOFAR-MC', self.invalid_data, ['tcp_wer_hyp.json']))
164
+
165
+ def test_DASR_Constrained_LM_valid_data(self):
166
+ self.assertEqual(validate_zip(*self.create_test_data('DASR-Constrained-LM', self.valid_data,
167
+ ['chime6.json', 'dipco.json', 'mixer6.json',
168
+ 'notsofar1.json'], 'dev')), None)
169
+
170
+ def test_DASR_Constrained_LM_invalid_data(self):
171
+ with self.assertRaises(ValueError):
172
+ validate_zip(*self.create_test_data('DASR-Constrained-LM', self.invalid_data,
173
+ ['chime6.json', 'dipco.json', 'mixer6.json', 'notsofar1.json'], 'dev'))
174
+
175
+ def test_DASR_Constrained_LM_missing_dev_dir(self):
176
+ with self.assertRaises(ValueError):
177
+ validate_zip(*self.create_test_data('DASR-Constrained-LM', self.valid_data,
178
+ ['chime6.json', 'dipco.json', 'mixer6.json', 'notsofar1.json']))
179
+
180
+ def test_DASR_Constrained_LM_missing_json_file(self):
181
+ with self.assertRaises(ValueError):
182
+ validate_zip(*self.create_test_data('DASR-Constrained-LM', self.valid_data,
183
+ ['chime6.json', 'dipco.json', 'mixer6.json'], 'dev'))
184
+
185
+ def test_DASR_Unconstrained_LM_valid_data(self):
186
+ self.assertEqual(validate_zip(*self.create_test_data('DASR-Unconstrained-LM', self.valid_data,
187
+ ['chime6.json', 'dipco.json', 'mixer6.json',
188
+ 'notsofar1.json'], 'dev')), None)
189
+
190
+ def test_DASR_Unconstrained_LM_invalid_data(self):
191
+ with self.assertRaises(ValueError):
192
+ validate_zip(*self.create_test_data('DASR-Unconstrained-LM', self.invalid_data,
193
+ ['chime6.json', 'dipco.json', 'mixer6.json', 'notsofar1.json'], 'dev'))
194
+
195
+ def test_DASR_Unconstrained_LM_missing_dev_dir(self):
196
+ with self.assertRaises(ValueError):
197
+ validate_zip(*self.create_test_data('DASR-Unconstrained-LM', self.valid_data,
198
+ ['chime6.json', 'dipco.json', 'mixer6.json', 'notsofar1.json']))
199
+
200
+ def test_DASR_Unconstrained_LM_missing_json_file(self):
201
+ with self.assertRaises(ValueError):
202
+ validate_zip(*self.create_test_data('DASR-Unconstrained-LM', self.valid_data,
203
+ ['chime6.json', 'dipco.json', 'mixer6.json'], 'dev'))
204
 
205
 
206
  if __name__ == '__main__':
207
+ unittest.main()