Terry Zhuo commited on
Commit
9fd6b97
1 Parent(s): 89eb067
Files changed (1) hide show
  1. app.py +4 -28
app.py CHANGED
@@ -12,8 +12,6 @@ from typing import Any, Dict, List, Tuple
12
  from warnings import warn
13
 
14
  import numpy as np
15
- from termcolor import cprint
16
- from tqdm import tqdm
17
 
18
  from bigcodebench.data import get_bigcodebench, get_bigcodebench_hash, load_solutions
19
  from bigcodebench.data.utils import CACHE_DIR
@@ -27,14 +25,10 @@ def get_groundtruth(n_workers, problems, hashcode, check_gt_only, max_as_limit,
27
  cache_file = os.path.join(CACHE_DIR, f"{hashcode}.pkl")
28
  if os.path.exists(cache_file):
29
  if check_gt_only:
30
- os.remove(cache_file)
31
- else:
32
- print(f"Load from ground-truth from {cache_file}")
33
  with open(cache_file, "rb") as f:
34
  return pickle.load(f)
35
 
36
  os.makedirs(CACHE_DIR, exist_ok=True)
37
- print("\nAsserting the groundtruth...")
38
  tbegin = time.time()
39
 
40
  with ProcessPoolExecutor(max_workers=n_workers) as executor:
@@ -56,12 +50,10 @@ def get_groundtruth(n_workers, problems, hashcode, check_gt_only, max_as_limit,
56
  futures.append(executor.submit(trusted_check, *args))
57
  n_samples += 1
58
 
59
- for future in tqdm(as_completed(futures), total=n_samples):
60
  result = future.result()
61
  expected_time[result["task_id"]] = result["time"]
62
-
63
- print(f"Expected outputs computed in {time.time() - tbegin:.2f}s")
64
-
65
  if any(expected_time.values()):
66
  with open(cache_file, "wb") as f:
67
  pickle.dump(expected_time, f)
@@ -149,14 +141,10 @@ def evaluate(
149
  eval_results = defaultdict(list) # task_id ->
150
  remainings = set()
151
 
152
- print("Reading samples...")
153
- for sample in tqdm(load_solutions(samples)):
154
  task_id = sample["task_id"]
155
 
156
  if task_id not in problems:
157
- warn(
158
- f"Task {task_id} is found in the samples but not found in the dataset"
159
- )
160
  continue
161
  solution = (
162
  sample["solution"]
@@ -184,23 +172,11 @@ def evaluate(
184
  assert n_samples == len(remainings), "Missing problems in unfinished"
185
  assert len(completion_id) == len(problems), "Missing problems in samples"
186
 
187
- # def stucking_checker():
188
- # not_done = futures
189
- # while len(not_done) > 0:
190
- # done, not_done = wait(not_done, timeout=240, return_when=FIRST_COMPLETED)
191
-
192
- # if len(done) == 0:
193
- # warn("No samples have finished testing in the last 240s")
194
- # warn(f"{len(remainings)} samples to be tested: {remainings}")
195
-
196
- # threading.Thread(target=stucking_checker).start()
197
-
198
- for future in tqdm(as_completed(futures), total=n_samples):
199
  result = future.result()
200
  remainings.remove(result["_identifier"])
201
  eval_results[result["task_id"]].append(result)
202
 
203
-
204
  # sort the results for each problem by completion_id
205
  for task_id, task_results in eval_results.items():
206
  task_results.sort(key=lambda x: x["completion_id"])
 
12
  from warnings import warn
13
 
14
  import numpy as np
 
 
15
 
16
  from bigcodebench.data import get_bigcodebench, get_bigcodebench_hash, load_solutions
17
  from bigcodebench.data.utils import CACHE_DIR
 
25
  cache_file = os.path.join(CACHE_DIR, f"{hashcode}.pkl")
26
  if os.path.exists(cache_file):
27
  if check_gt_only:
 
 
 
28
  with open(cache_file, "rb") as f:
29
  return pickle.load(f)
30
 
31
  os.makedirs(CACHE_DIR, exist_ok=True)
 
32
  tbegin = time.time()
33
 
34
  with ProcessPoolExecutor(max_workers=n_workers) as executor:
 
50
  futures.append(executor.submit(trusted_check, *args))
51
  n_samples += 1
52
 
53
+ for future in as_completed(futures):
54
  result = future.result()
55
  expected_time[result["task_id"]] = result["time"]
56
+
 
 
57
  if any(expected_time.values()):
58
  with open(cache_file, "wb") as f:
59
  pickle.dump(expected_time, f)
 
141
  eval_results = defaultdict(list) # task_id ->
142
  remainings = set()
143
 
144
+ for sample in load_solutions(samples):
 
145
  task_id = sample["task_id"]
146
 
147
  if task_id not in problems:
 
 
 
148
  continue
149
  solution = (
150
  sample["solution"]
 
172
  assert n_samples == len(remainings), "Missing problems in unfinished"
173
  assert len(completion_id) == len(problems), "Missing problems in samples"
174
 
175
+ for future in as_completed(futures):
 
 
 
 
 
 
 
 
 
 
 
176
  result = future.result()
177
  remainings.remove(result["_identifier"])
178
  eval_results[result["task_id"]].append(result)
179
 
 
180
  # sort the results for each problem by completion_id
181
  for task_id, task_results in eval_results.items():
182
  task_results.sort(key=lambda x: x["completion_id"])