code_eval_octopack

Sleeping

App Files Files Community

Muennighoff commited on Mar 31, 2023

Commit

0e7922f

•

1 Parent(s): ea58aa2

Add JS

Browse files

Files changed (2) hide show

code_eval.py +2 -2
execute.py +43 -2

code_eval.py CHANGED Viewed

@@ -152,7 +152,7 @@ class CodeEval(evaluate.Metric):
             license=_LICENSE,
         )
-    def _compute(self, predictions, references, k=[1, 10, 100], num_workers=4, timeout=3.0):
         """Returns the scores"""
         if os.getenv("HF_ALLOW_CODE_EVAL", 0) != "1":
@@ -170,7 +170,7 @@ class CodeEval(evaluate.Metric):
             for task_id, (candidates, test_case) in enumerate(zip(predictions, references)):
                 for candidate in candidates:
                     test_program = candidate + "\n" + test_case
-                    args = (test_program, timeout, task_id, completion_id[task_id])
                     future = executor.submit(check_correctness, *args)
                     futures.append(future)
                     completion_id[task_id] += 1

             license=_LICENSE,
         )
+    def _compute(self, predictions, references, k=[1, 10, 100], num_workers=4, timeout=3.0, language="python"):
         """Returns the scores"""
         if os.getenv("HF_ALLOW_CODE_EVAL", 0) != "1":
             for task_id, (candidates, test_case) in enumerate(zip(predictions, references)):
                 for candidate in candidates:
                     test_program = candidate + "\n" + test_case
+                    args = (test_program, timeout, task_id, completion_id[task_id], language)
                     future = executor.submit(check_correctness, *args)
                     futures.append(future)
                     completion_id[task_id] += 1

execute.py CHANGED Viewed

@@ -24,8 +24,12 @@ import platform
 import signal
 import tempfile
-def check_correctness(check_program, timeout, task_id, completion_id):
     """
     Evaluates the functional correctness of a completion by running the test
     suite provided in the problem.
@@ -36,7 +40,8 @@ def check_correctness(check_program, timeout, task_id, completion_id):
     manager = multiprocessing.Manager()
     result = manager.list()
-    p = multiprocessing.Process(target=unsafe_execute, args=(check_program, result, timeout))
     p.start()
     p.join(timeout=timeout + 1)
     if p.is_alive():
@@ -85,6 +90,42 @@ def unsafe_execute(check_program, result, timeout):
         os.rmdir = rmdir
         os.chdir = chdir
 @contextlib.contextmanager
 def time_limit(seconds):

 import signal
 import tempfile
+LANGUAGE_TO_FUNC = {
+    "python": unsafe_execute,
+    "javascript": unsafe_execute_js,
+}
+def check_correctness(check_program, timeout, task_id, completion_id, language):
     """
     Evaluates the functional correctness of a completion by running the test
     suite provided in the problem.
     manager = multiprocessing.Manager()
     result = manager.list()
+    p = multiprocessing.Process(target=LANGUAGE_TO_FUNC[language], args=(check_program, result, timeout))
     p.start()
     p.join(timeout=timeout + 1)
     if p.is_alive():
         os.rmdir = rmdir
         os.chdir = chdir
+def unsafe_execute_js(check_program, result, timeout):
+    with create_tempdir():
+        open(f"test.js", 'w').write(check_program)
+        # These system calls are needed when cleaning up tempdir.
+        import os
+        import shutil
+        rmtree = shutil.rmtree
+        rmdir = os.rmdir
+        chdir = os.chdir
+        # Run program.
+        try:
+            exec_globals = {}
+            with time_limit(timeout):
+                exec_result = subprocess.run(["node", "test.js"], timeout=timeout, capture_output=True)
+            if exec_result.stderr.decode():
+                err = exec_result.stderr.decode()
+                result.append(f"failed: {err}")
+            elif exec_result.stdout.decode():
+                err = exec_result.stdout.decode()
+                result.append(f"failed: {err}")
+            else:
+                result.append("passed")
+        except TimeoutException:
+            result.append("timed out")
+        # Needed for cleaning up.
+        shutil.rmtree = rmtree
+        os.rmdir = rmdir
+        os.chdir = chdir
 @contextlib.contextmanager
 def time_limit(seconds):