| import contextlib |
| import multiprocessing |
| import os |
| import subprocess |
| import tempfile |
|
|
| HEADERS: str = """ |
| #include <bits/stdc++.h> |
| """ |
|
|
|
|
| def check_correctness(candidate, reference, cpp_type, task_id, completion_id): |
| """ |
| Evaluates the functional correctness of a completion by running the test |
| suite provided in the problem. |
| |
| :param completion_id: an optional completion ID so we can match |
| the results later even if execution finishes asynchronously. |
| """ |
|
|
| manager = multiprocessing.Manager() |
|
|
| result = dict( |
| task_id=task_id, |
| completion_id=completion_id, |
| ) |
|
|
| if cpp_type == "base": |
| base_run_result = manager.list() |
| process_case( |
| unsafe_execute_cpp, |
| candidate, |
| reference["tests"], |
| base_run_result, |
| "c++17", |
| ) |
| base_run_result = base_run_result[0] |
| base_run_passed = base_run_result == "passed" |
| base_run_compiled = ( |
| base_run_result == "passed" |
| or base_run_result.startswith("failed: runtime error:") |
| ) |
| result = { |
| **result, |
| **dict( |
| base_run_passed=base_run_passed, |
| base_run_compiled=base_run_compiled, |
| base_run_result=base_run_result, |
| ), |
| } |
| elif cpp_type == "sfinae": |
| sfinae_run_result = manager.list() |
| process_case( |
| unsafe_execute_cpp, |
| candidate, |
| reference["tests"], |
| sfinae_run_result, |
| "c++17", |
| ) |
| sfinae_constrain_result = manager.list() |
| process_case( |
| invalid_compile_cpp, |
| candidate, |
| reference["invalids"], |
| sfinae_constrain_result, |
| "c++17", |
| ) |
| sfinae_run_result = sfinae_run_result[0] |
| sfinae_constrain_result = sfinae_constrain_result[0] |
| sfinae_run_passed = sfinae_run_result == "passed" |
| sfinae_run_compiled = sfinae_run_passed or sfinae_run_result.startswith( |
| "failed: runtime error:" |
| ) |
| sfinae_constrain_passed = ( |
| sfinae_constrain_result == "passed" and sfinae_run_compiled |
| ) |
| result = { |
| **result, |
| **dict( |
| sfinae_run_passed=sfinae_run_passed, |
| sfinae_run_compiled=sfinae_run_compiled, |
| sfinae_run_result=sfinae_run_result, |
| sfinae_constrain_passed=sfinae_constrain_passed, |
| sfinae_constrain_result=sfinae_constrain_result, |
| ), |
| } |
| elif cpp_type == "concepts": |
| concepts_run_result = manager.list() |
| process_case( |
| unsafe_execute_cpp, |
| candidate, |
| reference["tests"], |
| concepts_run_result, |
| "c++20", |
| ) |
| concepts_constrain_result = manager.list() |
| process_case( |
| invalid_compile_cpp, |
| candidate, |
| reference["invalids"], |
| concepts_constrain_result, |
| "c++20", |
| ) |
| concepts_run_result = concepts_run_result[0] |
| concepts_constrain_result = concepts_constrain_result[0] |
| concepts_run_passed = concepts_run_result == "passed" |
| concepts_run_compiled = ( |
| concepts_run_passed |
| or concepts_run_result.startswith("failed: runtime error:") |
| ) |
| concepts_constrain_passed = ( |
| concepts_constrain_result == "passed" and concepts_run_compiled |
| ) |
| result = { |
| **result, |
| **dict( |
| concepts_run_passed=concepts_run_passed, |
| concepts_run_compiled=concepts_run_compiled, |
| concepts_run_result=concepts_run_result, |
| concepts_constrain_passed=concepts_constrain_passed, |
| concepts_constrain_result=concepts_constrain_result, |
| ), |
| } |
| else: |
| raise ValueError(f"Unknown cpp_type: {cpp_type}") |
|
|
| return result |
|
|
|
|
| def process_case(target, candidate, reference, result, cppstd): |
| timeout = 60 |
|
|
| p = multiprocessing.Process( |
| target=target, |
| args=(candidate, reference, result, timeout, cppstd), |
| ) |
|
|
| p.start() |
| p.join(timeout=timeout + 5) |
| if p.is_alive(): |
| p.kill() |
|
|
| if not result: |
| result.append("timed out") |
|
|
|
|
| def unsafe_execute_cpp(candidate, reference, result, timeout, cppstd): |
| with create_tempdir(): |
| code = "\n".join([HEADERS, candidate, reference]) |
| open(f"test.cpp", "w").write(code) |
|
|
| cpp_compiler = os.getenv("GENERICIFY_CLANG") |
| compilation_result = subprocess.run( |
| [cpp_compiler, f"-std={cppstd}", "test.cpp"], |
| timeout=timeout, |
| capture_output=True, |
| ) |
| if compilation_result.returncode != 0: |
| if compilation_result.stderr: |
| err = compilation_result.stderr.decode() |
| else: |
| err = compilation_result.stdout.decode() |
| result.append(f"failed: compilation error: {err}") |
| else: |
| try: |
| exec_result = subprocess.run( |
| ["./a.out"], timeout=timeout, capture_output=True |
| ) |
|
|
| if exec_result.returncode == 0: |
| result.append("passed") |
| else: |
| if exec_result.stderr: |
| try: |
| err = exec_result.stderr.decode() |
| except: |
| err = exec_result.stderr |
| else: |
| try: |
| err = exec_result.stdout.decode() |
| except: |
| err = exec_result.stdout |
| result.append(f"failed: runtime error: {err}") |
|
|
| except subprocess.TimeoutExpired as e: |
| result.append("timed out") |
|
|
|
|
| def invalid_compile_cpp(candidate, reference, result, timeout, cppstd): |
| with create_tempdir(): |
| code = "\n".join([HEADERS, candidate, reference]) |
| open(f"invalid.cpp", "w").write(code) |
|
|
| cpp_compiler = os.getenv("GENERICIFY_CLANG") |
| compilation_result = subprocess.run( |
| [cpp_compiler, f"-std={cppstd}", "invalid.cpp"], |
| timeout=timeout, |
| capture_output=True, |
| ) |
|
|
| if compilation_result.stderr: |
| err = compilation_result.stderr.decode() |
| else: |
| err = compilation_result.stdout.decode() |
|
|
| if compilation_result.returncode != 1: |
| result.append(f"failed: compilation succeeded: {err}") |
| else: |
| if "note: candidate template ignored" in err: |
| result.append("passed") |
| else: |
| result.append("failed: improperly constrained: {err}") |
|
|
|
|
| @contextlib.contextmanager |
| def create_tempdir(): |
| with tempfile.TemporaryDirectory() as dirname: |
| with chdir(dirname): |
| yield dirname |
|
|
|
|
| @contextlib.contextmanager |
| def chdir(root): |
| if root == ".": |
| yield |
| return |
| cwd = os.getcwd() |
| os.chdir(root) |
| try: |
| yield |
| except BaseException as exc: |
| raise exc |
| finally: |
| os.chdir(cwd) |
|
|