""" Quick sanity test for the Compiler Pass Ordering environment. Run with: python test_env.py Server must be running: uvicorn server.app:app --reload --host 0.0.0.0 --port 8000 """ from compiler_opt_env import CompilerOptAction, CompilerOptEnv from compiler_opt_env.models import PASS_NAMES, TASK_EASY, TASK_MEDIUM, TASK_HARD from compiler_opt_env.server.compiler_opt_env_environment import BASE_PASS_EFFECTS def run_greedy(env, task_id: int, label: str): """Greedy agent: always pick the available pass with highest base effect.""" print(f"\n{'─'*60}") print(f" {label} (greedy agent)") print(f"{'─'*60}") obs = env.reset().observation if hasattr(env.reset(), 'observation') else env.reset() # reset() returns StepResult in sync mode result = env.reset() obs = result.observation print(f" Program type: {obs.program_type}") print(f" Baseline cost: {obs.baseline_cost:.1f}") print() while not obs.done: available = obs.passes_available best = max(available, key=lambda p: BASE_PASS_EFFECTS[p]) step_result = env.step(CompilerOptAction(pass_id=best, task_id=task_id)) obs = step_result.observation print(f" Step {obs.step_count:2d}: {PASS_NAMES[best]:35s} " f"cost={obs.estimated_cost:7.1f} " f"improvement={obs.improvement_pct:5.1f}% " f"reward={step_result.reward:+.4f}") print(f"\n Greedy improvement: {obs.improvement_pct:.1f}%") print(f" Grader score: {obs.grader_score:.3f}") return obs.improvement_pct, obs.grader_score def run_optimal_task1(env): """Hand-crafted optimal sequence for Task 1: alias → DCE → vectorization chain.""" print(f"\n{'─'*60}") print(f" Task 1 — Optimal sequence (alias → DCE → vectorization)") print(f"{'─'*60}") result = env.reset() obs = result.observation print(f" Program type: {obs.program_type}") print(f" Baseline cost: {obs.baseline_cost:.1f}\n") # FIX: Padded the sequence to 10 steps to ensure the episode finishes (done=True) optimal_sequence = [13, 0, 4, 5, 2, 7, 1, 10, 8, 9] for pass_id in optimal_sequence: if obs.done: break step_result = env.step(CompilerOptAction(pass_id=pass_id, task_id=TASK_EASY)) obs = step_result.observation print(f" Step {obs.step_count:2d}: {PASS_NAMES[pass_id]:35s} " f"cost={obs.estimated_cost:7.1f} " f"improvement={obs.improvement_pct:5.1f}% " f"reward={step_result.reward:+.4f}") print(f"\n Optimal improvement: {obs.improvement_pct:.1f}%") print(f" Grader score: {obs.grader_score:.3f}") return obs.improvement_pct, obs.grader_score if __name__ == "__main__": print("Compiler Pass Ordering — Environment Sanity Test") print("=" * 60) with CompilerOptEnv(base_url="http://localhost:8000").sync() as env: # Task 1: greedy vs optimal greedy_improv_1, greedy_score_1 = run_greedy(env, TASK_EASY, "Task 1 (Easy)") opt_improv_1, opt_score_1 = run_optimal_task1(env) # Task 2: greedy greedy_improv_2, greedy_score_2 = run_greedy(env, TASK_MEDIUM, "Task 2 (Medium)") # Task 3: greedy greedy_improv_3, greedy_score_3 = run_greedy(env, TASK_HARD, "Task 3 (Hard)") print(f"\n{'='*60}") print(" SUMMARY") print(f"{'='*60}") print(f" Task 1 greedy: {greedy_improv_1:.1f}% improvement score={greedy_score_1:.3f}") print(f" Task 1 optimal: {opt_improv_1:.1f}% improvement score={opt_score_1:.3f}") print(f" Task 2 greedy: {greedy_improv_2:.1f}% improvement score={greedy_score_2:.3f}") print(f" Task 3 greedy: {greedy_improv_3:.1f}% improvement score={greedy_score_3:.3f}") print() print(" Expected: greedy ~19-24% | optimal Task 1 ~40-50%") print(" If greedy << optimal: ✓ environment requires RL") print(f"{'='*60}")