11import os
22import subprocess
3+ import tempfile
4+ from pathlib import Path
35
46import debug_gym .gym .utils as utils
57from debug_gym .constants import DEBUG_GYM_CACHE_DIR
68from debug_gym .gym .entities import EvalOutput
79from debug_gym .gym .envs .env import RepoEnv
10+ from debug_gym .gym .terminal import DockerTerminal , Terminal
11+
12+ DOCKER_AIDER_IMAGE_NAME = "debug-gym:aider"
13+
14+
15+ def build_docker_image (logger ):
16+ """
17+ Build a Docker image for the Mini Nightmare environment.
18+ """
19+ # Check if Docker image is built.
20+ import docker
21+
22+ docker_client = docker .from_env (timeout = 600 )
23+ try :
24+ docker_client .images .get (DOCKER_AIDER_IMAGE_NAME )
25+ return
26+ except docker .errors .ImageNotFound :
27+ pass
28+
29+ logger .info (f"Docker image { DOCKER_AIDER_IMAGE_NAME } not found. Building it..." )
30+
31+ # Starts from the official Python 3.12 slim image
32+ base_image = "python:3.12-slim"
33+ # Then install git and the required Python packages
34+ setup_commands = [
35+ "apt update" ,
36+ "apt install -y git tree" ,
37+ "pip install pytest" ,
38+ ]
39+ # Create a temporary Dockerfile
40+ with tempfile .TemporaryDirectory () as build_dir :
41+ dockerfile_path = Path (build_dir ) / "Dockerfile"
42+ with open (dockerfile_path , "w" ) as dockerfile :
43+ dockerfile .write (f"FROM { base_image } \n " )
44+ for command in setup_commands :
45+ dockerfile .write (f"RUN { command } \n " )
46+
47+ # Build the Docker image using docker client
48+ image , build_logs = docker_client .images .build (
49+ path = str (build_dir ),
50+ dockerfile = "Dockerfile" ,
51+ tag = DOCKER_AIDER_IMAGE_NAME ,
52+ rm = True ,
53+ )
54+
55+ logger .info (f"Docker image { DOCKER_AIDER_IMAGE_NAME } built successfully." )
856
957
1058class AiderBenchmarkEnv (RepoEnv ):
1159 REPO_URL = "https://github.com/exercism/python"
1260 REPO_PATH = DEBUG_GYM_CACHE_DIR / "exercism"
1361
62+ def __init__ (
63+ self ,
64+ entrypoint : str = "python -m pytest --tb=no -s ." ,
65+ terminal : Terminal | None = None ,
66+ ** kwargs ,
67+ ):
68+
69+ terminal = terminal or DockerTerminal (
70+ base_image = DOCKER_AIDER_IMAGE_NAME ,
71+ logger = kwargs .get ("logger" ),
72+ )
73+
74+ super ().__init__ (entrypoint = entrypoint , terminal = terminal , ** kwargs )
75+
1476 @property
1577 def instructions (self ) -> str :
16- return self .current_sample ["instructions" ]
17-
18- def __init__ (self , entrypoint : str = "python -m pytest -s ." , ** kwargs ):
19- super ().__init__ (entrypoint = entrypoint , ** kwargs )
78+ return self .current_task ["instructions" ]
2079
2180 def calculate_max_score (self , eval_output : EvalOutput ) -> int :
2281 return utils .extract_max_score_from_pytest_output (eval_output .output )
@@ -30,15 +89,43 @@ def eval(self, **kwargs) -> EvalOutput:
3089 self .last_eval = EvalOutput (success , output )
3190 return self .last_eval
3291
33- def reset (self , * , options : dict = None ):
34- options = options or {}
35- self .current_sample = self .dataset [options ["task_name" ]]
36- directory = self .current_sample ["base_directory" ]
37- self .setup_workspace (directory , entrypoint = self .entrypoint )
38- infos = super ().reset (options = options )
39- return infos
92+ def setup_task (self , task_name : str , options : dict = None ):
93+ if task_name not in self .dataset :
94+ raise ValueError (f"Task { task_name } not found in the dataset." )
95+ self .current_task = self .dataset [task_name ]
96+
97+ def setup_workspace (self ):
98+ self .workspace .reset ()
99+
100+ self .logger .info ("Copying files.." )
101+ self .workspace .copy_content (
102+ src = self .current_task ["codebase" ], target = self .workspace .working_dir
103+ )
104+ self .workspace .setup_file_filters () # Use codebase's .debugignore and .debugreadonly.
105+
106+ self .set_entrypoints ("python -m pytest --tb=no -s ." )
107+
108+ def setup_terminal (self ):
109+ self .logger .info (f"Configuring { self .terminal } ..." )
110+
111+ self .terminal .run ("git init" )
112+ self .terminal .run ("git config user.name 'debug-gym'" )
113+ self .terminal .run ("git config user.email '<>'" )
114+
115+ self .terminal .run (
116+ "git add *.py *.txt"
117+ ) # Aider tasks only have Python and text files.
118+ self .terminal .run ("git commit -am 'Init'" )
119+
120+ self .terminal .run (
121+ "git add .debugignore .debugreadonly"
122+ ) # Aider tasks come with those.
123+ self .terminal .run ("git commit -am 'Add debug-gym ignore and read-only files'" )
40124
41125 def load_dataset (self , problems : str | list [str ] | None = None ):
126+ if isinstance (self .terminal , DockerTerminal ):
127+ build_docker_image (self .logger )
128+
42129 if not os .path .exists (self .REPO_PATH ):
43130 subprocess .run (["git" , "clone" , self .REPO_URL , self .REPO_PATH ], check = True )
44131
@@ -65,9 +152,6 @@ def load_dataset(self, problems: str | list[str] | None = None):
65152 ".?*" , # Ignore hidden files and directories but not current dir "."
66153 "__pycache__/" ,
67154 "*.pyc" ,
68- # "*.md",
69- # "log/",
70- # "data/",
71155 ],
72156 )
73157 # Add .debugreadonly so tests are readonly.
@@ -76,7 +160,7 @@ def load_dataset(self, problems: str | list[str] | None = None):
76160 )
77161
78162 dataset [task_name ] = {
79- "base_directory " : directory ,
163+ "codebase " : directory ,
80164 "instructions" : instructions ,
81165 "filename" : task_name + ".py" ,
82166 }
0 commit comments