All-Hands-AI · neubig · Aug 18, 2025 · Aug 12, 2025 · Aug 12, 2025 · Aug 12, 2025
@@ -93,6 +93,9 @@ export USE_HINT_TEXT=true # Ignore this if you are not sure.
 
 # Specify a condenser configuration for memory management (default: NoOpCondenser)
 export EVAL_CONDENSER=summarizer_for_eval # Name of the condenser config group in config.toml
+
+# Specify the instruction prompt template file name
+export INSTRUCTION_TEMPLATE_NAME=swe_custom.j2 # Name of the file in the swe_bench/prompts folder.
 ```
 
 Let's say you'd like to run 10 instances using `llm.eval_gpt4_1106_preview` and CodeActAgent,

@@ -108,7 +108,9 @@ def get_instruction(instance: pd.Series, metadata: EvalMetadata) -> MessageActio
     llm_model = metadata.llm_config.model
 
     # Determine the template file based on mode and LLM
-    if mode.startswith('swt'):
+    if metadata.instruction_template_name:
+        template_name = metadata.instruction_template_name
+    elif mode.startswith('swt'):
         template_name = 'swt.j2'
     elif mode == 'swe':
         if 'gpt-4.1' in llm_model:
@@ -122,6 +124,7 @@ def get_instruction(instance: pd.Series, metadata: EvalMetadata) -> MessageActio
         logger.error(f'Unexpected evaluation mode: {mode}. Falling back to default.')
         template_name = 'swe_default.j2'
 
+    logger.debug(f'Using instruction template file: {template_name}')
     # Set up Jinja2 environment
     # Assuming templates are in 'evaluation/benchmarks/swe_bench/prompts' relative to this script
     prompts_dir = os.path.join(os.path.dirname(__file__), 'prompts')

@@ -53,6 +53,7 @@ class EvalMetadata(BaseModel):
     data_split: str | None = None
     details: dict[str, Any] | None = None
     condenser_config: CondenserConfig | None = None
+    instruction_template_name: str | None = None
 
 
 class EvalOutput(BaseModel):
@@ -205,6 +206,7 @@ def make_metadata(
         condenser_config=condenser_config
         if condenser_config
         else NoOpCondenserConfig(),
+        instruction_template_name=os.environ.get('INSTRUCTION_TEMPLATE_NAME')
     )
     metadata_json = metadata.model_dump_json()
     logger.info(f'Metadata: {metadata_json}')