# recursive_swe_bench/models/base_model.py from typing import Any, Dict, List, Optional, Union import logging import time from abc import ABC, abstractmethod class ModelInterface(ABC): """ Base interface for models that can be evaluated using Recursive-SWE-bench. This abstract class defines the core functionality required for a model to be evaluated using the recursive evaluation framework. Concrete implementations must provide the actual model-specific logic. """ def __init__(self, model_identifier: str, config: Optional[Dict[str, Any]] = None): """ Initialize the model interface. Args: model_identifier: Identifier for the model config: Configuration options """ self.model_identifier = model_identifier self.config = config or {} self.logger = self._setup_logger() def _setup_logger(self) -> logging.Logger: """Set up logging for the model.""" logger = logging.getLogger(f"Model.{self.model_identifier}") handler = logging.StreamHandler() formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') handler.setFormatter(formatter) logger.addHandler(handler) logger.setLevel(self.config.get("log_level", logging.INFO)) return logger @abstractmethod def solve(self, problem: Dict[str, Any], history: Optional[List[Dict[str, Any]]] = None) -> str: """ Generate a solution for the given problem. Args: problem: The problem to solve history: Optional history of previous solution attempts Returns: The generated solution """ pass @abstractmethod def get_meta_information(self) -> Dict[str, Any]: """ Get meta information about the model. Returns: Dictionary containing model information """ pass # recursive_swe_bench/models/openai.py import openai import json import backoff from typing import Any, Dict, List, Optional, Union from recursive_swe_bench.models.base_model import ModelInterface class OpenAIModel(ModelInterface): """ Integration with OpenAI models (GPT-3.5, GPT-4, etc.). This class provides integration with OpenAI's API for evaluating models like GPT-3.5 and GPT-4 with Recursive-SWE-bench. """ def __init__( self, model_identifier: str, api_key: Optional[str] = None, config: Optional[Dict[str, Any]] = None ): """ Initialize the OpenAI model interface. Args: model_identifier: OpenAI model identifier (e.g., "gpt-4", "gpt-3.5-turbo") api_key: OpenAI API key (optional if set in environment) config: Additional configuration options """ super().__init__(model_identifier, config) # Set API key if provided if api_key: openai.api_key = api_key # Load default prompts or use config-provided ones self.prompts = self.config.get("prompts", { "system": "You are an expert programmer tasked with fixing bugs in code. Fix the code based on the description and tests.", "user_template": "# Bug Fixing Task\n\n{description}\n\n# Code\n```python\n{code}\n```\n\n{tests_description}\n\n# Your task\nFix the bugs in the code above. Provide only the corrected code without any explanations.", }) # Configure API parameters self.api_params = self.config.get("api_params", { "temperature": 0.2, "max_tokens": 2000, "top_p": 0.95, "frequency_penalty": 0, "presence_penalty": 0, }) self.logger.info(f"Initialized OpenAI model: {model_identifier}") @backoff.on_exception( backoff.expo, (openai.error.RateLimitError, openai.error.ServiceUnavailableError, openai.error.APIError), max_tries=5 ) def solve( self, problem: Dict[str, Any], history: Optional[List[Dict[str, Any]]] = None ) -> str: """ Generate a solution using the OpenAI model. Args: problem: The problem to solve history: Optional history of previous solution attempts Returns: The generated solution """ self.logger.info(f"Solving problem with OpenAI model: {self.model_identifier}") start_time = time.time() # Format the problem for the model messages = self._format_messages(problem, history) # Make API call response = openai.ChatCompletion.create( model=self.model_identifier, messages=messages, **self.api_params ) # Extract the solution from the response solution = response.choices[0].message.content.strip() end_time = time.time() self.logger.info(f"Solution generated in {end_time - start_time:.2f} seconds") return self._extract_code(solution) def _format_messages( self, problem: Dict[str, Any], history: Optional[List[Dict[str, Any]]] = None ) -> List[Dict[str, str]]: """ Format the problem and history into messages for the OpenAI API. Args: problem: The problem to solve history: Optional history of previous solution attempts Returns: List of formatted messages """ messages = [ {"role": "system", "content": self.prompts["system"]} ] # Format the user message code = problem["code_context"]["code"] # Prepare tests description tests_description = "# Tests\n" if "tests" in problem["code_context"]: tests_description += "The code must pass the following tests:\n\n" for i, test in enumerate(problem["code_context"]["tests"]): tests_description += f"## Test {i+1}: {test['name']}\n```python\n{test['content']}\n```\n\n" else: tests_description += "The code must work correctly according to its intended functionality." # Create the user message using the template user_content = self.prompts["user_template"].format( description=problem["description"], code=code, tests_description=tests_description ) messages.append({"role": "user", "content": user_content}) # Add history if available if history and self.config.get("include_history", True): for entry in history: # Add previous attempt messages.append({ "role": "assistant", "content": entry["solution"] }) # Add feedback on previous attempt feedback_content = f"Your solution has the following issues:\n" for issue in entry["feedback"]["issues"]: feedback_content += f"- {issue['message']}\n" feedback_content += "\nPlease try again with these improvements:\n" for suggestion in entry["feedback"]["suggestions"]: feedback_content += f"- {suggestion['message']}\n" messages.append({ "role": "user", "content": feedback_content }) return messages def _extract_code(self, text: str) -> str: """ Extract code from the model's response. Args: text: The model's response Returns: Extracted code """ # Try to extract code from markdown code blocks import re code_blocks = re.findall(r'```(?:python)?\s*(.*?)\s*```', text, re.DOTALL) if code_blocks: return code_blocks[0].strip() # If no code blocks, return the full text (it might be just code) return text.strip() def get_meta_information(self) -> Dict[str, Any]: """ Get meta information about the model. Returns: Dictionary containing model information """ return { "model_name": self.model_identifier, "provider": "OpenAI", "type": "API", "parameters": self.api_params, "system_prompt": self.prompts["system"] }