Spaces:
Running
Running
""" | |
GAIA Answer Format Compliance System | |
This module ensures all GAIA answers meet exact format requirements by: | |
1. Extracting pure numbers from verbose responses | |
2. Formatting names correctly (last names only when specified) | |
3. Alphabetizing lists properly | |
4. Removing verbose explanations for concise answers | |
Critical fixes for GAIA benchmark compliance: | |
- "The video features 12 bird species" β "12" | |
- "Hirokazu Sawamura, Shintaro Fujinami" β "Sawamura, Fujinami" | |
- Unordered lists β alphabetized lists | |
- Verbose explanations β exact answers only | |
Author: GAIA Format Compliance Implementation | |
""" | |
import re | |
import logging | |
from typing import Dict, Any, Optional, List, Tuple, Union | |
from dataclasses import dataclass | |
from enum import Enum | |
from .intelligent_question_analyzer import ( | |
IntelligentQuestionAnalyzer, | |
QuestionAnalysis as IntelligentAnalysis, | |
AnswerFormat as IntelligentFormat | |
) | |
logger = logging.getLogger(__name__) | |
class AnswerType(Enum): | |
"""Types of answers for GAIA format compliance.""" | |
NUMERIC = "numeric" # Pure numbers: "12", "3.14", "42" | |
LIST = "list" # Comma-separated lists: "apple, banana, cherry" | |
NAME = "name" # Names: "Smith, Johnson" or "John Smith" | |
TEXT = "text" # General text answers | |
BOOLEAN = "boolean" # Yes/No answers | |
DATE = "date" # Date formats | |
UNKNOWN = "unknown" # Cannot classify | |
class FormatRule: | |
"""Rules for formatting specific answer types.""" | |
extract_numbers_only: bool = False | |
alphabetize_lists: bool = False | |
last_names_only: bool = False | |
first_names_only: bool = False | |
middle_names_only: bool = False | |
full_names: bool = True | |
remove_explanations: bool = False | |
max_length: int = 200 | |
case_sensitive: bool = False | |
name_format: str = 'full' # 'first', 'last', 'middle', 'full', 'initials' | |
class AnswerAnalysis: | |
"""Analysis of answer content and format requirements.""" | |
answer_type: AnswerType | |
confidence: float # 0.0 to 1.0 | |
detected_patterns: List[str] | |
format_rule: FormatRule | |
metadata: Dict[str, Any] | |
class GAIAAnswerFormatter: | |
""" | |
GAIA Answer Format Compliance System | |
Ensures all answers meet exact GAIA format requirements through: | |
- Question analysis to determine expected answer format | |
- Answer type classification (NUMERIC, LIST, NAME, TEXT) | |
- Format-specific post-processing rules | |
- Validation before submission | |
""" | |
# Patterns for detecting answer types from questions | |
QUESTION_PATTERNS = { | |
AnswerType.NUMERIC: [ | |
r'\bhow many\b', r'\bcount\b', r'\bnumber of\b', r'\bhow much\b', | |
r'\bwhat is the\s+(?:total|sum|amount|quantity|number)\b', | |
r'\bcalculate\b', r'\bcompute\b', r'\bfind the value\b', | |
r'\bwhat percentage\b', r'\bhow old\b', r'\bwhat year\b', | |
r'\bhow long\b', r'\bhow tall\b', r'\bhow wide\b', r'\bhow deep\b', | |
r'\bat.?bats?\b', r'\bstudio albums?\b', r'\bspecies\b', r'\bhighest number\b' | |
], | |
AnswerType.LIST: [ | |
r'\blist\b', r'\bname all\b', r'\bwhat are\b', r'\bwhich\b.*\band\b', | |
r'\benumerate\b', r'\bidentify all\b', r'\bmention all\b', | |
r'\bprovide.*list\b', r'\bgive.*examples\b', r'\bcomma.?separated\b' | |
], | |
AnswerType.NAME: [ | |
r'\bwho\b', r'\bwho is\b', r'\bwho was\b', r'\bwho are\b', r'\bwho were\b', | |
r'\bname of\b', r'\bnamed\b', r'\bcalled\b', r'\bauthor\b', | |
r'\bdirector\b', r'\bactor\b', r'\bsinger\b', r'\bmusician\b', | |
r'\bpresident\b', r'\bminister\b', r'\bCEO\b', r'\bnominated\b' | |
], | |
AnswerType.BOOLEAN: [ | |
r'\bis it\b', r'\bcan\b', r'\bdoes\b', r'\bdo\b', r'\bwill\b', | |
r'\bwould\b', r'\bshould\b', r'\btrue or false\b', r'\byes or no\b' | |
], | |
AnswerType.DATE: [ | |
r'\bwhen\b', r'\bwhat date\b', r'\bwhat time\b', r'\bwhat year\b', | |
r'\bwhat month\b', r'\bwhat day\b', r'\bin which year\b' | |
] | |
} | |
# Patterns for detecting content in answers | |
ANSWER_PATTERNS = { | |
'numbers': r'\b\d+(?:\.\d+)?\b', | |
'list_separators': r'[,;]\s*', | |
'names': r'\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*\b', | |
'explanations': r'\b(?:because|since|therefore|however|the reason|this is|explanation)\b', | |
'verbose_intro': r'^(?:the answer is|the result is|this shows|we can see|it appears|the video features|the document shows)\s*', | |
'units': r'\b(?:meters?|feet|inches?|cm|mm|kg|lbs?|celsius|fahrenheit|Β°[CF]|years?|months?|days?)\b' | |
} | |
# Common list items that should be alphabetized | |
COMMON_LIST_ITEMS = { | |
'vegetables': ['broccoli', 'celery', 'lettuce', 'carrot', 'onion', 'potato', 'tomato'], | |
'fruits': ['apple', 'banana', 'cherry', 'grape', 'orange', 'strawberry'], | |
'colors': ['red', 'blue', 'green', 'yellow', 'black', 'white', 'purple'], | |
'countries': ['usa', 'canada', 'mexico', 'france', 'germany', 'italy', 'spain'] | |
} | |
def __init__(self): | |
"""Initialize the GAIA answer formatter.""" | |
self.intelligent_analyzer = IntelligentQuestionAnalyzer() | |
logger.info("π― GAIA Answer Formatter initialized with intelligent question analysis") | |
def format_answer(self, question: str, answer: str) -> str: | |
""" | |
Format answer according to GAIA requirements. | |
Args: | |
question: The original question to analyze format requirements | |
answer: The raw answer to format | |
Returns: | |
Formatted answer meeting GAIA compliance | |
""" | |
if not answer or not answer.strip(): | |
logger.warning("Empty answer provided") | |
return "" | |
# Step 1: Analyze question using intelligent analyzer | |
intelligent_analysis = self.intelligent_analyzer.analyze_question(question) | |
analysis = self._convert_intelligent_analysis(intelligent_analysis) | |
logger.info(f"Intelligent analysis: {analysis.answer_type.value} (confidence: {analysis.confidence:.2f})") | |
# Step 2: Clean and preprocess answer | |
cleaned_answer = self._preprocess_answer(answer) | |
# Step 3: Apply format-specific rules | |
formatted_answer = self._apply_format_rules(cleaned_answer, analysis) | |
# Step 4: Final validation and cleanup | |
final_answer = self._final_cleanup(formatted_answer, analysis) | |
# Log transformation if significant change | |
if final_answer != answer: | |
logger.info(f"Answer transformed: '{answer[:50]}...' β '{final_answer}'") | |
return final_answer | |
def _analyze_question(self, question: str) -> AnswerAnalysis: | |
"""Analyze question to determine expected answer format.""" | |
q_lower = question.lower() | |
detected_patterns = [] | |
type_scores = {} | |
# Score each answer type based on pattern matches | |
for answer_type, patterns in self.QUESTION_PATTERNS.items(): | |
score = 0 | |
for pattern in patterns: | |
if re.search(pattern, q_lower): | |
score += 1 | |
detected_patterns.append(f"{answer_type.value}:{pattern}") | |
type_scores[answer_type] = score | |
# Determine best match | |
if not type_scores or max(type_scores.values()) == 0: | |
answer_type = AnswerType.TEXT | |
confidence = 0.3 | |
else: | |
answer_type = max(type_scores, key=type_scores.get) | |
confidence = min(1.0, type_scores[answer_type] * 0.3) | |
# Create format rule based on answer type | |
format_rule = self._create_format_rule(answer_type, question) | |
metadata = { | |
'question_length': len(question), | |
'type_scores': {t.value: s for t, s in type_scores.items()}, | |
'question_keywords': self._extract_keywords(question) | |
} | |
return AnswerAnalysis( | |
answer_type=answer_type, | |
confidence=confidence, | |
detected_patterns=detected_patterns, | |
format_rule=format_rule, | |
metadata=metadata | |
) | |
def _convert_intelligent_analysis(self, intelligent_analysis: IntelligentAnalysis) -> AnswerAnalysis: | |
"""Convert intelligent analysis to legacy AnswerAnalysis format.""" | |
# Map intelligent formats to legacy answer types | |
format_to_type_map = { | |
IntelligentFormat.NUMBER: AnswerType.NUMERIC, | |
IntelligentFormat.PERCENTAGE: AnswerType.NUMERIC, | |
IntelligentFormat.LIST_ALPHABETICAL: AnswerType.LIST, | |
IntelligentFormat.LIST_CHRONOLOGICAL: AnswerType.LIST, | |
IntelligentFormat.LIST_NUMERICAL: AnswerType.LIST, | |
IntelligentFormat.NAME_FULL: AnswerType.NAME, | |
IntelligentFormat.NAME_FIRST: AnswerType.NAME, | |
IntelligentFormat.NAME_LAST: AnswerType.NAME, | |
IntelligentFormat.NAME_INITIALS: AnswerType.NAME, | |
IntelligentFormat.BOOLEAN: AnswerType.BOOLEAN, | |
IntelligentFormat.DATE: AnswerType.DATE, | |
IntelligentFormat.TEXT_CONCISE: AnswerType.TEXT, | |
IntelligentFormat.TEXT_DETAILED: AnswerType.TEXT, | |
IntelligentFormat.CURRENCY: AnswerType.NUMERIC | |
} | |
answer_type = format_to_type_map.get(intelligent_analysis.expected_format, AnswerType.TEXT) | |
# Convert formatting rules | |
format_rule = FormatRule( | |
extract_numbers_only=intelligent_analysis.formatting_rules.get('extract_numbers_only', False), | |
alphabetize_lists=intelligent_analysis.formatting_rules.get('alphabetize_lists', False), | |
last_names_only=intelligent_analysis.formatting_rules.get('name_format') == 'last', | |
first_names_only=intelligent_analysis.formatting_rules.get('name_format') == 'first', | |
middle_names_only=intelligent_analysis.formatting_rules.get('name_format') == 'middle', | |
full_names=intelligent_analysis.formatting_rules.get('name_format') == 'full', | |
remove_explanations=intelligent_analysis.formatting_rules.get('remove_explanations', False), | |
max_length=intelligent_analysis.formatting_rules.get('max_length', 200), | |
case_sensitive=intelligent_analysis.formatting_rules.get('case_sensitive', False), | |
name_format=intelligent_analysis.formatting_rules.get('name_format', 'full') | |
) | |
# Convert detected patterns | |
detected_patterns = [ | |
f"{intelligent_analysis.intent.value}:{pattern}" | |
for pattern in intelligent_analysis.modifiers | |
] | |
# Enhanced metadata | |
metadata = { | |
'intelligent_intent': intelligent_analysis.intent.value, | |
'intelligent_format': intelligent_analysis.expected_format.value, | |
'key_entities': intelligent_analysis.key_entities, | |
'modifiers': intelligent_analysis.modifiers, | |
'context_clues': intelligent_analysis.context_clues, | |
'original_confidence': intelligent_analysis.confidence | |
} | |
return AnswerAnalysis( | |
answer_type=answer_type, | |
confidence=intelligent_analysis.confidence, | |
detected_patterns=detected_patterns, | |
format_rule=format_rule, | |
metadata=metadata | |
) | |
def _create_format_rule(self, answer_type: AnswerType, question: str) -> FormatRule: | |
"""Create format rule based on answer type and question context.""" | |
q_lower = question.lower() | |
if answer_type == AnswerType.NUMERIC: | |
return FormatRule( | |
extract_numbers_only=True, | |
remove_explanations=True, | |
max_length=50 | |
) | |
elif answer_type == AnswerType.LIST: | |
return FormatRule( | |
alphabetize_lists=True, | |
remove_explanations=True, | |
max_length=500 | |
) | |
elif answer_type == AnswerType.NAME: | |
# Dynamically determine what part of names is requested | |
name_format = self._analyze_name_requirements(q_lower) | |
return FormatRule( | |
last_names_only=(name_format == 'last'), | |
first_names_only=(name_format == 'first'), | |
middle_names_only=(name_format == 'middle'), | |
full_names=(name_format == 'full'), | |
name_format=name_format, | |
remove_explanations=True, | |
max_length=200, | |
case_sensitive=False | |
) | |
else: | |
# For TEXT answers, check if they need concise formatting | |
needs_concise = any(pattern in q_lower for pattern in [ | |
'chess', 'move', 'algebraic notation', 'best move', 'correct move', | |
'final output', 'result', 'what is the', 'provide the' | |
]) | |
return FormatRule( | |
remove_explanations=needs_concise, | |
max_length=300 if not needs_concise else 100 | |
) | |
def _preprocess_answer(self, answer: str) -> str: | |
"""Clean and preprocess the raw answer.""" | |
# Remove common verbose introductions | |
answer = re.sub(self.ANSWER_PATTERNS['verbose_intro'], '', answer, flags=re.IGNORECASE) | |
# Clean whitespace | |
answer = re.sub(r'\s+', ' ', answer).strip() | |
# Remove markdown formatting | |
answer = re.sub(r'\*\*(.*?)\*\*', r'\1', answer) # Bold | |
answer = re.sub(r'\*(.*?)\*', r'\1', answer) # Italic | |
answer = re.sub(r'`(.*?)`', r'\1', answer) # Code | |
return answer | |
def _apply_format_rules(self, answer: str, analysis: AnswerAnalysis) -> str: | |
"""Apply format-specific rules based on answer type.""" | |
rule = analysis.format_rule | |
if analysis.answer_type == AnswerType.NUMERIC and rule.extract_numbers_only: | |
return self._extract_number(answer) | |
elif analysis.answer_type == AnswerType.LIST and rule.alphabetize_lists: | |
return self._format_list(answer) | |
elif analysis.answer_type == AnswerType.NAME and rule.last_names_only: | |
return self._format_names(answer, last_names_only=True) | |
elif analysis.answer_type == AnswerType.NAME: | |
return self._format_names(answer, last_names_only=False) | |
elif rule.remove_explanations: | |
return self._remove_explanations(answer) | |
return answer | |
def _extract_number(self, answer: str) -> str: | |
"""Extract pure number from answer text following GAIA exact match rules.""" | |
# GAIA Rule: Numbers should have no commas, no units (unless specified) | |
# Enhanced patterns for different number formats - ORDER MATTERS! | |
patterns = [ | |
# Most specific patterns first | |
r'(?:released|published|has|have|had|features?|shows?|contains?|includes?)\s+(\d+(?:,\d{3})*(?:\.\d+)?)\s*(?:studio\s+albums?|albums?|species|items?|things?|at-bats?|at\s+bats?)', # "released 2 studio albums" | |
r'(?:is|are|was|were|exactly|total|sum|amount)\s+(\d+(?:,\d{3})*(?:\.\d+)?)\b', # "is 5", "were 480" | |
r'(\d+(?:,\d{3})*(?:\.\d+)?)\s*(?:studio\s+albums?|albums?|species|items?|things?|at-bats?|at\s+bats?)', # "2 studio albums" | |
r'(?:\$|USD\s*)?(\d+(?:,\d{3})*(?:\.\d+)?)\s*(?:USD|dollars?)?', # Currency amounts | |
r'(\d+(?:,\d{3})*(?:\.\d+)?)\s*(?:percent|%)', # Percentages (remove % unless specified) | |
r'(\d+(?:,\d{3})*(?:\.\d+)?)\s*(?:degrees?|Β°)', # Temperatures | |
r'(\d+(?:,\d{3})*(?:\.\d+)?)\s*(?:people|persons|individuals)', # People counts | |
# Population pattern specifically | |
r'population\s+is\s+(\d+(?:,\d{3})*(?:\.\d+)?)', | |
# Least specific - any isolated number (avoid years/dates) | |
r'(?<!19|20)\b(\d{1,7}(?:,\d{3})*(?:\.\d+)?)\b(?!\d)' # 1-7 digit numbers with commas, not part of years | |
] | |
# Try patterns in order of specificity | |
for pattern in patterns: | |
matches = re.findall(pattern, answer, re.IGNORECASE) | |
if matches: | |
number = matches[0] | |
# GAIA formatting: remove commas, clean decimal format | |
number = number.replace(',', '') | |
# Ensure proper decimal format (no trailing zeros unless needed) | |
if '.' in number: | |
# Keep trailing zeros for currency if specified in question | |
if 'decimal places' in answer.lower() or 'USD' in answer: | |
number = f"{float(number):.2f}" | |
else: | |
number = str(float(number)) | |
return number | |
# If no numbers found, return original answer | |
return answer | |
def _format_list(self, answer: str) -> str: | |
"""Format and alphabetize list items following GAIA exact match rules.""" | |
# GAIA Rule: Comma-separated list, no articles, alphabetical order | |
# Remove common prefixes first | |
clean_answer = re.sub(r'^.*?\s+(are|were|include|mentioned)\s+', '', answer, flags=re.IGNORECASE) | |
clean_answer = re.sub(r'^(The|These|Those)\s+.*?\s+(are|were|include|mentioned):\s*', '', clean_answer, flags=re.IGNORECASE) | |
clean_answer = re.sub(r'^.*?vegetables\s+(?:are|include):\s*', '', clean_answer, flags=re.IGNORECASE) | |
# Handle "and" at the end: "red, blue, green, and yellow" -> "red, blue, green, yellow" | |
clean_answer = re.sub(r',\s*and\s+([^,]+)$', r', \1', clean_answer) | |
clean_answer = re.sub(r'\s+and\s+([^,]+)$', r', \1', clean_answer) | |
# Try different separators | |
items = [] | |
if ',' in clean_answer: | |
items = [item.strip() for item in clean_answer.split(',')] | |
elif ' and ' in clean_answer: | |
items = [item.strip() for item in clean_answer.split(' and ')] | |
elif ';' in clean_answer: | |
items = [item.strip() for item in clean_answer.split(';')] | |
elif '\n' in clean_answer: | |
items = [item.strip() for item in clean_answer.split('\n')] | |
if not items: | |
# Try to extract items from natural language | |
items = self._extract_list_items(clean_answer) | |
if not items or len(items) < 2: | |
return answer | |
# Clean items according to GAIA rules | |
cleaned_items = [] | |
for item in items: | |
# Remove common prefixes/suffixes | |
item = re.sub(r'^(?:and\s+|or\s+|\d+\.\s*|-\s*|\*\s*)', '', item, flags=re.IGNORECASE) | |
item = re.sub(r'\s*(?:etc\.?|and so on)$', '', item, flags=re.IGNORECASE) | |
item = re.sub(r'\s*are\s+mentioned.*$', '', item, flags=re.IGNORECASE) | |
item = re.sub(r'\s*\(.*?\)$', '', item) # Remove parenthetical info | |
# GAIA Rule: Remove articles (the, a, an) | |
item = re.sub(r'^(?:the\s+|a\s+|an\s+)', '', item, flags=re.IGNORECASE) | |
# Clean whitespace and punctuation | |
item = item.strip(' .,;') | |
# Only include meaningful items | |
if item and len(item) > 1 and not item.lower() in ['not', 'to', 'be', 'removed']: | |
cleaned_items.append(item) | |
if len(cleaned_items) < 2: | |
return answer | |
# GAIA Rule: Alphabetize | |
cleaned_items.sort(key=str.lower) | |
# GAIA Rule: Comma-separated format | |
return ', '.join(cleaned_items) | |
def _extract_list_items(self, answer: str) -> List[str]: | |
"""Extract list items from natural language.""" | |
# Look for patterns like "A, B, and C" or "A and B" | |
and_pattern = r'\b(\w+(?:\s+\w+)*)\s+and\s+(\w+(?:\s+\w+)*)\b' | |
matches = re.findall(and_pattern, answer) | |
if matches: | |
items = [] | |
for match in matches: | |
items.extend(match) | |
return items | |
# Look for enumerated items | |
enum_pattern = r'\b(?:\d+\.|[a-z]\)|\*|\-)\s*([^.]+?)(?=\s*(?:\d+\.|[a-z]\)|\*|\-|$))' | |
enum_matches = re.findall(enum_pattern, answer, re.MULTILINE) | |
if enum_matches: | |
return [match.strip() for match in enum_matches] | |
return [] | |
def _format_names(self, answer: str, last_names_only: bool = False) -> str: | |
"""Format names according to requirements.""" | |
# Clean up the answer first | |
clean_answer = answer.strip() | |
# Remove common prefixes | |
clean_answer = re.sub(r'^.*?\s+(are|were|include|mentioned)\s+', '', clean_answer, flags=re.IGNORECASE) | |
clean_answer = re.sub(r'^(The|These|Those)\s+.*?\s+(are|were|include|mentioned):\s*', '', clean_answer, flags=re.IGNORECASE) | |
clean_answer = re.sub(r'^.*?\s+were\s+', '', clean_answer, flags=re.IGNORECASE) | |
clean_answer = re.sub(r'^.*?\s+actors\s+were\s+', '', clean_answer, flags=re.IGNORECASE) | |
clean_answer = re.sub(r'^.*?\s+written\s+by\s+', '', clean_answer, flags=re.IGNORECASE) | |
clean_answer = re.sub(r'^\s*The\s+players\s+are\s+', '', clean_answer, flags=re.IGNORECASE) | |
clean_answer = re.sub(r'^\s*The\s+main\s+actors\s+were\s+', '', clean_answer, flags=re.IGNORECASE) | |
# Remove trailing periods | |
clean_answer = re.sub(r'\.$', '', clean_answer) | |
# Enhanced name pattern to handle titles and prefixes | |
name_pattern = r'(?:Dr\.?\s+|Professor\s+|Mr\.?\s+|Ms\.?\s+|Mrs\.?\s+)?([A-Z][a-z]+(?:\s+[A-Z][a-z]+)+)' | |
matches = re.findall(name_pattern, clean_answer) | |
if not matches: | |
# Fallback to simpler pattern | |
simple_pattern = r'\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)+\b' | |
matches = re.findall(simple_pattern, clean_answer) | |
if not matches: | |
return clean_answer | |
if last_names_only: | |
# Extract last names only | |
last_names = [] | |
for name in matches: | |
# Remove titles and prefixes | |
clean_name = re.sub(r'^(?:Dr\.?\s+|Professor\s+|Mr\.?\s+|Ms\.?\s+|Mrs\.?\s+)', '', name).strip() | |
parts = clean_name.split() | |
if len(parts) >= 2: | |
last_names.append(parts[-1]) # Take the last part as surname | |
if last_names: | |
return ', '.join(last_names) | |
# Return formatted full names | |
return ', '.join(matches) | |
def _remove_explanations(self, answer: str) -> str: | |
"""Remove verbose explanations to get concise answers following GAIA exact match rules.""" | |
# GAIA Rule: Answer should be just the answer, nothing else | |
# Chess move extraction patterns (algebraic notation) | |
chess_patterns = [ | |
r'(?:move|best|winning|correct)\s+(?:is|for\s+black|move)\s+([a-h][1-8]|[NBRQK][a-h]?[1-8]?x?[a-h][1-8]|O-O(?:-O)?)', | |
r'(?:The\s+)?(?:winning\s+)?move\s+(?:for\s+black\s+)?is\s+([a-h][1-8]|[NBRQK][a-h]?[1-8]?x?[a-h][1-8]|O-O(?:-O)?)', | |
r'\b([a-h][1-8]|[NBRQK][a-h]?[1-8]?x?[a-h][1-8]|O-O(?:-O)?)\b' | |
] | |
# Try chess move extraction first | |
for pattern in chess_patterns: | |
match = re.search(pattern, answer, re.IGNORECASE) | |
if match: | |
move = match.group(1) | |
# Validate it looks like a chess move | |
if re.match(r'^[a-h][1-8]$|^[NBRQK][a-h]?[1-8]?x?[a-h][1-8]$|^O-O(-O)?$', move): | |
return move | |
# Name extraction patterns (remove articles, abbreviations) | |
name_patterns = [ | |
r'(?:nominated|written|created|directed)\s+by\s+(?:User:)?([A-Z][a-zA-Z]+(?:\s+[A-Z][a-zA-Z]+)*)', | |
r'(?:The\s+)?(?:first\s+name|name)\s+is\s+([A-Z][a-zA-Z]+)', | |
r'([A-Z][a-zA-Z]+)\s+(?:is\s+the\s+(?:first\s+name|name|author|director))', | |
] | |
for pattern in name_patterns: | |
match = re.search(pattern, answer, re.IGNORECASE) | |
if match: | |
name = match.group(1).strip() | |
# Remove common prefixes/suffixes | |
name = re.sub(r'^(?:User:|Dr\.?\s+|Professor\s+|Mr\.?\s+|Ms\.?\s+|Mrs\.?\s+)', '', name) | |
name = re.sub(r'\s*\([^)]*\)$', '', name) # Remove parenthetical info | |
return name | |
# General concise answer extraction patterns | |
concise_patterns = [ | |
# "The answer is X" -> "X" | |
r'(?:The\s+)?(?:answer|result|output|solution|total)\s+(?:is|was|were)\s+([^.!?]+)', | |
# "X is the answer" -> "X" | |
r'([^.!?]+)\s+is\s+the\s+(?:answer|result|output|solution)', | |
# "It is X" -> "X" | |
r'(?:It|This)\s+(?:is|was|were)\s+([^.!?]+)', | |
# Extract content after key phrases | |
r'(?:Here|The\s+answer|The\s+result):\s*([^.!?]+)', | |
# Extract last meaningful phrase | |
r'\.([^.!?]{1,50})\.?$' | |
] | |
for pattern in concise_patterns: | |
match = re.search(pattern, answer, re.IGNORECASE) | |
if match: | |
core_answer = match.group(1).strip() | |
# Clean up the extracted answer | |
core_answer = re.sub(r'^(?:The\s+|A\s+|An\s+)', '', core_answer, flags=re.IGNORECASE) # Remove articles | |
core_answer = re.sub(r'\s*\([^)]*\)$', '', core_answer) # Remove parenthetical info | |
core_answer = core_answer.strip(' .,;') | |
# Only return if significantly shorter than original and meaningful | |
if len(core_answer) < len(answer) * 0.4 and len(core_answer) > 0 and len(core_answer.split()) <= 5: | |
return core_answer | |
# If no specific patterns match, try to extract the shortest meaningful sentence | |
sentences = re.split(r'[.!?]+', answer) | |
# Find the shortest sentence that doesn't contain explanation keywords | |
explanation_keywords = [ | |
'because', 'since', 'therefore', 'however', 'the reason', 'this is', | |
'explanation', 'based on', 'after analyzing', 'research', 'found that', | |
'using', 'tool', 'engine', 'calculated' | |
] | |
shortest_sentence = None | |
min_length = float('inf') | |
for sentence in sentences: | |
sentence = sentence.strip() | |
if not sentence: | |
continue | |
# Skip sentences with explanation keywords | |
if any(keyword in sentence.lower() for keyword in explanation_keywords): | |
continue | |
# Prefer shorter sentences | |
if len(sentence) < min_length and len(sentence.split()) <= 10: | |
min_length = len(sentence) | |
shortest_sentence = sentence | |
if shortest_sentence and len(shortest_sentence) < len(answer) * 0.5: | |
# Clean up the sentence | |
shortest_sentence = re.sub(r'^(?:The\s+|A\s+|An\s+)', '', shortest_sentence, flags=re.IGNORECASE) | |
return shortest_sentence.strip(' .,;') | |
# Remove sentences that contain explanation keywords | |
sentences = re.split(r'[.!?]+', answer) | |
filtered_sentences = [] | |
for sentence in sentences: | |
sentence = sentence.strip() | |
if not sentence: | |
continue | |
# Skip sentences with explanation keywords | |
if re.search(self.ANSWER_PATTERNS['explanations'], sentence, re.IGNORECASE): | |
continue | |
# Skip very long explanatory sentences | |
if len(sentence) > 100 and any(word in sentence.lower() for word in [ | |
'because', 'therefore', 'explanation', 'reason', 'this shows', 'due to' | |
]): | |
continue | |
filtered_sentences.append(sentence) | |
if filtered_sentences: | |
# Take the shortest sentence as it's likely the core answer | |
shortest = min(filtered_sentences, key=len) | |
if len(shortest) < len(answer) * 0.5: | |
return shortest.strip() | |
result = '. '.join(filtered_sentences) | |
if not result.endswith('.'): | |
result += '.' | |
return result | |
# If all sentences were filtered out, return the first sentence | |
if sentences: | |
return sentences[0].strip() | |
return answer | |
def _final_cleanup(self, answer: str, analysis: AnswerAnalysis) -> str: | |
"""Final cleanup and validation.""" | |
# Trim to max length | |
if len(answer) > analysis.format_rule.max_length: | |
answer = answer[:analysis.format_rule.max_length].strip() | |
# Try to end at a word boundary | |
if ' ' in answer: | |
answer = answer.rsplit(' ', 1)[0] | |
# Remove trailing punctuation for numeric answers | |
if analysis.answer_type == AnswerType.NUMERIC: | |
answer = answer.rstrip('.,;') | |
# Ensure proper capitalization for names | |
if analysis.answer_type == AnswerType.NAME: | |
answer = self._capitalize_names(answer) | |
return answer.strip() | |
def _capitalize_names(self, answer: str) -> str: | |
"""Ensure proper capitalization for names.""" | |
# Split by commas and capitalize each name | |
parts = [part.strip() for part in answer.split(',')] | |
capitalized_parts = [] | |
for part in parts: | |
# Capitalize each word in the name | |
words = part.split() | |
capitalized_words = [word.capitalize() for word in words] | |
capitalized_parts.append(' '.join(capitalized_words)) | |
return ', '.join(capitalized_parts) | |
def _extract_keywords(self, text: str) -> List[str]: | |
"""Extract keywords from text for analysis.""" | |
# Simple keyword extraction | |
words = re.findall(r'\b[a-zA-Z]{3,}\b', text.lower()) | |
# Filter out common words | |
stop_words = {'the', 'and', 'are', 'was', 'were', 'what', 'how', 'who', 'when', 'where', 'why'} | |
keywords = [word for word in words if word not in stop_words] | |
return keywords[:10] # Return top 10 keywords | |
def validate_format(self, question: str, answer: str) -> Tuple[bool, List[str], float]: | |
""" | |
Validate if answer meets GAIA format requirements. | |
Args: | |
question: Original question | |
answer: Formatted answer | |
Returns: | |
Tuple of (is_valid, issues, compliance_score) | |
""" | |
issues = [] | |
score = 1.0 | |
analysis = self._analyze_question(question) | |
# Check type-specific requirements | |
if analysis.answer_type == AnswerType.NUMERIC: | |
if not re.search(r'\b\d+(?:\.\d+)?\b', answer): | |
issues.append("Numeric answer expected but no numbers found") | |
score -= 0.5 | |
# Check for verbose explanations in numeric answers | |
if len(answer.split()) > 5: | |
issues.append("Numeric answer too verbose") | |
score -= 0.3 | |
elif analysis.answer_type == AnswerType.LIST: | |
if ',' not in answer and ' and ' not in answer: | |
issues.append("List format expected but no separators found") | |
score -= 0.3 | |
# Check if list is alphabetized | |
items = [item.strip() for item in answer.split(',')] | |
if len(items) > 1: | |
sorted_items = sorted(items, key=str.lower) | |
if items != sorted_items: | |
issues.append("List items not alphabetized") | |
score -= 0.2 | |
# General checks | |
if len(answer) > 300: | |
issues.append("Answer too long") | |
score -= 0.2 | |
if not answer.strip(): | |
issues.append("Empty answer") | |
score = 0.0 | |
return len(issues) == 0, issues, max(0.0, score) | |
# Convenience function for quick formatting | |
def format_gaia_answer(question: str, answer: str) -> str: | |
""" | |
Quick function to format answer for GAIA compliance. | |
Args: | |
question: The original question | |
answer: The raw answer to format | |
Returns: | |
Formatted answer meeting GAIA requirements | |
""" | |
formatter = GAIAAnswerFormatter() | |
return formatter.format_answer(question, answer) | |
# Integration function for existing systems | |
def integrate_with_orchestrator(original_answer_func): | |
""" | |
Decorator to integrate GAIA formatting with existing answer functions. | |
Usage: | |
@integrate_with_orchestrator | |
def my_agent_function(question): | |
return "raw answer" | |
""" | |
def wrapper(question: str) -> str: | |
raw_answer = original_answer_func(question) | |
return format_gaia_answer(question, raw_answer) | |
return wrapper |