Spaces:
Sleeping
Sleeping
# Utility functions for GAIA Agent Evaluator | |
from langchain.docstore.document import Document | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from config import GAIA_KNOWLEDGE, ANSWER_PREFIXES_TO_REMOVE, LLM_RESPONSE_MARKERS, LLM_END_MARKERS | |
def create_knowledge_documents(): | |
"""Create knowledge base documents from GAIA_KNOWLEDGE.""" | |
text_splitter = RecursiveCharacterTextSplitter( | |
chunk_size=500, | |
chunk_overlap=50, | |
separators=["\n\n", "\n", ".", "!", "?", ",", " ", ""] | |
) | |
knowledge_chunks = text_splitter.split_text(GAIA_KNOWLEDGE) | |
return [Document(page_content=chunk) for chunk in knowledge_chunks] | |
def clean_llm_response(response, prompt): | |
"""Clean up the LLM response to extract the answer.""" | |
# Remove the prompt from the beginning if it's included | |
if response.startswith(prompt): | |
response = response[len(prompt):] | |
# Try to find where the model's actual answer begins | |
for marker in LLM_RESPONSE_MARKERS: | |
if marker.lower() in response.lower(): | |
parts = response.lower().split(marker.lower(), 1) | |
if len(parts) > 1: | |
response = parts[1].strip() | |
# Remove any closing tags if they exist | |
for marker in LLM_END_MARKERS: | |
if marker.lower() in response.lower(): | |
response = response.lower().split(marker.lower())[0].strip() | |
return response.strip() | |
def extract_final_answer(answer): | |
"""Extract and clean the final answer for exact matching.""" | |
clean_answer = answer.strip() | |
# Remove prefixes (case insensitive) | |
for prefix in ANSWER_PREFIXES_TO_REMOVE: | |
if clean_answer.lower().startswith(prefix.lower()): | |
clean_answer = clean_answer[len(prefix):].strip() | |
# Remove quotes if the entire answer is quoted | |
if clean_answer.startswith('"') and clean_answer.endswith('"'): | |
clean_answer = clean_answer[1:-1] | |
elif clean_answer.startswith("'") and clean_answer.endswith("'"): | |
clean_answer = clean_answer[1:-1] | |
# Remove trailing periods if they seem extraneous | |
if clean_answer.endswith('.') and not clean_answer.replace('.', '').isdigit(): | |
# Don't remove decimal points from numbers | |
if not (clean_answer.count('.') == 1 and clean_answer.replace('.', '').isdigit()): | |
clean_answer = clean_answer[:-1] | |
# Clean up extra whitespace | |
clean_answer = ' '.join(clean_answer.split()) | |
return clean_answer | |
def format_prompt(question, context=""): | |
"""Format the question into a proper prompt for the LLM.""" | |
if context: | |
return f"""You are a precise AI assistant that answers questions using available information. Your answer will be evaluated with exact string matching, so provide only the specific answer requested without additional text. | |
Context Information: | |
{context} | |
Question: {question} | |
Critical Instructions: | |
- Provide ONLY the exact answer requested, nothing else | |
- Do not include phrases like "The answer is", "Final answer", or "Based on the context" | |
- For numerical answers, use the exact format requested (integers, decimals, etc.) | |
- For lists, use the exact formatting specified in the question (commas, spaces, etc.) | |
- For names, use proper capitalization as would appear in official sources | |
- Be concise and precise - extra words will cause evaluation failure | |
- If the question asks for multiple items, provide them in the exact format requested | |
Direct Answer:""" | |
else: | |
return f"""You are a precise AI assistant that answers questions accurately. Your answer will be evaluated with exact string matching, so provide only the specific answer requested without additional text. | |
Question: {question} | |
Critical Instructions: | |
- Provide ONLY the exact answer requested, nothing else | |
- Do not include phrases like "The answer is", "Final answer", or explanations | |
- For numerical answers, use the exact format that would be expected | |
- For lists, use appropriate formatting (commas, spaces, etc.) | |
- For names, use proper capitalization | |
- Be concise and precise - extra words will cause evaluation failure | |
- Answer based on your knowledge and reasoning | |
Direct Answer:""" | |