Update agent.py
Browse files
agent.py
CHANGED
@@ -533,38 +533,9 @@ class EnhancedGAIAAgent:
|
|
533 |
1. THINK: Analyze the GAIA question thoroughly
|
534 |
2. ACT: Use your specialist tools IF RELEVANT
|
535 |
3. OBSERVE: Review results from specialist tools
|
536 |
-
4. REPEAT: Continue until you have the final answer.
|
537 |
|
538 |
-
CRITICAL
|
539 |
-
**Numbers (no commas, no units unless specified):**
|
540 |
-
Question: "How many research papers were published by the university between 2010 and 2020?"
|
541 |
-
CORRECT: 156
|
542 |
-
WRONG: "The answer is 156 papers" or "156 papers" or "one hundred fifty-six" or " 156 research papers were published by the university between 2010 and 2020"
|
543 |
-
|
544 |
-
**Strings (exact words, no articles, no explanations):**
|
545 |
-
Question: "What is the last name of the software engineer mentioned in the report?"
|
546 |
-
CORRECT: Martinez
|
547 |
-
WRONG: "The last name is Martinez" or "Dr. Martinez" or "martinez"
|
548 |
-
|
549 |
-
**Lists (comma-separated with spaces, alphabetized when requested):**
|
550 |
-
Question: "List the programming languages from this job description, alphabetized:"
|
551 |
-
CORRECT: C++, Java, JavaScript, Python, Ruby, TypeScript
|
552 |
-
WRONG: "C++,Java,JavaScript" or "1. C++ 2. Java" or "[C++, Java]"
|
553 |
-
|
554 |
-
**First/Last names only:**
|
555 |
-
Question: "Give only the first name of the developer who created the framework."
|
556 |
-
CORRECT: Sarah
|
557 |
-
WRONG: "Sarah Johnson" or "The first name is Sarah"
|
558 |
-
|
559 |
-
**Country codes:**
|
560 |
-
Question: "Give the ISO country code as your answer."
|
561 |
-
CORRECT: FRA
|
562 |
-
WRONG: "The ISO code is FRA" or "France (FRA)"
|
563 |
-
|
564 |
-
**Technical notation:**
|
565 |
-
Question: "Provide your response in standard notation."
|
566 |
-
CORRECT: 3.14e+8
|
567 |
-
WRONG: "The value is 3.14e+8" or "314 million"
|
568 |
|
569 |
ABSOLUTE RULES:
|
570 |
- NO explanations, NO additional text, NO units unless specifically requested
|
@@ -577,25 +548,81 @@ class EnhancedGAIAAgent:
|
|
577 |
max_steps = 10,
|
578 |
verbose = True
|
579 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
580 |
|
581 |
async def solve_gaia_question(self, question_data: Dict[str, Any]) -> str:
|
582 |
question = question_data.get("Question", "")
|
583 |
task_id = question_data.get("task_id", "")
|
|
|
584 |
context_prompt = f"""
|
585 |
GAIA Task ID: {task_id}
|
586 |
Question: {question}
|
587 |
{f"Associated files: {question_data.get('file_name', '')}" if 'file_name' in question_data else 'No files provided'}
|
588 |
-
|
589 |
-
|
590 |
-
2. Use specialist tools ONLY when their specific expertise is needed
|
591 |
-
3. Provide a precise, exact answer in GAIA format
|
592 |
-
Begin your reasoning process:
|
593 |
"""
|
|
|
594 |
try:
|
595 |
from llama_index.core.workflow import Context
|
596 |
ctx = Context(self.coordinator)
|
597 |
-
|
598 |
-
|
599 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
600 |
except Exception as e:
|
601 |
return f"Error processing question: {str(e)}"
|
|
|
533 |
1. THINK: Analyze the GAIA question thoroughly
|
534 |
2. ACT: Use your specialist tools IF RELEVANT
|
535 |
3. OBSERVE: Review results from specialist tools
|
536 |
+
4. REPEAT: Continue until you have the final answer.
|
537 |
|
538 |
+
CRITICAL: Your final answer must be EXACT and CONCISE as required by GAIA format:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
539 |
|
540 |
ABSOLUTE RULES:
|
541 |
- NO explanations, NO additional text, NO units unless specifically requested
|
|
|
548 |
max_steps = 10,
|
549 |
verbose = True
|
550 |
)
|
551 |
+
|
552 |
+
def format_gaia_answer(self, raw_response: str, original_question: str) -> str:
|
553 |
+
"""
|
554 |
+
Post-process the agent response to extract the exact GAIA format answer
|
555 |
+
"""
|
556 |
+
format_prompt = f"""Extract the exact answer from the response below. Follow GAIA formatting rules strictly.
|
557 |
+
|
558 |
+
Examples:
|
559 |
+
|
560 |
+
Question: "How many research papers were published by the university between 2010 and 2020?"
|
561 |
+
Response: "Based on my analysis of the data, I found that the university published 156 research papers between 2010 and 2020."
|
562 |
+
Answer: 156
|
563 |
+
|
564 |
+
Question: "What is the last name of the software engineer mentioned in the report?"
|
565 |
+
Response: "After reviewing the document, the software engineer mentioned is Dr. Martinez who developed the system."
|
566 |
+
Answer: Martinez
|
567 |
+
|
568 |
+
Question: "List the programming languages from this job description, alphabetized:"
|
569 |
+
Response: "The job description mentions several programming languages including Python, Java, C++, and JavaScript. When alphabetized, these are: C++, Java, JavaScript, Python"
|
570 |
+
Answer: C++, Java, JavaScript, Python
|
571 |
+
|
572 |
+
Question: "Give only the first name of the developer who created the framework."
|
573 |
+
Response: "The framework was created by Sarah Johnson, a senior developer at the company."
|
574 |
+
Answer: Sarah
|
575 |
+
|
576 |
+
Question: "Give the ISO country code as your answer."
|
577 |
+
Response: "The country in question is France, which has the ISO code FRA."
|
578 |
+
Answer: FRA
|
579 |
+
|
580 |
+
Question: "Provide your response in standard notation."
|
581 |
+
Response: "The calculated value is 314 million, which in standard notation is 3.14e+8"
|
582 |
+
Answer: 3.14e+8
|
583 |
+
|
584 |
+
Now extract the exact answer:
|
585 |
+
|
586 |
+
Question: {original_question}
|
587 |
+
Response: {raw_response}
|
588 |
+
Answer:"""
|
589 |
+
|
590 |
+
try:
|
591 |
+
# Use a simple, fast LLM for formatting
|
592 |
+
formatting_response = proj_llm.complete(format_prompt)
|
593 |
+
answer = str(formatting_response).strip()
|
594 |
+
|
595 |
+
return answer
|
596 |
+
|
597 |
+
except Exception as e:
|
598 |
+
print(f"Error in formatting: {e}")
|
599 |
+
return self._extract_fallback_answer(raw_response)
|
600 |
+
|
601 |
|
602 |
async def solve_gaia_question(self, question_data: Dict[str, Any]) -> str:
|
603 |
question = question_data.get("Question", "")
|
604 |
task_id = question_data.get("task_id", "")
|
605 |
+
|
606 |
context_prompt = f"""
|
607 |
GAIA Task ID: {task_id}
|
608 |
Question: {question}
|
609 |
{f"Associated files: {question_data.get('file_name', '')}" if 'file_name' in question_data else 'No files provided'}
|
610 |
+
|
611 |
+
Analyze this question and provide your reasoning and final answer.
|
|
|
|
|
|
|
612 |
"""
|
613 |
+
|
614 |
try:
|
615 |
from llama_index.core.workflow import Context
|
616 |
ctx = Context(self.coordinator)
|
617 |
+
raw_response = await self.coordinator.run(ctx=ctx, user_msg=context_prompt)
|
618 |
+
|
619 |
+
# Post-process to extract exact GAIA format
|
620 |
+
formatted_answer = self.format_gaia_answer(str(raw_response), question)
|
621 |
+
|
622 |
+
print(f"Raw response: {raw_response}")
|
623 |
+
print(f"Formatted answer: {formatted_answer}")
|
624 |
+
|
625 |
+
return formatted_answer
|
626 |
+
|
627 |
except Exception as e:
|
628 |
return f"Error processing question: {str(e)}"
|