Agents_Course_Final_Assignment_Evaluator

Paused

App Files Files Community

Michele De Stefano commited on May 25

Commit

b066853

1 Parent(s): 1b8aef5

Now it is possible to process questions incrementally

Browse files

Files changed (3) hide show

agent_factory.py +14 -19
app.py +72 -37
tools/video_sampling.py +0 -1

agent_factory.py CHANGED Viewed

@@ -9,6 +9,7 @@ from langchain_ollama import ChatOllama
 from langgraph.constants import START, END
 from langgraph.graph import MessagesState, StateGraph
 from langgraph.graph.graph import CompiledGraph
 from langgraph.prebuilt import ToolNode
 from pydantic import BaseModel
@@ -67,7 +68,6 @@ class AgentFactory:
         "follow the rules explained above.\n"
     )
-    __llm_for_decision: Runnable
     __llm: Runnable
     __tools: list[BaseTool]
@@ -115,30 +115,25 @@ class AgentFactory:
             web_page_info_retriever,
             youtube_video_to_frame_captions
         ]
-        self.__llm_for_decision = ChatOllama(
-            model=model,
-            temperature=1.0,
-            num_ctx=num_ctx
-        )
         self.__llm = ChatOllama(
             model=model,
             temperature=temperature,
             num_ctx=num_ctx
         ).bind_tools(tools=self.__tools)
-    def __decide_for_code_agent(self, state: MessagesState) -> str:
-        decision_messages = [
-            SystemMessage(
-                content="Answer only yes or no. "
-                        "If you think the question can be easily answered "
-                        "by writing Python code and executing it then answer "
-                        "yes. If you think you can answer by exploiting other "
-                        "resources then answer no."
-            ),
-            state["messages"][-1]
-        ]
-        answer = self.__llm_for_decision.invoke(decision_messages)
-        return answer.content
     def __run_llm(self, state: MessagesState) -> dict[str, Any]:
         answer = self.__llm.invoke(state["messages"])

 from langgraph.constants import START, END
 from langgraph.graph import MessagesState, StateGraph
 from langgraph.graph.graph import CompiledGraph
+from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
 from langgraph.prebuilt import ToolNode
 from pydantic import BaseModel
         "follow the rules explained above.\n"
     )
     __llm: Runnable
     __tools: list[BaseTool]
             web_page_info_retriever,
             youtube_video_to_frame_captions
         ]
         self.__llm = ChatOllama(
             model=model,
             temperature=temperature,
             num_ctx=num_ctx
         ).bind_tools(tools=self.__tools)
+        # llm_endpoint = HuggingFaceEndpoint(
+        #     repo_id="Qwen/Qwen2.5-72B-Instruct",
+        #     task="text-generation",
+        #     max_new_tokens=num_ctx,
+        #     do_sample=False,
+        #     repetition_penalty=1.03,
+        #     temperature=temperature,
+        # )
+        #
+        # self.__llm = (
+        #     ChatHuggingFace(llm=llm_endpoint)
+        #         .bind_tools(tools=self.__tools)
+        # )
     def __run_llm(self, state: MessagesState) -> dict[str, Any]:
         answer = self.__llm.invoke(state["messages"])

app.py CHANGED Viewed

@@ -58,7 +58,11 @@ class BasicAgent:
         return answer
-def download_questions_and_files() -> dict[str, Any]:
     api_url = DEFAULT_API_URL
     questions_url = f"{api_url}/questions"
     files_base_url = f"{api_url}/files"
@@ -70,26 +74,26 @@ def download_questions_and_files() -> dict[str, Any]:
         questions_data = response.json()
         if not questions_data:
             print("Fetched questions list is empty.")
-            return {
                 "error": "Fetched questions list is empty or invalid format."
-            }
         print(f"Fetched {len(questions_data)} questions.")
     except requests.exceptions.RequestException as e:
         print(f"Error fetching questions: {e}")
-        return {
             "error": f"Error fetching questions: {e}"
-        }
     except requests.exceptions.JSONDecodeError as e:
         print(f"Error decoding JSON response from questions endpoint: {e}")
         print(f"Response text: {response.text[:500]}")
-        return {
             "error": f"Error decoding server response for questions: {e}"
-        }
     except Exception as e:
         print(f"An unexpected error occurred fetching questions: {e}")
-        return {
             "error": f"An unexpected error occurred fetching questions: {e}"
-        }
     # Save input questions and related files into the data subdirectory
     try:
@@ -107,18 +111,39 @@ def download_questions_and_files() -> dict[str, Any]:
                         file.write(response.content)
     except requests.exceptions.RequestException as e:
         print(f"Error fetching question-related file: {e}")
-        return {
             "error": f"Error fetching question-related file: {e}"
-        }
     except Exception as e:
         print(f"An unexpected error occurred fetching question-related file: {e}")
-        return {
             "error": f"An unexpected error occurred fetching question-related file: {e}"
-        }
     return questions_data
 def run_and_submit_all() -> tuple[str, pd.DataFrame | None]:
     """
     Fetches all questions, runs the BasicAgent on them, submits all answers,
@@ -145,34 +170,46 @@ def run_and_submit_all() -> tuple[str, pd.DataFrame | None]:
     print(agent_code)
     # 2. Fetch Questions and related files (they get saved into the data directory)
-    questions_data = download_questions_and_files()
     # 3. Run your Agent and save agent's answers for later review
     results_log = []
-    answers_payload = []
     print(f"Running agent on {len(questions_data)} questions...")
-    for item in questions_data:
-        task_id = item.get("task_id")
-        question_text = json.dumps(item)
-        if not task_id or question_text is None:
-            print(f"Skipping item with missing task_id or question: {item}")
-            continue
-        try:
-            submitted_answer = agent(question_text)
-            answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
-            results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
-        except Exception as e:
-             print(f"Error running agent on task {task_id}: {e}")
-             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
     if not answers_payload:
         print("Agent did not produce any answers to submit.")
         return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
-    with open(AGENT_ANSWERS_FILE_PATH, mode="w") as f:
-        for cur_answer in answers_payload:
-            json.dump(cur_answer, f)
-            f.write("\n")
     # 4. Prepare Submission
     submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
@@ -239,18 +276,16 @@ with gr.Blocks() as demo:
         """
         **Instructions:**
-        1.  Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
-        2.  Log in to your Hugging Face account using the button below. This uses your HF username for submission.
-        3.  Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
         ---
         **Disclaimers:**
         Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
-        This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
         """
     )
-    gr.LoginButton()
     run_button = gr.Button("Run Evaluation & Submit All Answers")

         return answer
+def retrieve_downloaded_questions() -> list[dict[str, Any]]:
+    with open(QUESTIONS_FILE_PATH, mode="r") as f:
+        return [json.loads(line) for line in f]
+def download_questions_and_files() -> list[dict[str, Any]]:
     api_url = DEFAULT_API_URL
     questions_url = f"{api_url}/questions"
     files_base_url = f"{api_url}/files"
         questions_data = response.json()
         if not questions_data:
             print("Fetched questions list is empty.")
+            return [{
                 "error": "Fetched questions list is empty or invalid format."
+            }]
         print(f"Fetched {len(questions_data)} questions.")
     except requests.exceptions.RequestException as e:
         print(f"Error fetching questions: {e}")
+        return [{
             "error": f"Error fetching questions: {e}"
+        }]
     except requests.exceptions.JSONDecodeError as e:
         print(f"Error decoding JSON response from questions endpoint: {e}")
         print(f"Response text: {response.text[:500]}")
+        return [{
             "error": f"Error decoding server response for questions: {e}"
+        }]
     except Exception as e:
         print(f"An unexpected error occurred fetching questions: {e}")
+        return [{
             "error": f"An unexpected error occurred fetching questions: {e}"
+        }]
     # Save input questions and related files into the data subdirectory
     try:
                         file.write(response.content)
     except requests.exceptions.RequestException as e:
         print(f"Error fetching question-related file: {e}")
+        return [{
             "error": f"Error fetching question-related file: {e}"
+        }]
     except Exception as e:
         print(f"An unexpected error occurred fetching question-related file: {e}")
+        return [{
             "error": f"An unexpected error occurred fetching question-related file: {e}"
+        }]
     return questions_data
+def create_answers_file_if_not_exists() -> None:
+    if not os.path.exists(AGENT_ANSWERS_FILE_PATH):
+        with open(AGENT_ANSWERS_FILE_PATH, 'w'):
+            pass
+def get_answers_payload() -> list[dict[str, Any]]:
+    with open(AGENT_ANSWERS_FILE_PATH, mode="r") as f:
+        answers_payload = [json.loads(line) for line in f]
+    return answers_payload
+def get_task_ids_to_process() -> list[str]:
+    with open(QUESTIONS_FILE_PATH, mode="r") as f:
+        all_tasks = set([json.loads(line)["task_id"] for line in f])
+    answers = get_answers_payload()
+    answered_tasks = set([answer["task_id"] for answer in answers])
+    tasks_to_answer = all_tasks - answered_tasks
+    return list(tasks_to_answer)
 def run_and_submit_all() -> tuple[str, pd.DataFrame | None]:
     """
     Fetches all questions, runs the BasicAgent on them, submits all answers,
     print(agent_code)
     # 2. Fetch Questions and related files (they get saved into the data directory)
+    if os.path.exists(QUESTIONS_FILE_PATH):
+        questions_data = retrieve_downloaded_questions()
+    else:
+        questions_data = download_questions_and_files()
     # 3. Run your Agent and save agent's answers for later review
+    create_answers_file_if_not_exists()
+    task_ids_to_process = get_task_ids_to_process()
     results_log = []
     print(f"Running agent on {len(questions_data)} questions...")
+    with open(AGENT_ANSWERS_FILE_PATH, mode="a") as f:
+        for item in questions_data:
+            task_id = item.get("task_id")
+            if task_id not in task_ids_to_process:
+                print(f"Skipping already answered question: {item}")
+                continue
+            question_text = json.dumps(item)
+            if not task_id or question_text is None:
+                print(f"Skipping item with missing task_id or question: {item}")
+                continue
+            try:
+                answer_to_submit = agent(question_text)
+                answer_payload = {"task_id": task_id, "answer_to_submit": answer_to_submit}
+                json.dump(answer_payload, f)
+                f.write("\n")
+                f.flush()
+                results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": answer_to_submit})
+            except Exception as e:
+                 print(f"Error running agent on task {task_id}: {e}")
+                 results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
+    answers_payload = get_answers_payload()
     if not answers_payload:
         print("Agent did not produce any answers to submit.")
         return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
+    if len(answers_payload) < len(questions_data):
+        msg = "Still need to process all the questions. Rerun until all questions are answered."
+        print(msg)
+        return msg, pd.DataFrame(results_log)
     # 4. Prepare Submission
     submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
         """
         **Instructions:**
+        1.  Read the `README.md` file for configuration.
+        2.  Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
         ---
         **Disclaimers:**
         Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
         """
     )
+    # gr.LoginButton()
     run_button = gr.Button("Run Evaluation & Submit All Answers")

tools/video_sampling.py CHANGED Viewed

@@ -86,7 +86,6 @@ def extract_frame_captions(
             frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
             inputs = captioning_processor(
                 frame,
-                text="Detailed image description:",
                 return_tensors="pt"
             )
             out = captioning_model.generate(**inputs)

             frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
             inputs = captioning_processor(
                 frame,
                 return_tensors="pt"
             )
             out = captioning_model.generate(**inputs)