Michele De Stefano commited on
Commit
b066853
·
1 Parent(s): 1b8aef5

Now it is possible to process questions incrementally

Browse files
Files changed (3) hide show
  1. agent_factory.py +14 -19
  2. app.py +72 -37
  3. tools/video_sampling.py +0 -1
agent_factory.py CHANGED
@@ -9,6 +9,7 @@ from langchain_ollama import ChatOllama
9
  from langgraph.constants import START, END
10
  from langgraph.graph import MessagesState, StateGraph
11
  from langgraph.graph.graph import CompiledGraph
 
12
  from langgraph.prebuilt import ToolNode
13
  from pydantic import BaseModel
14
 
@@ -67,7 +68,6 @@ class AgentFactory:
67
  "follow the rules explained above.\n"
68
  )
69
 
70
- __llm_for_decision: Runnable
71
  __llm: Runnable
72
  __tools: list[BaseTool]
73
 
@@ -115,30 +115,25 @@ class AgentFactory:
115
  web_page_info_retriever,
116
  youtube_video_to_frame_captions
117
  ]
118
- self.__llm_for_decision = ChatOllama(
119
- model=model,
120
- temperature=1.0,
121
- num_ctx=num_ctx
122
- )
123
  self.__llm = ChatOllama(
124
  model=model,
125
  temperature=temperature,
126
  num_ctx=num_ctx
127
  ).bind_tools(tools=self.__tools)
128
 
129
- def __decide_for_code_agent(self, state: MessagesState) -> str:
130
- decision_messages = [
131
- SystemMessage(
132
- content="Answer only yes or no. "
133
- "If you think the question can be easily answered "
134
- "by writing Python code and executing it then answer "
135
- "yes. If you think you can answer by exploiting other "
136
- "resources then answer no."
137
- ),
138
- state["messages"][-1]
139
- ]
140
- answer = self.__llm_for_decision.invoke(decision_messages)
141
- return answer.content
142
 
143
  def __run_llm(self, state: MessagesState) -> dict[str, Any]:
144
  answer = self.__llm.invoke(state["messages"])
 
9
  from langgraph.constants import START, END
10
  from langgraph.graph import MessagesState, StateGraph
11
  from langgraph.graph.graph import CompiledGraph
12
+ from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
13
  from langgraph.prebuilt import ToolNode
14
  from pydantic import BaseModel
15
 
 
68
  "follow the rules explained above.\n"
69
  )
70
 
 
71
  __llm: Runnable
72
  __tools: list[BaseTool]
73
 
 
115
  web_page_info_retriever,
116
  youtube_video_to_frame_captions
117
  ]
 
 
 
 
 
118
  self.__llm = ChatOllama(
119
  model=model,
120
  temperature=temperature,
121
  num_ctx=num_ctx
122
  ).bind_tools(tools=self.__tools)
123
 
124
+ # llm_endpoint = HuggingFaceEndpoint(
125
+ # repo_id="Qwen/Qwen2.5-72B-Instruct",
126
+ # task="text-generation",
127
+ # max_new_tokens=num_ctx,
128
+ # do_sample=False,
129
+ # repetition_penalty=1.03,
130
+ # temperature=temperature,
131
+ # )
132
+ #
133
+ # self.__llm = (
134
+ # ChatHuggingFace(llm=llm_endpoint)
135
+ # .bind_tools(tools=self.__tools)
136
+ # )
137
 
138
  def __run_llm(self, state: MessagesState) -> dict[str, Any]:
139
  answer = self.__llm.invoke(state["messages"])
app.py CHANGED
@@ -58,7 +58,11 @@ class BasicAgent:
58
  return answer
59
 
60
 
61
- def download_questions_and_files() -> dict[str, Any]:
 
 
 
 
62
  api_url = DEFAULT_API_URL
63
  questions_url = f"{api_url}/questions"
64
  files_base_url = f"{api_url}/files"
@@ -70,26 +74,26 @@ def download_questions_and_files() -> dict[str, Any]:
70
  questions_data = response.json()
71
  if not questions_data:
72
  print("Fetched questions list is empty.")
73
- return {
74
  "error": "Fetched questions list is empty or invalid format."
75
- }
76
  print(f"Fetched {len(questions_data)} questions.")
77
  except requests.exceptions.RequestException as e:
78
  print(f"Error fetching questions: {e}")
79
- return {
80
  "error": f"Error fetching questions: {e}"
81
- }
82
  except requests.exceptions.JSONDecodeError as e:
83
  print(f"Error decoding JSON response from questions endpoint: {e}")
84
  print(f"Response text: {response.text[:500]}")
85
- return {
86
  "error": f"Error decoding server response for questions: {e}"
87
- }
88
  except Exception as e:
89
  print(f"An unexpected error occurred fetching questions: {e}")
90
- return {
91
  "error": f"An unexpected error occurred fetching questions: {e}"
92
- }
93
 
94
  # Save input questions and related files into the data subdirectory
95
  try:
@@ -107,18 +111,39 @@ def download_questions_and_files() -> dict[str, Any]:
107
  file.write(response.content)
108
  except requests.exceptions.RequestException as e:
109
  print(f"Error fetching question-related file: {e}")
110
- return {
111
  "error": f"Error fetching question-related file: {e}"
112
- }
113
  except Exception as e:
114
  print(f"An unexpected error occurred fetching question-related file: {e}")
115
- return {
116
  "error": f"An unexpected error occurred fetching question-related file: {e}"
117
- }
118
 
119
  return questions_data
120
 
121
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
  def run_and_submit_all() -> tuple[str, pd.DataFrame | None]:
123
  """
124
  Fetches all questions, runs the BasicAgent on them, submits all answers,
@@ -145,34 +170,46 @@ def run_and_submit_all() -> tuple[str, pd.DataFrame | None]:
145
  print(agent_code)
146
 
147
  # 2. Fetch Questions and related files (they get saved into the data directory)
148
- questions_data = download_questions_and_files()
 
 
 
149
 
150
  # 3. Run your Agent and save agent's answers for later review
 
 
151
  results_log = []
152
- answers_payload = []
153
  print(f"Running agent on {len(questions_data)} questions...")
154
- for item in questions_data:
155
- task_id = item.get("task_id")
156
- question_text = json.dumps(item)
157
- if not task_id or question_text is None:
158
- print(f"Skipping item with missing task_id or question: {item}")
159
- continue
160
- try:
161
- submitted_answer = agent(question_text)
162
- answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
163
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
164
- except Exception as e:
165
- print(f"Error running agent on task {task_id}: {e}")
166
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
 
 
 
 
 
 
 
167
 
 
168
  if not answers_payload:
169
  print("Agent did not produce any answers to submit.")
170
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
171
 
172
- with open(AGENT_ANSWERS_FILE_PATH, mode="w") as f:
173
- for cur_answer in answers_payload:
174
- json.dump(cur_answer, f)
175
- f.write("\n")
176
 
177
  # 4. Prepare Submission
178
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
@@ -239,18 +276,16 @@ with gr.Blocks() as demo:
239
  """
240
  **Instructions:**
241
 
242
- 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
243
- 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
244
- 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
245
 
246
  ---
247
  **Disclaimers:**
248
  Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
249
- This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
250
  """
251
  )
252
 
253
- gr.LoginButton()
254
 
255
  run_button = gr.Button("Run Evaluation & Submit All Answers")
256
 
 
58
  return answer
59
 
60
 
61
+ def retrieve_downloaded_questions() -> list[dict[str, Any]]:
62
+ with open(QUESTIONS_FILE_PATH, mode="r") as f:
63
+ return [json.loads(line) for line in f]
64
+
65
+ def download_questions_and_files() -> list[dict[str, Any]]:
66
  api_url = DEFAULT_API_URL
67
  questions_url = f"{api_url}/questions"
68
  files_base_url = f"{api_url}/files"
 
74
  questions_data = response.json()
75
  if not questions_data:
76
  print("Fetched questions list is empty.")
77
+ return [{
78
  "error": "Fetched questions list is empty or invalid format."
79
+ }]
80
  print(f"Fetched {len(questions_data)} questions.")
81
  except requests.exceptions.RequestException as e:
82
  print(f"Error fetching questions: {e}")
83
+ return [{
84
  "error": f"Error fetching questions: {e}"
85
+ }]
86
  except requests.exceptions.JSONDecodeError as e:
87
  print(f"Error decoding JSON response from questions endpoint: {e}")
88
  print(f"Response text: {response.text[:500]}")
89
+ return [{
90
  "error": f"Error decoding server response for questions: {e}"
91
+ }]
92
  except Exception as e:
93
  print(f"An unexpected error occurred fetching questions: {e}")
94
+ return [{
95
  "error": f"An unexpected error occurred fetching questions: {e}"
96
+ }]
97
 
98
  # Save input questions and related files into the data subdirectory
99
  try:
 
111
  file.write(response.content)
112
  except requests.exceptions.RequestException as e:
113
  print(f"Error fetching question-related file: {e}")
114
+ return [{
115
  "error": f"Error fetching question-related file: {e}"
116
+ }]
117
  except Exception as e:
118
  print(f"An unexpected error occurred fetching question-related file: {e}")
119
+ return [{
120
  "error": f"An unexpected error occurred fetching question-related file: {e}"
121
+ }]
122
 
123
  return questions_data
124
 
125
 
126
+ def create_answers_file_if_not_exists() -> None:
127
+ if not os.path.exists(AGENT_ANSWERS_FILE_PATH):
128
+ with open(AGENT_ANSWERS_FILE_PATH, 'w'):
129
+ pass
130
+
131
+
132
+ def get_answers_payload() -> list[dict[str, Any]]:
133
+ with open(AGENT_ANSWERS_FILE_PATH, mode="r") as f:
134
+ answers_payload = [json.loads(line) for line in f]
135
+ return answers_payload
136
+
137
+
138
+ def get_task_ids_to_process() -> list[str]:
139
+ with open(QUESTIONS_FILE_PATH, mode="r") as f:
140
+ all_tasks = set([json.loads(line)["task_id"] for line in f])
141
+ answers = get_answers_payload()
142
+ answered_tasks = set([answer["task_id"] for answer in answers])
143
+ tasks_to_answer = all_tasks - answered_tasks
144
+ return list(tasks_to_answer)
145
+
146
+
147
  def run_and_submit_all() -> tuple[str, pd.DataFrame | None]:
148
  """
149
  Fetches all questions, runs the BasicAgent on them, submits all answers,
 
170
  print(agent_code)
171
 
172
  # 2. Fetch Questions and related files (they get saved into the data directory)
173
+ if os.path.exists(QUESTIONS_FILE_PATH):
174
+ questions_data = retrieve_downloaded_questions()
175
+ else:
176
+ questions_data = download_questions_and_files()
177
 
178
  # 3. Run your Agent and save agent's answers for later review
179
+ create_answers_file_if_not_exists()
180
+ task_ids_to_process = get_task_ids_to_process()
181
  results_log = []
 
182
  print(f"Running agent on {len(questions_data)} questions...")
183
+ with open(AGENT_ANSWERS_FILE_PATH, mode="a") as f:
184
+ for item in questions_data:
185
+ task_id = item.get("task_id")
186
+ if task_id not in task_ids_to_process:
187
+ print(f"Skipping already answered question: {item}")
188
+ continue
189
+ question_text = json.dumps(item)
190
+ if not task_id or question_text is None:
191
+ print(f"Skipping item with missing task_id or question: {item}")
192
+ continue
193
+ try:
194
+ answer_to_submit = agent(question_text)
195
+ answer_payload = {"task_id": task_id, "answer_to_submit": answer_to_submit}
196
+ json.dump(answer_payload, f)
197
+ f.write("\n")
198
+ f.flush()
199
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": answer_to_submit})
200
+ except Exception as e:
201
+ print(f"Error running agent on task {task_id}: {e}")
202
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
203
 
204
+ answers_payload = get_answers_payload()
205
  if not answers_payload:
206
  print("Agent did not produce any answers to submit.")
207
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
208
 
209
+ if len(answers_payload) < len(questions_data):
210
+ msg = "Still need to process all the questions. Rerun until all questions are answered."
211
+ print(msg)
212
+ return msg, pd.DataFrame(results_log)
213
 
214
  # 4. Prepare Submission
215
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
 
276
  """
277
  **Instructions:**
278
 
279
+ 1. Read the `README.md` file for configuration.
280
+ 2. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
 
281
 
282
  ---
283
  **Disclaimers:**
284
  Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
 
285
  """
286
  )
287
 
288
+ # gr.LoginButton()
289
 
290
  run_button = gr.Button("Run Evaluation & Submit All Answers")
291
 
tools/video_sampling.py CHANGED
@@ -86,7 +86,6 @@ def extract_frame_captions(
86
  frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
87
  inputs = captioning_processor(
88
  frame,
89
- text="Detailed image description:",
90
  return_tensors="pt"
91
  )
92
  out = captioning_model.generate(**inputs)
 
86
  frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
87
  inputs = captioning_processor(
88
  frame,
 
89
  return_tensors="pt"
90
  )
91
  out = captioning_model.generate(**inputs)