Agents_Course_Final_Project

Sleeping

App Files Files Community

razvanfischer commited on 16 days ago

Commit

8609cfe

1 Parent(s): ef1b18f

Working demo

Browse files

Files changed (3) hide show

.gitignore +1 -0
app.py +106 -52
requirements.txt +205 -9

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ venv

app.py CHANGED Viewed

@@ -1,14 +1,16 @@
 import os
 import gradio as gr
-import requests
-import inspect
 import pandas as pd
 import asyncio
 from llama_index.tools.duckduckgo import DuckDuckGoSearchToolSpec
 from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
 from llama_index.core.agent.workflow import AgentWorkflow
-from llama_index.core import SummaryIndex
 from llama_index.readers.web import SimpleWebPageReader
 from llama_index.core.agent.workflow import (
     AgentInput,
     AgentOutput,
@@ -17,52 +19,103 @@ from llama_index.core.agent.workflow import (
     AgentStream,
 )
 # (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 # --- Basic Agent Definition ---
 # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
 class BasicAgent:
     def __init__(self):
         self.llm = HuggingFaceInferenceAPI(model_name="Qwen/Qwen2.5-Coder-32B-Instruct")
         system_prompt = """
-            You are a helpful assistant that answers questions. If you don't know the answer, you can search the web for information.
-            If you are searching the web, keep the query very concise in order to get good results.
         """
-        self.agent = AgentWorkflow.from_tools_or_functions([search_web], llm=self.llm,
                                                            system_prompt=system_prompt)
         print("BasicAgent initialized.")
     async def __call__(self, question: str) -> str:
         handler = self.agent.run(user_msg=question)
         async for event in handler.stream_events():
             if isinstance(event, AgentStream):
                 print(event.delta, end="", flush=True)
             elif isinstance(event, ToolCallResult):
-               print(event.tool_name)  # the tool name
-               print(event.tool_kwargs)  # the tool kwargs
-               print(event.tool_output)  # the tool output
         response = await handler
         return str(response)
-async def search_web(query: str) -> str:
-    """Useful for using the web to answer questions. Keep the query very concise in order to get good results."""
-    client = DuckDuckGoSearchToolSpec()
-    search_res = client.duckduckgo_instant_search(query)
-    return str(search_res)
-def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
     Fetches all questions, runs the BasicAgent on them, submits all answers,
     and displays the results.
     """
     # --- Determine HF Space Runtime URL and Repo URL ---
-    space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
     if profile:
-        username= f"{profile.username}"
         print(f"User logged in: {username}")
     else:
         print("User not logged in.")
@@ -89,16 +142,16 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
         response.raise_for_status()
         questions_data = response.json()
         if not questions_data:
-             print("Fetched questions list is empty.")
-             return "Fetched questions list is empty or invalid format.", None
         print(f"Fetched {len(questions_data)} questions.")
     except requests.exceptions.RequestException as e:
         print(f"Error fetching questions: {e}")
         return f"Error fetching questions: {e}", None
     except requests.exceptions.JSONDecodeError as e:
-         print(f"Error decoding JSON response from questions endpoint: {e}")
-         print(f"Response text: {response.text[:500]}")
-         return f"Error decoding server response for questions: {e}", None
     except Exception as e:
         print(f"An unexpected error occurred fetching questions: {e}")
         return f"An unexpected error occurred fetching questions: {e}", None
@@ -118,14 +171,14 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e:
-             print(f"Error running agent on task {task_id}: {e}")
-             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
     if not answers_payload:
         print("Agent did not produce any answers to submit.")
         return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
-    # 4. Prepare Submission
     submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
     status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
     print(status_update)
@@ -206,28 +259,29 @@ with gr.Blocks() as demo:
     )
 if __name__ == "__main__":
-    # agent = BasicAgent()
-    # answ = asyncio.run(agent("Who is Michael Jackson?"))
-    print("\n" + "-"*30 + " App Starting " + "-"*30)
-    # Check for SPACE_HOST and SPACE_ID at startup for information
-    space_host_startup = os.getenv("SPACE_HOST")
-    space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
-    if space_host_startup:
-        print(f"✅ SPACE_HOST found: {space_host_startup}")
-        print(f"   Runtime URL should be: https://{space_host_startup}.hf.space")
-    else:
-        print("ℹ️  SPACE_HOST environment variable not found (running locally?).")
-    if space_id_startup: # Print repo URLs if SPACE_ID is found
-        print(f"✅ SPACE_ID found: {space_id_startup}")
-        print(f"   Repo URL: https://huggingface.co/spaces/{space_id_startup}")
-        print(f"   Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
-    else:
-        print("ℹ️  SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
-    print("-"*(60 + len(" App Starting ")) + "\n")
-    print("Launching Gradio Interface for Basic Agent Evaluation...")
-    demo.launch(debug=True, share=False)

 import os
 import gradio as gr
 import pandas as pd
+from huggingface_hub import InferenceClient
 import asyncio
+from llama_index.embeddings.huggingface import HuggingFaceEmbedding
 from llama_index.tools.duckduckgo import DuckDuckGoSearchToolSpec
 from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
 from llama_index.core.agent.workflow import AgentWorkflow
+from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
 from llama_index.readers.web import SimpleWebPageReader
+import requests
+from llama_index.readers.wikipedia import WikipediaReader
 from llama_index.core.agent.workflow import (
     AgentInput,
     AgentOutput,
     AgentStream,
 )
 # (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 # --- Basic Agent Definition ---
 # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
 class BasicAgent:
     def __init__(self):
         self.llm = HuggingFaceInferenceAPI(model_name="Qwen/Qwen2.5-Coder-32B-Instruct")
+        self.vision_llm = HuggingFaceInferenceAPI(model_name="CohereLabs/aya-vision-32b")
+        self.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
+        self.search_client = DuckDuckGoSearchToolSpec()
+        self.wiki_reader = WikipediaReader()
         system_prompt = """
+            You are a helpful assistant that answers questions. If you don't know the answer, you can search the web.
         """
+        self.agent = AgentWorkflow.from_tools_or_functions([self.search_web, self.search_wiki, self.webpage_reader],
+                                                           llm=self.llm,
                                                            system_prompt=system_prompt)
         print("BasicAgent initialized.")
     async def __call__(self, question: str) -> str:
         handler = self.agent.run(user_msg=question)
         async for event in handler.stream_events():
             if isinstance(event, AgentStream):
                 print(event.delta, end="", flush=True)
             elif isinstance(event, ToolCallResult):
+                print(event.tool_name)  # the tool name
+                print(event.tool_kwargs)  # the tool kwargs
+                print(event.tool_output)  # the tool output
         response = await handler
         return str(response)
+    async def describe_images(self, webpage_url: str, query: str) -> str:
+        """Extracts and describes images from an input webpage url based on a query."""
+        client = InferenceClient(
+            provider="novita",
+            api_key="hf_xxxxxxxxxxxxxxxxxxxxxxxx",
+        )
+        completion = client.chat.completions.create(
+            model="meta-llama/Llama-3.2-11B-Vision-Instruct",
+            messages=[
+                {
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": "text",
+                            "text": "Describe this image in one sentence."
+                        },
+                        {
+                            "type": "image_url",
+                            "image_url": {
+                                "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
+                            }
+                        }
+                    ]
+                }
+            ],
+        )
+        print(completion.choices[0].message)
+    async def search_wiki(self, query: str) -> str:
+        """Useful for browsing Wikipedia to look up specific info."""
+        reader = self.wiki_reader
+        documents = reader.load_data(pages=[query])
+        index = VectorStoreIndex.from_documents(documents, embed_model=self.embed_model)
+        search_res = index.as_query_engine(llm=self.llm).query(query)
+        return str(search_res)
+    async def search_web(self, query: str) -> str:
+        """Useful for using the web to answer questions. Keep the query very concise in order to get good results."""
+        client = self.search_client
+        search_res = client.duckduckgo_full_search(query)
+        return str(search_res)
+    async def webpage_reader(self, webpage_url: str) -> str:
+        """Useful for when you want to read and extract information from a specific webpage."""
+        documents = SimpleWebPageReader(html_to_text=True).load_data(
+            [webpage_url]
+        )
+        return str(documents)
+def run_and_submit_all(profile: gr.OAuthProfile | None):
     """
     Fetches all questions, runs the BasicAgent on them, submits all answers,
     and displays the results.
     """
     # --- Determine HF Space Runtime URL and Repo URL ---
+    space_id = os.getenv("SPACE_ID")  # Get the SPACE_ID for sending link to the code
     if profile:
+        username = f"{profile.username}"
         print(f"User logged in: {username}")
     else:
         print("User not logged in.")
         response.raise_for_status()
         questions_data = response.json()
         if not questions_data:
+            print("Fetched questions list is empty.")
+            return "Fetched questions list is empty or invalid format.", None
         print(f"Fetched {len(questions_data)} questions.")
     except requests.exceptions.RequestException as e:
         print(f"Error fetching questions: {e}")
         return f"Error fetching questions: {e}", None
     except requests.exceptions.JSONDecodeError as e:
+        print(f"Error decoding JSON response from questions endpoint: {e}")
+        print(f"Response text: {response.text[:500]}")
+        return f"Error decoding server response for questions: {e}", None
     except Exception as e:
         print(f"An unexpected error occurred fetching questions: {e}")
         return f"An unexpected error occurred fetching questions: {e}", None
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e:
+            print(f"Error running agent on task {task_id}: {e}")
+            results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
     if not answers_payload:
         print("Agent did not produce any answers to submit.")
         return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
+    # 4. Prepare Submission
     submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
     status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
     print(status_update)
     )
 if __name__ == "__main__":
+    agent = BasicAgent()
+    # print(asyncio.run(agent.search_wiki("Michael Jackson (entertainer)")))
+    answ = asyncio.run(agent("Search Wikipedia for info on Michael Jackson"))
+    # asyncio.run(agent.describe_images("", ""))
+    # print("\n" + "-"*30 + " App Starting " + "-"*30)
+    # # Check for SPACE_HOST and SPACE_ID at startup for information
+    # space_host_startup = os.getenv("SPACE_HOST")
+    # space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
+    #
+    # if space_host_startup:
+    #     print(f"✅ SPACE_HOST found: {space_host_startup}")
+    #     print(f"   Runtime URL should be: https://{space_host_startup}.hf.space")
+    # else:
+    #     print("ℹ️  SPACE_HOST environment variable not found (running locally?).")
+    #
+    # if space_id_startup: # Print repo URLs if SPACE_ID is found
+    #     print(f"✅ SPACE_ID found: {space_id_startup}")
+    #     print(f"   Repo URL: https://huggingface.co/spaces/{space_id_startup}")
+    #     print(f"   Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
+    # else:
+    #     print("ℹ️  SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
+    #
+    # print("-"*(60 + len(" App Starting ")) + "\n")
+    #
+    # print("Launching Gradio Interface for Basic Agent Evaluation...")
+    # demo.launch(debug=True, share=False)

requirements.txt CHANGED Viewed

@@ -1,9 +1,205 @@
-gradio
-requests
-llama-index
-llama-index-vector-stores-chroma
-llama-index-llms-huggingface-api
-llama-index-embeddings-huggingface
-llama-index-tools-duckduckgo
-llama-index-readers-web
-syncio

+aiofiles==24.1.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.12.2
+aiosignal==1.3.2
+aiosqlite==0.21.0
+annotated-types==0.7.0
+anyio==4.9.0
+asgiref==3.8.1
+attrs==25.3.0
+Authlib==1.6.0
+backoff==2.2.1
+banks==2.1.2
+bcrypt==4.3.0
+beautifulsoup4==4.13.4
+build==1.2.2.post1
+cachetools==5.5.2
+certifi==2025.4.26
+cffi==1.17.1
+charset-normalizer==3.4.2
+chromadb==1.0.10
+chromedriver-autoinstaller==0.6.4
+click==8.2.1
+colorama==0.4.6
+coloredlogs==15.0.1
+cryptography==45.0.3
+cssselect==1.3.0
+dataclasses-json==0.6.7
+defusedxml==0.7.1
+Deprecated==1.2.18
+dirtyjson==1.0.8
+distro==1.9.0
+duckduckgo_search==6.4.2
+durationpy==0.10
+fastapi==0.115.9
+feedfinder2==0.0.4
+feedparser==6.0.11
+ffmpy==0.5.0
+filelock==3.18.0
+filetype==1.2.0
+flatbuffers==25.2.10
+frozenlist==1.6.0
+fsspec==2025.5.1
+google-auth==2.40.2
+googleapis-common-protos==1.70.0
+gradio==5.31.0
+gradio_client==1.10.1
+greenlet==3.2.2
+griffe==1.7.3
+groovy==0.1.2
+grpcio==1.71.0
+h11==0.16.0
+hf-xet==1.1.2
+html2text==2024.2.26
+httpcore==1.0.9
+httptools==0.6.4
+httpx==0.28.1
+huggingface-hub==0.32.2
+humanfriendly==10.0
+idna==3.10
+importlib_metadata==8.6.1
+importlib_resources==6.5.2
+itsdangerous==2.2.0
+jieba3k==0.35.1
+Jinja2==3.1.6
+jiter==0.10.0
+joblib==1.5.1
+jsonschema==4.24.0
+jsonschema-specifications==2025.4.1
+kubernetes==32.0.1
+llama-cloud==0.1.22
+llama-cloud-services==0.6.23
+llama-index==0.12.37
+llama-index-agent-openai==0.4.8
+llama-index-cli==0.4.1
+llama-index-core==0.12.37
+llama-index-embeddings-huggingface==0.5.4
+llama-index-embeddings-openai==0.3.1
+llama-index-indices-managed-llama-cloud==0.6.11
+llama-index-llms-huggingface-api==0.4.3
+llama-index-llms-openai==0.3.44
+llama-index-multi-modal-llms-openai==0.4.3
+llama-index-program-openai==0.3.1
+llama-index-question-gen-openai==0.3.0
+llama-index-readers-file==0.4.8
+llama-index-readers-llama-parse==0.4.0
+llama-index-readers-web==0.4.1
+llama-index-readers-wikipedia==0.3.0
+llama-index-tools-duckduckgo==0.3.0
+llama-index-vector-stores-chroma==0.4.1
+llama-parse==0.6.23
+lxml==5.4.0
+lxml_html_clean==0.4.2
+markdown-it-py==3.0.0
+markdownify==1.1.0
+MarkupSafe==3.0.2
+marshmallow==3.26.1
+mdurl==0.1.2
+mmh3==5.1.0
+mpmath==1.3.0
+multidict==6.4.4
+mypy_extensions==1.1.0
+nest-asyncio==1.6.0
+networkx==3.4.2
+newspaper3k==0.2.8
+nltk==3.9.1
+numpy==1.26.4
+oauthlib==3.2.2
+onnxruntime==1.16.3
+openai==1.82.0
+opentelemetry-api==1.33.1
+opentelemetry-exporter-otlp-proto-common==1.33.1
+opentelemetry-exporter-otlp-proto-grpc==1.33.1
+opentelemetry-instrumentation==0.54b1
+opentelemetry-instrumentation-asgi==0.54b1
+opentelemetry-instrumentation-fastapi==0.54b1
+opentelemetry-proto==1.33.1
+opentelemetry-sdk==1.33.1
+opentelemetry-semantic-conventions==0.54b1
+opentelemetry-util-http==0.54b1
+orjson==3.10.18
+outcome==1.3.0.post0
+overrides==7.7.0
+oxylabs==2.0.0
+packaging==25.0
+pandas==2.2.3
+pillow==11.2.1
+platformdirs==4.3.8
+playwright==1.52.0
+posthog==4.2.0
+primp==0.15.0
+propcache==0.3.1
+protobuf==5.29.4
+pyasn1==0.6.1
+pyasn1_modules==0.4.2
+pycparser==2.22
+pydantic==2.11.5
+pydantic_core==2.33.2
+pydub==0.25.1
+pyee==13.0.0
+Pygments==2.19.1
+pypdf==5.5.0
+PyPika==0.48.9
+pyproject_hooks==1.2.0
+PySocks==1.7.1
+python-dateutil==2.9.0.post0
+python-dotenv==1.1.0
+python-multipart==0.0.20
+pytz==2025.2
+PyYAML==6.0.2
+referencing==0.36.2
+regex==2024.11.6
+requests==2.32.3
+requests-file==2.1.0
+requests-oauthlib==2.0.0
+rich==14.0.0
+rpds-py==0.25.1
+rsa==4.9.1
+ruff==0.11.11
+safehttpx==0.1.6
+safetensors==0.5.3
+scikit-learn==1.6.1
+scipy==1.15.3
+selenium==4.33.0
+semantic-version==2.10.0
+sentence-transformers==4.1.0
+sgmllib3k==1.0.0
+shellingham==1.5.4
+six==1.17.0
+sniffio==1.3.1
+sortedcontainers==2.4.0
+soupsieve==2.7
+spider-client==0.0.27
+SQLAlchemy==2.0.41
+starlette==0.45.3
+striprtf==0.0.26
+sympy==1.14.0
+syncio==0.0.4
+tenacity==9.1.2
+threadpoolctl==3.6.0
+tiktoken==0.9.0
+tinysegmenter==0.3
+tldextract==5.3.0
+tokenizers==0.15.2
+tomlkit==0.13.2
+torch==2.2.2
+tqdm==4.67.1
+transformers==4.36.2
+trio==0.30.0
+trio-websocket==0.12.2
+typer==0.16.0
+typing-inspect==0.9.0
+typing-inspection==0.4.1
+typing_extensions==4.13.2
+tzdata==2025.2
+urllib3==2.4.0
+uvicorn==0.34.2
+uvloop==0.21.0
+watchfiles==1.0.5
+websocket-client==1.8.0
+websockets==15.0.1
+wikipedia==1.4.0
+wrapt==1.17.2
+wsproto==1.2.0
+yarl==1.20.0
+zipp==3.22.0