Spaces:

Neurocognitive
/

agentic-RAG

Sleeping

App Files Files Community

APrmn8 commited on Jun 4

Commit

96d679e

verified ·

1 Parent(s): 9705142

thinkless

Browse files

Files changed (2) hide show

app.py +206 -465
requirements.txt +4 -4

app.py CHANGED Viewed

@@ -1,488 +1,229 @@
 import gradio as gr
-import re
-import os # Used for environment variables if you switch to a real LLM
-import datetime # For timestamping entries in the knowledge base
-import arxiv # Python library for interacting with the arXiv API
-import requests # For making HTTP requests to download PDF files
-import fitz # PyMuPDF library for extracting text from PDF documents
-# --- Agent Core Logic ---
-# CURRENT_PAPER_CONTEXT: A global dictionary to help the mock_llm maintain state
-# about the paper currently being processed within a single agent run.
-# In a real agent with a proper LLM, state management would be more sophisticated,
-# possibly integrated into the agent's memory or passed explicitly.
-# This is reset for each new user query processed by the agent.
-CURRENT_PAPER_CONTEXT = {}
-# KNOWLEDGE_BASE: An in-memory list to store dictionaries of scraped paper information.
-# This acts as a simple knowledge base for the duration of the Gradio session.
-# For persistence, you would use a database or file storage.
-KNOWLEDGE_BASE = []
-def mock_llm(prompt: str, tools_description: str) -> str:
     """
-    A mock Large Language Model (LLM) for the arXiv scraping agent.
-    This function simulates LLM behavior using simplistic keyword-based logic
-    and the global CURRENT_PAPER_CONTEXT to make decisions.
-    A real LLM would use its trained knowledge and reasoning capabilities.
     """
-    global CURRENT_PAPER_CONTEXT
-    # Print the prompt for debugging (visible in Hugging Face Space logs or local console)
-    print(f"\n----- Mock LLM Input -----\nPrompt:\n{prompt}\nTools Available:\n{tools_description}\n--------------------------\n")
-    lower_prompt = prompt.lower() # Normalize for case-insensitive matching
-    # Scenario 1: Observation received from ArxivSearchTool
-    if "observation from last action (arxivsearchtool):" in lower_prompt:
-        # Attempt to parse paper details from the search tool's observation
-        match = re.search(r"top result:\s*'(.*?)'\s*\(id:\s*([\d\.]+),\s*url:\s*(https?://[^\s]+)\)", lower_prompt, re.IGNORECASE)
-        if match:
-            title, paper_id, url = match.groups()
-            # Update context: we've found a paper to process
-            CURRENT_PAPER_CONTEXT = {'id': paper_id, 'title': title, 'url': url, 'status': 'found_paper'}
-            # LLM decides the next action is to scrape this paper
-            return f"""Thought: I have found a paper titled '{title}' with ID {paper_id}. I should now scrape its content to extract information using the PaperScraperTool.
-Action: PaperScraperTool
-Action Input: {url}""" # Use the arXiv page URL as input for the scraper
-        else:
-            # If parsing fails, update context and conclude
-            CURRENT_PAPER_CONTEXT = {'status': 'search_failed_to_parse'}
-            return f"""Thought: I received search results from ArxivSearchTool, but I couldn't parse the top paper details from the observation. I cannot proceed with scraping.
-Final Answer: I found some papers but had trouble extracting specific details for scraping. Please check the raw search results if they were logged, or try a different query."""
-    # Scenario 2: Observation received from PaperScraperTool
-    elif "observation from last action (paperscrapertool):" in lower_prompt:
-        if CURRENT_PAPER_CONTEXT.get('status') == 'found_paper': # Check if we were expecting scraped content
-            # Simulate extracting abstract and snippet from the observation
-            # A real LLM would parse this more intelligently from the tool's output string.
-            abstract_match = re.search(r"abstract:\s*(.*?)(full text snippet:|$)", lower_prompt, re.IGNORECASE | re.DOTALL)
-            text_snippet_match = re.search(r"full text snippet:\s*(.*)", lower_prompt, re.IGNORECASE | re.DOTALL)
-            abstract = abstract_match.group(1).strip() if abstract_match else "Could not extract abstract from observation."
-            text_snippet = text_snippet_match.group(1).strip() if text_snippet_match else "Could not extract text snippet from observation."
-            # Prepare data for the knowledge base
-            paper_data_for_kb = {
-                "id": CURRENT_PAPER_CONTEXT.get('id', 'unknown_id'),
-                "title": CURRENT_PAPER_CONTEXT.get('title', 'Unknown Title'),
-                "url": CURRENT_PAPER_CONTEXT.get('url', 'unknown_url'),
-                "abstract": abstract,
-                "text_snippet": text_snippet, # In a real case, this might be more structured or the full text
-                "scraped_at": datetime.datetime.now().isoformat()
-            }
-            CURRENT_PAPER_CONTEXT['status'] = 'scraped_paper' # Update context
-            # LLM decides the next action is to store this data
-            return f"""Thought: I have the scraped content for '{CURRENT_PAPER_CONTEXT.get('title')}'. I should now store this information in the knowledge base using the KnowledgeBaseStorageTool.
-Action: KnowledgeBaseStorageTool
-Action Input: {str(paper_data_for_kb)}""" # Pass data as a string (mock LLM limitation)
-        else:
-            return f"""Thought: I received scraped content, but I don't have the correct prior context (e.g., which paper was being scraped). This is unexpected.
-Final Answer: Error processing scraped content due to missing or incorrect context. The scraping might have occurred without a preceding successful search and paper identification."""
-    # Scenario 3: Observation received from KnowledgeBaseStorageTool
-    elif "observation from last action (knowledgebasestoragetool):" in lower_prompt:
-        if CURRENT_PAPER_CONTEXT.get('status') == 'scraped_paper': # Check if we were expecting storage confirmation
-            paper_title = CURRENT_PAPER_CONTEXT.get('title', 'the paper')
-            CURRENT_PAPER_CONTEXT = {} # Reset context as this task is complete
-            # LLM concludes the process
-            return f"""Thought: The paper '{paper_title}' has been successfully processed (found, scraped, and stored) in the knowledge base. The task is complete.
-Final Answer: Successfully found, scraped, and stored information for '{paper_title}'."""
-        else:
-            CURRENT_PAPER_CONTEXT = {} # Reset context
-            return f"""Thought: I received a storage confirmation, but the context was unclear or didn't match the expected 'scraped_paper' status.
-Final Answer: A storage action was observed, but there might have been issues in the preceding steps. The overall process integrity is uncertain."""
-    # Scenario 4: Initial query processing (likely a search request)
-    if "find papers on" in lower_prompt or "search arxiv for" in lower_prompt:
-        query_match = re.search(r"(?:find papers on|search arxiv for)\s*(.+)", lower_prompt)
-        search_query = query_match.group(1).strip() if query_match else "default search: quantum computing"
-        CURRENT_PAPER_CONTEXT = {'query': search_query, 'status': 'searching'} # Set initial context
-        # LLM decides to use the search tool
-        return f"""Thought: The user wants to find papers about '{search_query}'. I should use the ArxivSearchTool to find relevant papers.
-Action: ArxivSearchTool
-Action Input: {search_query}"""
-    # Fallback Scenario: Query not understood by the mock LLM's simple logic
-    else:
-        CURRENT_PAPER_CONTEXT = {} # Reset context
-        original_query = prompt.split("User query:", 1)[-1].split("\n", 1)[0].strip() if "User query:" in prompt else "the user's query"
-        return f"""Thought: I'm not sure how to handle this query: '{original_query}'. My current mocked abilities are limited to searching arXiv based on keywords like 'find papers on' or 'search arxiv for', then scraping and storing the first result.
-Final Answer: I can only search arXiv for papers and process them if the query starts with 'find papers on' or 'search arxiv for'. Please rephrase your query (e.g., 'find papers on artificial intelligence')."""
-class Tool:
-    """A simple class to represent a tool that the agent can use."""
-    def __init__(self, name: str, description: str, func):
-        self.name = name
-        self.description = description # Crucial for the LLM to understand the tool's purpose
-        self.func = func # The actual Python function to execute
-    def run(self, action_input: str) -> str:
-        """Executes the tool's function with the given input."""
-        print(f"TOOL EXECUTING: {self.name} with input: '{action_input}'")
-        try:
-            result = self.func(action_input)
-            print(f"TOOL RESULT ({self.name}): {result}")
-            return result
-        except Exception as e:
-            error_message = f"Error executing tool {self.name}: {str(e)}"
-            print(error_message)
-            return error_message # Return error message as observation
-def arxiv_search_func(query: str, max_results=1) -> str:
-    """Tool function: Searches arXiv for papers matching the query."""
     try:
-        search = arxiv.Search(
-            query=query,
-            max_results=max_results, # Limiting to 1 for faster demo and simpler mock LLM logic
-            sort_by=arxiv.SortCriterion.Relevance
         )
-        results_data = []
-        for r in search.results(): # arxiv.Client().results(search) is an alternative
-            results_data.append({
-                "id": r.entry_id.split('/')[-1], # Get the versionless ID (e.g., "1703.03400")
-                "title": r.title,
-                "authors": [author.name for author in r.authors],
-                "summary": r.summary,
-                "published": r.published.isoformat(),
-                "pdf_url": r.pdf_url, # Direct PDF link
-                "arxiv_url": r.entry_id # Link to the abstract page (e.g., "http://arxiv.org/abs/1703.03400v5")
-            })
-        if not results_data:
-            return f"No papers found on arXiv for query: '{query}'."
-        # For the mock LLM, provide a clear summary of the top result
-        top_result = results_data[0]
-        return (f"Found {len(results_data)} papers. "
-                f"Top result: '{top_result['title']}' (ID: {top_result['id']}, URL: {top_result['arxiv_url']})")
-    except Exception as e:
-        return f"Error searching arXiv: {str(e)}"
-def paper_scraper_func(pdf_url_or_id: str) -> str:
-    """Tool function: Downloads an arXiv PDF and extracts its text content."""
-    try:
-        # Determine the direct PDF URL from various input formats
-        if "arxiv.org/abs/" in pdf_url_or_id: # e.g., http://arxiv.org/abs/1703.03400
-            paper_id_match = re.search(r'abs/([\d\.]+)', pdf_url_or_id)
-            if not paper_id_match: raise ValueError("Could not extract paper ID from abs URL.")
-            paper_id = paper_id_match.group(1)
-            pdf_url = f"https://arxiv.org/pdf/{paper_id}.pdf"
-        elif "arxiv.org/pdf/" in pdf_url_or_id: # e.g., http://arxiv.org/pdf/1703.03400.pdf
-             pdf_url = pdf_url_or_id
-        elif re.match(r'^[\d\.]+(v\d+)?$', pdf_url_or_id): # e.g., 1703.03400 or 1703.03400v5
-            pdf_url = f"https://arxiv.org/pdf/{pdf_url_or_id}.pdf"
         else:
-            raise ValueError(f"Invalid input format for PaperScraperTool: '{pdf_url_or_id}'. Expected arXiv URL or ID.")
-        print(f"Attempting to download PDF from: {pdf_url}")
-        response = requests.get(pdf_url, timeout=30) # Added timeout for network robustness
-        response.raise_for_status()  # Raise an exception for HTTP errors (4xx or 5xx)
-        global CURRENT_PAPER_CONTEXT # Use context set by LLM/previous steps
-        paper_title = CURRENT_PAPER_CONTEXT.get('title', f"paper from {pdf_url}")
-        paper_id_context = CURRENT_PAPER_CONTEXT.get('id', 'unknown_id_from_context')
-        full_text = ""
-        abstract = "Could not reliably extract abstract from PDF text."
-        # Use fitz (PyMuPDF) to open PDF from downloaded bytes
-        with fitz.open(stream=response.content, filetype="pdf") as doc:
-            for page_num, page in enumerate(doc):
-                full_text += page.get_text("text") # "text" preserves some layout
-                if page_num == 0: # Attempt to extract abstract from the first page
-                    first_page_text = page.get_text("text")
-                    # Heuristic for abstract extraction (can be improved)
-                    abstract_match = re.search(r"Abstract\s*([\s\S]*?)(?:1\.|Introduction|Keywords|I\.|\n\s*\n\s*\n)", first_page_text, re.IGNORECASE | re.DOTALL)
-                    if abstract_match:
-                        abstract = abstract_match.group(1).strip().replace('\n', ' ')
-        # Provide a snippet for the observation (full text can be very long)
-        text_snippet = (full_text[:500] + "...") if len(full_text) > 500 else full_text
-        return (f"Scraped content for '{paper_title}' (ID: {paper_id_context}). "
-                f"Abstract: {abstract} Full text snippet: {text_snippet}")
-    except requests.exceptions.RequestException as e:
-        return f"Error downloading PDF from '{pdf_url_or_id}': {str(e)}"
-    except Exception as e:
-        return f"Error scraping paper '{pdf_url_or_id}': {str(e)}"
-def knowledge_base_storage_func(paper_data_str: str) -> str:
-    """Tool function: Stores the extracted paper information into the KNOWLEDGE_BASE."""
-    global KNOWLEDGE_BASE, CURRENT_PAPER_CONTEXT
-    try:
-        # The mock LLM provides paper_data_str as a string representation of a dictionary.
-        # WARNING: eval() is risky if the input string is not strictly controlled.
-        # A real LLM should be prompted to return JSON, and then use json.loads().
-        # For this demo, we assume the mock LLM's output is "safe" for eval.
-        if isinstance(paper_data_str, str):
-            try:
-                paper_data = eval(paper_data_str) # Convert string to dict
-                if not isinstance(paper_data, dict):
-                    raise ValueError("Parsed data from string is not a dictionary.")
-            except Exception as e:
-                return f"Error parsing paper data string for storage: {str(e)}. Input data string was: '{paper_data_str}'"
-        elif isinstance(paper_data_str, dict): # If a dict is somehow passed directly
-             paper_data = paper_data_str
-        else:
-            return f"Invalid data type received for storage: {type(paper_data_str)}. Expected string (evaluable to dict) or dict."
-        # Validate essential keys
-        required_keys = ["id", "title", "url", "abstract"]
-        if not all(key in paper_data for key in required_keys):
-            missing_keys = [key for key in required_keys if key not in paper_data]
-            return f"Error: Missing required keys for storage: {missing_keys}. Received data: {paper_data}"
-        # Avoid adding duplicate papers by ID
-        if any(p["id"] == paper_data["id"] for p in KNOWLEDGE_BASE):
-            return f"Paper with ID '{paper_data['id']}' is already in the knowledge base. Not adding again."
-        KNOWLEDGE_BASE.append(paper_data)
-        return (f"Successfully stored paper '{paper_data.get('id', 'N/A')}' (Title: '{paper_data.get('title', 'N/A')}') in the knowledge base. "
-                f"Knowledge base now contains {len(KNOWLEDGE_BASE)} papers.")
     except Exception as e:
-        # If storage fails critically, reset context to prevent loops with bad data
-        CURRENT_PAPER_CONTEXT = {}
-        return f"Critical error storing paper in knowledge base: {str(e)}. Input was: '{paper_data_str}'"
-# Define the list of tools available to the agent
-tools_list = [
-    Tool(
-        name="ArxivSearchTool",
-        description="Searches the arXiv repository for research papers based on a query. Input should be the search query (e.g., 'machine learning for climate change'). Returns a summary of search results, highlighting the top paper found.",
-        func=arxiv_search_func
-    ),
-    Tool(
-        name="PaperScraperTool",
-        description="Downloads an arXiv paper PDF given its arXiv abstract page URL (e.g., 'http://arxiv.org/abs/1234.5678') or just its ID (e.g., '1234.5678') and extracts its text content, including the abstract and a snippet of the full text.",
-        func=paper_scraper_func
     ),
-    Tool(
-        name="KnowledgeBaseStorageTool",
-        description="Stores extracted information about a paper (such as its ID, title, URL, abstract, and text snippet) into the system's knowledge base. Input should be a string representation of a Python dictionary containing these paper details.",
-        func=knowledge_base_storage_func
-    )
-]
-def get_tools_description_for_prompt(tool_list_arg):
-    """Formats tool descriptions for the LLM prompt to help it choose tools."""
-    return "\n".join([f"- {tool.name}: {tool.description}" for tool in tool_list_arg])
-def parse_llm_react_output(llm_response: str):
-    """
-    Parses the LLM's ReAct-formatted response to extract Thought, Action, Action Input, or Final Answer.
-    """
-    thought_match = re.search(r"Thought:\s*(.*)", llm_response, re.IGNORECASE | re.DOTALL)
-    action_match = re.search(r"Action:\s*([\w_]+)", llm_response, re.IGNORECASE | re.DOTALL) # Tool names are usually alphanumeric with underscores
-    action_input_match = re.search(r"Action Input:\s*(.*)", llm_response, re.IGNORECASE | re.DOTALL)
-    final_answer_match = re.search(r"Final Answer:\s*(.*)", llm_response, re.IGNORECASE | re.DOTALL)
-    thought = thought_match.group(1).strip() if thought_match else None
-    action = action_match.group(1).strip() if action_match else None
-    action_input_str = "" # Default to empty string if no input
-    if action_input_match:
-        action_input_str = action_input_match.group(1).strip()
-    elif action: # If there's an action but "Action Input:" line is missing, assume empty input
-        action_input_str = ""
-    final_answer = final_answer_match.group(1).strip() if final_answer_match else None
-    return thought, action, action_input_str, final_answer
-class ReActAgent:
-    """A simple ReAct agent that uses an LLM to reason and act."""
-    def __init__(self, llm_function, tool_list_arg, max_iterations=7): # Max iterations for the ReAct loop
-        self.llm_function = llm_function
-        self.tools = {tool.name: tool for tool in tool_list_arg} # Store tools in a dict for easy lookup
-        self.tools_description = get_tools_description_for_prompt(tool_list_arg)
-        self.max_iterations = max_iterations
-        self.agent_log = [] # Stores the step-by-step log for display in Gradio
-    def run(self, user_query: str):
-        """Runs the ReAct loop for a given user query."""
-        global CURRENT_PAPER_CONTEXT
-        CURRENT_PAPER_CONTEXT = {} # Ensure context is fresh for each new query
-        self.agent_log = [f"User Query: {user_query}\n"] # Start log with the user query
-        # Construct the initial part of the prompt for the LLM
-        prompt_history = f"User query: {user_query}\n"
-        prompt_history += "You are an AI assistant that processes arXiv papers. You must use the ReAct format: Thought, Action, Action Input, Observation, and finally Final Answer.\n"
-        prompt_history += "Based on the user query, decide on a thought, then an action to take using one of the available tools.\n"
-        prompt_history += "After an action, you will receive an observation. Reason about the observation to decide the next step.\n"
-        prompt_history += "If you have enough information from an observation to answer the user query, or if a multi-step task (like search, scrape, store) is complete, respond with 'Final Answer:'.\n"
-        prompt_history += "When using PaperScraperTool, the input is the arXiv URL or ID. When using KnowledgeBaseStorageTool, the input is a string representation of a dictionary with paper details.\n"
-        for i in range(self.max_iterations):
-            log_entry = f"\n--- Iteration {i + 1} ---\n"
-            # Call the LLM with the current prompt history and tool descriptions
-            llm_response_str = self.llm_function(prompt_history, self.tools_description)
-            log_entry += f"LLM Raw Response (Mocked):\n{llm_response_str}\n" # Clearly label as mocked
-            thought, action_name, action_input, final_answer = parse_llm_react_output(llm_response_str)
-            if thought:
-                log_entry += f"Thought: {thought}\n"
-                prompt_history += f"Thought: {thought}\n" # Add thought to history for next LLM call
-            else:
-                log_entry += "Warning: No thought found in LLM response for this iteration.\n"
-            if final_answer:
-                log_entry += f"\nFinal Answer from Agent: {final_answer}\n"
-                self.agent_log.append(log_entry)
-                CURRENT_PAPER_CONTEXT = {} # Clear context as task is finished
-                return final_answer, "\n".join(self.agent_log)
-            if action_name:
-                log_entry += f"Action: {action_name}\nAction Input: '{action_input}'\n"
-                prompt_history += f"Action: {action_name}\nAction Input: {action_input}\n"
-                if action_name in self.tools:
-                    tool_to_use = self.tools[action_name]
-                    observation = tool_to_use.run(action_input) # Execute the tool
-                    log_entry += f"Observation: {observation}\n"
-                    prompt_history += f"Observation: {observation}\n" # Add observation to history
-                else:
-                    observation = f"Error: Tool '{action_name}' not found. Please choose from the available tools."
-                    log_entry += f"{observation}\n"
-                    prompt_history += f"Observation: {observation}\n" # Feed error back to LLM
-            else:
-                # If LLM provides no action and no final answer, it might be stuck
-                log_entry += "LLM did not specify an action or a final answer. The agent might be stuck or the task is implicitly complete based on LLM's internal state (which is hard for a mock to determine).\n"
-                self.agent_log.append(log_entry)
-                CURRENT_PAPER_CONTEXT = {} # Clear context
-                # Attempt to give a more informative "stuck" message
-                last_thought_or_obs = thought if thought else "No clear thought before stopping."
-                return f"Agent concluded: No further action or final answer provided by LLM. Last thought: {last_thought_or_obs}", "\n".join(self.agent_log)
-            self.agent_log.append(log_entry) # Append current iteration's log
-        # If max_iterations is reached without a final answer
-        self.agent_log.append("\nMax iterations reached. Stopping.\n")
-        CURRENT_PAPER_CONTEXT = {} # Clear context
-        return "Agent stopped: Maximum iterations reached without a final answer.", "\n".join(self.agent_log)
-# --- Gradio App Definition ---
-# Instantiate the agent globally. This ensures that the agent (and its KNOWLEDGE_BASE)
-# persists across multiple interactions within the same Gradio session.
-# For a deployed app with multiple users, this global KNOWLEDGE_BASE would be shared,
-# which might not be desired. Consider session state or a proper database for such scenarios.
-arxiv_agent_instance = ReActAgent(llm_function=mock_llm, tool_list_arg=tools_list)
-def process_gradio_query(user_query_text: str):
-    """
-    This function is called by the Gradio interface when the user submits a query.
-    It runs the ReAct agent and formats the outputs for display in the UI.
-    """
-    if not user_query_text or not user_query_text.strip():
-        # Handle empty input gracefully
-        empty_kb_message = "Knowledge Base is currently empty." if not KNOWLEDGE_BASE else KNOWLEDGE_BASE
-        return "Please enter a query.", empty_kb_message, "No agent activity to log for an empty query."
-    # Run the agent with the user's query
-    final_answer, agent_log_str = arxiv_agent_instance.run(user_query_text)
-    # Prepare the Knowledge Base for display in Gradio (as JSON)
-    # If KNOWLEDGE_BASE is empty, gr.JSON will handle it gracefully.
-    # If it has content, it will be a list of dictionaries.
-    kb_display_data = KNOWLEDGE_BASE if KNOWLEDGE_BASE else "Knowledge Base is currently empty."
-    return final_answer, kb_display_data, agent_log_str
-# Define the Gradio interface using gr.Blocks for more layout control
-with gr.Blocks(theme=gr.themes.Soft(primary_hue=gr.themes.colors.blue, secondary_hue=gr.themes.colors.sky)) as demo:
-    gr.Markdown(
-        """
-        # 📄🤖 arXiv Research Paper Agent (Demo)
-        This agent uses a **mocked LLM** to simulate searching arXiv, scraping paper content,
-        and adding it to an in-memory knowledge base.
-        Enter a query like: `Find papers on 'topic X' and add the first one to the knowledge base.`
-        The agent will show its thought process (as if from an LLM) and tool interactions.
-        """
-    )
-    with gr.Row():
-        query_input_textbox = gr.Textbox(
-            label="Your Query for the arXiv Agent",
-            placeholder="e.g., Find papers on 'transformer models' and add the first one to the knowledge base.",
-            lines=2
-        )
-    submit_query_button = gr.Button("Run Agent �", variant="primary")
-    with gr.Accordion("Agent's Final Answer & Step-by-Step Log", open=True):
-        agent_final_answer_output_textbox = gr.Textbox(
-            label="Agent's Final Answer",
-            lines=3,
-            interactive=False,
-            placeholder="Agent's final conclusion will appear here..."
-        )
-        agent_log_output_textbox = gr.Textbox(
-            label="Agent's Step-by-Step Log (Simulated LLM Thoughts & Tool Use)",
-            lines=15,
-            interactive=False,
-            placeholder="Detailed agent activity log..."
-        )
-    with gr.Accordion("In-Memory Knowledge Base Contents", open=True):
-        knowledge_base_output_json = gr.JSON(
-            label="Current Knowledge Base (Papers stored in this session)"
-        )
-        # For a more tabular view, if KNOWLEDGE_BASE items are consistent dictionaries:
-        # knowledge_base_output_df = gr.DataFrame(
-        #     label="Current Knowledge Base (Table View)",
-        #     headers=["ID", "Title", "URL", "Abstract Snippet", "Scraped At"], # Adjust headers as needed
-        #     # You'd need to transform KNOWLEDGE_BASE into a list of lists for gr.DataFrame
-        # )
-    # Connect the button click to the processing function
-    submit_query_button.click(
-        fn=process_gradio_query,
-        inputs=[query_input_textbox],
-        outputs=[agent_final_answer_output_textbox, knowledge_base_output_json, agent_log_output_textbox]
-    )
-    gr.Examples(
-        examples=[
-            ["Find papers on 'reinforcement learning for robotics' and add the first one to the knowledge base."],
-            ["Search arxiv for 'quantum machine learning' and process the top result."],
-            ["Find papers on 'explainable AI in healthcare' and add the first one to the knowledge base."],
-            ["Find papers on 'graph neural networks for drug discovery' and add the top one to the knowledge base."],
         ],
-        inputs=[query_input_textbox],
-        # Optional: Define outputs and function for examples if they should pre-fill or behave differently
-        # outputs=[agent_final_answer_output_textbox, knowledge_base_output_json, agent_log_output_textbox],
-        # fn=process_gradio_query
-    )
-    gr.Markdown(
-        """
-        ---
-        *Powered by a Mock LLM & Gradio. For a real application, replace `mock_llm` with an actual LLM integration.*
-        *PDF scraping uses PyMuPDF. arXiv interaction uses the `arxiv` library.*
-        *Knowledge Base is in-memory and resets if the Gradio app restarts.*
-        """
-    )
 if __name__ == "__main__":
-    # Instructions to run this Gradio app locally:
-    # 1. Ensure all dependencies are installed:
-    #    pip install gradio arxiv PyMuPDF requests
-    # 2. Save this code as a Python file (e.g., app.py).
-    # 3. Run the file from your terminal:
-    #    python app.py
-    # This will launch a local web server, and Gradio will provide a URL (usually http://127.0.0.1:7860)
-    # that you can open in your web browser to interact with the app.
-    #
-    # For deployment on Hugging Face Spaces:
-    # - Name this file `app.py`.
-    # - Create a `requirements.txt` file in the same directory with the content:
-    #   gradio
-    #   arxiv
-    #   PyMuPDF
-    #   requests
-    # - Create a new Space on Hugging Face, select "Gradio" as the SDK, and upload these files.
-    demo.launch() # debug=True can be helpful for local development

 import gradio as gr
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+# --- Model and Tokenizer Loading ---
+# It's recommended to load the model and tokenizer once globally
+# so they are not reloaded on every prediction.
+try:
+    MODEL_NAME = "Vinnnf/Thinkless-1.5B-Warmup"
+    print(f"Loading model: {MODEL_NAME}...")
+    model = AutoModelForCausalLM.from_pretrained(
+        MODEL_NAME,
+        torch_dtype="auto",  # Use "auto" or torch.float16 if GPU is available and supports it
+        device_map="auto"    # Automatically maps to GPU if available, otherwise CPU
+    )
+    print("Model loaded successfully.")
+    print(f"Loading tokenizer for: {MODEL_NAME}...")
+    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+    print("Tokenizer loaded successfully.")
+except Exception as e:
+    print(f"Error loading model or tokenizer: {e}")
+    # Fallback or error handling if model loading fails
+    # For a Gradio app, you might want to display this error in the UI
+    # For now, we'll let it raise if essential components fail to load.
+    raise
+# --- Prediction Function ---
+def generate_response(instruction_text, prompt_question, think_mode_active, max_tokens):
     """
+    Generates a response from the language model based on the input.
     """
+    if not instruction_text or not prompt_question:
+        return "Error: Instruction and Prompt Question cannot be empty.", "", "N/A", "N/A"
     try:
+        # 1. Combine instruction and prompt question
+        full_prompt_content = f"{instruction_text}\n{prompt_question}"
+        # 2. Format for chat model
+        messages = [
+            {"role": "user", "content": full_prompt_content}
+        ]
+        # 3. Apply chat template
+        #    tokenize=False because we add special tags <think>/<short> afterwards
+        text_from_template = tokenizer.apply_chat_template(
+            messages,
+            tokenize=False,
+            add_generation_prompt=True  # Ensures the model knows to generate a response
         )
+        # 4. Add <think> or <short> tag
+        if think_mode_active:
+            final_input_text = f"{text_from_template}<think>"
         else:
+            final_input_text = f"{text_from_template}<short>"
+        # 5. Tokenize the final input
+        #    Ensure the tokenizer and model are on the same device
+        model_inputs = tokenizer([final_input_text], return_tensors="pt").to(model.device)
+        # 6. Generate response
+        #    Ensure max_new_tokens is an integer
+        try:
+            max_new_tokens_int = int(max_tokens)
+        except ValueError:
+            return "Error: Max new tokens must be an integer.", final_input_text, "N/A", "N/A"
+        if max_new_tokens_int <= 0:
+             return "Error: Max new tokens must be a positive integer.", final_input_text, "N/A", "N/A"
+        print(f"Generating with max_new_tokens: {max_new_tokens_int}")
+        generated_ids = model.generate(
+            **model_inputs,
+            max_new_tokens=max_new_tokens_int,
+            # Common generation parameters you might want to add:
+            # temperature=0.7,
+            # top_k=50,
+            # top_p=0.95,
+            # num_return_sequences=1,
+            # no_repeat_ngram_size=2, # to prevent some repetition
+            # early_stopping=True
+        )
+        # 7. Decode the generated part only
+        #    The generated_ids include the input_ids, so we slice them off.
+        input_ids_length = model_inputs.input_ids.shape[1]
+        output_only_ids = generated_ids[:, input_ids_length:]
+        num_generated_tokens = len(output_only_ids[0])
+        # 8. Batch decode
+        response_text = tokenizer.batch_decode(output_only_ids, skip_special_tokens=True)[0]
+        # For debugging: full generated text including prompt
+        # full_response_text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+        # print(f"Full text (prompt + response): {full_response_text}")
+        return final_input_text, response_text, num_generated_tokens, full_prompt_content
     except Exception as e:
+        print(f"Error during generation: {e}")
+        # Return the error message to be displayed in the Gradio UI
+        return f"An error occurred: {str(e)}", "", "N/A", "N/A"
+# --- Gradio Interface Definition ---
+# Default values from the original script
+DEFAULT_INSTRUCTION = "Please reason step by step, and put your final answer within \\boxed{}."
+DEFAULT_PROMPT_QUESTION = "The arithmetic mean of 7, 2, $x$ and 10 is 9. What is the value of $x$?"
+DEFAULT_THINK_MODE = True
+DEFAULT_MAX_TOKENS = 512 # Default value for max_new_tokens
+# Define input components
+instruction_input = gr.Textbox(
+    lines=3,
+    label="Instruction",
+    value=DEFAULT_INSTRUCTION,
+    info="The overall instruction for the model (e.g., reasoning style)."
+)
+prompt_question_input = gr.Textbox(
+    lines=3,
+    label="Prompt Question",
+    value=DEFAULT_PROMPT_QUESTION,
+    info="The specific question or task for the model."
+)
+think_mode_checkbox = gr.Checkbox(
+    label="Enable Think Mode (<think> tag)",
+    value=DEFAULT_THINK_MODE,
+    info="If checked, adds '<think>' for detailed reasoning. If unchecked, adds '<short>' for concise answers."
+)
+max_tokens_slider = gr.Slider(
+    minimum=32,
+    maximum=4096, # As per original script's max_new_tokens
+    value=DEFAULT_MAX_TOKENS,
+    step=32,
+    label="Max New Tokens",
+    info="Maximum number of tokens to generate for the response."
+)
+# Define output components
+full_prompt_output = gr.Textbox(
+    label="Actual Input to Model (with template and tag)",
+    lines=5,
+    interactive=False, # Read-only
+    show_copy_button=True
+)
+response_output = gr.Textbox(
+    label="Model Response",
+    lines=10,
+    interactive=False, # Read-only
+    show_copy_button=True
+)
+num_tokens_output = gr.Textbox(
+    label="Number of Generated Tokens",
+    interactive=False # Read-only
+)
+original_prompt_output = gr.Textbox(
+    label="Original User Prompt (Instruction + Question)",
+    lines=3,
+    interactive=False, # Read-only
+    show_copy_button=True
+)
+# Create the Gradio interface
+# We pass a list of inputs and outputs to gr.Interface
+# The order in the list corresponds to the arguments of the `generate_response` function
+app_interface = gr.Interface(
+    fn=generate_response,
+    inputs=[
+        instruction_input,
+        prompt_question_input,
+        think_mode_checkbox,
+        max_tokens_slider
+    ],
+    outputs=[
+        full_prompt_output,
+        response_output,
+        num_tokens_output,
+        original_prompt_output # Added to show the combined instruction + question
+    ],
+    title="Thinkless Model Interface",
+    description=(
+        "Interact with the Vinnnf/Thinkless-1.5B-Warmup model. "
+        "Provide an instruction and a prompt, choose a thinking mode, and set max tokens. "
+        "The model will generate a response based on your input. "
+        "Note: Model loading might take a few moments when the app starts."
     ),
+    allow_flagging='never', # or 'auto' if you want to enable flagging
+    examples=[
+        [
+            "Please reason step by step, and put your final answer within \\boxed{}.",
+            "Sarah has 5 apples. She gives 2 apples to John and then buys 3 more apples. How many apples does Sarah have now?",
+            True,
+            256
         ],
+        [
+            "Provide a concise answer.",
+            "What is the capital of France?",
+            False,
+            64
+        ],
+        [
+            "Explain the concept of photosynthesis in simple terms.",
+            "What is photosynthesis?",
+            True,
+            512
+        ]
+    ]
+)
+# --- Launch the App ---
 if __name__ == "__main__":
+    print("Starting Gradio app...")
+    # For Hugging Face Spaces, Gradio automatically handles the server.
+    # When running locally, this will start a local server.
+    app_interface.launch()
+    # To share on Hugging Face Spaces, you would typically save this file as app.py
+    # and ensure your requirements.txt includes:
+    # gradio
+    # transformers
+    # torch
+    # sentencepiece (often a dependency for tokenizers)
+    # accelerate (if using device_map="auto" effectively with multiple GPUs/CPU offload)

requirements.txt CHANGED Viewed

@@ -1,5 +1,5 @@
-huggingface_hub==0.25.2
 gradio
-arxiv
-PyMuPDF
-requests

 gradio
+transformers
+torch
+sentencepiece
+accelerate