Spaces:
Sleeping
Sleeping
import os | |
from smolagents import CodeAgent, ToolCallingAgent | |
from smolagents import OpenAIServerModel | |
from tools.fetch import fetch_webpage | |
from tools.yttranscript import get_youtube_transcript, get_youtube_title_description | |
import myprompts | |
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline | |
import torch | |
# --- Basic Agent Definition --- | |
class BasicAgent: | |
def __init__(self): | |
print("BasicAgent initialized.") | |
def __call__(self, question: str) -> str: | |
print(f"Agent received question (first 50 chars): {question[:50]}...") | |
try: | |
# Use the reviewer agent to determine if the question can be answered by a model or requires code | |
print("Calling reviewer agent...") | |
reviewer_answer = reviewer_agent.run(myprompts.review_prompt + "\nThe question is:\n" + question) | |
print(f"Reviewer agent answer: {reviewer_answer}") | |
question = question + '\n' + myprompts.output_format | |
fixed_answer = "" | |
if reviewer_answer == "code": | |
fixed_answer = gaia_agent.run(question) | |
print(f"Code agent answer: {fixed_answer}") | |
elif reviewer_answer == "model": | |
# If the reviewer agent suggests using the model, we can proceed with the model agent | |
print("Using model agent to answer the question.") | |
fixed_answer = model_agent.run(myprompts.model_prompt + "\nThe question is:\n" + question) | |
print(f"Model agent answer: {fixed_answer}") | |
return fixed_answer | |
except Exception as e: | |
error = f"An error occurred while processing the question: {e}" | |
print(error) | |
return error | |
# Create a wrapper class that matches the expected interface | |
class LocalLlamaModel: | |
def __init__(self, model, tokenizer): | |
self.model = model | |
self.tokenizer = tokenizer | |
self.device = model.device if hasattr(model, 'device') else 'cpu' | |
def generate(self, prompt: str, max_new_tokens=512, **kwargs): | |
"""Generate text using the local model""" | |
input_ids = self.tokenizer(prompt, return_tensors="pt").input_ids.to(self.device) | |
with torch.no_grad(): | |
output_ids = self.model.generate( | |
input_ids, | |
max_new_tokens=max_new_tokens, | |
do_sample=True, | |
temperature=0.7, | |
pad_token_id=self.tokenizer.eos_token_id, | |
**kwargs | |
) | |
# Decode only the new tokens (excluding the input) | |
new_tokens = output_ids[0][input_ids.shape[1]:] | |
output = self.tokenizer.decode(new_tokens, skip_special_tokens=True) | |
return output | |
def __call__(self, prompt: str, max_new_tokens=512, **kwargs): | |
"""Make the model callable like a function""" | |
return self.generate(prompt, max_new_tokens, **kwargs) | |
# Create the model instance | |
model = LocalLlamaModel(model_init, tokenizer) | |
# Now create your agents - these should work with the wrapped model | |
reviewer_agent = ToolCallingAgent(model=model, tools=[]) | |
model_agent = ToolCallingAgent(model=model, tools=[fetch_webpage]) | |
gaia_agent = CodeAgent( | |
tools=[fetch_webpage, get_youtube_title_description, get_youtube_transcript], | |
model=model | |
) | |
if __name__ == "__main__": | |
# Example usage | |
question = "What was the actual enrollment of the Malko competition in 2023?" | |
agent = BasicAgent() | |
answer = agent(question) | |
print(f"Answer: {answer}") |