import os
from smolagents import CodeAgent, ToolCallingAgent
from smolagents import OpenAIServerModel
from tools.fetch import fetch_webpage
from tools.yttranscript import get_youtube_transcript, get_youtube_title_description
import myprompts
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
t torch

# --- Basic Agent Definition ---
class BasicAgent:
    def __init__(self):
        print("BasicAgent initialized.")
    
    def __call__(self, question: str) -> str:
        print(f"Agent received question (first 50 chars): {question[:50]}...")

        try:
            # Use the reviewer agent to determine if the question can be answered by a model or requires code
            print("Calling reviewer agent...")
            reviewer_answer = reviewer_agent.run(myprompts.review_prompt + "\nThe question is:\n" + question)
            print(f"Reviewer agent answer: {reviewer_answer}")

            question = question + '\n' + myprompts.output_format
            fixed_answer = ""

            if reviewer_answer == "code":
                fixed_answer = gaia_agent.run(question)
                print(f"Code agent answer: {fixed_answer}")
                
            elif reviewer_answer == "model":    
                # If the reviewer agent suggests using the model, we can proceed with the model agent
                print("Using model agent to answer the question.")
                fixed_answer = model_agent.run(myprompts.model_prompt + "\nThe question is:\n" + question)
                print(f"Model agent answer: {fixed_answer}")

            return fixed_answer
        except Exception as e:
            error = f"An error occurred while processing the question: {e}"
            print(error)
            return error

# Load model and tokenizer
model_id = "LiquidAI/LFM2-1.2B"
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    torch_dtype=torch.bfloat16,  # Fixed: was string, should be torch dtype
    trust_remote_code=True,
    # attn_implementation="flash_attention_2"  # <- uncomment on compatible GPU
)
tokenizer = AutoTokenizer.from_pretrained(model_id)

# Create a wrapper class that matches the expected interface
class LocalLlamaModel:
    def __init__(self, model, tokenizer):
        self.model = model
        self.tokenizer = tokenizer
        self.device = model.device if hasattr(model, 'device') else 'cpu'
    
    def _extract_text_from_messages(self, messages):
        """Extract text content from ChatMessage objects or handle string input"""
        if isinstance(messages, str):
            return messages
        elif isinstance(messages, list):
            # Handle list of ChatMessage objects
            text_parts = []
            for msg in messages:
                if hasattr(msg, 'content'):
                    # Handle ChatMessage with content attribute
                    if isinstance(msg.content, list):
                        # Content is a list of content items
                        for content_item in msg.content:
                            if isinstance(content_item, dict) and 'text' in content_item:
                                text_parts.append(content_item['text'])
                            elif hasattr(content_item, 'text'):
                                text_parts.append(content_item.text)
                    elif isinstance(msg.content, str):
                        text_parts.append(msg.content)
                elif isinstance(msg, dict) and 'content' in msg:
                    # Handle dictionary format
                    text_parts.append(str(msg['content']))
                else:
                    # Fallback: convert to string
                    text_parts.append(str(msg))
            return '\n'.join(text_parts)
        else:
            return str(messages)
    
    def generate(self, prompt, max_new_tokens=512*5, **kwargs):
        try:

            print("Prompt: ", prompt)
            print("Prompt type: ", type(prompt))
            # Extract text from the prompt (which might be ChatMessage objects)
            text_prompt = self._extract_text_from_messages(prompt)
            
            print("Extracted text prompt:", text_prompt[:200] + "..." if len(text_prompt) > 200 else text_prompt)
            
            # Tokenize the text prompt
            inputs = self.tokenizer(text_prompt, return_tensors="pt").to(self.model.device)
            input_ids = inputs['input_ids']
            
            # Generate output
            with torch.no_grad():
                output = self.model.generate(
                    input_ids,
                    do_sample=True,
                    temperature=0.3,
                    min_p=0.15,
                    repetition_penalty=1.05,
                    max_new_tokens=max_new_tokens,
                    pad_token_id=self.tokenizer.eos_token_id,  # Handle padding
                )
            
            # Decode only the new tokens (exclude the input)
            new_tokens = output[0][len(input_ids[0]):]
            response = self.tokenizer.decode(new_tokens, skip_special_tokens=True)
            
            return response.strip()
            
        except Exception as e:
            print(f"Error in model generation: {e}")
            return f"Error generating response: {str(e)}"

    def __call__(self, prompt, max_new_tokens=512, **kwargs):
        """Make the model callable like a function"""
        return self.generate(prompt, max_new_tokens, **kwargs)

# Create the model instance
wrapped_model = LocalLlamaModel(model, tokenizer)

# Now create your agents - these should work with the wrapped model
reviewer_agent = ToolCallingAgent(model=wrapped_model, tools=[])
model_agent = ToolCallingAgent(model=wrapped_model, tools=[fetch_webpage])
gaia_agent = CodeAgent(
    tools=[fetch_webpage, get_youtube_title_description, get_youtube_transcript],
    model=wrapped_model
)

if __name__ == "__main__":
    # Example usage
    question = "What was the actual enrollment of the Malko competition in 2023?"
    agent = BasicAgent()
    answer = agent(question)
    print(f"Answer: {answer}")