import os, io, re
import pandas as pd
from sklearn.metrics import accuracy_score
from bert_score import score as bert_score
import google.generativeai as genai

from modtran_gemini import (
    handle_user_query,
    initialize_chatbot_agent,
    get_uploaded_text,
    get_text_chunks,
    get_vectorstore,
    set_global_vectorstore,
    self_reasoning,
    faiss_search_with_keywords,
    faiss_search_with_reasoning
)

from langchain_openai import ChatOpenAI

class GeminiLLM:
    def __init__(self, model_name="models/gemini-1.5-pro-latest", api_key=None):
        api_key = api_key or os.getenv("GOOGLE_API_KEY")
        if not api_key:
            raise ValueError("Missing GOOGLE_API_KEY")
        genai.configure(api_key=api_key)
        self.model = genai.GenerativeModel(model_name)

    def predict(self, prompt: str) -> str:
        response = self.model.generate_content(prompt)
        return response.text.strip()

# Load CSV dataset (ensure columns are 'question', 'answer' with no extra spaces)
df = pd.read_csv("modtran_dataset.csv")
df.columns = df.columns.str.strip()  # Strip whitespace from column names

# Load the MODTRAN user manual
with open("MODTRAN 6 User's Manual.pdf", "rb") as f:
    file_obj = io.BytesIO(f.read())
    file_obj.name = "MODTRAN 6 User's Manual.pdf"
    uploaded_files = [file_obj]

# Document processing
raw_text = get_uploaded_text(uploaded_files)
text_chunks = get_text_chunks(raw_text)
vectorstore = get_vectorstore(text_chunks)
set_global_vectorstore(vectorstore)
llm = GeminiLLM()

# Direct retrieval + answer generation
def direct_llm_rag_response(question):
    from modtran_gemini import vectorstore_global  
    if vectorstore_global is None:
        raise ValueError("Vectorstore is not initialized.")

    # Retrieve relevant documents
    retriever = vectorstore_global.as_retriever(search_kwargs={"k": 20})
    docs = retriever.get_relevant_documents(question)

    # Build a simple prompt with raw context
    context = "\n\n".join([doc.page_content for doc in docs])

    prompt = f"""
    You are an AI assistant that analyzes the context provided to answer the user's query comprehensively and clearly. 
    Answer in a concise, factual way using the terminology from the context. Avoid extra explanation unless explicitly asked.
    If asked for the page number,YOU MUST mention the page number. 
    ### Example 1:
    **Question:** What is the purpose of the MODTRAN GUI?
    **Context:**
    [Page 10 of the docuemnt] The MODTRAN GUI helps users set parameters and visualize the model's output.
    **Answer:** The MODTRAN GUI assists users in parameter setup and output visualization. You can find the answer at Page 10 of the document provided.

    ### Example 2:
    **Question:** How do you run MODTRAN on Linux? Answer with page number. 
    **Context:**
    [Page 15 of the docuemnt] On Linux systems, MODTRAN can be run using the `mod6c` binary via terminal.
    **Answer:** Use the `mod6c` binary via terminal. (Page 15)

    ### Now answer:
    **Question:** {question}
    **Context:**
    {context}

    **Answer:**
    """
    return llm.predict(prompt)

# Predict answers
df["predicted"] = df["question"].apply(direct_llm_rag_response)

# Clean up answers
true_answers = df["answer"].str.lower().str.strip()
pred_answers = df["predicted"].str.lower().str.strip()

# Normalize answers
def normalize_text(s):
    s = s.lower()
    s = re.sub(r'\b(a|an|the)\b', ' ', s)
    s = re.sub(r'[^a-z0-9]', ' ', s)
    return ' '.join(s.split())

normalized_preds = [normalize_text(p) for p in pred_answers]
normalized_refs = [normalize_text(r) for r in true_answers]

# Token-level F1
def compute_f1(pred, ref):
    pred_tokens = pred.split()
    ref_tokens = ref.split()
    common = set(pred_tokens) & set(ref_tokens)
    if not common:
        return 0.0
    precision = len(common) / len(pred_tokens)
    recall = len(common) / len(ref_tokens)
    return 2 * precision * recall / (precision + recall)


def manual_tool_routing(question):
    if "how" in question.lower():
        context = faiss_search_with_reasoning(question)
    else:
        context = faiss_search_with_keywords(question)
    return self_reasoning(question, context)

# Create predictions using different strategies
df["agent_predicted"] = df["question"].apply(manual_tool_routing)
df["keyword_predicted"] = df["question"].apply(faiss_search_with_keywords)
df["reasoning_predicted"] = df["question"].apply(faiss_search_with_reasoning)

refs = df["answer"].str.lower().str.strip()

for col in ["agent_predicted", "keyword_predicted", "reasoning_predicted"]:
    preds = df[col].str.lower().str.strip()
    normalized_preds = [normalize_text(p) for p in preds]
    normalized_refs = [normalize_text(r) for r in refs]

    em = sum([int(p == r) for p, r in zip(normalized_preds, normalized_refs)]) / len(refs)
    f1 = sum([compute_f1(p, r) for p, r in zip(normalized_preds, normalized_refs)]) / len(refs)
    P, R, F1_bert = bert_score(preds.tolist(), refs.tolist(), lang="en", verbose=True)
    bert_f1 = F1_bert.mean().item()

    print(f"\n🔹 Evaluation for: {col}")
    print(f" - Exact Match: {em:.3f}")
    print(f" - F1 Score: {f1:.3f}")
    print(f" - BERTScore F1: {bert_f1:.3f}")

    df[f"{col}_bert_f1"] = F1_bert.numpy()