import gradio as gr
import random
import torch
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from ctransformers import AutoModelForCausalLM
import time
import pickle

# Load LLM (4-bit quantized)
start_load = time.time()
llm = AutoModelForCausalLM.from_pretrained(
    "TheBloke/zephyr-7B-alpha-GGUF",
    model_file="zephyr-7b-alpha.Q4_K_M.gguf",
    model_type="mistral"
)
print(f"Model loaded in {time.time() - start_load:.2f} seconds")

# Load sentences for random queries
with open("bc sentences.pkl", "rb") as f:
    bs_sen = pickle.load(f)

# Load embedding model
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")

# Load FAISS indexes
faiss_a = FAISS.load_local("faiss/index_a", embeddings=embedding_model, allow_dangerous_deserialization=True)
faiss_b = FAISS.load_local("faiss/index_b", embeddings=embedding_model, allow_dangerous_deserialization=True)

# Set up retrievers
retriever_a = faiss_a.as_retriever(search_kwargs={"k": 10})
retriever_b = faiss_b.as_retriever(search_kwargs={"k": 10})

# System prompts
guidance_a = """You are a 1920s trade union representative.
Use the retrieved sentences as your knowledge base.
Speak persuasively as if you are arguing with a fellow trade unionist.
Do not use your own knowledge.
Vary your sentence structures, do not repeat phrases.
Respond with 2 sentences maximum.
Do not use the names of any trade union members or name any geographic locations."""

guidance_b = """You are a 2020s trade union representative.
Use the retrieved sentences as your knowledge base.
Speak persuasively as if you are arguing with a fellow trade unionist.
Do not use your own knowledge.
Vary your sentence structures, do not repeat phrases.
Respond with 2 sentences maximum.
Do not use the names of any trade union members or name any geographic locations."""

def generate_text(prompt):
    try:
        output = llm(prompt, max_new_tokens=150, temperature=0.7, do_sample=True)
        return output.strip()
    except Exception as e:
        return f"[Error generating response: {e}]"

# Dialogue turn generator
def generate_turn(query, retriever, prompt_text):
    docs = retriever.get_relevant_documents(query)
    context = "\n".join(doc.page_content for doc in docs)
    prompt = f"{prompt_text}\n\nYou just heard the following message:\n\"{query}\"\n\nHere are 5 excerpts from your documents that may help you reply:\n{context}\n\nRespond to the message above based ONLY on this information, and speak as if you were in a real conversation."
    return generate_text(prompt)

# Conversation function
def simulate_conversation(query, history):
    if not query.strip():
        return "", history

    a_reply = generate_turn(query, retriever_a, guidance_a)
    b_reply = generate_turn(a_reply, retriever_b, guidance_b)

    new_history = history + [(f"User: {query}", f"Speaker A: {a_reply}\nSpeaker B: {b_reply}")]
    return "", new_history

# Random prompt from bs_sen
def random_query(history):
    query = random.choice(bs_sen)
    return simulate_conversation(query, history)

# Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("## Dialogue Simulator between Two Union Eras")
    chatbot = gr.Chatbot()
    txt = gr.Textbox(label="Ask a question")
    ask_btn = gr.Button("Ask")
    rand_btn = gr.Button("Random Conversation")
    state = gr.State([])

    ask_btn.click(fn=simulate_conversation, inputs=[txt, state], outputs=[txt, chatbot])
    rand_btn.click(fn=random_query, inputs=state, outputs=[txt, chatbot])

# AFTER LOADING FAISS
try:
    test_doc = faiss_a.similarity_search("test", k=1)
    print("FAISS A loaded successfully:", test_doc)
except Exception as e:
    print("[ERROR] FAISS A failed to load:", str(e))

try:
    test_doc = faiss_b.similarity_search("test", k=1)
    print("FAISS B loaded successfully:", test_doc)
except Exception as e:
    print("[ERROR] FAISS B failed to load:", str(e))

# AFTER LOADING PKL
try:
    print(f"Loaded {len(bs_sen)} sentences from bc_sentences.pkl")
    print("Sample sentence:", random.choice(bs_sen))
except Exception as e:
    print("[ERROR] bc_sentences.pkl failed to load:", str(e))

# AFTER LOADING LLM
try:
    warmup_response = llm("Hello", max_new_tokens=10)
    print("LLM warmup response:", warmup_response)
except Exception as e:
    print("[ERROR] LLM failed on warmup:", str(e))

demo.launch()