import os
import faiss
import numpy as np
import torch
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer

# ✅ Fix: Use AutoTokenizer instead of GemmaTokenizer
tokenizer = AutoTokenizer.from_pretrained(HF_REPO, token=HF_TOKEN)

from huggingface_hub import hf_hub_download

# 🔹 Hugging Face Credentials
HF_REPO = "Futuresony/my_model"  # Ensure this is correct
HF_TOKEN = os.getenv('HUGGINGFACEHUB_API_TOKEN')  # Ensure this is set in your environment

# 🔹 FAISS Index Path
FAISS_PATH = "asa_faiss.index"
DATASET_PATH = "responses.txt"  # Ensure this file contains indexed responses

# ✅ Load FAISS Index from Hugging Face if not available locally
if not os.path.exists(FAISS_PATH):
    print("🔄 Downloading FAISS index...")
    FAISS_PATH = hf_hub_download(HF_REPO, "asa_faiss.index", token=HF_TOKEN)

print(f"📂 Loading FAISS index from {FAISS_PATH}...")
faiss_index = faiss.read_index(FAISS_PATH)
print("✅ FAISS index loaded successfully!")

# ✅ Load responses dataset
if os.path.exists(DATASET_PATH):
    with open(DATASET_PATH, "r", encoding="utf-8") as f:
        dataset = f.readlines()
    print("✅ Responses dataset loaded!")
else:
    print(f"⚠️ Warning: {DATASET_PATH} not found!")
    dataset = []

# ✅ Load model & tokenizer
print("🔄 Loading tokenizer and model...")
tokenizer = GemmaTokenizer.from_pretrained(HF_REPO, token=HF_TOKEN)
model = AutoModelForCausalLM.from_pretrained(HF_REPO, token=HF_TOKEN)
print("✅ Model and tokenizer loaded!")

# 🔹 Set FAISS distance threshold (lower values = more strict matches)
THRESHOLD = 80  # Adjusted threshold for better accuracy

def embed(text):
    """Convert text to FAISS-compatible vector."""
    tokens = tokenizer.encode(text, add_special_tokens=True)
    return np.array(tokens, dtype=np.float32).reshape(1, -1)

def chatbot_response(user_query):
    """Fetches response from FAISS or falls back to the model."""
    query_vector = embed(user_query)
    D, I = faiss_index.search(query_vector, k=1)

    print(f"🔍 Closest FAISS match index: {I[0][0]}, Distance: {D[0][0]}")

    if D[0][0] < THRESHOLD and 0 <= I[0][0] < len(dataset):
        response = dataset[I[0][0]].strip()
        print("✅ FAISS response used!")
    else:
        print("⚠️ FAISS match too weak, generating response using model.")
        inputs = tokenizer(user_query, return_tensors="pt")
        outputs = model.generate(**inputs, max_new_tokens=150)
        response = tokenizer.decode(outputs[0], skip_special_tokens=True)

    return response

# 🔹 Gradio UI
iface = gr.Interface(
    fn=chatbot_response,
    inputs="text",
    outputs="text",
    title="ASA Microfinance Chatbot",
    description="A chatbot that provides information using FAISS and a language model."
)

if __name__ == "__main__":
    iface.launch()