import os import faiss import numpy as np import torch import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer # ✅ Fix: Use AutoTokenizer instead of GemmaTokenizer tokenizer = AutoTokenizer.from_pretrained(HF_REPO, token=HF_TOKEN) from huggingface_hub import hf_hub_download # 🔹 Hugging Face Credentials HF_REPO = "Futuresony/my_model" # Ensure this is correct HF_TOKEN = os.getenv('HUGGINGFACEHUB_API_TOKEN') # Ensure this is set in your environment # 🔹 FAISS Index Path FAISS_PATH = "asa_faiss.index" DATASET_PATH = "responses.txt" # Ensure this file contains indexed responses # ✅ Load FAISS Index from Hugging Face if not available locally if not os.path.exists(FAISS_PATH): print("🔄 Downloading FAISS index...") FAISS_PATH = hf_hub_download(HF_REPO, "asa_faiss.index", token=HF_TOKEN) print(f"📂 Loading FAISS index from {FAISS_PATH}...") faiss_index = faiss.read_index(FAISS_PATH) print("✅ FAISS index loaded successfully!") # ✅ Load responses dataset if os.path.exists(DATASET_PATH): with open(DATASET_PATH, "r", encoding="utf-8") as f: dataset = f.readlines() print("✅ Responses dataset loaded!") else: print(f"⚠️ Warning: {DATASET_PATH} not found!") dataset = [] # ✅ Load model & tokenizer print("🔄 Loading tokenizer and model...") tokenizer = GemmaTokenizer.from_pretrained(HF_REPO, token=HF_TOKEN) model = AutoModelForCausalLM.from_pretrained(HF_REPO, token=HF_TOKEN) print("✅ Model and tokenizer loaded!") # 🔹 Set FAISS distance threshold (lower values = more strict matches) THRESHOLD = 80 # Adjusted threshold for better accuracy def embed(text): """Convert text to FAISS-compatible vector.""" tokens = tokenizer.encode(text, add_special_tokens=True) return np.array(tokens, dtype=np.float32).reshape(1, -1) def chatbot_response(user_query): """Fetches response from FAISS or falls back to the model.""" query_vector = embed(user_query) D, I = faiss_index.search(query_vector, k=1) print(f"🔍 Closest FAISS match index: {I[0][0]}, Distance: {D[0][0]}") if D[0][0] < THRESHOLD and 0 <= I[0][0] < len(dataset): response = dataset[I[0][0]].strip() print("✅ FAISS response used!") else: print("⚠️ FAISS match too weak, generating response using model.") inputs = tokenizer(user_query, return_tensors="pt") outputs = model.generate(**inputs, max_new_tokens=150) response = tokenizer.decode(outputs[0], skip_special_tokens=True) return response # 🔹 Gradio UI iface = gr.Interface( fn=chatbot_response, inputs="text", outputs="text", title="ASA Microfinance Chatbot", description="A chatbot that provides information using FAISS and a language model." ) if __name__ == "__main__": iface.launch()