FuturesonyAi / app.py
Futuresony's picture
Update app.py
f65fcfc verified
raw
history blame
2.84 kB
import os
import faiss
import numpy as np
import torch
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
# βœ… Fix: Use AutoTokenizer instead of GemmaTokenizer
tokenizer = AutoTokenizer.from_pretrained(HF_REPO, token=HF_TOKEN)
from huggingface_hub import hf_hub_download
# πŸ”Ή Hugging Face Credentials
HF_REPO = "Futuresony/my_model" # Ensure this is correct
HF_TOKEN = os.getenv('HUGGINGFACEHUB_API_TOKEN') # Ensure this is set in your environment
# πŸ”Ή FAISS Index Path
FAISS_PATH = "asa_faiss.index"
DATASET_PATH = "responses.txt" # Ensure this file contains indexed responses
# βœ… Load FAISS Index from Hugging Face if not available locally
if not os.path.exists(FAISS_PATH):
print("πŸ”„ Downloading FAISS index...")
FAISS_PATH = hf_hub_download(HF_REPO, "asa_faiss.index", token=HF_TOKEN)
print(f"πŸ“‚ Loading FAISS index from {FAISS_PATH}...")
faiss_index = faiss.read_index(FAISS_PATH)
print("βœ… FAISS index loaded successfully!")
# βœ… Load responses dataset
if os.path.exists(DATASET_PATH):
with open(DATASET_PATH, "r", encoding="utf-8") as f:
dataset = f.readlines()
print("βœ… Responses dataset loaded!")
else:
print(f"⚠️ Warning: {DATASET_PATH} not found!")
dataset = []
# βœ… Load model & tokenizer
print("πŸ”„ Loading tokenizer and model...")
tokenizer = GemmaTokenizer.from_pretrained(HF_REPO, token=HF_TOKEN)
model = AutoModelForCausalLM.from_pretrained(HF_REPO, token=HF_TOKEN)
print("βœ… Model and tokenizer loaded!")
# πŸ”Ή Set FAISS distance threshold (lower values = more strict matches)
THRESHOLD = 80 # Adjusted threshold for better accuracy
def embed(text):
"""Convert text to FAISS-compatible vector."""
tokens = tokenizer.encode(text, add_special_tokens=True)
return np.array(tokens, dtype=np.float32).reshape(1, -1)
def chatbot_response(user_query):
"""Fetches response from FAISS or falls back to the model."""
query_vector = embed(user_query)
D, I = faiss_index.search(query_vector, k=1)
print(f"πŸ” Closest FAISS match index: {I[0][0]}, Distance: {D[0][0]}")
if D[0][0] < THRESHOLD and 0 <= I[0][0] < len(dataset):
response = dataset[I[0][0]].strip()
print("βœ… FAISS response used!")
else:
print("⚠️ FAISS match too weak, generating response using model.")
inputs = tokenizer(user_query, return_tensors="pt")
outputs = model.generate(**inputs, max_new_tokens=150)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
return response
# πŸ”Ή Gradio UI
iface = gr.Interface(
fn=chatbot_response,
inputs="text",
outputs="text",
title="ASA Microfinance Chatbot",
description="A chatbot that provides information using FAISS and a language model."
)
if __name__ == "__main__":
iface.launch()