File size: 2,838 Bytes
56d52d4
982fc1c
 
 
3d1b72d
f65fcfc
 
 
 
 
3d1b72d
1c211ea
8fd98b4
56d52d4
 
8fd98b4
56d52d4
a500491
56d52d4
1c211ea
56d52d4
 
 
 
1c211ea
56d52d4
 
 
a500491
56d52d4
 
 
 
 
 
 
 
 
 
 
 
3d1b72d
56d52d4
a500491
56d52d4
 
a500491
3d1b72d
56d52d4
 
 
1c211ea
3d1b72d
56d52d4
 
 
1c211ea
56d52d4
1c211ea
56d52d4
 
3d1b72d
 
56d52d4
3d1b72d
 
 
a500491
3d1b72d
a500491
3d1b72d
56d52d4
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import os
import faiss
import numpy as np
import torch
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer

# βœ… Fix: Use AutoTokenizer instead of GemmaTokenizer
tokenizer = AutoTokenizer.from_pretrained(HF_REPO, token=HF_TOKEN)

from huggingface_hub import hf_hub_download

# πŸ”Ή Hugging Face Credentials
HF_REPO = "Futuresony/my_model"  # Ensure this is correct
HF_TOKEN = os.getenv('HUGGINGFACEHUB_API_TOKEN')  # Ensure this is set in your environment

# πŸ”Ή FAISS Index Path
FAISS_PATH = "asa_faiss.index"
DATASET_PATH = "responses.txt"  # Ensure this file contains indexed responses

# βœ… Load FAISS Index from Hugging Face if not available locally
if not os.path.exists(FAISS_PATH):
    print("πŸ”„ Downloading FAISS index...")
    FAISS_PATH = hf_hub_download(HF_REPO, "asa_faiss.index", token=HF_TOKEN)

print(f"πŸ“‚ Loading FAISS index from {FAISS_PATH}...")
faiss_index = faiss.read_index(FAISS_PATH)
print("βœ… FAISS index loaded successfully!")

# βœ… Load responses dataset
if os.path.exists(DATASET_PATH):
    with open(DATASET_PATH, "r", encoding="utf-8") as f:
        dataset = f.readlines()
    print("βœ… Responses dataset loaded!")
else:
    print(f"⚠️ Warning: {DATASET_PATH} not found!")
    dataset = []

# βœ… Load model & tokenizer
print("πŸ”„ Loading tokenizer and model...")
tokenizer = GemmaTokenizer.from_pretrained(HF_REPO, token=HF_TOKEN)
model = AutoModelForCausalLM.from_pretrained(HF_REPO, token=HF_TOKEN)
print("βœ… Model and tokenizer loaded!")

# πŸ”Ή Set FAISS distance threshold (lower values = more strict matches)
THRESHOLD = 80  # Adjusted threshold for better accuracy

def embed(text):
    """Convert text to FAISS-compatible vector."""
    tokens = tokenizer.encode(text, add_special_tokens=True)
    return np.array(tokens, dtype=np.float32).reshape(1, -1)

def chatbot_response(user_query):
    """Fetches response from FAISS or falls back to the model."""
    query_vector = embed(user_query)
    D, I = faiss_index.search(query_vector, k=1)

    print(f"πŸ” Closest FAISS match index: {I[0][0]}, Distance: {D[0][0]}")

    if D[0][0] < THRESHOLD and 0 <= I[0][0] < len(dataset):
        response = dataset[I[0][0]].strip()
        print("βœ… FAISS response used!")
    else:
        print("⚠️ FAISS match too weak, generating response using model.")
        inputs = tokenizer(user_query, return_tensors="pt")
        outputs = model.generate(**inputs, max_new_tokens=150)
        response = tokenizer.decode(outputs[0], skip_special_tokens=True)

    return response

# πŸ”Ή Gradio UI
iface = gr.Interface(
    fn=chatbot_response,
    inputs="text",
    outputs="text",
    title="ASA Microfinance Chatbot",
    description="A chatbot that provides information using FAISS and a language model."
)

if __name__ == "__main__":
    iface.launch()