Futuresony commited on
Commit
1c211ea
·
verified ·
1 Parent(s): e7d841d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -57
app.py CHANGED
@@ -1,58 +1,43 @@
1
- import faiss
2
- import numpy as np
3
- import torch
4
- from transformers import AutoModelForCausalLM, AutoTokenizer
5
  import gradio as gr
6
-
7
- # Set paths
8
- FAISS_PATH = "asa_faiss.index"
9
- DATASET_PATH = "responses.txt" # Ensure this file contains indexed responses
10
-
11
- # Load FAISS index
12
- print(f"Loading FAISS index from {FAISS_PATH}...")
13
- faiss_index = faiss.read_index(FAISS_PATH)
14
- print("✅ FAISS index loaded successfully!")
15
-
16
- # Load dataset responses
17
- with open(DATASET_PATH, "r", encoding="utf-8") as f:
18
- dataset = f.readlines()
19
- print("✅ Responses dataset loaded!")
20
-
21
- # Load model & tokenizer (Ensure model path is correct)
22
- MODEL_NAME = "Futuresony/future_ai_12_10_2024.gguf" # Change this if using a local model
23
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
24
- model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
25
-
26
- # Set FAISS threshold (Adjust based on FAISS distance values)
27
- THRESHOLD = 100 # Lower threshold to improve response accuracy
28
-
29
-
30
- def embed(text):
31
- """Convert text to FAISS-compatible vector (Ensure same embeddings as FAISS training)."""
32
- tokens = tokenizer.encode(text, add_special_tokens=True)
33
- return np.array(tokens, dtype=np.float32).reshape(1, -1)
34
-
35
-
36
- def chatbot_response(user_query):
37
- """Fetches response from FAISS or falls back to the model."""
38
- query_vector = embed(user_query) # Convert input to vector
39
- D, I = faiss_index.search(query_vector, k=1) # Search FAISS
40
-
41
- print(f"Closest FAISS match index: {I[0][0]}, Distance: {D[0][0]}") # Debugging info
42
-
43
- if D[0][0] < THRESHOLD: # Check if FAISS result is relevant
44
- response = dataset[I[0][0]].strip() # Fetch matched response
45
- print("✅ FAISS response used!")
46
- else:
47
- # Fallback to model-generated response
48
- print("⚠️ FAISS match too weak, using model instead.")
49
- inputs = tokenizer(user_query, return_tensors="pt")
50
- outputs = model.generate(**inputs, max_new_tokens=150)
51
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
52
-
53
- return response
54
-
55
-
56
- # Gradio UI
57
- iface = gr.Interface(fn=chatbot_response, inputs="text", outputs="text", title="ASA Microfinance Chatbot")
58
- iface.launch()
 
 
 
 
 
1
  import gradio as gr
2
+ from huggingface_hub import InferenceClient
3
+
4
+ client = InferenceClient("Futuresony/future_ai_12_10_2024.gguf")
5
+
6
+ def format_alpaca_prompt(user_input, system_prompt):
7
+ """Formats input in Alpaca/LLaMA style"""
8
+ prompt = f"""{system_prompt}
9
+
10
+ ### Instruction:
11
+ {user_input}
12
+
13
+ ### Response:
14
+ """
15
+ return prompt
16
+
17
+ def respond(message, history, system_message, max_tokens, temperature, top_p):
18
+ formatted_prompt = format_alpaca_prompt(message, system_message)
19
+
20
+ response = client.text_generation(
21
+ formatted_prompt,
22
+ max_new_tokens=max_tokens,
23
+ temperature=temperature,
24
+ top_p=top_p,
25
+ )
26
+
27
+ # Extract only the response
28
+ cleaned_response = response.split("### Response:")[-1].strip()
29
+
30
+ yield cleaned_response # ✅ Output only the answer
31
+
32
+ demo = gr.ChatInterface(
33
+ respond,
34
+ additional_inputs=[
35
+ gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
36
+ gr.Slider(minimum=1, maximum=250, value=128, step=1, label="Max new tokens"),
37
+ gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
38
+ gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
39
+ ],
40
+ )
41
+
42
+ if __name__ == "__main__":
43
+ demo.launch()