Futuresony commited on
Commit
9c7848d
Β·
verified Β·
1 Parent(s): f65fcfc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -77
app.py CHANGED
@@ -1,81 +1,47 @@
1
- import os
2
- import faiss
3
- import numpy as np
4
- import torch
5
  import gradio as gr
6
- from transformers import AutoModelForCausalLM, AutoTokenizer
7
-
8
- # βœ… Fix: Use AutoTokenizer instead of GemmaTokenizer
9
- tokenizer = AutoTokenizer.from_pretrained(HF_REPO, token=HF_TOKEN)
10
-
11
- from huggingface_hub import hf_hub_download
12
-
13
- # πŸ”Ή Hugging Face Credentials
14
- HF_REPO = "Futuresony/my_model" # Ensure this is correct
15
- HF_TOKEN = os.getenv('HUGGINGFACEHUB_API_TOKEN') # Ensure this is set in your environment
16
-
17
- # πŸ”Ή FAISS Index Path
18
- FAISS_PATH = "asa_faiss.index"
19
- DATASET_PATH = "responses.txt" # Ensure this file contains indexed responses
20
-
21
- # βœ… Load FAISS Index from Hugging Face if not available locally
22
- if not os.path.exists(FAISS_PATH):
23
- print("πŸ”„ Downloading FAISS index...")
24
- FAISS_PATH = hf_hub_download(HF_REPO, "asa_faiss.index", token=HF_TOKEN)
25
-
26
- print(f"πŸ“‚ Loading FAISS index from {FAISS_PATH}...")
27
- faiss_index = faiss.read_index(FAISS_PATH)
28
- print("βœ… FAISS index loaded successfully!")
29
-
30
- # βœ… Load responses dataset
31
- if os.path.exists(DATASET_PATH):
32
- with open(DATASET_PATH, "r", encoding="utf-8") as f:
33
- dataset = f.readlines()
34
- print("βœ… Responses dataset loaded!")
35
- else:
36
- print(f"⚠️ Warning: {DATASET_PATH} not found!")
37
- dataset = []
38
-
39
- # βœ… Load model & tokenizer
40
- print("πŸ”„ Loading tokenizer and model...")
41
- tokenizer = GemmaTokenizer.from_pretrained(HF_REPO, token=HF_TOKEN)
42
- model = AutoModelForCausalLM.from_pretrained(HF_REPO, token=HF_TOKEN)
43
- print("βœ… Model and tokenizer loaded!")
44
-
45
- # πŸ”Ή Set FAISS distance threshold (lower values = more strict matches)
46
- THRESHOLD = 80 # Adjusted threshold for better accuracy
47
-
48
- def embed(text):
49
- """Convert text to FAISS-compatible vector."""
50
- tokens = tokenizer.encode(text, add_special_tokens=True)
51
- return np.array(tokens, dtype=np.float32).reshape(1, -1)
52
-
53
- def chatbot_response(user_query):
54
- """Fetches response from FAISS or falls back to the model."""
55
- query_vector = embed(user_query)
56
- D, I = faiss_index.search(query_vector, k=1)
57
-
58
- print(f"πŸ” Closest FAISS match index: {I[0][0]}, Distance: {D[0][0]}")
59
-
60
- if D[0][0] < THRESHOLD and 0 <= I[0][0] < len(dataset):
61
- response = dataset[I[0][0]].strip()
62
- print("βœ… FAISS response used!")
63
- else:
64
- print("⚠️ FAISS match too weak, generating response using model.")
65
- inputs = tokenizer(user_query, return_tensors="pt")
66
- outputs = model.generate(**inputs, max_new_tokens=150)
67
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
68
-
69
- return response
70
-
71
- # πŸ”Ή Gradio UI
72
- iface = gr.Interface(
73
- fn=chatbot_response,
74
- inputs="text",
75
- outputs="text",
76
- title="ASA Microfinance Chatbot",
77
- description="A chatbot that provides information using FAISS and a language model."
78
  )
79
 
80
  if __name__ == "__main__":
81
- iface.launch()
 
 
 
 
 
1
  import gradio as gr
2
+ from huggingface_hub import InferenceClient
3
+
4
+ client = InferenceClient("Futuresony/future_ai_12_10_2024.gguf")
5
+
6
+ def format_alpaca_prompt(user_input, system_prompt, history):
7
+ """Formats input in Alpaca/LLaMA style"""
8
+ history_str = "\n".join([f"### Instruction:\n{h[0]}\n### Response:\n{h[1]}" for h in history])
9
+ prompt = f"""{system_prompt}
10
+ {history_str}
11
+
12
+ ### Instruction:
13
+ {user_input}
14
+
15
+ ### Response:
16
+ """
17
+ return prompt
18
+
19
+ def respond(message, history, system_message, max_tokens, temperature, top_p):
20
+ formatted_prompt = format_alpaca_prompt(message, system_message, history)
21
+
22
+ response = client.text_generation(
23
+ formatted_prompt,
24
+ max_new_tokens=max_tokens,
25
+ temperature=temperature,
26
+ top_p=top_p,
27
+ )
28
+
29
+ # βœ… Extract only the response
30
+ cleaned_response = response.split("### Response:")[-1].strip()
31
+
32
+ history.append((message, cleaned_response)) # βœ… Update history with the new message and response
33
+
34
+ yield cleaned_response # βœ… Output only the answer
35
+
36
+ demo = gr.ChatInterface(
37
+ respond,
38
+ additional_inputs=[
39
+ gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
40
+ gr.Slider(minimum=1, maximum=250, value=128, step=1, label="Max new tokens"),
41
+ gr.Slider(minimum=0.1, maximum=4.0, value=0.9, step=0.1, label="Temperature"),
42
+ gr.Slider(minimum=0.1, maximum=1.0, value=0.99, step=0.01, label="Top-p (nucleus sampling)"),
43
+ ],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  )
45
 
46
  if __name__ == "__main__":
47
+ demo.launch()