ftkd99 commited on
Commit
027da24
·
verified ·
1 Parent(s): 7e55752

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +25 -0
  2. utils.py +20 -0
app.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
3
+ from utils import build_faiss_index, retrieve
4
+
5
+ # Load documents
6
+ with open("documents/1mg_rag.txt") as f:
7
+ docs = [line.strip() for line in f if line.strip()]
8
+
9
+ # Build FAISS index
10
+ index, _ = build_faiss_index(docs)
11
+
12
+ # Load quantized Mistral 7B
13
+ model_id = "TheBloke/Mistral-7B-Instruct-v0.2-GPTQ"
14
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
15
+ model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", trust_remote_code=True)
16
+
17
+ generator = pipeline("text-generation", model=model, tokenizer=tokenizer)
18
+
19
+ def answer_question(query):
20
+ context = "\n".join(retrieve(query, index, docs))
21
+ prompt = f"[INST] Use the following context to answer the question.\n\nContext:\n{context}\n\nQuestion: {query} [/INST]"
22
+ result = generator(prompt, max_new_tokens=256, do_sample=True, temperature=0.7)
23
+ return result[0]['generated_text']
24
+
25
+ gr.Interface(fn=answer_question, inputs="text", outputs="text", title="Mistral RAG").launch()
utils.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sentence_transformers import SentenceTransformer
2
+ import faiss
3
+ import numpy as np
4
+
5
+ # Load MiniLM embedder
6
+ embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
7
+
8
+ def embed_texts(texts):
9
+ return embedder.encode(texts, convert_to_tensor=False)
10
+
11
+ def build_faiss_index(texts):
12
+ embeddings = embed_texts(texts)
13
+ index = faiss.IndexFlatL2(embeddings[0].shape[0])
14
+ index.add(np.array(embeddings))
15
+ return index, embeddings
16
+
17
+ def retrieve(query, index, docs, k=3):
18
+ query_embedding = embed_texts([query])
19
+ distances, indices = index.search(np.array(query_embedding), k)
20
+ return [docs[i] for i in indices[0]]