Spaces:

Rohit1412
/

gemma3-27b-RAG

Sleeping

App Files Files Community

Rohit1412 commited on Mar 16

Commit

d7f5ad7

verified ·

1 Parent(s): 959cfe7

Create app.py

Browse files

Files changed (1) hide show

app.py +161 -0

app.py ADDED Viewed

	@@ -0,0 +1,161 @@

+import os
+import gradio as gr
+import faiss
+import numpy as np
+from transformers import AutoTokenizer, AutoModelForCausalLM
+from sentence_transformers import SentenceTransformer
+# ---------------------------
+# Load Models (cached on first run)
+# ---------------------------
+def load_models():
+    hf_token = os.getenv("HF_TOKEN")  # Set this secret in your HF Space settings
+    embed_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')  # For embeddings
+    tokenizer = AutoTokenizer.from_pretrained("google/gemma-3-4b-it", use_auth_token=hf_token)
+    model = AutoModelForCausalLM.from_pretrained(
+        "google/gemma-3-4b-it",
+        device_map="auto",
+        low_cpu_mem_usage=True,
+        use_auth_token=hf_token
+    )
+    return embed_model, tokenizer, model
+embed_model, tokenizer, model = load_models()
+# ---------------------------
+# Global state for FAISS index and document chunks.
+# Using a dictionary to hold state.
+state = {
+    "faiss_index": None,
+    "doc_chunks": []
+}
+# ---------------------------
+# Document Processing Function
+# ---------------------------
+def process_document(file, chunk_size, chunk_overlap):
+    """
+    Reads the uploaded file (PDF or text), extracts text, splits into chunks,
+    computes embeddings, and builds a FAISS index.
+    """
+    if file is None:
+        return "No file uploaded."
+    file_bytes = file.read()
+    file_name = file.name
+    text = ""
+    if file_name.lower().endswith(".pdf"):
+        try:
+            from PyPDF2 import PdfReader
+        except ImportError:
+            return "Error: PyPDF2 is required for PDF extraction."
+        # Save file to temporary path
+        temp_path = os.path.join("temp", file_name)
+        os.makedirs("temp", exist_ok=True)
+        with open(temp_path, "wb") as f:
+            f.write(file_bytes)
+        reader = PdfReader(temp_path)
+        for page in reader.pages:
+            text += page.extract_text() or ""
+    else:
+        # Assume it's a text file
+        text = file_bytes.decode("utf-8", errors="ignore")
+    if text.strip() == "":
+        return "No text found in the document."
+    # Split text into overlapping chunks
+    chunks = []
+    for start in range(0, len(text), chunk_size - chunk_overlap):
+        chunk_text = text[start: start + chunk_size]
+        chunks.append(chunk_text)
+    # Compute embeddings for each chunk using the embedding model.
+    embeddings = embed_model.encode(chunks, normalize_embeddings=True).astype('float32')
+    dim = embeddings.shape[1]
+    # Build FAISS index using cosine similarity (normalized vectors -> inner product)
+    index = faiss.IndexFlatIP(dim)
+    index.add(embeddings)
+    # Update global state
+    state["faiss_index"] = index
+    state["doc_chunks"] = chunks
+    # Return a preview (first 500 characters of the first chunk) and status.
+    preview = chunks[0][:500] if chunks else "No content"
+    return f"Indexed {len(chunks)} chunks.\n\n**Document Preview:**\n{preview}"
+# ---------------------------
+# Question Answering Function
+# ---------------------------
+def answer_question(query, top_k):
+    """
+    Retrieves the top_k chunks most relevant to the query using the FAISS index,
+    builds a prompt with the retrieved context, and generates an answer using the Gemma model.
+    """
+    index = state.get("faiss_index")
+    chunks = state.get("doc_chunks")
+    if index is None or len(chunks) == 0:
+        return "No document processed. Please upload a document first."
+    # Encode query using the same embedding model
+    query_vec = embed_model.encode([query], normalize_embeddings=True).astype('float32')
+    D, I = index.search(query_vec, top_k)
+    # Concatenate retrieved chunks as context
+    retrieved_text = ""
+    for idx in I[0]:
+        retrieved_text += chunks[idx] + "\n"
+    # Formulate the prompt for the generative model
+    prompt = f"Context:\n{retrieved_text}\nQuestion: {query}\nAnswer:"
+    # Tokenize and generate answer
+    input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(model.device)
+    output_ids = model.generate(input_ids, max_new_tokens=200, temperature=0.2)
+    answer = tokenizer.decode(output_ids[0][input_ids.size(1):], skip_special_tokens=True)
+    return answer.strip()
+# ---------------------------
+# Gradio Interface
+# ---------------------------
+with gr.Blocks(title="RAG System with Gemma‑3‑4B‑it") as demo:
+    gr.Markdown(
+        """
+        # RAG System with Gemma‑3‑4B‑it
+        Upload a document (PDF or TXT) below. The system will extract text, split it into chunks,
+        build a vector index using FAISS, and then allow you to ask questions based on the document.
+        """
+    )
+    with gr.Tab("Document Upload & Processing"):
+        with gr.Row():
+            file_input = gr.File(label="Upload Document (PDF or TXT)", file_count="single")
+        with gr.Row():
+            chunk_size_input = gr.Number(label="Chunk Size (characters)", value=1000, precision=0)
+            chunk_overlap_input = gr.Number(label="Chunk Overlap (characters)", value=100, precision=0)
+        process_btn = gr.Button("Process Document")
+        process_output = gr.Markdown()
+    with gr.Tab("Ask a Question"):
+        query_input = gr.Textbox(label="Enter your question", placeholder="Type your question here...")
+        top_k_input = gr.Number(label="Number of Chunks to Retrieve", value=3, precision=0)
+        answer_btn = gr.Button("Get Answer")
+        answer_output = gr.Markdown(label="Answer")
+    # Set up actions
+    process_btn.click(
+        fn=process_document,
+        inputs=[file_input, chunk_size_input, chunk_overlap_input],
+        outputs=process_output
+    )
+    answer_btn.click(
+        fn=answer_question,
+        inputs=[query_input, top_k_input],
+        outputs=answer_output
+    )
+if __name__ == "__main__":
+    demo.launch()