Spaces:

Jesudian
/

RAG-Chatbot

Sleeping

App Files Files Community

Jesudian commited on May 5

Commit

bc4be0a

verified ·

1 Parent(s): 0fa1096

Upload 3 files

Browse files

Required files are added to the repo

Files changed (3) hide show

README.md +14 -5
app.py +63 -0
requirements.txt +7 -0

README.md CHANGED Viewed

@@ -1,13 +1,22 @@
 ---
-title: RAG Chatbot
-emoji: 📚
 colorFrom: blue
-colorTo: gray
 sdk: gradio
 sdk_version: 5.29.0
 app_file: app.py
 pinned: false
-short_description: Retrieval-Augmented Generation with MMR and PDF Upload
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: RAG with MMR + PDF Upload
+emoji: 📄
 colorFrom: blue
+colorTo: indigo
 sdk: gradio
 sdk_version: 5.29.0
 app_file: app.py
 pinned: false
+license: apache-2.0
 ---
+# 🧠 Retrieval-Augmented Generation with MMR and PDF Upload
+This Gradio demo allows you to:
+- Upload a PDF document
+- Chunk the content and embed using `MiniLM`
+- Store and search chunks using FAISS with **Maximal Marginal Relevance (MMR)**
+- Answer questions using `FLAN-T5`
+> Powered by LangChain + HuggingFace + Gradio + FAISS

app.py ADDED Viewed

	@@ -0,0 +1,63 @@

+import fitz
+import tempfile
+import gradio as gr
+from langchain_community.embeddings import HuggingFaceEmbeddings
+from langchain_community.vectorstores import FAISS
+from langchain.docstore.document import Document
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.chains import RetrievalQA
+from langchain_community.llms import HuggingFacePipeline
+from transformers import pipeline
+# Load and chunk PDF
+def load_pdf_chunks(file_path, chunk_size=500, chunk_overlap=50):
+    doc = fitz.open(file_path)
+    text = "\n".join([page.get_text() for page in doc])
+    splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
+    chunks = splitter.split_text(text)
+    return [Document(page_content=chunk, metadata={"source": file_path}) for chunk in chunks if chunk.strip()]
+# Setup RAG pipeline
+def setup_rag(documents):
+    embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
+    vectorstore = FAISS.from_documents(documents, embeddings)
+    retriever = vectorstore.as_retriever(search_type="mmr", search_kwargs={"k": 4, "fetch_k": 8, "lambda_mult": 0.5})
+    gen_pipeline = pipeline("text2text-generation", model="google/flan-t5-base", max_length=128)
+    llm = HuggingFacePipeline(pipeline=gen_pipeline)
+    chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever, return_source_documents=True)
+    return chain
+# Global RAG chain (updated on upload)
+qa_chain = None
+def upload_pdf(file):
+    global qa_chain
+    pdf_path = file.name
+    docs = load_pdf_chunks(pdf_path)
+    qa_chain = setup_rag(docs)
+    return "PDF uploaded and indexed!"
+def query_rag(question):
+    if qa_chain is None:
+        return "Upload a PDF first!"
+    result = qa_chain({"query": question})
+    return result["result"]
+# Gradio UI
+with gr.Blocks() as demo:
+    gr.Markdown("## 🧠 RAG App with MMR + PDF Upload (Hugging Face Demo)")
+    with gr.Row():
+        file = gr.File(label="Upload a PDF", file_types=[".pdf"])
+        upload_btn = gr.Button("Upload and Index")
+    status = gr.Textbox(label="Status")
+    upload_btn.click(upload_pdf, inputs=file, outputs=status)
+    with gr.Row():
+        question = gr.Textbox(label="Enter your question")
+        answer = gr.Textbox(label="Answer")
+        answer_btn = gr.Button("Answer")
+    answer_btn.click(query_rag, inputs=question, outputs=answer)
+if __name__ == "__main__":
+    demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+gradio
+langchain
+langchain-community
+sentence-transformers
+transformers
+faiss-cpu
+pymupdf