Spaces:

AreebaAliAsghar
/

rag-chatbot

Sleeping

App Files Files Community

rag-chatbot / app.py

AreebaAliAsghar

Upload 3 files

e9edc62 verified about 1 month ago

raw

history blame contribute delete

2.26 kB

	# app.py

	from pypdf import PdfReader
	import gradio as gr
	from sentence_transformers import SentenceTransformer
	import faiss
	import numpy as np

	# Load embedding model
	model = SentenceTransformer('all-MiniLM-L6-v2')

	# Global state to persist embeddings and chunks
	index = None
	chunks = []

	# Step 1: Extract text from uploaded PDFs
	def extract_text_from_pdfs(files):
	all_text = ""
	for file in files:
	reader = PdfReader(file.name)
	for page in reader.pages:
	text = page.extract_text()
	if text:
	all_text += text + "\n"
	return all_text

	# Step 2: Chunk text
	def chunk_text(text, chunk_size=500, overlap=50):
	words = text.split()
	result = []
	for i in range(0, len(words), chunk_size - overlap):
	chunk = " ".join(words[i:i + chunk_size])
	result.append(chunk)
	return result

	# Step 3: Embed and store chunks
	def create_index(text_chunks):
	global index, chunks
	chunks = text_chunks
	embeddings = model.encode(chunks)
	index = faiss.IndexFlatL2(len(embeddings[0]))
	index.add(np.array(embeddings))

	# Step 4: Retrieve top relevant chunks
	def get_top_chunks(query, k=3):
	query_vec = model.encode([query])
	D, I = index.search(np.array(query_vec), k)
	return [chunks[i] for i in I[0]]

	# Step 5: Fake LLM response (replace with real API call if needed)
	def call_llm(context, question):
	return f"Answer (simulated): Based on context:\n\n{context}\n\nQuestion: {question}"

	# Step 6: Gradio main function
	def rag_pipeline(files, question):
	text = extract_text_from_pdfs(files)
	text_chunks = chunk_text(text)
	create_index(text_chunks)
	top_chunks = get_top_chunks(question)
	context = "\n".join(top_chunks)
	answer = call_llm(context, question)
	return answer

	# Step 7: Gradio UI
	demo = gr.Interface(
	fn=rag_pipeline,
	inputs=[
	gr.File(file_types=[".pdf"], file_count="multiple", label="Upload PDFs"),
	gr.Textbox(lines=2, label="Ask a question")
	],
	outputs="text",
	title="RAG PDF Chatbot",
	description="Upload PDFs and ask questions based on their content"
	)

	if __name__ == "__main__":
	demo.launch()