Spaces:

ankitv42
/

docs_talk

Running

App Files Files Community

docs_talk / app.py

ankitv42

Update app.py

af821c4 verified 3 months ago

raw

history blame

3.02 kB

	import os
	import tempfile
	import torch
	import gradio as gr
	from langchain_community.vectorstores import FAISS
	from langchain_groq import ChatGroq
	from langchain_community.embeddings import HuggingFaceBgeEmbeddings # Fixed import
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain_core.runnables import RunnablePassthrough
	from langchain_community.document_loaders import PyPDFLoader # Fixed import
	from langchain import hub

	# Set API key (Replace with your actual key)
	os.environ["GROQ_API_KEY"] = "your_groq_api_key"

	# Check if GPU is available
	device = "cuda" if torch.cuda.is_available() else "cpu"

	# Initialize LLM and Embeddings with GPU if available
	llm = ChatGroq(model="llama3-8b-8192")
	model_name = "BAAI/bge-small-en"
	hf_embeddings = HuggingFaceBgeEmbeddings( # Fixed import
	model_name=model_name,
	model_kwargs={'device': device},
	encode_kwargs={'normalize_embeddings': True}
	)

	# Function to process PDF
	def process_pdf(file):
	if file is None:
	return "Please upload a PDF file."

	# Save PDF temporarily
	with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
	temp_file.write(file)
	temp_file_path = temp_file.name

	# Load and process PDF
	loader = PyPDFLoader(temp_file_path)
	docs = loader.load()

	# Split text
	text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
	splits = text_splitter.split_documents(docs)

	# Create FAISS vector store
	vectorstore = FAISS.from_documents(documents=splits, embedding=hf_embeddings)
	retriever = vectorstore.as_retriever()

	# Load RAG prompt
	prompt = hub.pull("rlm/rag-prompt")

	def format_docs(docs):
	return "\n\n".join(doc.page_content for doc in docs)

	# RAG Chain
	global rag_chain
	rag_chain = (
	{"context": retriever \| format_docs, "question": RunnablePassthrough()}
	\| prompt
	\| llm
	)

	return "PDF processed successfully! Now ask questions."

	# Function to answer queries
	def ask_question(query):
	if "rag_chain" not in globals():
	return "Please upload and process a PDF first."

	response = rag_chain.invoke(query)
	return response

	# Gradio UI with Mobile-Friendly Fixes
	with gr.Blocks() as demo:
	gr.Markdown("# 📄 PDF Chatbot with RAG")
	gr.Markdown("Upload a PDF and ask questions!")

	with gr.Row():
	pdf_input = gr.File(label="Upload PDF", type="binary")
	process_button = gr.Button("Process PDF")

	output_message = gr.Textbox(label="Status", interactive=False)

	with gr.Row():
	query_input = gr.Textbox(label="Ask a Question")
	submit_button = gr.Button("Submit")

	response_output = gr.Textbox(label="AI Response")

	process_button.click(process_pdf, inputs=pdf_input, outputs=output_message)
	submit_button.click(ask_question, inputs=query_input, outputs=response_output)

	# 🚀 Fixed launch (removed invalid 'theme' argument)
	demo.launch(share=True)