File size: 2,887 Bytes
18056d4
6e2c6a7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18056d4
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import gradio as gr
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_community.llms import HuggingFaceEndpoint
from langchain.chains import RetrievalQA
import os
import shutil

# Hugging Face API key (store in your Space's secrets for security)
HUGGINGFACEHUB_API_TOKEN = os.environ.get("HUGGINGFACEHUB_API_TOKEN")

# Load HF Inference Endpoint (like mistralai/Mistral-7B-Instruct)
llm = HuggingFaceEndpoint(
    repo_id="mistralai/Mistral-7B-Instruct-v0.2",
    temperature=0.2,
    huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN
)

# Embeddings (Hugging Face miniLM for fast processing)
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Temporary folder to store PDFs
UPLOAD_DIR = "pdf_uploads"
if not os.path.exists(UPLOAD_DIR):
    os.makedirs(UPLOAD_DIR)

def process_pdf(file):
    # Save PDF file
    file_path = os.path.join(UPLOAD_DIR, file.name)
    with open(file_path, "wb") as f:
        f.write(file.read())
    
    # Load PDF text using langchain
    loader = PyPDFLoader(file_path)
    pages = loader.load_and_split()
    
    # Create Chroma vector store (in-memory)
    vectordb = Chroma.from_documents(pages, embedding=embeddings)
    retriever = vectordb.as_retriever()
    
    # Create RetrievalQA chain
    qa_chain = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=retriever,
        return_source_documents=True
    )
    
    # Return the QA chain to use in the chat
    return qa_chain

# Global variable to hold QA chain for the session
qa_chain = None

def upload_pdf(file):
    global qa_chain
    qa_chain = process_pdf(file)
    return "βœ… PDF uploaded and processed! Ask me anything about it."

def chatbot(user_message, history):
    if qa_chain is None:
        return "❌ Please upload a PDF first.", history
    
    response = qa_chain.run(user_message)
    history.append((user_message, response))
    return "", history

with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("<h1 style='text-align:center;'>πŸ“š PDF Chatbot Assistant</h1>")
    
    with gr.Row():
        pdf_upload = gr.File(label="Upload your PDF", file_types=[".pdf"])
        upload_btn = gr.Button("Process PDF")
    
    chatbot_ui = gr.Chatbot(height=400)
    user_input = gr.Textbox(label="Ask something about the PDF...", placeholder="Type your question here and hit Enter")

    upload_btn.click(upload_pdf, inputs=pdf_upload, outputs=chatbot_ui)
    user_input.submit(chatbot, [user_input, chatbot_ui], [user_input, chatbot_ui])
    
    gr.Markdown("<footer style='text-align:center; font-size:0.85rem; color:#64748b;'>Created by YourName - Powered by Hugging Face</footer>")

if __name__ == "__main__":
    demo.launch()