habibahmad commited on
Commit
6e2c6a7
Β·
verified Β·
1 Parent(s): d55d339

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +81 -54
app.py CHANGED
@@ -1,58 +1,85 @@
1
  import gradio as gr
2
- from transformers import pipeline
3
- import PyPDF2
4
-
5
- summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
6
-
7
- def summarize_pdf(pdf_file):
8
- if pdf_file is None:
9
- return "❌ Please upload a PDF file first."
10
-
11
- try:
12
- reader = PyPDF2.PdfReader(pdf_file)
13
- text = ""
14
- for page in reader.pages:
15
- text += page.extract_text() or ""
16
-
17
- text = text[:2000] # limit to avoid too long input
18
- summary = summarizer(text, max_length=150, min_length=30, do_sample=False)[0]['summary_text']
19
- return summary
20
- except Exception as e:
21
- return f"❌ Error reading PDF: {str(e)}"
22
-
23
- with gr.Blocks(css="""
24
- body {background: #f0f4f8; font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;}
25
- #main_container { max-width: 720px; margin: auto; padding: 2rem; background: white;
26
- box-shadow: 0 8px 24px rgba(149, 157, 165, 0.2); border-radius: 12px; }
27
- .btn-primary {background: #3b82f6; color: white; font-weight: 600; padding: 0.8rem 1.6rem;
28
- border-radius: 8px; border: none; transition: background 0.3s;}
29
- .btn-primary:hover {background: #2563eb;}
30
- .scroll-box {max-height: 220px; overflow-y: auto; padding: 1rem; border: 1px solid #ddd; border-radius: 8px; background: #fafafa;}
31
- """) as demo:
32
-
33
- with gr.Column(elem_id="main_container"):
34
- gr.Markdown(
35
- """
36
- <h1 style="text-align:center; color:#1e40af;">πŸ“„ PDF Summarizer</h1>
37
- <p style="text-align:center; color:#475569;">
38
- Upload your PDF and get a crisp summary generated by Hugging Face's BART model.
39
- </p>
40
- """
41
- )
42
-
43
- pdf_input = gr.File(label="Upload PDF file", file_types=[".pdf"], interactive=True)
44
- summary_output = gr.Textbox(label="Summary", interactive=False, elem_classes="scroll-box", lines=8)
45
- summarize_btn = gr.Button("Generate Summary", elem_classes="btn-primary")
46
-
47
- summarize_btn.click(fn=summarize_pdf, inputs=pdf_input, outputs=summary_output)
48
-
49
- gr.Markdown(
50
- """
51
- <footer style="text-align:center; margin-top:2rem; font-size:0.9rem; color:#94a3b8;">
52
- Made with ❀️ by YourName | Powered by Hugging Face & Gradio
53
- </footer>
54
- """
55
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
  if __name__ == "__main__":
58
  demo.launch()
 
1
  import gradio as gr
2
+ from langchain_community.document_loaders import PyPDFLoader
3
+ from langchain_community.embeddings import HuggingFaceEmbeddings
4
+ from langchain_community.vectorstores import Chroma
5
+ from langchain_community.llms import HuggingFaceEndpoint
6
+ from langchain.chains import RetrievalQA
7
+ import os
8
+ import shutil
9
+
10
+ # Hugging Face API key (store in your Space's secrets for security)
11
+ HUGGINGFACEHUB_API_TOKEN = os.environ.get("HUGGINGFACEHUB_API_TOKEN")
12
+
13
+ # Load HF Inference Endpoint (like mistralai/Mistral-7B-Instruct)
14
+ llm = HuggingFaceEndpoint(
15
+ repo_id="mistralai/Mistral-7B-Instruct-v0.2",
16
+ temperature=0.2,
17
+ huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN
18
+ )
19
+
20
+ # Embeddings (Hugging Face miniLM for fast processing)
21
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
22
+
23
+ # Temporary folder to store PDFs
24
+ UPLOAD_DIR = "pdf_uploads"
25
+ if not os.path.exists(UPLOAD_DIR):
26
+ os.makedirs(UPLOAD_DIR)
27
+
28
+ def process_pdf(file):
29
+ # Save PDF file
30
+ file_path = os.path.join(UPLOAD_DIR, file.name)
31
+ with open(file_path, "wb") as f:
32
+ f.write(file.read())
33
+
34
+ # Load PDF text using langchain
35
+ loader = PyPDFLoader(file_path)
36
+ pages = loader.load_and_split()
37
+
38
+ # Create Chroma vector store (in-memory)
39
+ vectordb = Chroma.from_documents(pages, embedding=embeddings)
40
+ retriever = vectordb.as_retriever()
41
+
42
+ # Create RetrievalQA chain
43
+ qa_chain = RetrievalQA.from_chain_type(
44
+ llm=llm,
45
+ chain_type="stuff",
46
+ retriever=retriever,
47
+ return_source_documents=True
48
+ )
49
+
50
+ # Return the QA chain to use in the chat
51
+ return qa_chain
52
+
53
+ # Global variable to hold QA chain for the session
54
+ qa_chain = None
55
+
56
+ def upload_pdf(file):
57
+ global qa_chain
58
+ qa_chain = process_pdf(file)
59
+ return "βœ… PDF uploaded and processed! Ask me anything about it."
60
+
61
+ def chatbot(user_message, history):
62
+ if qa_chain is None:
63
+ return "❌ Please upload a PDF first.", history
64
+
65
+ response = qa_chain.run(user_message)
66
+ history.append((user_message, response))
67
+ return "", history
68
+
69
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
70
+ gr.Markdown("<h1 style='text-align:center;'>πŸ“š PDF Chatbot Assistant</h1>")
71
+
72
+ with gr.Row():
73
+ pdf_upload = gr.File(label="Upload your PDF", file_types=[".pdf"])
74
+ upload_btn = gr.Button("Process PDF")
75
+
76
+ chatbot_ui = gr.Chatbot(height=400)
77
+ user_input = gr.Textbox(label="Ask something about the PDF...", placeholder="Type your question here and hit Enter")
78
+
79
+ upload_btn.click(upload_pdf, inputs=pdf_upload, outputs=chatbot_ui)
80
+ user_input.submit(chatbot, [user_input, chatbot_ui], [user_input, chatbot_ui])
81
+
82
+ gr.Markdown("<footer style='text-align:center; font-size:0.85rem; color:#64748b;'>Created by YourName - Powered by Hugging Face</footer>")
83
 
84
  if __name__ == "__main__":
85
  demo.launch()