Spaces:
Build error
Build error
Commit
·
f4dcafb
1
Parent(s):
0c56589
second iteration
Browse files- app.py +51 -19
- chatbot.py +12 -0
- qa_engine.py +7 -7
- suggestions.py +8 -0
- summarizer.py +22 -6
app.py
CHANGED
@@ -2,29 +2,61 @@ import gradio as gr
|
|
2 |
from utils.pdf_parser import extract_text_from_pdf
|
3 |
from summarizer import Summarizer
|
4 |
from qa_engine import QABot
|
|
|
|
|
5 |
|
|
|
6 |
summarizer = Summarizer()
|
|
|
|
|
|
|
7 |
|
8 |
-
|
9 |
-
|
|
|
|
|
|
|
|
|
10 |
summary = summarizer.summarize(text)
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
|
|
|
29 |
if __name__ == "__main__":
|
30 |
iface.launch()
|
|
|
2 |
from utils.pdf_parser import extract_text_from_pdf
|
3 |
from summarizer import Summarizer
|
4 |
from qa_engine import QABot
|
5 |
+
from chatbot import ask_model
|
6 |
+
from suggestions import suggest_questions
|
7 |
|
8 |
+
# Initialize summarizer and global variables
|
9 |
summarizer = Summarizer()
|
10 |
+
qa_bot = None
|
11 |
+
summary = ""
|
12 |
+
text_chunks = []
|
13 |
|
14 |
+
# Gradio chat history
|
15 |
+
chat_history = []
|
16 |
+
|
17 |
+
def process_pdf(file):
|
18 |
+
global summary, qa_bot, text_chunks, chat_history
|
19 |
+
text = extract_text_from_pdf(file.name)
|
20 |
summary = summarizer.summarize(text)
|
21 |
+
text_chunks = text.split("\n\n")
|
22 |
+
qa_bot = QABot(text_chunks)
|
23 |
+
chat_history.clear()
|
24 |
+
return summary, "PDF processed. You can now ask questions."
|
25 |
+
|
26 |
+
def chat_with_doc(question):
|
27 |
+
if not qa_bot:
|
28 |
+
return chat_history, "Please upload and summarize a document first."
|
29 |
+
|
30 |
+
context = qa_bot.retrieve_context(question)
|
31 |
+
response = ask_model(context, question)
|
32 |
+
|
33 |
+
chat_history.append((question, response))
|
34 |
+
suggestions = suggest_questions(summary)
|
35 |
+
suggestions_block = "💡 You can also ask:\n" + "\n".join([f"• {q}" for q in suggestions])
|
36 |
+
|
37 |
+
return chat_history, suggestions_block
|
38 |
+
|
39 |
+
# UI layout
|
40 |
+
with gr.Blocks(title="BioSummarize.ai") as iface:
|
41 |
+
gr.Markdown("# 🧬 BioSummarize.ai")
|
42 |
+
gr.Markdown("Upload a biotech research paper, generate its summary, and chat with it using an AI-powered assistant.")
|
43 |
+
|
44 |
+
with gr.Row():
|
45 |
+
file_input = gr.File(label="Upload Biotech Research PDF")
|
46 |
+
summarize_btn = gr.Button("Summarize + Start Chat")
|
47 |
+
|
48 |
+
summary_box = gr.Textbox(label="📘 Summary", lines=6)
|
49 |
+
summary_status = gr.Textbox(label="Status / Info", lines=2)
|
50 |
+
|
51 |
+
chat_input = gr.Textbox(label="💬 Ask a Question", placeholder="What is the main finding?")
|
52 |
+
chatbot = gr.Chatbot(label="🧠 BioResearch Chatbot")
|
53 |
+
|
54 |
+
suggestions_box = gr.Textbox(label="💡 Follow-up Suggestions", interactive=False)
|
55 |
+
|
56 |
+
# Bind actions
|
57 |
+
summarize_btn.click(fn=process_pdf, inputs=file_input, outputs=[summary_box, summary_status])
|
58 |
+
chat_input.submit(fn=chat_with_doc, inputs=chat_input, outputs=[chatbot, suggestions_box])
|
59 |
|
60 |
+
# Launch the app
|
61 |
if __name__ == "__main__":
|
62 |
iface.launch()
|
chatbot.py
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import openai # or use requests if OpenRouter API
|
2 |
+
import os
|
3 |
+
|
4 |
+
openai.api_key = os.getenv("OPENROUTER_API_KEY")
|
5 |
+
|
6 |
+
def ask_model(context, query, model="mistral"):
|
7 |
+
prompt = f"""Context: {context}\n\nUser: {query}\nAI:"""
|
8 |
+
response = openai.ChatCompletion.create(
|
9 |
+
model=model,
|
10 |
+
messages=[{"role": "user", "content": prompt}]
|
11 |
+
)
|
12 |
+
return response['choices'][0]['message']['content']
|
qa_engine.py
CHANGED
@@ -4,13 +4,13 @@ import numpy as np
|
|
4 |
|
5 |
class QABot:
|
6 |
def __init__(self, chunks):
|
7 |
-
self.
|
8 |
self.chunks = chunks
|
9 |
-
self.
|
10 |
-
self.
|
11 |
self.index.add(np.array(self.embeddings))
|
12 |
|
13 |
-
def
|
14 |
-
|
15 |
-
D, I = self.index.search(np.array(
|
16 |
-
return "\n".join([self.chunks[i] for i in I[0]])
|
|
|
4 |
|
5 |
class QABot:
|
6 |
def __init__(self, chunks):
|
7 |
+
self.model = SentenceTransformer("pritamdeka/BioBERT-mnli-snli-scinli-scitail-mednli-stsb")
|
8 |
self.chunks = chunks
|
9 |
+
self.embeddings = self.model.encode(chunks)
|
10 |
+
self.index = faiss.IndexFlatL2(self.embeddings.shape[1])
|
11 |
self.index.add(np.array(self.embeddings))
|
12 |
|
13 |
+
def retrieve(self, query, k=3):
|
14 |
+
query_vec = self.model.encode([query])
|
15 |
+
D, I = self.index.search(np.array(query_vec), k)
|
16 |
+
return "\n\n".join([self.chunks[i] for i in I[0]])
|
suggestions.py
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
def suggest_questions(summary):
|
2 |
+
return [
|
3 |
+
"What is the objective of this research?",
|
4 |
+
"What methods were used in the study?",
|
5 |
+
"What are the key results?",
|
6 |
+
"What limitations or future work are mentioned?",
|
7 |
+
"What is the significance of this study?"
|
8 |
+
]
|
summarizer.py
CHANGED
@@ -1,11 +1,27 @@
|
|
1 |
from transformers import AutoTokenizer, BartForConditionalGeneration
|
|
|
|
|
2 |
|
3 |
class Summarizer:
|
4 |
def __init__(self):
|
5 |
-
self.tokenizer = AutoTokenizer.from_pretrained("
|
6 |
-
self.model = BartForConditionalGeneration.from_pretrained("
|
7 |
|
8 |
-
def
|
9 |
-
|
10 |
-
|
11 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
from transformers import AutoTokenizer, BartForConditionalGeneration
|
2 |
+
import torch
|
3 |
+
import math
|
4 |
|
5 |
class Summarizer:
|
6 |
def __init__(self):
|
7 |
+
self.tokenizer = AutoTokenizer.from_pretrained("sshleifer/distilbart-cnn-12-6")
|
8 |
+
self.model = BartForConditionalGeneration.from_pretrained("sshleifer/distilbart-cnn-12-6")
|
9 |
|
10 |
+
def split_text(self, text, max_tokens=1024):
|
11 |
+
words = text.split()
|
12 |
+
chunks = [' '.join(words[i:i+max_tokens]) for i in range(0, len(words), max_tokens)]
|
13 |
+
return chunks
|
14 |
+
|
15 |
+
def summarize(self, text):
|
16 |
+
chunks = self.split_text(text)
|
17 |
+
partial_summaries = []
|
18 |
+
|
19 |
+
for chunk in chunks:
|
20 |
+
inputs = self.tokenizer(chunk, return_tensors="pt", truncation=True, max_length=1024)
|
21 |
+
summary_ids = self.model.generate(inputs["input_ids"], max_new_tokens=200)
|
22 |
+
summary = self.tokenizer.decode(summary_ids[0], skip_special_tokens=True)
|
23 |
+
partial_summaries.append(summary)
|
24 |
+
|
25 |
+
# Final merged summary
|
26 |
+
full_summary = " ".join(partial_summaries)
|
27 |
+
return full_summary
|