Added code for file uploads
Browse files- app.py +137 -22
- requirements.txt +1 -1
app.py
CHANGED
@@ -2,8 +2,10 @@ import gradio as gr
|
|
2 |
import requests
|
3 |
from newspaper import Article
|
4 |
from transformers import pipeline
|
5 |
-
import
|
6 |
import nltk
|
|
|
|
|
7 |
|
8 |
|
9 |
# Load summarization pipeline
|
@@ -56,14 +58,50 @@ def summarize_input(mixed_input):
|
|
56 |
return f"**Title:** {title}\n\n**Author(s):** {author}\n\n**Published:** {pub_date}\n\n**π Summary** \n\n{summary}\n\n[π Read more]({mixed_input})\n\n---"
|
57 |
else:
|
58 |
summary = generate_summary(mixed_input)
|
59 |
-
return f"## π Summary \n\n{summary}\n\n**Original Text:**\n\n{mixed_input}\n\n---"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
|
61 |
# Function to fetch top headlines from NewsAPI and summarize them
|
62 |
def fetch_news():
|
63 |
url = 'https://newsapi.org/v2/top-headlines'
|
64 |
-
api_key = os.environ.get("api_key")
|
65 |
params = {
|
66 |
-
'apiKey': api_key,
|
67 |
'language': 'en',
|
68 |
'sources': 'associated-press',
|
69 |
'pageSize': 10
|
@@ -82,7 +120,7 @@ def fetch_news():
|
|
82 |
pub_date = article.get("publishedAt", "Unknown")
|
83 |
content = extract_full_content(article_url) or article.get("content") or article.get("description") or ""
|
84 |
summary = generate_summary(content)
|
85 |
-
summaries.append(f"**{title}** \n\n**Author(s):** {author}\n\n**Published:** {pub_date}\n\n
|
86 |
|
87 |
if not summaries:
|
88 |
return "### No articles could be summarized."
|
@@ -101,29 +139,106 @@ def extract_full_content(url):
|
|
101 |
return None
|
102 |
|
103 |
# Gradio interface
|
104 |
-
with gr.Blocks(theme=gr.themes.
|
|
|
105 |
gr.Markdown("# π° Sum Up! Stay Informed, Instantly")
|
106 |
-
gr.Markdown("
|
|
|
|
|
|
|
107 |
|
108 |
-
#
|
109 |
-
gr.Markdown("
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
110 |
with gr.Row():
|
111 |
with gr.Column(scale=1):
|
112 |
-
gr.Markdown(
|
113 |
-
gr.Markdown("**Source: Associated Press**")
|
114 |
-
gr.Markdown("Click the button below to fetch the latest news articles.")
|
115 |
-
news_btn = gr.Button("ποΈ News Now", variant="primary")
|
116 |
-
with gr.Column(scale=1):
|
117 |
-
input_box = gr.Textbox(label="Enter article text or URL", placeholder="Paste article text or link...")
|
118 |
-
summarize_btn = gr.Button("π Summarize", variant="secondary")
|
119 |
-
|
120 |
-
# Output area for displaying results
|
121 |
-
output_area = gr.Markdown() # Use a valid output component
|
122 |
|
123 |
# Link buttons to their respective functions
|
124 |
-
summarize_btn.click(fn=summarize_input, inputs=input_box, outputs=
|
125 |
-
|
126 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
127 |
|
128 |
if __name__ == "__main__":
|
129 |
demo.launch()
|
|
|
2 |
import requests
|
3 |
from newspaper import Article
|
4 |
from transformers import pipeline
|
5 |
+
import config
|
6 |
import nltk
|
7 |
+
import os
|
8 |
+
import PyPDF2
|
9 |
|
10 |
|
11 |
# Load summarization pipeline
|
|
|
58 |
return f"**Title:** {title}\n\n**Author(s):** {author}\n\n**Published:** {pub_date}\n\n**π Summary** \n\n{summary}\n\n[π Read more]({mixed_input})\n\n---"
|
59 |
else:
|
60 |
summary = generate_summary(mixed_input)
|
61 |
+
return f"## π Summary \n\n{summary}\n\nπ **Original Text:**\n\n{mixed_input}\n\n---"
|
62 |
+
|
63 |
+
# Function to summarize a file (PDF or TXT)
|
64 |
+
def summarize_file(file):
|
65 |
+
try:
|
66 |
+
if file is None: # Handle the case where no file is provided
|
67 |
+
return "" # Return an empty string instead of an error message
|
68 |
+
|
69 |
+
text = ""
|
70 |
+
if file.name.endswith(".pdf"):
|
71 |
+
with open(file.name, "rb") as f:
|
72 |
+
reader = PyPDF2.PdfReader(f)
|
73 |
+
for page in reader.pages:
|
74 |
+
text += page.extract_text() or ""
|
75 |
+
elif file.name.endswith(".txt"):
|
76 |
+
with open(file.name, "r", encoding="utf-8") as f:
|
77 |
+
text = f.read()
|
78 |
+
else:
|
79 |
+
return "β Unsupported file type."
|
80 |
+
|
81 |
+
if not text.strip():
|
82 |
+
return "β No text found in file."
|
83 |
+
|
84 |
+
summary = generate_summary(text)
|
85 |
+
original_text = text
|
86 |
+
|
87 |
+
# Combine the outputs into a single string
|
88 |
+
result = (
|
89 |
+
f"### π Summary\n\n"
|
90 |
+
f"{summary}\n\n"
|
91 |
+
f"---\n\n"
|
92 |
+
f"π **Original Extracted Text:**\n\n{original_text}"
|
93 |
+
)
|
94 |
+
return result
|
95 |
+
except Exception as e:
|
96 |
+
return f"β Error processing file: {str(e)}"
|
97 |
+
|
98 |
+
|
99 |
|
100 |
# Function to fetch top headlines from NewsAPI and summarize them
|
101 |
def fetch_news():
|
102 |
url = 'https://newsapi.org/v2/top-headlines'
|
|
|
103 |
params = {
|
104 |
+
'apiKey': config.api_key,
|
105 |
'language': 'en',
|
106 |
'sources': 'associated-press',
|
107 |
'pageSize': 10
|
|
|
120 |
pub_date = article.get("publishedAt", "Unknown")
|
121 |
content = extract_full_content(article_url) or article.get("content") or article.get("description") or ""
|
122 |
summary = generate_summary(content)
|
123 |
+
summaries.append(f"**{title}** \n\n**Author(s):** {author}\n\n**Published:** {pub_date}\n\n**π Summary:** {summary}\n\n [π Read more]({article_url})\n\n---")
|
124 |
|
125 |
if not summaries:
|
126 |
return "### No articles could be summarized."
|
|
|
139 |
return None
|
140 |
|
141 |
# Gradio interface
|
142 |
+
with gr.Blocks(theme=gr.themes.Default(font="Arial", font_mono="Courier New")) as demo:
|
143 |
+
# Header Section
|
144 |
gr.Markdown("# π° Sum Up! Stay Informed, Instantly")
|
145 |
+
gr.Markdown("## A LLM-based News Summarizer App")
|
146 |
+
gr.Markdown(
|
147 |
+
"Sum Up! condenses the latest headlines from trusted news sources into clear, concise, and easy-to-read summaries, so you can stay informed in seconds."
|
148 |
+
)
|
149 |
|
150 |
+
# Input Section
|
151 |
+
gr.Markdown("---") # Horizontal line for separation
|
152 |
+
with gr.Row():
|
153 |
+
# Left Column: Collapsible Sidebar for Latest News
|
154 |
+
with gr.Column(scale=1, min_width=300):
|
155 |
+
with gr.Accordion("π’ Fetch Latest News", open=False):
|
156 |
+
gr.Markdown("**Source: Associated Press**")
|
157 |
+
gr.Markdown(
|
158 |
+
"Click the button below to fetch the top news articles and summarize them."
|
159 |
+
)
|
160 |
+
news_btn = gr.Button("ποΈ News Now", variant="primary", elem_id="news-now-btn")
|
161 |
+
gr.Markdown(
|
162 |
+
"This feature fetches the latest headlines from the Associated Press and summarizes them for you."
|
163 |
+
)
|
164 |
+
|
165 |
+
# Right Column: Text Input and File Upload
|
166 |
+
with gr.Column(scale=2, min_width=400):
|
167 |
+
gr.Markdown("### Provide Your Input")
|
168 |
+
gr.Markdown("#### Enter Text or URL")
|
169 |
+
input_box = gr.Textbox(
|
170 |
+
label="Enter URL or Text",
|
171 |
+
placeholder="Paste a URL or text here...",
|
172 |
+
lines=5,
|
173 |
+
)
|
174 |
+
summarize_btn = gr.Button("π Summarize", variant="primary", elem_id="summarize-btn")
|
175 |
+
|
176 |
+
# Clear Button placed below the Summarize button
|
177 |
+
clear_btn = gr.Button("Clear", variant="secondary", elem_id="clear-btn")
|
178 |
+
|
179 |
+
gr.Markdown("#### Upload a File")
|
180 |
+
file_input = gr.File(
|
181 |
+
label="Upload a .pdf or .txt file", file_types=[".pdf", ".txt"]
|
182 |
+
)
|
183 |
+
gr.Markdown("**Note:** Only PDF and TXT files are supported.")
|
184 |
+
|
185 |
+
# Output Section
|
186 |
+
gr.Markdown("---") # Horizontal line for separation
|
187 |
+
gr.Markdown("### View Results")
|
188 |
with gr.Row():
|
189 |
with gr.Column(scale=1):
|
190 |
+
gen_output = gr.Markdown() # Use a valid output component
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
191 |
|
192 |
# Link buttons to their respective functions
|
193 |
+
summarize_btn.click(fn=summarize_input, inputs=input_box, outputs=gen_output)
|
194 |
+
file_input.change(fn=summarize_file, inputs=file_input, outputs=gen_output)
|
195 |
+
news_btn.click(fn=fetch_news, inputs=[], outputs=gen_output)
|
196 |
+
|
197 |
+
# Clear button functionality
|
198 |
+
clear_btn.click(
|
199 |
+
fn=lambda: ("", None, ""), # Clear all inputs and outputs
|
200 |
+
inputs=[],
|
201 |
+
outputs=[input_box, file_input, gen_output],
|
202 |
+
)
|
203 |
+
|
204 |
+
# Ensure gen_output is properly reset
|
205 |
+
gen_output = gr.Markdown(value="") # Initialize with an empty value
|
206 |
+
|
207 |
+
# Add custom CSS for better styling
|
208 |
+
css = """
|
209 |
+
#summarize-btn {
|
210 |
+
background-color: #4CAF50 !important; /* Green for Summarize */
|
211 |
+
color: white !important;
|
212 |
+
font-size: 16px !important;
|
213 |
+
padding: 10px 20px !important;
|
214 |
+
border-radius: 5px !important;
|
215 |
+
margin-top: 20px !important;
|
216 |
+
width: 100%;
|
217 |
+
}
|
218 |
+
|
219 |
+
#news-now-btn {
|
220 |
+
background-color: #0078D7 !important; /* Blue for News Now */
|
221 |
+
color: white !important;
|
222 |
+
font-size: 16px !important;
|
223 |
+
padding: 10px 20px !important;
|
224 |
+
border-radius: 5px !important;
|
225 |
+
margin-top: 20px !important;
|
226 |
+
width: 100%;
|
227 |
+
}
|
228 |
+
|
229 |
+
#clear-btn {
|
230 |
+
background-color: #d6d8db !important; /* Lighter Gray for Clear */
|
231 |
+
color: black !important;
|
232 |
+
font-size: 16px !important;
|
233 |
+
padding: 10px 20px !important;
|
234 |
+
border-radius: 5px !important;
|
235 |
+
margin-top: 20px !important;
|
236 |
+
width: 100%;
|
237 |
+
}
|
238 |
+
"""
|
239 |
+
|
240 |
+
# Apply the custom CSS
|
241 |
+
demo.css = css
|
242 |
|
243 |
if __name__ == "__main__":
|
244 |
demo.launch()
|
requirements.txt
CHANGED
@@ -5,4 +5,4 @@ torch
|
|
5 |
newspaper3k
|
6 |
requests
|
7 |
gradio
|
8 |
-
|
|
|
5 |
newspaper3k
|
6 |
requests
|
7 |
gradio
|
8 |
+
PyPDF2
|