harao-ml commited on
Commit
bbc8201
Β·
verified Β·
1 Parent(s): 3158b7a

Added code for file uploads

Browse files
Files changed (2) hide show
  1. app.py +137 -22
  2. requirements.txt +1 -1
app.py CHANGED
@@ -2,8 +2,10 @@ import gradio as gr
2
  import requests
3
  from newspaper import Article
4
  from transformers import pipeline
5
- import os
6
  import nltk
 
 
7
 
8
 
9
  # Load summarization pipeline
@@ -56,14 +58,50 @@ def summarize_input(mixed_input):
56
  return f"**Title:** {title}\n\n**Author(s):** {author}\n\n**Published:** {pub_date}\n\n**πŸ“ Summary** \n\n{summary}\n\n[πŸ”— Read more]({mixed_input})\n\n---"
57
  else:
58
  summary = generate_summary(mixed_input)
59
- return f"## πŸ“ Summary \n\n{summary}\n\n**Original Text:**\n\n{mixed_input}\n\n---"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
61
  # Function to fetch top headlines from NewsAPI and summarize them
62
  def fetch_news():
63
  url = 'https://newsapi.org/v2/top-headlines'
64
- api_key = os.environ.get("api_key")
65
  params = {
66
- 'apiKey': api_key,
67
  'language': 'en',
68
  'sources': 'associated-press',
69
  'pageSize': 10
@@ -82,7 +120,7 @@ def fetch_news():
82
  pub_date = article.get("publishedAt", "Unknown")
83
  content = extract_full_content(article_url) or article.get("content") or article.get("description") or ""
84
  summary = generate_summary(content)
85
- summaries.append(f"**{title}** \n\n**Author(s):** {author}\n\n**Published:** {pub_date}\n\n**Summary:** {summary}\n\n [πŸ”— Read more]({article_url})\n\n---")
86
 
87
  if not summaries:
88
  return "### No articles could be summarized."
@@ -101,29 +139,106 @@ def extract_full_content(url):
101
  return None
102
 
103
  # Gradio interface
104
- with gr.Blocks(theme=gr.themes.Base()) as demo:
 
105
  gr.Markdown("# πŸ“° Sum Up! Stay Informed, Instantly")
106
- gr.Markdown(" ## A LLM based News Summarizer App")
 
 
 
107
 
108
- # Add a brief description
109
- gr.Markdown("Sum Up! condenses the latest headlines from trusted news sources into clear, concise and easy-to-read summaries, so you can stay informed in seconds.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  with gr.Row():
111
  with gr.Column(scale=1):
112
- gr.Markdown("### Top Stories - A Snapshot ")
113
- gr.Markdown("**Source: Associated Press**")
114
- gr.Markdown("Click the button below to fetch the latest news articles.")
115
- news_btn = gr.Button("πŸ—žοΈ News Now", variant="primary")
116
- with gr.Column(scale=1):
117
- input_box = gr.Textbox(label="Enter article text or URL", placeholder="Paste article text or link...")
118
- summarize_btn = gr.Button("πŸ” Summarize", variant="secondary")
119
-
120
- # Output area for displaying results
121
- output_area = gr.Markdown() # Use a valid output component
122
 
123
  # Link buttons to their respective functions
124
- summarize_btn.click(fn=summarize_input, inputs=input_box, outputs=output_area)
125
- news_btn.click(fn=fetch_news, inputs=[], outputs=output_area)
126
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
 
128
  if __name__ == "__main__":
129
  demo.launch()
 
2
  import requests
3
  from newspaper import Article
4
  from transformers import pipeline
5
+ import config
6
  import nltk
7
+ import os
8
+ import PyPDF2
9
 
10
 
11
  # Load summarization pipeline
 
58
  return f"**Title:** {title}\n\n**Author(s):** {author}\n\n**Published:** {pub_date}\n\n**πŸ“ Summary** \n\n{summary}\n\n[πŸ”— Read more]({mixed_input})\n\n---"
59
  else:
60
  summary = generate_summary(mixed_input)
61
+ return f"## πŸ“ Summary \n\n{summary}\n\nπŸ“Ž **Original Text:**\n\n{mixed_input}\n\n---"
62
+
63
+ # Function to summarize a file (PDF or TXT)
64
+ def summarize_file(file):
65
+ try:
66
+ if file is None: # Handle the case where no file is provided
67
+ return "" # Return an empty string instead of an error message
68
+
69
+ text = ""
70
+ if file.name.endswith(".pdf"):
71
+ with open(file.name, "rb") as f:
72
+ reader = PyPDF2.PdfReader(f)
73
+ for page in reader.pages:
74
+ text += page.extract_text() or ""
75
+ elif file.name.endswith(".txt"):
76
+ with open(file.name, "r", encoding="utf-8") as f:
77
+ text = f.read()
78
+ else:
79
+ return "❌ Unsupported file type."
80
+
81
+ if not text.strip():
82
+ return "❌ No text found in file."
83
+
84
+ summary = generate_summary(text)
85
+ original_text = text
86
+
87
+ # Combine the outputs into a single string
88
+ result = (
89
+ f"### πŸ“ Summary\n\n"
90
+ f"{summary}\n\n"
91
+ f"---\n\n"
92
+ f"πŸ“Ž **Original Extracted Text:**\n\n{original_text}"
93
+ )
94
+ return result
95
+ except Exception as e:
96
+ return f"❌ Error processing file: {str(e)}"
97
+
98
+
99
 
100
  # Function to fetch top headlines from NewsAPI and summarize them
101
  def fetch_news():
102
  url = 'https://newsapi.org/v2/top-headlines'
 
103
  params = {
104
+ 'apiKey': config.api_key,
105
  'language': 'en',
106
  'sources': 'associated-press',
107
  'pageSize': 10
 
120
  pub_date = article.get("publishedAt", "Unknown")
121
  content = extract_full_content(article_url) or article.get("content") or article.get("description") or ""
122
  summary = generate_summary(content)
123
+ summaries.append(f"**{title}** \n\n**Author(s):** {author}\n\n**Published:** {pub_date}\n\n**πŸ“ Summary:** {summary}\n\n [πŸ”— Read more]({article_url})\n\n---")
124
 
125
  if not summaries:
126
  return "### No articles could be summarized."
 
139
  return None
140
 
141
  # Gradio interface
142
+ with gr.Blocks(theme=gr.themes.Default(font="Arial", font_mono="Courier New")) as demo:
143
+ # Header Section
144
  gr.Markdown("# πŸ“° Sum Up! Stay Informed, Instantly")
145
+ gr.Markdown("## A LLM-based News Summarizer App")
146
+ gr.Markdown(
147
+ "Sum Up! condenses the latest headlines from trusted news sources into clear, concise, and easy-to-read summaries, so you can stay informed in seconds."
148
+ )
149
 
150
+ # Input Section
151
+ gr.Markdown("---") # Horizontal line for separation
152
+ with gr.Row():
153
+ # Left Column: Collapsible Sidebar for Latest News
154
+ with gr.Column(scale=1, min_width=300):
155
+ with gr.Accordion("πŸ“’ Fetch Latest News", open=False):
156
+ gr.Markdown("**Source: Associated Press**")
157
+ gr.Markdown(
158
+ "Click the button below to fetch the top news articles and summarize them."
159
+ )
160
+ news_btn = gr.Button("πŸ—žοΈ News Now", variant="primary", elem_id="news-now-btn")
161
+ gr.Markdown(
162
+ "This feature fetches the latest headlines from the Associated Press and summarizes them for you."
163
+ )
164
+
165
+ # Right Column: Text Input and File Upload
166
+ with gr.Column(scale=2, min_width=400):
167
+ gr.Markdown("### Provide Your Input")
168
+ gr.Markdown("#### Enter Text or URL")
169
+ input_box = gr.Textbox(
170
+ label="Enter URL or Text",
171
+ placeholder="Paste a URL or text here...",
172
+ lines=5,
173
+ )
174
+ summarize_btn = gr.Button("πŸ“ Summarize", variant="primary", elem_id="summarize-btn")
175
+
176
+ # Clear Button placed below the Summarize button
177
+ clear_btn = gr.Button("Clear", variant="secondary", elem_id="clear-btn")
178
+
179
+ gr.Markdown("#### Upload a File")
180
+ file_input = gr.File(
181
+ label="Upload a .pdf or .txt file", file_types=[".pdf", ".txt"]
182
+ )
183
+ gr.Markdown("**Note:** Only PDF and TXT files are supported.")
184
+
185
+ # Output Section
186
+ gr.Markdown("---") # Horizontal line for separation
187
+ gr.Markdown("### View Results")
188
  with gr.Row():
189
  with gr.Column(scale=1):
190
+ gen_output = gr.Markdown() # Use a valid output component
 
 
 
 
 
 
 
 
 
191
 
192
  # Link buttons to their respective functions
193
+ summarize_btn.click(fn=summarize_input, inputs=input_box, outputs=gen_output)
194
+ file_input.change(fn=summarize_file, inputs=file_input, outputs=gen_output)
195
+ news_btn.click(fn=fetch_news, inputs=[], outputs=gen_output)
196
+
197
+ # Clear button functionality
198
+ clear_btn.click(
199
+ fn=lambda: ("", None, ""), # Clear all inputs and outputs
200
+ inputs=[],
201
+ outputs=[input_box, file_input, gen_output],
202
+ )
203
+
204
+ # Ensure gen_output is properly reset
205
+ gen_output = gr.Markdown(value="") # Initialize with an empty value
206
+
207
+ # Add custom CSS for better styling
208
+ css = """
209
+ #summarize-btn {
210
+ background-color: #4CAF50 !important; /* Green for Summarize */
211
+ color: white !important;
212
+ font-size: 16px !important;
213
+ padding: 10px 20px !important;
214
+ border-radius: 5px !important;
215
+ margin-top: 20px !important;
216
+ width: 100%;
217
+ }
218
+
219
+ #news-now-btn {
220
+ background-color: #0078D7 !important; /* Blue for News Now */
221
+ color: white !important;
222
+ font-size: 16px !important;
223
+ padding: 10px 20px !important;
224
+ border-radius: 5px !important;
225
+ margin-top: 20px !important;
226
+ width: 100%;
227
+ }
228
+
229
+ #clear-btn {
230
+ background-color: #d6d8db !important; /* Lighter Gray for Clear */
231
+ color: black !important;
232
+ font-size: 16px !important;
233
+ padding: 10px 20px !important;
234
+ border-radius: 5px !important;
235
+ margin-top: 20px !important;
236
+ width: 100%;
237
+ }
238
+ """
239
+
240
+ # Apply the custom CSS
241
+ demo.css = css
242
 
243
  if __name__ == "__main__":
244
  demo.launch()
requirements.txt CHANGED
@@ -5,4 +5,4 @@ torch
5
  newspaper3k
6
  requests
7
  gradio
8
- lxml_html_clean
 
5
  newspaper3k
6
  requests
7
  gradio
8
+ PyPDF2