harao-ml commited on
Commit
d0c5c2c
·
verified ·
1 Parent(s): 2c4c384

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +267 -0
app.py ADDED
@@ -0,0 +1,267 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import cluster_news
4
+ import extract_news
5
+ import summarizer
6
+ import analyze_sentiment
7
+ import gather_news
8
+
9
+ # ------------------ Utilities ------------------
10
+
11
+ def fetch_content(topic):
12
+ articles = gather_news.fetch_articles_newsapi(topic)
13
+ if isinstance(articles, str):
14
+ articles = gather_news.fetch_articles_google(topic)
15
+ if isinstance(articles, str):
16
+ return None
17
+ try:
18
+ articles = sorted(articles, key=lambda x: x.get("publishedAt", ""), reverse=True)[:10]
19
+ except Exception:
20
+ return None
21
+ return articles
22
+
23
+ def fetch_and_process_latest_news(sentiment_filters):
24
+ topic = "Top Headlines"
25
+ articles = gather_news.fetch_articles_newsapi("top headlines")
26
+ if isinstance(articles, str) or not articles:
27
+ return sentiment_filters, "### No latest news available", "", "", "", "", None
28
+
29
+ articles = sorted(articles, key=lambda x: x.get("publishedAt", ""), reverse=True)[:10]
30
+ extracted_articles = extract_summarize_and_analyze_articles(articles)
31
+
32
+ if not extracted_articles:
33
+ return sentiment_filters, "### No content to display", "", "", "", "", None
34
+
35
+ df = pd.DataFrame(extracted_articles)
36
+ result = cluster_news.cluster_and_label_articles(df, content_column="content", summary_column="summary")
37
+ cluster_md_blocks = display_clusters_as_columns(result, sentiment_filters)
38
+ csv_file, _ = save_clustered_articles(result["dataframe"], topic)
39
+
40
+ return sentiment_filters, *cluster_md_blocks, csv_file
41
+
42
+ def extract_summarize_and_analyze_articles(articles):
43
+ extracted_articles = []
44
+ for article in articles:
45
+ url = article.get("url")
46
+ if url:
47
+ content, _ = extract_news.extract_full_content(url)
48
+ if content:
49
+ summary = summarizer.generate_summary(content)
50
+ sentiment, score = analyze_sentiment.analyze_summary(summary)
51
+ extracted_articles.append({
52
+ "title": article.get("title", "No title"),
53
+ "url": url,
54
+ "source": article.get("source", "Unknown"),
55
+ "author": article.get("author", "Unknown"),
56
+ "publishedAt": article.get("publishedAt", "Unknown"),
57
+ "content": content,
58
+ "summary": summary,
59
+ "sentiment": sentiment,
60
+ "score": score
61
+ })
62
+ return extracted_articles
63
+
64
+ def extract_summarize_and_analyze_content_from_file(files):
65
+ extracted_articles = []
66
+ for file in files:
67
+ with open(file.name, "r", encoding="utf-8") as f:
68
+ content = f.read()
69
+ if content.strip():
70
+ summary = summarizer.generate_summary(content)
71
+ sentiment, score = analyze_sentiment.analyze_summary(summary)
72
+ extracted_articles.append({
73
+ "title": "Custom File",
74
+ "url": "N/A",
75
+ "source": "Uploaded File",
76
+ "author": "Unknown",
77
+ "publishedAt": "Unknown",
78
+ "content": content,
79
+ "summary": summary,
80
+ "sentiment": sentiment,
81
+ "score": score
82
+ })
83
+ return extracted_articles
84
+
85
+ def extract_summarize_and_analyze_content_from_urls(urls):
86
+ extracted_articles = []
87
+ for url in urls:
88
+ content, title = extract_news.extract_full_content(url)
89
+ if content: # Only proceed if content is successfully extracted
90
+ summary = summarizer.generate_summary(content)
91
+ sentiment, score = analyze_sentiment.analyze_summary(summary)
92
+ extracted_articles.append({
93
+ "title": title if title else "Untitled Article",
94
+ "url": url,
95
+ "source": "External Link",
96
+ "author": "Unknown",
97
+ "publishedAt": "Unknown",
98
+ "content": content,
99
+ "summary": summary,
100
+ "sentiment": sentiment,
101
+ "score": score
102
+ })
103
+ return extracted_articles
104
+
105
+ def display_clusters_as_columns(result, sentiment_filters=None):
106
+ df = result["dataframe"]
107
+ detected_topics = result.get("detected_topics", {})
108
+ df["sentiment"] = df["sentiment"].str.capitalize()
109
+
110
+ if sentiment_filters:
111
+ df = df[df["sentiment"].isin(sentiment_filters)]
112
+
113
+ if df.empty:
114
+ return ["### ⚠️ No matching articles."] + [""] * 4
115
+
116
+ clusters = df.groupby("cluster_label")
117
+ markdown_blocks = []
118
+
119
+ for cluster_label, articles in clusters:
120
+ cluster_md = f"### 🧩 Cluster {cluster_label}\n"
121
+ if cluster_label in detected_topics:
122
+ topics = detected_topics[cluster_label]
123
+ cluster_md += f"**Primary Topic:** {topics['primary_focus']}\n\n"
124
+ if topics["related_topics"]:
125
+ cluster_md += f"**Related Topics:** {', '.join(topics['related_topics'])}\n\n"
126
+ cluster_md += f"**Articles:** {len(articles)}\n\n"
127
+ for _, article in articles.iterrows():
128
+ cluster_md += (
129
+ f"#### 📰 {article['title']}\n"
130
+ f"- **Source:** {article['source']}\n"
131
+ f"- **Sentiment:** {article['sentiment']}\n"
132
+ f"<details><summary><strong>Summary</strong></summary>\n"
133
+ f"{article['summary']}\n"
134
+ f"</details>\n"
135
+ f"- [Read Full Article]({article['url']})\n\n"
136
+ )
137
+
138
+ markdown_blocks.append(cluster_md)
139
+
140
+ while len(markdown_blocks) < 5:
141
+ markdown_blocks.append("")
142
+
143
+ return markdown_blocks[:5]
144
+
145
+ def save_clustered_articles(df, topic):
146
+ if df.empty:
147
+ return None, None
148
+ csv_file = f"{topic.replace(' ', '_')}_clustered_articles.csv"
149
+ df.to_csv(csv_file, index=False)
150
+ return csv_file, None
151
+
152
+ # ------------------ Pipeline Trigger ------------------
153
+
154
+ def update_ui_with_columns(topic, files, urls, sentiment_filters):
155
+ extracted_articles = []
156
+
157
+ if topic.strip():
158
+ articles = fetch_content(topic)
159
+ if articles:
160
+ extracted_articles.extend(extract_summarize_and_analyze_articles(articles))
161
+
162
+ if files:
163
+ extracted_articles.extend(extract_summarize_and_analyze_content_from_file(files))
164
+
165
+ if urls:
166
+ url_list = [url.strip() for url in urls.split("\n") if url.strip()]
167
+ extracted_articles.extend(extract_summarize_and_analyze_content_from_urls(url_list))
168
+
169
+ if not extracted_articles:
170
+ return sentiment_filters, "### No content to display", "", "", "", "", None
171
+
172
+ df = pd.DataFrame(extracted_articles)
173
+ result = cluster_news.cluster_and_label_articles(df, content_column="content", summary_column="summary")
174
+ cluster_md_blocks = display_clusters_as_columns(result, sentiment_filters)
175
+ csv_file, _ = save_clustered_articles(result["dataframe"], topic or "batch_upload")
176
+
177
+ return sentiment_filters, *cluster_md_blocks, csv_file
178
+
179
+ def clear_interface():
180
+ return (
181
+ "", # topic_input
182
+ ["Positive", "Neutral", "Negative"],# sentiment_filter
183
+ gr.update(value=None), # uploaded_files (reset file upload)
184
+ "", # urls_input
185
+ "", "", "", "", "", # cluster columns 0–4
186
+ gr.update(value=None) # csv_output (reset download file)
187
+ )
188
+
189
+
190
+ # ------------------ Gradio UI ------------------
191
+
192
+ with gr.Blocks(theme=gr.themes.Base(), css=".gr-markdown { margin: 10px; }") as demo:
193
+
194
+ # Header Section
195
+ gr.Markdown("# 📰 Quick Pulse")
196
+ gr.Markdown("### AI-Powered News Summarization with Real-Time Sentiment and Topic Insights")
197
+ gr.Markdown(
198
+ "From headlines to insight, Quick Pulse summarizes news stories, captures emotional context, and clusters related topics to provide structured intelligence—faster than ever")
199
+
200
+ # Input Section
201
+ gr.Markdown("---") # Horizontal line for separation
202
+ with gr.Accordion("🗞️ Latest Top Headlines", open=False):
203
+ latest_news_button = gr.Button("Fetch & Summarize Top 10 Headlines")
204
+
205
+ with gr.Row():
206
+ topic_input = gr.Textbox(label="Enter Topic", placeholder="e.g. climate change")
207
+ sentiment_filter = gr.CheckboxGroup(choices=["Positive", "Neutral", "Negative"], value=["Positive", "Neutral", "Negative"], label="Sentiment Filter")
208
+ csv_output = gr.File(label="📁 Download Clustered Digest CSV")
209
+
210
+ with gr.Accordion("📂 Upload Articles (.txt files)", open=False):
211
+ uploaded_files = gr.File(label="Upload .txt Files", file_types=[".txt"], file_count="multiple")
212
+
213
+ with gr.Accordion("🔗 Enter Multiple URLs", open=False):
214
+ urls_input = gr.Textbox(label="Enter URLs (newline separated)", lines=4)
215
+
216
+ with gr.Row():
217
+ submit_button = gr.Button(" Generate Digest")
218
+ clear_button = gr.Button(" Clear")
219
+
220
+ with gr.Row():
221
+ column_0 = gr.Markdown()
222
+ column_1 = gr.Markdown()
223
+ column_2 = gr.Markdown()
224
+ column_3 = gr.Markdown()
225
+ column_4 = gr.Markdown()
226
+
227
+ submit_button.click(
228
+ fn=update_ui_with_columns,
229
+ inputs=[topic_input, uploaded_files, urls_input, sentiment_filter],
230
+ outputs=[
231
+ sentiment_filter,
232
+ column_0, column_1, column_2, column_3, column_4,
233
+ csv_output
234
+ ]
235
+ )
236
+
237
+ latest_news_button.click(
238
+ fn=fetch_and_process_latest_news,
239
+ inputs=[sentiment_filter],
240
+ outputs=[
241
+ sentiment_filter,
242
+ column_0, column_1, column_2, column_3, column_4,
243
+ csv_output
244
+ ]
245
+ )
246
+
247
+ clear_button.click(
248
+ fn=clear_interface,
249
+ inputs=[],
250
+ outputs=[
251
+ topic_input, # 1
252
+ sentiment_filter, # 2
253
+ uploaded_files, # 3
254
+ urls_input, # 4
255
+ column_0, # 5
256
+ column_1, # 6
257
+ column_2, # 7
258
+ column_3, # 8
259
+ column_4, # 9
260
+ csv_output # 10
261
+ ]
262
+ )
263
+
264
+
265
+
266
+ if __name__ == "__main__":
267
+ demo.launch()