Initial commit
Browse files
app.py
ADDED
@@ -0,0 +1,128 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import requests
|
3 |
+
from newspaper import Article
|
4 |
+
from transformers import pipeline
|
5 |
+
import config
|
6 |
+
import nltk
|
7 |
+
|
8 |
+
|
9 |
+
# Load summarization pipeline
|
10 |
+
summarizer = pipeline("summarization", model="harao-ml/flant5-finetuned-summarize")
|
11 |
+
|
12 |
+
# Function to split text into smaller chunks
|
13 |
+
def split_text(text, max_tokens=512):
|
14 |
+
words = text.split()
|
15 |
+
for i in range(0, len(words), max_tokens):
|
16 |
+
yield ' '.join(words[i:i + max_tokens])
|
17 |
+
|
18 |
+
# Function to clean text
|
19 |
+
def clean_text(text):
|
20 |
+
text = ' '.join(text.split())
|
21 |
+
text = ' '.join(word for word in text.split() if len(word) < 100)
|
22 |
+
return text
|
23 |
+
|
24 |
+
|
25 |
+
# Helper function to fetch and parse an article from a URL
|
26 |
+
def fetch_article_details(url):
|
27 |
+
try:
|
28 |
+
article = Article(url)
|
29 |
+
article.download()
|
30 |
+
article.parse()
|
31 |
+
title = article.title or "Untitled"
|
32 |
+
author = ", ".join(article.authors) if article.authors else "Unknown"
|
33 |
+
pub_date = article.publish_date.strftime('%B %d, %Y') if article.publish_date else "Unknown"
|
34 |
+
return title, author, pub_date, article.text
|
35 |
+
except Exception as e:
|
36 |
+
return None, None, None, f"Error fetching article: {str(e)}"
|
37 |
+
|
38 |
+
# Helper function to generate a summary
|
39 |
+
def generate_summary(content):
|
40 |
+
if not content.strip():
|
41 |
+
return "No input provided."
|
42 |
+
text = content
|
43 |
+
cleaned_text = clean_text(text)
|
44 |
+
chunks = list(split_text(cleaned_text))
|
45 |
+
cons_summary = ''.join([summarizer(chunk, do_sample=False)[0]['summary_text'] for chunk in chunks if chunk.strip()]) if chunks else ''
|
46 |
+
summary = summarizer(text, do_sample=False)[0]['summary_text']
|
47 |
+
return cons_summary
|
48 |
+
|
49 |
+
# Summarize from text or URL
|
50 |
+
def summarize_input(mixed_input):
|
51 |
+
if mixed_input.startswith("http://") or mixed_input.startswith("https://"):
|
52 |
+
title, author, pub_date, content = fetch_article_details(mixed_input)
|
53 |
+
if content.startswith("Error"):
|
54 |
+
return f"### Error\n\n{content}"
|
55 |
+
summary = generate_summary(content)
|
56 |
+
return f"**Title:** {title}\n\n**Author(s):** {author}\n\n**Published:** {pub_date}\n\n**π Summary** \n\n{summary}\n\n[π Read more]({mixed_input})\n\n---"
|
57 |
+
else:
|
58 |
+
summary = generate_summary(mixed_input)
|
59 |
+
return f"## π Summary \n\n{summary}\n\n**Original Text:**\n\n{mixed_input}\n\n---"
|
60 |
+
|
61 |
+
# Function to fetch top headlines from NewsAPI and summarize them
|
62 |
+
def fetch_news():
|
63 |
+
url = 'https://newsapi.org/v2/top-headlines'
|
64 |
+
params = {
|
65 |
+
'apiKey': config.api_key,
|
66 |
+
'language': 'en',
|
67 |
+
'sources': 'associated-press',
|
68 |
+
'pageSize': 10
|
69 |
+
}
|
70 |
+
try:
|
71 |
+
response = requests.get(url, params=params)
|
72 |
+
if response.status_code != 200:
|
73 |
+
return f"Error: Failed to fetch news. Status code: {response.status_code}"
|
74 |
+
|
75 |
+
articles = response.json().get("articles", [])
|
76 |
+
summaries = []
|
77 |
+
for article in articles:
|
78 |
+
title = article.get("title", "No title")
|
79 |
+
article_url = article.get("url", "#")
|
80 |
+
author = article.get("author", "Unknown")
|
81 |
+
pub_date = article.get("publishedAt", "Unknown")
|
82 |
+
content = extract_full_content(article_url) or article.get("content") or article.get("description") or ""
|
83 |
+
summary = generate_summary(content)
|
84 |
+
summaries.append(f"**{title}** \n\n**Author(s):** {author}\n\n**Published:** {pub_date}\n\n**Summary:** {summary}\n\n [π Read more]({article_url})\n\n---")
|
85 |
+
|
86 |
+
if not summaries:
|
87 |
+
return "### No articles could be summarized."
|
88 |
+
return "\n\n".join(summaries)
|
89 |
+
except Exception as e:
|
90 |
+
return f"### Error fetching news\n\n{str(e)}"
|
91 |
+
|
92 |
+
# Helper function to extract full content using newspaper3k
|
93 |
+
def extract_full_content(url):
|
94 |
+
try:
|
95 |
+
article = Article(url)
|
96 |
+
article.download()
|
97 |
+
article.parse()
|
98 |
+
return article.text
|
99 |
+
except Exception:
|
100 |
+
return None
|
101 |
+
|
102 |
+
# Gradio interface
|
103 |
+
with gr.Blocks(theme=gr.themes.Base()) as demo:
|
104 |
+
gr.Markdown("# π° Sum Up! Stay Informed, Instantly")
|
105 |
+
gr.Markdown(" ## A LLM based News Summarizer App")
|
106 |
+
|
107 |
+
# Add a brief description
|
108 |
+
gr.Markdown("Sum Up! condenses the latest headlines from trusted news sources into clear, concise and easy-to-read summaries, so you can stay informed in seconds.")
|
109 |
+
with gr.Row():
|
110 |
+
with gr.Column(scale=1):
|
111 |
+
gr.Markdown("### Top Stories - A Snapshot ")
|
112 |
+
gr.Markdown("**Source: Associated Press**")
|
113 |
+
gr.Markdown("Click the button below to fetch the latest news articles.")
|
114 |
+
news_btn = gr.Button("ποΈ News Now", variant="primary")
|
115 |
+
with gr.Column(scale=1):
|
116 |
+
input_box = gr.Textbox(label="Enter article text or URL", placeholder="Paste article text or link...")
|
117 |
+
summarize_btn = gr.Button("π Summarize", variant="secondary")
|
118 |
+
|
119 |
+
# Output area for displaying results
|
120 |
+
output_area = gr.Markdown() # Use a valid output component
|
121 |
+
|
122 |
+
# Link buttons to their respective functions
|
123 |
+
summarize_btn.click(fn=summarize_input, inputs=input_box, outputs=output_area)
|
124 |
+
news_btn.click(fn=fetch_news, inputs=[], outputs=output_area)
|
125 |
+
|
126 |
+
|
127 |
+
if __name__ == "__main__":
|
128 |
+
demo.launch()
|