harao-ml commited on
Commit
5549e15
Β·
verified Β·
1 Parent(s): 4b1f254

Initial commit

Browse files
Files changed (1) hide show
  1. app.py +128 -0
app.py ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import requests
3
+ from newspaper import Article
4
+ from transformers import pipeline
5
+ import config
6
+ import nltk
7
+
8
+
9
+ # Load summarization pipeline
10
+ summarizer = pipeline("summarization", model="harao-ml/flant5-finetuned-summarize")
11
+
12
+ # Function to split text into smaller chunks
13
+ def split_text(text, max_tokens=512):
14
+ words = text.split()
15
+ for i in range(0, len(words), max_tokens):
16
+ yield ' '.join(words[i:i + max_tokens])
17
+
18
+ # Function to clean text
19
+ def clean_text(text):
20
+ text = ' '.join(text.split())
21
+ text = ' '.join(word for word in text.split() if len(word) < 100)
22
+ return text
23
+
24
+
25
+ # Helper function to fetch and parse an article from a URL
26
+ def fetch_article_details(url):
27
+ try:
28
+ article = Article(url)
29
+ article.download()
30
+ article.parse()
31
+ title = article.title or "Untitled"
32
+ author = ", ".join(article.authors) if article.authors else "Unknown"
33
+ pub_date = article.publish_date.strftime('%B %d, %Y') if article.publish_date else "Unknown"
34
+ return title, author, pub_date, article.text
35
+ except Exception as e:
36
+ return None, None, None, f"Error fetching article: {str(e)}"
37
+
38
+ # Helper function to generate a summary
39
+ def generate_summary(content):
40
+ if not content.strip():
41
+ return "No input provided."
42
+ text = content
43
+ cleaned_text = clean_text(text)
44
+ chunks = list(split_text(cleaned_text))
45
+ cons_summary = ''.join([summarizer(chunk, do_sample=False)[0]['summary_text'] for chunk in chunks if chunk.strip()]) if chunks else ''
46
+ summary = summarizer(text, do_sample=False)[0]['summary_text']
47
+ return cons_summary
48
+
49
+ # Summarize from text or URL
50
+ def summarize_input(mixed_input):
51
+ if mixed_input.startswith("http://") or mixed_input.startswith("https://"):
52
+ title, author, pub_date, content = fetch_article_details(mixed_input)
53
+ if content.startswith("Error"):
54
+ return f"### Error\n\n{content}"
55
+ summary = generate_summary(content)
56
+ return f"**Title:** {title}\n\n**Author(s):** {author}\n\n**Published:** {pub_date}\n\n**πŸ“ Summary** \n\n{summary}\n\n[πŸ”— Read more]({mixed_input})\n\n---"
57
+ else:
58
+ summary = generate_summary(mixed_input)
59
+ return f"## πŸ“ Summary \n\n{summary}\n\n**Original Text:**\n\n{mixed_input}\n\n---"
60
+
61
+ # Function to fetch top headlines from NewsAPI and summarize them
62
+ def fetch_news():
63
+ url = 'https://newsapi.org/v2/top-headlines'
64
+ params = {
65
+ 'apiKey': config.api_key,
66
+ 'language': 'en',
67
+ 'sources': 'associated-press',
68
+ 'pageSize': 10
69
+ }
70
+ try:
71
+ response = requests.get(url, params=params)
72
+ if response.status_code != 200:
73
+ return f"Error: Failed to fetch news. Status code: {response.status_code}"
74
+
75
+ articles = response.json().get("articles", [])
76
+ summaries = []
77
+ for article in articles:
78
+ title = article.get("title", "No title")
79
+ article_url = article.get("url", "#")
80
+ author = article.get("author", "Unknown")
81
+ pub_date = article.get("publishedAt", "Unknown")
82
+ content = extract_full_content(article_url) or article.get("content") or article.get("description") or ""
83
+ summary = generate_summary(content)
84
+ summaries.append(f"**{title}** \n\n**Author(s):** {author}\n\n**Published:** {pub_date}\n\n**Summary:** {summary}\n\n [πŸ”— Read more]({article_url})\n\n---")
85
+
86
+ if not summaries:
87
+ return "### No articles could be summarized."
88
+ return "\n\n".join(summaries)
89
+ except Exception as e:
90
+ return f"### Error fetching news\n\n{str(e)}"
91
+
92
+ # Helper function to extract full content using newspaper3k
93
+ def extract_full_content(url):
94
+ try:
95
+ article = Article(url)
96
+ article.download()
97
+ article.parse()
98
+ return article.text
99
+ except Exception:
100
+ return None
101
+
102
+ # Gradio interface
103
+ with gr.Blocks(theme=gr.themes.Base()) as demo:
104
+ gr.Markdown("# πŸ“° Sum Up! Stay Informed, Instantly")
105
+ gr.Markdown(" ## A LLM based News Summarizer App")
106
+
107
+ # Add a brief description
108
+ gr.Markdown("Sum Up! condenses the latest headlines from trusted news sources into clear, concise and easy-to-read summaries, so you can stay informed in seconds.")
109
+ with gr.Row():
110
+ with gr.Column(scale=1):
111
+ gr.Markdown("### Top Stories - A Snapshot ")
112
+ gr.Markdown("**Source: Associated Press**")
113
+ gr.Markdown("Click the button below to fetch the latest news articles.")
114
+ news_btn = gr.Button("πŸ—žοΈ News Now", variant="primary")
115
+ with gr.Column(scale=1):
116
+ input_box = gr.Textbox(label="Enter article text or URL", placeholder="Paste article text or link...")
117
+ summarize_btn = gr.Button("πŸ” Summarize", variant="secondary")
118
+
119
+ # Output area for displaying results
120
+ output_area = gr.Markdown() # Use a valid output component
121
+
122
+ # Link buttons to their respective functions
123
+ summarize_btn.click(fn=summarize_input, inputs=input_box, outputs=output_area)
124
+ news_btn.click(fn=fetch_news, inputs=[], outputs=output_area)
125
+
126
+
127
+ if __name__ == "__main__":
128
+ demo.launch()