sentivity commited on
Commit
9521f84
·
verified ·
1 Parent(s): 1661711

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +72 -42
app.py CHANGED
@@ -1,15 +1,21 @@
 
 
 
 
1
  import gradio as gr
2
  import requests
3
  import torch
4
  import torch.nn as nn
5
  import re
6
  import datetime
 
7
  from transformers import AutoTokenizer
8
  import numpy as np
9
  from transformers import AutoModelForSequenceClassification
10
  from transformers import TFAutoModelForSequenceClassification
11
  from transformers import AutoConfig
12
  from scipy.special import softmax
 
13
 
14
  # Load tokenizer and sentiment model
15
  MODEL = "cardiffnlp/xlm-twitter-politics-sentiment"
@@ -86,30 +92,43 @@ def predict_sentiment(text):
86
  return (1-(float(negative_score)))*100
87
 
88
 
89
-
90
- # uses Polygon API to fetch article
91
- def fetch_articles(ticker):
92
- POLYGON_API_KEY = "cMCv7jipVvV4qLBikgzllNmW_isiODRR"
93
- url = f"https://api.polygon.io/v2/reference/news?ticker={ticker}&limit=1&apiKey={POLYGON_API_KEY}"
94
- print(f"[FETCH] {ticker}: {url}")
95
  try:
96
- response = requests.get(url, timeout=10)
97
- response.raise_for_status()
98
- data = response.json()
99
- if data.get("results"):
100
- article = data["results"][0]
101
- title = article.get("title", "")
102
- description = article.get("description", "")
103
- return title + " " + description
 
 
 
104
  return None
105
- # checks specific HTTP errors
106
- except requests.exceptions.HTTPError as http_err:
107
- print(f"[ERROR] HTTP error for {ticker}: {http_err}")
108
- return f"HTTP error when fetching {ticker}: {http_err}"
109
- # catches any other error
110
- except Exception as exc:
111
- print(f"[ERROR] Unexpected error for {ticker}: {exc}")
112
- return f"Error fetching articles for {ticker}: {exc}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
 
114
  # initialize cache
115
  sentiment_cache = {}
@@ -123,43 +142,54 @@ def is_cache_valid(cached_time, max_age_minutes=10):
123
  return age.total_seconds() < max_age_minutes * 60
124
 
125
  # analyzes the tikcers
126
- def analyze_ticker(user_ticker: str):
127
  user_ticker = user_ticker.upper().strip()
128
- tickers_to_check = list({user_ticker, "SPY"})
129
  results = []
130
 
131
  for tk in tickers_to_check:
132
- cached = sentiment_cache.get(tk, {})
133
  if cached and is_cache_valid(cached.get("timestamp")):
134
- print(f"[CACHE] Using cached sentiment for {tk}")
135
- results.append({**cached, "ticker": tk})
 
 
 
 
 
136
  continue
137
 
138
- print(f"[INFO] Fetching fresh data for {tk}")
139
-
140
- article_text = fetch_articles(tk)
141
-
142
- if article_text is None:
143
  sentiment_score = None
144
- article_text = f"No news articles found for {tk}."
145
  else:
146
- sentiment_score = predict_sentiment(article_text)
147
-
148
- timestamp = datetime.datetime.utcnow()
 
 
149
 
 
150
  cache_entry = {
151
- "article": article_text,
152
  "sentiment": sentiment_score,
153
- "timestamp": timestamp,
154
  }
155
  sentiment_cache[tk] = cache_entry
156
- results.append({**cache_entry, "ticker": tk})
157
 
158
- # sort so user ticker appears first, SPY second
 
 
 
 
 
 
 
159
  results.sort(key=lambda x: 0 if x["ticker"] == user_ticker else 1)
160
  return results
161
 
162
-
163
  def display_sentiment(results):
164
  html = "<h2>Sentiment Analysis</h2><ul>"
165
  for r in results:
@@ -171,7 +201,7 @@ def display_sentiment(results):
171
  )
172
  html += (
173
  f"<li><b>{r['ticker']}</b> &nbsp;({ts_str})<br>"
174
- f"{r['article']}<br>"
175
  f"<i>Sentiment score:</i> {score_display}</li>"
176
  )
177
  html += "</ul>"
 
1
+ !pip install yfinance
2
+ !pip install newspaper3k
3
+ !pip install lxml
4
+
5
  import gradio as gr
6
  import requests
7
  import torch
8
  import torch.nn as nn
9
  import re
10
  import datetime
11
+ import yfinance as yf
12
  from transformers import AutoTokenizer
13
  import numpy as np
14
  from transformers import AutoModelForSequenceClassification
15
  from transformers import TFAutoModelForSequenceClassification
16
  from transformers import AutoConfig
17
  from scipy.special import softmax
18
+ from newspaper import Article
19
 
20
  # Load tokenizer and sentiment model
21
  MODEL = "cardiffnlp/xlm-twitter-politics-sentiment"
 
92
  return (1-(float(negative_score)))*100
93
 
94
 
95
+ # extracts article text
96
+ def extract_article_text(url: str):
 
 
 
 
97
  try:
98
+ article = Article(url)
99
+ article.download()
100
+ article.parse()
101
+ return {
102
+ "title": article.title or "",
103
+ "text": article.text or "",
104
+ "publish_date": article.publish_date,
105
+ "url": url
106
+ }
107
+ except Exception as e:
108
+ print(f"[ERROR] newspaper3k failed for URL {url}: {e}")
109
  return None
110
+
111
+ # fetch article based on ticker
112
+ def fetch_article_for_ticker(ticker: str):
113
+ ticker_obj = yf.Ticker(ticker)
114
+ news_items = ticker_obj.news or []
115
+
116
+ if not news_items:
117
+ return None
118
+
119
+ for item in news_items:
120
+ if item is None:
121
+ continue
122
+ # tries both fields where yfinance might store a URL
123
+ url = item.get("link") or item.get("content", {}).get("clickThroughUrl", {}).get("url")
124
+ if not url:
125
+ continue
126
+
127
+ parsed = extract_article_text(url)
128
+ if parsed:
129
+ return parsed
130
+
131
+ return None
132
 
133
  # initialize cache
134
  sentiment_cache = {}
 
142
  return age.total_seconds() < max_age_minutes * 60
143
 
144
  # analyzes the tikcers
145
+ def analyze_ticker(user_ticker: str) -> list:
146
  user_ticker = user_ticker.upper().strip()
147
+ tickers_to_check = [user_ticker, "SPY"] if user_ticker != "SPY" else ["SPY"]
148
  results = []
149
 
150
  for tk in tickers_to_check:
151
+ cached = sentiment_cache.get(tk)
152
  if cached and is_cache_valid(cached.get("timestamp")):
153
+ # reuse cached entry
154
+ results.append({
155
+ "ticker": tk,
156
+ "article_blurb": cached["article_blurb"],
157
+ "sentiment": cached["sentiment"],
158
+ "timestamp": cached["timestamp"],
159
+ })
160
  continue
161
 
162
+ # fetch fresh article via yfinance + newspaper3k
163
+ article_data = fetch_article_for_ticker(tk)
164
+ if not article_data:
165
+ blurb = f"No news articles found for {tk}."
 
166
  sentiment_score = None
 
167
  else:
168
+ full_text = article_data["title"] + " " + article_data["text"]
169
+ sentiment_score = predict_sentiment(full_text)
170
+
171
+ snippet = article_data["text"][:500].replace("\n", " ").strip()
172
+ blurb = f"{article_data['title']}\n\n{snippet}..."
173
 
174
+ timestamp = datetime.datetime.utcnow()
175
  cache_entry = {
176
+ "article_blurb": blurb,
177
  "sentiment": sentiment_score,
178
+ "timestamp": timestamp
179
  }
180
  sentiment_cache[tk] = cache_entry
 
181
 
182
+ results.append({
183
+ "ticker": tk,
184
+ "article_blurb": blurb,
185
+ "sentiment": sentiment_score,
186
+ "timestamp": timestamp
187
+ })
188
+
189
+ # has user_ticker appears first in the list
190
  results.sort(key=lambda x: 0 if x["ticker"] == user_ticker else 1)
191
  return results
192
 
 
193
  def display_sentiment(results):
194
  html = "<h2>Sentiment Analysis</h2><ul>"
195
  for r in results:
 
201
  )
202
  html += (
203
  f"<li><b>{r['ticker']}</b> &nbsp;({ts_str})<br>"
204
+ f"{r['article_blurb']}<br>"
205
  f"<i>Sentiment score:</i> {score_display}</li>"
206
  )
207
  html += "</ul>"