tickerArticleScorer

Sleeping

App Files Files Community

sentivity commited on Jun 3

Commit

9521f84

verified ·

1 Parent(s): 1661711

Update app.py

Browse files

Files changed (1) hide show

app.py +72 -42

app.py CHANGED Viewed

@@ -1,15 +1,21 @@
 import gradio as gr
 import requests
 import torch
 import torch.nn as nn
 import re
 import datetime
 from transformers import AutoTokenizer
 import numpy as np
 from transformers import AutoModelForSequenceClassification
 from transformers import TFAutoModelForSequenceClassification
 from transformers import AutoConfig
 from scipy.special import softmax
 # Load tokenizer and sentiment model
 MODEL = "cardiffnlp/xlm-twitter-politics-sentiment"
@@ -86,30 +92,43 @@ def predict_sentiment(text):
     return (1-(float(negative_score)))*100
-# uses Polygon API to fetch article
-def fetch_articles(ticker):
-    POLYGON_API_KEY = "cMCv7jipVvV4qLBikgzllNmW_isiODRR"
-    url = f"https://api.polygon.io/v2/reference/news?ticker={ticker}&limit=1&apiKey={POLYGON_API_KEY}"
-    print(f"[FETCH] {ticker}: {url}")
     try:
-        response = requests.get(url, timeout=10)
-        response.raise_for_status()
-        data = response.json()
-        if data.get("results"):
-            article = data["results"][0]
-            title = article.get("title", "")
-            description = article.get("description", "")
-            return title + " " + description
         return None
-    # checks specific HTTP errors
-    except requests.exceptions.HTTPError as http_err:
-        print(f"[ERROR] HTTP error for {ticker}: {http_err}")
-        return f"HTTP error when fetching {ticker}: {http_err}"
-    # catches any other error
-    except Exception as exc:
-        print(f"[ERROR] Unexpected error for {ticker}: {exc}")
-        return f"Error fetching articles for {ticker}: {exc}"
 # initialize cache
 sentiment_cache = {}
@@ -123,43 +142,54 @@ def is_cache_valid(cached_time, max_age_minutes=10):
     return age.total_seconds() < max_age_minutes * 60
 # analyzes the tikcers
-def analyze_ticker(user_ticker: str):
     user_ticker = user_ticker.upper().strip()
-    tickers_to_check = list({user_ticker, "SPY"})
     results = []
     for tk in tickers_to_check:
-        cached = sentiment_cache.get(tk, {})
         if cached and is_cache_valid(cached.get("timestamp")):
-            print(f"[CACHE] Using cached sentiment for {tk}")
-            results.append({**cached, "ticker": tk})
             continue
-        print(f"[INFO] Fetching fresh data for {tk}")
-        article_text = fetch_articles(tk)
-        if article_text is None:
             sentiment_score = None
-            article_text = f"No news articles found for {tk}."
         else:
-            sentiment_score = predict_sentiment(article_text)
-        timestamp = datetime.datetime.utcnow()
         cache_entry = {
-            "article": article_text,
             "sentiment": sentiment_score,
-            "timestamp": timestamp,
         }
         sentiment_cache[tk] = cache_entry
-        results.append({**cache_entry, "ticker": tk})
-    # sort so user ticker appears first, SPY second
     results.sort(key=lambda x: 0 if x["ticker"] == user_ticker else 1)
     return results
 def display_sentiment(results):
     html = "<h2>Sentiment Analysis</h2><ul>"
     for r in results:
@@ -171,7 +201,7 @@ def display_sentiment(results):
         )
         html += (
             f"<li><b>{r['ticker']}</b> &nbsp;({ts_str})<br>"
-            f"{r['article']}<br>"
             f"<i>Sentiment score:</i> {score_display}</li>"
         )
     html += "</ul>"

+!pip install yfinance
+!pip install newspaper3k
+!pip install lxml
 import gradio as gr
 import requests
 import torch
 import torch.nn as nn
 import re
 import datetime
+import yfinance as yf
 from transformers import AutoTokenizer
 import numpy as np
 from transformers import AutoModelForSequenceClassification
 from transformers import TFAutoModelForSequenceClassification
 from transformers import AutoConfig
 from scipy.special import softmax
+from newspaper import Article
 # Load tokenizer and sentiment model
 MODEL = "cardiffnlp/xlm-twitter-politics-sentiment"
     return (1-(float(negative_score)))*100
+# extracts article text
+def extract_article_text(url: str):
     try:
+        article = Article(url)
+        article.download()
+        article.parse()
+        return {
+            "title": article.title or "",
+            "text": article.text or "",
+            "publish_date": article.publish_date,
+            "url": url
+        }
+    except Exception as e:
+        print(f"[ERROR] newspaper3k failed for URL {url}: {e}")
         return None
+# fetch article based on ticker
+def fetch_article_for_ticker(ticker: str):
+    ticker_obj = yf.Ticker(ticker)
+    news_items = ticker_obj.news or []
+    if not news_items:
+        return None
+    for item in news_items:
+        if item is None:
+            continue
+        # tries both fields where yfinance might store a URL
+        url = item.get("link") or item.get("content", {}).get("clickThroughUrl", {}).get("url")
+        if not url:
+            continue
+        parsed = extract_article_text(url)
+        if parsed:
+            return parsed
+    return None
 # initialize cache
 sentiment_cache = {}
     return age.total_seconds() < max_age_minutes * 60
 # analyzes the tikcers
+def analyze_ticker(user_ticker: str) -> list:
     user_ticker = user_ticker.upper().strip()
+    tickers_to_check = [user_ticker, "SPY"] if user_ticker != "SPY" else ["SPY"]
     results = []
     for tk in tickers_to_check:
+        cached = sentiment_cache.get(tk)
         if cached and is_cache_valid(cached.get("timestamp")):
+            # reuse cached entry
+            results.append({
+                "ticker": tk,
+                "article_blurb": cached["article_blurb"],
+                "sentiment": cached["sentiment"],
+                "timestamp": cached["timestamp"],
+            })
             continue
+        # fetch fresh article via yfinance + newspaper3k
+        article_data = fetch_article_for_ticker(tk)
+        if not article_data:
+            blurb = f"No news articles found for {tk}."
             sentiment_score = None
         else:
+            full_text = article_data["title"] + " " + article_data["text"]
+            sentiment_score = predict_sentiment(full_text)
+            snippet = article_data["text"][:500].replace("\n", " ").strip()
+            blurb = f"{article_data['title']}\n\n{snippet}..."
+        timestamp = datetime.datetime.utcnow()
         cache_entry = {
+            "article_blurb": blurb,
             "sentiment": sentiment_score,
+            "timestamp": timestamp
         }
         sentiment_cache[tk] = cache_entry
+        results.append({
+            "ticker": tk,
+            "article_blurb": blurb,
+            "sentiment": sentiment_score,
+            "timestamp": timestamp
+        })
+    # has user_ticker appears first in the list
     results.sort(key=lambda x: 0 if x["ticker"] == user_ticker else 1)
     return results
 def display_sentiment(results):
     html = "<h2>Sentiment Analysis</h2><ul>"
     for r in results:
         )
         html += (
             f"<li><b>{r['ticker']}</b> &nbsp;({ts_str})<br>"
+            f"{r['article_blurb']}<br>"
             f"<i>Sentiment score:</i> {score_display}</li>"
         )
     html += "</ul>"