Sze_Link_ISOM_5240_MODEL

Running

App Files Files Community

LinkLinkWu commited on 25 days ago

Commit

c7f60fc

verified ·

1 Parent(s): 7c727fa

Update func.py

Browse files

Files changed (1) hide show

func.py +55 -74

func.py CHANGED Viewed

@@ -12,13 +12,12 @@ import requests
 # ---------------------------------------------------------------------------
 # Model identifiers
 # ---------------------------------------------------------------------------
-SENTIMENT_MODEL_ID = "ahmedrachid/FinancialBERT-Sentiment-Analysis"  # returns: positive / neutral / negative
 NER_MODEL_ID = "dslim/bert-base-NER"
 # ---------------------------------------------------------------------------
-# Eager initialisation of Hugging Face pipelines (shared across requests)
 # ---------------------------------------------------------------------------
-# Sentiment pipeline (binary decision will be made later)
 sentiment_tokenizer = AutoTokenizer.from_pretrained(SENTIMENT_MODEL_ID)
 sentiment_model = AutoModelForSequenceClassification.from_pretrained(SENTIMENT_MODEL_ID)
 sentiment_pipeline = pipeline(
@@ -27,7 +26,6 @@ sentiment_pipeline = pipeline(
     tokenizer=sentiment_tokenizer,
 )
-# Named‑entity‑recognition pipeline (ORG extraction)
 ner_tokenizer = AutoTokenizer.from_pretrained(NER_MODEL_ID)
 ner_model = AutoModelForTokenClassification.from_pretrained(NER_MODEL_ID)
 ner_pipeline = pipeline(
@@ -38,14 +36,14 @@ ner_pipeline = pipeline(
 )
 # ---------------------------------------------------------------------------
-# Core functionality
 # ---------------------------------------------------------------------------
 def fetch_news(ticker: str) -> List[dict]:
-    """Scrape *up to* 30 recent headlines from Finviz for a given *ticker*.
-    Returns a list of dictionaries with ``{"title": str, "link": str}`` or an
-    empty list on any error/edge‑case (e.g. anti‑scraping redirect).
     """
     try:
         url = f"https://finviz.com/quote.ashx?t={ticker}"
@@ -56,80 +54,85 @@ def fetch_news(ticker: str) -> List[dict]:
             "Referer": "https://finviz.com/",
             "Connection": "keep-alive",
         }
-        response = requests.get(url, headers=headers, timeout=10)
-        if response.status_code != 200:
             return []
-        soup = BeautifulSoup(response.text, "html.parser")
-        page_title = soup.title.text if soup.title else ""
-        if ticker.upper() not in page_title.upper():
-            # Finviz sometimes redirects to a placeholder page if the ticker is unknown.
-            return []
-        news_table = soup.find(id="news-table")
-        if news_table is None:
             return []
-        latest_news: List[dict] = []
-        for row in news_table.find_all("tr")[:30]:  # keep only the 30 most recent rows
             link_tag = row.find("a")
             if link_tag:
-                latest_news.append({
-                    "title": link_tag.get_text(strip=True),
-                    "link": link_tag["href"],
-                })
-        return latest_news
     except Exception:
-        # swallow all exceptions and degrade gracefully
         return []
 # ---------------------------------------------------------------------------
-# Sentiment analysis helpers
 # ---------------------------------------------------------------------------
-# Raw labels coming from the FinancialBERT model
 _POSITIVE = "positive"
-_NEGATIVE = "negative"
-_DEFAULT_THRESHOLD = 0.55  # default probability threshold; callers may override
 def analyze_sentiment(
     text: str,
     pipe=None,
     threshold: float = _DEFAULT_THRESHOLD,
 ) -> Tuple[str, float]:
-    """Classify *text* as **Positive/Negative** and return its positive probability.
-    The underlying model is three‑class (positive/neutral/negative). We keep the
-    **positive** score only and compare it against *threshold* to obtain a binary
-    label. The function is **side‑effect free** and will never raise; on any
-    internal error it falls back to ``("Unknown", 0.0)``.
     """
     try:
         sentiment_pipe = pipe or sentiment_pipeline
-        raw_scores = sentiment_pipe(text, return_all_scores=True, truncation=True)[0]
-        score_lookup = {item["label"].lower(): item["score"] for item in raw_scores}
-        pos_score = score_lookup.get(_POSITIVE, 0.0)
-        label = "Positive" if pos_score >= threshold else "Negative"
-        return label, pos_score
     except Exception:
         return "Unknown", 0.0
 # ---------------------------------------------------------------------------
-# Organisation‑entity extraction helper (kept for backward compatibility)
 # ---------------------------------------------------------------------------
-def extract_org_entities(
-    text: str,
-    pipe=None,
-    max_entities: int = 5,
-) -> List[str]:
-    """Extract up to *max_entities* unique organisation tokens from *text*.
-    Uses the pre‑initialised NER pipeline unless an alternative *pipe* is
-    supplied. Tokens are upper‑cased and de‑hashed ("##") to make them ticker‑
-    friendly. The function is side‑effect free and falls back to an empty list
-    on any exception.
     """
     try:
         ner_pipe = pipe or ner_pipeline
         entities = ner_pipe(text)
@@ -146,34 +149,12 @@ def extract_org_entities(
         return []
 # ---------------------------------------------------------------------------
-# Aggregation logic – turning many headlines into one overall label
-# ---------------------------------------------------------------------------
-def aggregate_sentiments(
-    results: List[Tuple[str, float]],
-    avg_threshold: float = _DEFAULT_THRESHOLD,
-) -> str:
-    """Combine individual headline results into a single overall label.
-    The rule is simple: compute the *mean* positive probability across all
-    headlines and compare it with *avg_threshold*. If the list is empty, the
-    function returns ``"Unknown"``.
-    """
-    if not results:
-        return "Unknown"
-    avg_pos = sum(score for _, score in results) / len(results)
-    return "Positive" if avg_pos >= avg_threshold else "Negative"
-# ---------------------------------------------------------------------------
-# Public helpers (kept for backward compatibility with app.py)
 # ---------------------------------------------------------------------------
 def get_sentiment_pipeline():
-    """Expose the initialised sentiment pipeline (singleton)."""
     return sentiment_pipeline
 def get_ner_pipeline():
-    """Expose the initialised NER pipeline (singleton)."""
     return ner_pipeline

 # ---------------------------------------------------------------------------
 # Model identifiers
 # ---------------------------------------------------------------------------
+SENTIMENT_MODEL_ID = "LinkLinkWu/Stock_Analysis_Test_Ahamed"
 NER_MODEL_ID = "dslim/bert-base-NER"
 # ---------------------------------------------------------------------------
+# Eager initialisation of Hugging Face pipelines (shared singletons)
 # ---------------------------------------------------------------------------
 sentiment_tokenizer = AutoTokenizer.from_pretrained(SENTIMENT_MODEL_ID)
 sentiment_model = AutoModelForSequenceClassification.from_pretrained(SENTIMENT_MODEL_ID)
 sentiment_pipeline = pipeline(
     tokenizer=sentiment_tokenizer,
 )
 ner_tokenizer = AutoTokenizer.from_pretrained(NER_MODEL_ID)
 ner_model = AutoModelForTokenClassification.from_pretrained(NER_MODEL_ID)
 ner_pipeline = pipeline(
 )
 # ---------------------------------------------------------------------------
+# Web‑scraping helper
 # ---------------------------------------------------------------------------
 def fetch_news(ticker: str) -> List[dict]:
+    """Return up to 30 latest Finviz headlines for *ticker* (title & link).
+    Empty list on network / parsing errors or if Finviz redirects to a generic
+    page (e.g. wrong ticker).
     """
     try:
         url = f"https://finviz.com/quote.ashx?t={ticker}"
             "Referer": "https://finviz.com/",
             "Connection": "keep-alive",
         }
+        r = requests.get(url, headers=headers, timeout=10)
+        if r.status_code != 200:
             return []
+        soup = BeautifulSoup(r.text, "html.parser")
+        if ticker.upper() not in (soup.title.text if soup.title else "").upper():
+            return []  # Finviz placeholder page
+        table = soup.find(id="news-table")
+        if table is None:
             return []
+        news: List[dict] = []
+        for row in table.find_all("tr")[:30]:
             link_tag = row.find("a")
             if link_tag:
+                news.append({"title": link_tag.get_text(strip=True), "link": link_tag["href"]})
+        return news
     except Exception:
         return []
 # ---------------------------------------------------------------------------
+# Sentiment helpers
 # ---------------------------------------------------------------------------
 _POSITIVE = "positive"
+_DEFAULT_THRESHOLD = 0.55  # per‑headline probability cut‑off
 def analyze_sentiment(
     text: str,
     pipe=None,
     threshold: float = _DEFAULT_THRESHOLD,
 ) -> Tuple[str, float]:
+    """Classify *text* and return ``(label, positive_probability)``.
+    * Binary label (*Positive* / *Negative*) is determined by comparing the
+      *positive* probability with *threshold*.
+    * Neutral headlines are mapped to *Negative* by design.
+    * On any internal error → ("Unknown", 0.0).
     """
     try:
         sentiment_pipe = pipe or sentiment_pipeline
+        scores = sentiment_pipe(text, return_all_scores=True, truncation=True)[0]
+        pos_prob = 0.0
+        for item in scores:
+            if item["label"].lower() == _POSITIVE:
+                pos_prob = item["score"]
+                break
+        label = "Positive" if pos_prob >= threshold else "Negative"
+        return label, pos_prob
     except Exception:
         return "Unknown", 0.0
 # ---------------------------------------------------------------------------
+# Aggregation – average positive probability → binary overall label
 # ---------------------------------------------------------------------------
+def aggregate_sentiments(
+    results: List[Tuple[str, float]],
+    avg_threshold: float = _DEFAULT_THRESHOLD,
+) -> str:
+    """Compute overall **Positive/Negative** based on *mean* positive probability.
+    * *results* – list returned by ``analyze_sentiment`` for each headline.
+    * If the average positive probability ≥ *avg_threshold* → *Positive*.
+    * Empty list → *Unknown*.
     """
+    if not results:
+        return "Unknown"
+    avg_pos = sum(prob for _, prob in results) / len(results)
+    return "Positive" if avg_pos >= avg_threshold else "Negative"
+# ---------------------------------------------------------------------------
+# ORG‑entity extraction (for ticker discovery)
+# ---------------------------------------------------------------------------
+def extract_org_entities(text: str, pipe=None, max_entities: int = 5) -> List[str]:
+    """Return up to *max_entities* unique ORG tokens (upper‑case, de‑hashed)."""
     try:
         ner_pipe = pipe or ner_pipeline
         entities = ner_pipe(text)
         return []
 # ---------------------------------------------------------------------------
+# Public accessors (backward compatibility with app.py)
 # ---------------------------------------------------------------------------
 def get_sentiment_pipeline():
     return sentiment_pipeline
 def get_ner_pipeline():
     return ner_pipeline