Sze_Link_ISOM_5240_MODEL

Running

App Files Files Community

LinkLinkWu commited on May 18

Commit

ae44182

verified ·

1 Parent(s): 64d5a00

Update func.py

Browse files

Files changed (1) hide show

func.py +69 -33

func.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import List
 from transformers import (
     pipeline,
@@ -10,16 +10,14 @@ from bs4 import BeautifulSoup
 import requests
 # ---------------------------------------------------------------------------
-# Model identifiers – use your custom sentiment model hosted on Hugging Face
 # ---------------------------------------------------------------------------
-SENTIMENT_MODEL_ID = "LinkLinkWu/Stock_Analysis_Test_Ahamed"  # binary sentiment
 NER_MODEL_ID = "dslim/bert-base-NER"
 # ---------------------------------------------------------------------------
-# Eager initialisation (singletons shared by the whole Streamlit session)
 # ---------------------------------------------------------------------------
-# Sentiment pipeline – returns one label with its score. We will *ignore* the
-# numeric score down‑stream to satisfy the "no numbers" requirement.
 sentiment_tokenizer = AutoTokenizer.from_pretrained(SENTIMENT_MODEL_ID)
 sentiment_model = AutoModelForSequenceClassification.from_pretrained(SENTIMENT_MODEL_ID)
 sentiment_pipeline = pipeline(
@@ -28,7 +26,6 @@ sentiment_pipeline = pipeline(
     tokenizer=sentiment_tokenizer,
 )
-# Named‑entity‑recognition pipeline (ORG extraction)
 ner_tokenizer = AutoTokenizer.from_pretrained(NER_MODEL_ID)
 ner_model = AutoModelForTokenClassification.from_pretrained(NER_MODEL_ID)
 ner_pipeline = pipeline(
@@ -43,7 +40,7 @@ ner_pipeline = pipeline(
 # ---------------------------------------------------------------------------
 def fetch_news(ticker: str) -> List[dict]:
-    """Return at most 30 latest Finviz headlines for *ticker* ("title" & "link")."""
     try:
         url = f"https://finviz.com/quote.ashx?t={ticker}"
         headers = {
@@ -59,7 +56,7 @@ def fetch_news(ticker: str) -> List[dict]:
         soup = BeautifulSoup(r.text, "html.parser")
         if ticker.upper() not in (soup.title.text if soup.title else "").upper():
-            return []  # possibly a redirect page
         table = soup.find(id="news-table")
         if table is None:
@@ -75,48 +72,87 @@ def fetch_news(ticker: str) -> List[dict]:
         return []
 # ---------------------------------------------------------------------------
-# Sentiment helpers – binary classification, *no* numeric score exposed
 # ---------------------------------------------------------------------------
-_LABEL_MAP = {"LABEL_0": "Negative", "LABEL_1": "Positive"}  # adjust if model config differs
-def analyze_sentiment(text: str, pipe=None) -> str:
-    """Return **"Positive"** or **"Negative"** for a single headline.
-    *Neutral* outputs (if ever returned by the model) are coerced to *Negative*.
-    Numeric confidence scores are deliberately discarded to honour the
-    "no numbers" requirement.
     """
     try:
         sentiment_pipe = pipe or sentiment_pipeline
-        result = sentiment_pipe(text, truncation=True, return_all_scores=False)[0]
-        raw_label = result.get("label", "").upper()
-        label = _LABEL_MAP.get(raw_label, "Negative")  # default to Negative
-        return label
     except Exception:
-        return "Unknown"
 # ---------------------------------------------------------------------------
-# Aggregation – majority vote (Positive‑ratio) → binary label
 # ---------------------------------------------------------------------------
-_POS_RATIO_THRESHOLD = 0.6  # ≥60 % positives → overall Positive
-def aggregate_sentiments(labels: List[str], pos_ratio_threshold: float = _POS_RATIO_THRESHOLD) -> str:
-    """Combine individual headline labels into an overall binary sentiment.
-    * If *Positive* proportion ≥ *pos_ratio_threshold* → *Positive*.
-    * Otherwise → *Negative*.
     * Empty list → *Unknown*.
     """
-    if not labels:
         return "Unknown"
-    total = len(labels)
-    positives = sum(1 for l in labels if l == "Positive")
-    ratio = positives / total
-    return "Positive" if ratio >= pos_ratio_threshold else "Negative"
 # ---------------------------------------------------------------------------
 # ORG‑entity extraction (ticker discovery)

+from typing import List, Tuple
 from transformers import (
     pipeline,
 import requests
 # ---------------------------------------------------------------------------
+# Model identifiers – custom binary‑sentiment model hosted on Hugging Face
 # ---------------------------------------------------------------------------
+SENTIMENT_MODEL_ID = "LinkLinkWu/Stock_Analysis_Test_Ahamed"  # LABEL_0 = Negative, LABEL_1 = Positive
 NER_MODEL_ID = "dslim/bert-base-NER"
 # ---------------------------------------------------------------------------
+# Pipeline singletons (initialised once per session)
 # ---------------------------------------------------------------------------
 sentiment_tokenizer = AutoTokenizer.from_pretrained(SENTIMENT_MODEL_ID)
 sentiment_model = AutoModelForSequenceClassification.from_pretrained(SENTIMENT_MODEL_ID)
 sentiment_pipeline = pipeline(
     tokenizer=sentiment_tokenizer,
 )
 ner_tokenizer = AutoTokenizer.from_pretrained(NER_MODEL_ID)
 ner_model = AutoModelForTokenClassification.from_pretrained(NER_MODEL_ID)
 ner_pipeline = pipeline(
 # ---------------------------------------------------------------------------
 def fetch_news(ticker: str) -> List[dict]:
+    """Return ≤30 latest Finviz headlines for *ticker* ("title" & "link")."""
     try:
         url = f"https://finviz.com/quote.ashx?t={ticker}"
         headers = {
         soup = BeautifulSoup(r.text, "html.parser")
         if ticker.upper() not in (soup.title.text if soup.title else "").upper():
+            return []  # redirect / placeholder page
         table = soup.find(id="news-table")
         if table is None:
         return []
 # ---------------------------------------------------------------------------
+# Sentiment helpers – binary output, internal probabilities retained
 # ---------------------------------------------------------------------------
+_LABEL_MAP = {"LABEL_0": "Negative", "LABEL_1": "Positive", "NEUTRAL": "Positive"}
+_POSITIVE_RAW = "LABEL_1"
+_NEUTRAL_RAW = "NEUTRAL"  # rarely returned; mapped to Positive on purpose
+_SINGLE_THRESHOLD = 0.55  # per‑headline cut‑off
+def analyze_sentiment(
+    text: str,
+    pipe=None,
+    threshold: float = _SINGLE_THRESHOLD,
+) -> Tuple[str, float]:
+    """Return ``(label, positive_probability)`` for *text*.
+    * Neutral predictions – if produced by the model – are **treated as Positive**.
+    * Numeric probability is kept for aggregation; front‑end may discard it to
+      satisfy the "no numbers" display requirement.
+    """
+    try:
+        sentiment_pipe = pipe or sentiment_pipeline
+        all_scores = sentiment_pipe(text, return_all_scores=True, truncation=True)[0]
+        score_map = {item["label"].upper(): item["score"] for item in all_scores}
+        # Positive probability: include Neutral as positive when present
+        pos_prob = score_map.get(_POSITIVE_RAW, 0.0)
+        if _NEUTRAL_RAW in score_map:
+            pos_prob = max(pos_prob, score_map[_NEUTRAL_RAW])
+        # Determine final label (Neutral → Positive by design)
+        label = "Positive" if (
+            (_NEUTRAL_RAW in score_map) or (pos_prob >= threshold)
+        ) else "Negative"
+        return label, pos_prob
+    except Exception:
+        return "Unknown", 0.0
+# ---------------------------------------------------------------------------
+_LABEL_MAP = {"LABEL_0": "Negative", "LABEL_1": "Positive"}
+_POSITIVE_RAW = "LABEL_1"
+_SINGLE_THRESHOLD = 0.55  # per‑headline cut‑off
+def analyze_sentiment(text: str, pipe=None, threshold: float = _SINGLE_THRESHOLD) -> Tuple[str, float]:
+    """Return ``(label, positive_probability)`` for *text*.
+    * Neutral is not expected from a binary model; if encountered, treat as Negative.
+    * Numeric probability is for internal aggregation only – front‑end can ignore
+      it to satisfy the "no numbers" requirement.
     """
     try:
         sentiment_pipe = pipe or sentiment_pipeline
+        scores = sentiment_pipe(text, return_all_scores=True, truncation=True)[0]
+        pos_prob = 0.0
+        for item in scores:
+            if item["label"].upper() == _POSITIVE_RAW:
+                pos_prob = item["score"]
+                break
+        label = "Positive" if pos_prob >= threshold else "Negative"
+        return label, pos_prob
     except Exception:
+        return "Unknown", 0.0
 # ---------------------------------------------------------------------------
+# Aggregation – average positive probability → binary overall label
 # ---------------------------------------------------------------------------
+_AVG_THRESHOLD = 0.55  # ≥55 % mean positive probability → overall Positive
+def aggregate_sentiments(results: List[Tuple[str, float]], avg_threshold: float = _AVG_THRESHOLD) -> str:
+    """Compute overall **Positive/Negative** via *average positive probability*.
+    * *results* – list of tuples from ``analyze_sentiment``.
     * Empty list → *Unknown*.
+    * The returned label is **binary**; numeric values remain internal.
     """
+    if not results:
         return "Unknown"
+    avg_pos = sum(prob for _, prob in results) / len(results)
+    return "Positive" if avg_pos >= avg_threshold else "Negative"
 # ---------------------------------------------------------------------------
 # ORG‑entity extraction (ticker discovery)