Sze_Link_ISOM_5240_MODEL

Running

App Files Files Community

LinkLinkWu commited on May 18

Commit

6eecf76

verified ·

1 Parent(s): ae44182

Update func.py

Browse files

Files changed (1) hide show

func.py +100 -99

func.py CHANGED Viewed

@@ -1,3 +1,16 @@
 from typing import List, Tuple
 from transformers import (
@@ -10,37 +23,98 @@ from bs4 import BeautifulSoup
 import requests
 # ---------------------------------------------------------------------------
-# Model identifiers – custom binary‑sentiment model hosted on Hugging Face
 # ---------------------------------------------------------------------------
 SENTIMENT_MODEL_ID = "LinkLinkWu/Stock_Analysis_Test_Ahamed"  # LABEL_0 = Negative, LABEL_1 = Positive
 NER_MODEL_ID = "dslim/bert-base-NER"
 # ---------------------------------------------------------------------------
-# Pipeline singletons (initialised once per session)
 # ---------------------------------------------------------------------------
-sentiment_tokenizer = AutoTokenizer.from_pretrained(SENTIMENT_MODEL_ID)
-sentiment_model = AutoModelForSequenceClassification.from_pretrained(SENTIMENT_MODEL_ID)
 sentiment_pipeline = pipeline(
-    "sentiment-analysis",
-    model=sentiment_model,
-    tokenizer=sentiment_tokenizer,
 )
-ner_tokenizer = AutoTokenizer.from_pretrained(NER_MODEL_ID)
-ner_model = AutoModelForTokenClassification.from_pretrained(NER_MODEL_ID)
 ner_pipeline = pipeline(
     "ner",
-    model=ner_model,
-    tokenizer=ner_tokenizer,
     grouped_entities=True,
 )
 # ---------------------------------------------------------------------------
 # Web‑scraping helper (Finviz)
 # ---------------------------------------------------------------------------
-def fetch_news(ticker: str) -> List[dict]:
-    """Return ≤30 latest Finviz headlines for *ticker* ("title" & "link")."""
     try:
         url = f"https://finviz.com/quote.ashx?t={ticker}"
         headers = {
@@ -56,110 +130,34 @@ def fetch_news(ticker: str) -> List[dict]:
         soup = BeautifulSoup(r.text, "html.parser")
         if ticker.upper() not in (soup.title.text if soup.title else "").upper():
-            return []  # redirect / placeholder page
         table = soup.find(id="news-table")
         if table is None:
             return []
         headlines: List[dict] = []
-        for row in table.find_all("tr")[:30]:
             link_tag = row.find("a")
             if link_tag:
-                headlines.append({"title": link_tag.get_text(strip=True), "link": link_tag["href"]})
         return headlines
     except Exception:
         return []
-# ---------------------------------------------------------------------------
-# Sentiment helpers – binary output, internal probabilities retained
-# ---------------------------------------------------------------------------
-_LABEL_MAP = {"LABEL_0": "Negative", "LABEL_1": "Positive", "NEUTRAL": "Positive"}
-_POSITIVE_RAW = "LABEL_1"
-_NEUTRAL_RAW = "NEUTRAL"  # rarely returned; mapped to Positive on purpose
-_SINGLE_THRESHOLD = 0.55  # per‑headline cut‑off
-def analyze_sentiment(
-    text: str,
-    pipe=None,
-    threshold: float = _SINGLE_THRESHOLD,
-) -> Tuple[str, float]:
-    """Return ``(label, positive_probability)`` for *text*.
-    * Neutral predictions – if produced by the model – are **treated as Positive**.
-    * Numeric probability is kept for aggregation; front‑end may discard it to
-      satisfy the "no numbers" display requirement.
-    """
-    try:
-        sentiment_pipe = pipe or sentiment_pipeline
-        all_scores = sentiment_pipe(text, return_all_scores=True, truncation=True)[0]
-        score_map = {item["label"].upper(): item["score"] for item in all_scores}
-        # Positive probability: include Neutral as positive when present
-        pos_prob = score_map.get(_POSITIVE_RAW, 0.0)
-        if _NEUTRAL_RAW in score_map:
-            pos_prob = max(pos_prob, score_map[_NEUTRAL_RAW])
-        # Determine final label (Neutral → Positive by design)
-        label = "Positive" if (
-            (_NEUTRAL_RAW in score_map) or (pos_prob >= threshold)
-        ) else "Negative"
-        return label, pos_prob
-    except Exception:
-        return "Unknown", 0.0
 # ---------------------------------------------------------------------------
-_LABEL_MAP = {"LABEL_0": "Negative", "LABEL_1": "Positive"}
-_POSITIVE_RAW = "LABEL_1"
-_SINGLE_THRESHOLD = 0.55  # per‑headline cut‑off
-def analyze_sentiment(text: str, pipe=None, threshold: float = _SINGLE_THRESHOLD) -> Tuple[str, float]:
-    """Return ``(label, positive_probability)`` for *text*.
-    * Neutral is not expected from a binary model; if encountered, treat as Negative.
-    * Numeric probability is for internal aggregation only – front‑end can ignore
-      it to satisfy the "no numbers" requirement.
-    """
-    try:
-        sentiment_pipe = pipe or sentiment_pipeline
-        scores = sentiment_pipe(text, return_all_scores=True, truncation=True)[0]
-        pos_prob = 0.0
-        for item in scores:
-            if item["label"].upper() == _POSITIVE_RAW:
-                pos_prob = item["score"]
-                break
-        label = "Positive" if pos_prob >= threshold else "Negative"
-        return label, pos_prob
-    except Exception:
-        return "Unknown", 0.0
-# ---------------------------------------------------------------------------
-# Aggregation – average positive probability → binary overall label
 # ---------------------------------------------------------------------------
-_AVG_THRESHOLD = 0.55  # ≥55 % mean positive probability → overall Positive
-def aggregate_sentiments(results: List[Tuple[str, float]], avg_threshold: float = _AVG_THRESHOLD) -> str:
-    """Compute overall **Positive/Negative** via *average positive probability*.
-    * *results* – list of tuples from ``analyze_sentiment``.
-    * Empty list → *Unknown*.
-    * The returned label is **binary**; numeric values remain internal.
     """
-    if not results:
-        return "Unknown"
-    avg_pos = sum(prob for _, prob in results) / len(results)
-    return "Positive" if avg_pos >= avg_threshold else "Negative"
-# ---------------------------------------------------------------------------
-# ORG‑entity extraction (ticker discovery)
-# ---------------------------------------------------------------------------
-def extract_org_entities(text: str, pipe=None, max_entities: int = 5) -> List[str]:
-    """Extract up to *max_entities* unique ORG tokens (upper‑case, de‑hashed)."""
     try:
         ner_pipe = pipe or ner_pipeline
         entities = ner_pipe(text)
@@ -175,13 +173,16 @@ def extract_org_entities(text: str, pipe=None, max_entities: int = 5) -> List[st
     except Exception:
         return []
 # ---------------------------------------------------------------------------
 # Public accessors (legacy compatibility)
 # ---------------------------------------------------------------------------
 def get_sentiment_pipeline():
     return sentiment_pipeline
 def get_ner_pipeline():
     return ner_pipeline

+"""func.py – utility functions for EquiPulse
+Cleaned‑up single‑source version (2025‑05‑18).
+Highlights
+----------
+* **Single** `analyze_sentiment` implementation – no more duplicates.
+* Returns **label string by default**, optional probability via `return_prob`.
+* Threshold lowered to **0.50** and Neutral treated as Positive.
+* Helper pipelines cached at module level.
+"""
+from __future__ import annotations
 from typing import List, Tuple
 from transformers import (
 import requests
 # ---------------------------------------------------------------------------
+# Model identifiers (Hugging Face)
 # ---------------------------------------------------------------------------
 SENTIMENT_MODEL_ID = "LinkLinkWu/Stock_Analysis_Test_Ahamed"  # LABEL_0 = Negative, LABEL_1 = Positive
 NER_MODEL_ID = "dslim/bert-base-NER"
 # ---------------------------------------------------------------------------
+# Pipeline singletons – loaded once on first import
 # ---------------------------------------------------------------------------
+# Sentiment
+_sent_tok = AutoTokenizer.from_pretrained(SENTIMENT_MODEL_ID)
+_sent_model = AutoModelForSequenceClassification.from_pretrained(SENTIMENT_MODEL_ID)
 sentiment_pipeline = pipeline(
+    "text-classification",
+    model=_sent_model,
+    tokenizer=_sent_tok,
+    return_all_scores=True,
 )
+# NER
+_ner_tok = AutoTokenizer.from_pretrained(NER_MODEL_ID)
+_ner_model = AutoModelForTokenClassification.from_pretrained(NER_MODEL_ID)
 ner_pipeline = pipeline(
     "ner",
+    model=_ner_model,
+    tokenizer=_ner_tok,
     grouped_entities=True,
 )
+# ---------------------------------------------------------------------------
+# Sentiment helpers
+# ---------------------------------------------------------------------------
+_POSITIVE_RAW = "LABEL_1"   # positive class id in model output
+_NEUTRAL_RAW = "NEUTRAL"    # some models add a neutral class
+_SINGLE_THRESHOLD = 0.50    # ≥50% positive prob → Positive
+_LABEL_NEG = "Negative"
+_LABEL_POS = "Positive"
+_LABEL_UNK = "Unknown"
+def analyze_sentiment(
+    text: str,
+    *,
+    pipe=None,
+    threshold: float = _SINGLE_THRESHOLD,
+    return_prob: bool = False,
+):
+    """Classify *text* as Positive / Negative.
+    Parameters
+    ----------
+    text : str
+        Input sentence (e.g. news headline).
+    pipe : transformers.Pipeline, optional
+        Custom sentiment pipeline; defaults to module‑level singleton.
+    threshold : float, default 0.50
+        Positive‑probability cut‑off.
+    return_prob : bool, default False
+        If *True*, returns ``(label, positive_probability)`` tuple;
+        otherwise returns just the label string.
+    Notes
+    -----
+    * When the underlying model emits *NEUTRAL*, we treat it the same
+      as *Positive* – finance headlines often sound cautious.
+    * Function never raises; on failure returns ``"Unknown"`` (or
+      ``("Unknown", 0.0)`` when *return_prob* is *True*).
+    """
+    try:
+        s_pipe = pipe or sentiment_pipeline
+        scores = s_pipe(text, truncation=True)[0]  # list[dict]
+        score_map = {item["label"].upper(): item["score"] for item in scores}
+        pos_prob = score_map.get(_POSITIVE_RAW, 0.0)
+        if _NEUTRAL_RAW in score_map:  # treat Neutral as Positive
+            pos_prob = max(pos_prob, score_map[_NEUTRAL_RAW])
+        label = _LABEL_POS if pos_prob >= threshold else _LABEL_NEG
+        return (label, pos_prob) if return_prob else label
+    except Exception:
+        return (_LABEL_UNK, 0.0) if return_prob else _LABEL_UNK
 # ---------------------------------------------------------------------------
 # Web‑scraping helper (Finviz)
 # ---------------------------------------------------------------------------
+def fetch_news(ticker: str, max_items: int = 30) -> List[dict]:
+    """Return up to *max_items* latest Finviz headlines for *ticker*.
+    Result format:
+    ``[{'title': str, 'link': str}, ...]``
+    """
     try:
         url = f"https://finviz.com/quote.ashx?t={ticker}"
         headers = {
         soup = BeautifulSoup(r.text, "html.parser")
         if ticker.upper() not in (soup.title.text if soup.title else "").upper():
+            return []  # redirected / placeholder page
         table = soup.find(id="news-table")
         if table is None:
             return []
         headlines: List[dict] = []
+        for row in table.find_all("tr")[:max_items]:
             link_tag = row.find("a")
             if link_tag:
+                headlines.append(
+                    {"title": link_tag.text.strip(), "link": link_tag["href"]}
+                )
         return headlines
     except Exception:
         return []
 # ---------------------------------------------------------------------------
+# Named‑entity extraction helper
 # ---------------------------------------------------------------------------
+def extract_org_entities(text: str, pipe=None, max_entities: int = 5) -> List[str]:
+    """Extract *ORG* tokens (upper‑cased) from *text*.
+    Returns at most *max_entities* unique ticker‑like strings suitable
+    for Finviz / Yahoo queries.
     """
     try:
         ner_pipe = pipe or ner_pipeline
         entities = ner_pipe(text)
     except Exception:
         return []
 # ---------------------------------------------------------------------------
 # Public accessors (legacy compatibility)
 # ---------------------------------------------------------------------------
 def get_sentiment_pipeline():
+    """Return the module‑level sentiment pipeline singleton."""
     return sentiment_pipeline
 def get_ner_pipeline():
+    """Return the module‑level NER pipeline singleton."""
     return ner_pipeline