Spaces:
Sleeping
Sleeping
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification, AutoModelForTokenClassification | |
from bs4 import BeautifulSoup | |
import requests | |
# ----------- Eager Initialization of Pipelines ----------- | |
# Sentiment pipeline | |
model_id = "ahmedrachid/FinancialBERT-Sentiment-Analysis" | |
sentiment_tokenizer = AutoTokenizer.from_pretrained(model_id) | |
sentiment_model = AutoModelForSequenceClassification.from_pretrained(model_id) | |
sentiment_pipeline = pipeline( | |
"sentiment-analysis", | |
model=sentiment_model, | |
tokenizer=sentiment_tokenizer | |
) | |
# NER pipeline | |
ner_tokenizer = AutoTokenizer.from_pretrained("dslim/bert-base-NER") | |
ner_model = AutoModelForTokenClassification.from_pretrained("dslim/bert-base-NER") | |
ner_pipeline = pipeline( | |
"ner", | |
model=ner_model, | |
tokenizer=ner_tokenizer, | |
grouped_entities=True | |
) | |
# ----------- Core Functions ----------- | |
def fetch_news(ticker): | |
try: | |
url = f"https://finviz.com/quote.ashx?t={ticker}" | |
headers = { | |
'User-Agent': 'Mozilla/5.0', | |
'Accept': 'text/html', | |
'Accept-Language': 'en-US,en;q=0.5', | |
'Referer': 'https://finviz.com/', | |
'Connection': 'keep-alive', | |
} | |
response = requests.get(url, headers=headers) | |
if response.status_code != 200: | |
return [] | |
soup = BeautifulSoup(response.text, 'html.parser') | |
title = soup.title.text if soup.title else "" | |
if ticker not in title: | |
return [] | |
news_table = soup.find(id='news-table') | |
if news_table is None: | |
return [] | |
news = [] | |
for row in news_table.findAll('tr')[:30]: | |
a_tag = row.find('a') | |
if a_tag: | |
title_text = a_tag.get_text() | |
link = a_tag['href'] | |
news.append({'title': title_text, 'link': link}) | |
return news | |
except Exception: | |
return [] | |
def analyze_sentiment(text, pipe=None): | |
""" | |
兼容两种调用: | |
- analyze_sentiment(text) -> 使用全局 sentiment_pipeline | |
- analyze_sentiment(text, some_pipeline) -> 使用传入的 some_pipeline | |
""" | |
try: | |
sentiment_pipe = pipe or sentiment_pipeline | |
result = sentiment_pipe(text)[0] | |
return "Positive" if result['label'] == 'POSITIVE' else "Negative" | |
except Exception: | |
return "Unknown" | |
def extract_org_entities(text, pipe=None): | |
""" | |
- extract_org_entities(text) | |
- extract_org_entities(text, some_pipeline) | |
""" | |
try: | |
ner_pipe = pipe or ner_pipeline | |
entities = ner_pipe(text) | |
orgs = [] | |
for ent in entities: | |
if ent["entity_group"] == "ORG": | |
w = ent["word"].replace("##", "").strip().upper() | |
if w not in orgs: | |
orgs.append(w) | |
if len(orgs) >= 5: | |
break | |
return orgs | |
except Exception: | |
return [] | |
# ----------- Helper Functions for Imports ----------- | |
def get_sentiment_pipeline(): | |
return sentiment_pipeline | |
def get_ner_pipeline(): | |
return ner_pipeline | |