tickerArticleScorer

Sleeping

sentivity commited on Jun 3

Commit

5cd2ea8

verified ·

1 Parent(s): 1b63c04

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -45,10 +45,14 @@ score_model.eval()
 def preprocess_text(text):
     text = text.lower()
     text = re.sub(r'http\S+', '', text)
     text = re.sub(r'[^a-zA-Z0-9\s.,!?]', '', text)
     text = re.sub(r'\s+', ' ', text).strip()
     return text
 # predicts sentiment
 def predict_sentiment(text):
     if not text:
@@ -171,8 +175,13 @@ def analyze_ticker(user_ticker: str):
             full_text = article_data["title"] + " " + article_data["text"]
             sentiment_score = predict_sentiment(full_text)
             snippet = article_data["text"][:500].replace("\n", " ").strip()
             blurb = f"{article_data['title']}\n\n{snippet}..."
         timestamp = datetime.datetime.utcnow()
         cache_entry = {

 def preprocess_text(text):
     text = text.lower()
     text = re.sub(r'http\S+', '', text)
+    text = re.sub(r'\d{1,2}:\d{2}', '', text)
+    text = re.sub(r'speaker\s+[a-z]', '', text)
+    text = re.sub(r'\b[a-z]{2,20}\s+howley\b', '', text)
     text = re.sub(r'[^a-zA-Z0-9\s.,!?]', '', text)
     text = re.sub(r'\s+', ' ', text).strip()
     return text
 # predicts sentiment
 def predict_sentiment(text):
     if not text:
             full_text = article_data["title"] + " " + article_data["text"]
             sentiment_score = predict_sentiment(full_text)
+            '''
             snippet = article_data["text"][:500].replace("\n", " ").strip()
             blurb = f"{article_data['title']}\n\n{snippet}..."
+            '''
+            cleaned_text = preprocess_text(article_data["text"])
+            short_blurb = cleaned_text[:300] + "..." if len(cleaned_text) > 300 else cleaned_text
+            blurb = f"{article_data['title']}\n\n{short_blurb}"
         timestamp = datetime.datetime.utcnow()
         cache_entry = {