Spaces:

S6six
/

stock_sentiment_analysisv1

Sleeping

File size: 12,392 Bytes

9719f08

{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "8ccfe024",
   "metadata": {},
   "source": [
    "# Stock Sentiment Analysis\n",
    "\n",
    "This notebook performs sentiment analysis on news articles related to specific stocks and correlates it with stock price movements."
   ]
  },
  {
   "cell_type": "markdown",
   "id": "784f2635",
   "metadata": {},
   "source": [
    "## 1. Setup and Imports\n",
    "\n",
    "Import necessary libraries and modules from our `src` directory."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "3038c1d8",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Setup complete.\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import sys\n",
    "import os\n",
    "from datetime import datetime, timedelta\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "# Add src directory to path to import modules\n",
    "module_path = os.path.abspath(os.path.join('..'))\n",
    "if module_path not in sys.path:\n",
    "    sys.path.append(module_path)\n",
    "\n",
    "from src.data_fetcher import get_stock_data, get_news_articles\n",
    "\n",
    "# Configure pandas display options\n",
    "pd.set_option('display.max_rows', 100)\n",
    "pd.set_option('display.max_columns', 50)\n",
    "pd.set_option('display.width', 1000)\n",
    "\n",
    "print(\"Setup complete.\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4ed65790",
   "metadata": {},
   "source": [
    "## 2. Define Parameters\n",
    "\n",
    "Set the stock ticker and date range for analysis."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "d0bb6ca4",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Ticker: AAPL\n",
      "Start Date: 2025-03-31\n",
      "End Date: 2025-04-30\n"
     ]
    }
   ],
   "source": [
    "TICKER = 'AAPL'  # Example: Apple Inc.\n",
    "END_DATE = datetime.now().strftime('%Y-%m-%d')\n",
    "# Fetch data for the last 30 days (adjust as needed)\n",
    "# Note: NewsAPI free tier limits searches to the past month\n",
    "START_DATE = (datetime.now() - timedelta(days=30)).strftime('%Y-%m-%d') \n",
    "\n",
    "print(f\"Ticker: {TICKER}\")\n",
    "print(f\"Start Date: {START_DATE}\")\n",
    "print(f\"End Date: {END_DATE}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "902753f9",
   "metadata": {},
   "source": [
    "## 3. Fetch Data\n",
    "\n",
    "Use the functions from `data_fetcher.py` to get stock prices and news articles."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "0d28dcf3",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Fetching stock data...\n",
      "Successfully fetched 21 days of stock data.\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Date</th>\n",
       "      <th>Open</th>\n",
       "      <th>High</th>\n",
       "      <th>Low</th>\n",
       "      <th>Close</th>\n",
       "      <th>Volume</th>\n",
       "      <th>Dividends</th>\n",
       "      <th>Stock Splits</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2025-03-31</td>\n",
       "      <td>217.009995</td>\n",
       "      <td>225.619995</td>\n",
       "      <td>216.229996</td>\n",
       "      <td>222.130005</td>\n",
       "      <td>65299300</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2025-04-01</td>\n",
       "      <td>219.809998</td>\n",
       "      <td>223.679993</td>\n",
       "      <td>218.899994</td>\n",
       "      <td>223.190002</td>\n",
       "      <td>36412700</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2025-04-02</td>\n",
       "      <td>221.320007</td>\n",
       "      <td>225.190002</td>\n",
       "      <td>221.020004</td>\n",
       "      <td>223.889999</td>\n",
       "      <td>35905900</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2025-04-03</td>\n",
       "      <td>205.539993</td>\n",
       "      <td>207.490005</td>\n",
       "      <td>201.250000</td>\n",
       "      <td>203.190002</td>\n",
       "      <td>103419000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2025-04-04</td>\n",
       "      <td>193.889999</td>\n",
       "      <td>199.880005</td>\n",
       "      <td>187.339996</td>\n",
       "      <td>188.380005</td>\n",
       "      <td>125910900</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         Date        Open        High         Low       Close     Volume  Dividends  Stock Splits\n",
       "0  2025-03-31  217.009995  225.619995  216.229996  222.130005   65299300        0.0           0.0\n",
       "1  2025-04-01  219.809998  223.679993  218.899994  223.190002   36412700        0.0           0.0\n",
       "2  2025-04-02  221.320007  225.190002  221.020004  223.889999   35905900        0.0           0.0\n",
       "3  2025-04-03  205.539993  207.490005  201.250000  203.190002  103419000        0.0           0.0\n",
       "4  2025-04-04  193.889999  199.880005  187.339996  188.380005  125910900        0.0           0.0"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Fetch Stock Data\n",
    "print(\"Fetching stock data...\")\n",
    "stock_df = get_stock_data(TICKER, START_DATE, END_DATE)\n",
    "\n",
    "if stock_df is not None:\n",
    "    print(f\"Successfully fetched {len(stock_df)} days of stock data.\")\n",
    "    display(stock_df.head())\n",
    "else:\n",
    "    print(\"Failed to fetch stock data.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "45b2014d",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Fetching news articles...\n",
      "Found 853 articles for 'AAPL'\n"
     ]
    },
    {
     "ename": "AttributeError",
     "evalue": "'list' object has no attribute 'empty'",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
      "Cell \u001b[1;32mIn[20], line 4\u001b[0m\n\u001b[0;32m      2\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mFetching news articles...\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m      3\u001b[0m news_df \u001b[38;5;241m=\u001b[39m get_news_articles(TICKER, START_DATE, END_DATE)\n\u001b[1;32m----> 4\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m news_df \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[43mnews_df\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mempty\u001b[49m:\n\u001b[0;32m      5\u001b[0m     \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSuccessfully fetched \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mlen\u001b[39m(news_df)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m news articles.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m      6\u001b[0m     display(news_df\u001b[38;5;241m.\u001b[39mhead())\n",
      "\u001b[1;31mAttributeError\u001b[0m: 'list' object has no attribute 'empty'"
     ]
    }
   ],
   "source": [
    "# Fetch News Articles\n",
    "print(\"Fetching news articles...\")\n",
    "articles_list = get_news_articles(TICKER, START_DATE, END_DATE)\n",
    "\n",
    "# Convert the list of articles to a DataFrame\n",
    "if articles_list is not None:\n",
    "    news_df = pd.DataFrame(articles_list)\n",
    "    # Convert publishedAt to datetime and extract date\n",
    "    if 'publishedAt' in news_df.columns:\n",
    "        news_df['publishedAt'] = pd.to_datetime(news_df['publishedAt'])\n",
    "        news_df['date'] = news_df['publishedAt'].dt.date\n",
    "    else:\n",
    "        news_df['date'] = None # Handle case where publishedAt might be missing\n",
    "else:\n",
    "    news_df = pd.DataFrame() # Create an empty DataFrame if fetching failed\n",
    "\n",
    "# Now check the DataFrame\n",
    "if not news_df.empty:\n",
    "    print(f\"Successfully fetched and converted {len(news_df)} news articles to DataFrame.\")\n",
    "    display(news_df[['date', 'title', 'description', 'source']].head()) # Display relevant columns\n",
    "else:\n",
    "    print(\"No news articles found or failed to create DataFrame.\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "060f293c",
   "metadata": {},
   "source": [
    "## 4. Sentiment Analysis\n",
    "\n",
    "Apply sentiment analysis to the fetched news articles."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "23508f73",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Skipping sentiment analysis as no news articles were successfully fetched or the DataFrame is empty.\n"
     ]
    }
   ],
   "source": [
    "from src.sentiment_analyzer import analyze_sentiment\n",
    "# Check if news_df exists and is not empty\n",
    "if 'news_df' in locals() and not news_df.empty:\n",
    "    print(f\"Performing sentiment analysis on {len(news_df)} articles...\")\n",
    "    # Combine title and description for better context (handle None values)\n",
    "    news_df['text_to_analyze'] = news_df['title'].fillna('') + \". \" + news_df['description'].fillna('')\n",
    "    # Apply the sentiment analysis function\n",
    "    # This might take a while depending on the number of articles and your hardware\n",
    "    sentiment_results = news_df['text_to_analyze'].apply(lambda x: analyze_sentiment(x) if pd.notna(x) else (None, None, None))\n",
    "    # Unpack results into separate columns\n",
    "    news_df['sentiment_label'] = sentiment_results.apply(lambda x: x[0])\n",
    "    news_df['sentiment_score'] = sentiment_results.apply(lambda x: x[1])\n",
    "    news_df['sentiment_scores_all'] = sentiment_results.apply(lambda x: x[2])\n",
    "    # Display the results\n",
    "    print(\"Sentiment analysis complete.\")\n",
    "    display(news_df[['date', 'title', 'sentiment_label', 'sentiment_score']].head())\n",
    "    # Display value counts for sentiment labels\n",
    "    print(\"\\nSentiment Label Distribution:\")\n",
    "    print(news_df['sentiment_label'].value_counts())\n",
    "else:\n",
    "    print(\"Skipping sentiment analysis as no news articles were successfully fetched or the DataFrame is empty.\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": ".venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}