|
""" |
|
SEC Edgar Filings Search Tool for financial and company data |
|
""" |
|
from .base_tool import BaseTool |
|
import requests |
|
import json |
|
import re |
|
from typing import Dict, List, Optional |
|
|
|
|
|
class SECSearchTool(BaseTool): |
|
"""Search SEC EDGAR filings for company financial information""" |
|
|
|
def __init__(self): |
|
super().__init__("SEC EDGAR", "Search SEC filings and financial data for public companies") |
|
self.base_url = "https://data.sec.gov" |
|
self.headers = { |
|
'User-Agent': 'Research Tool research@academic.edu', |
|
'Accept-Encoding': 'gzip, deflate' |
|
} |
|
self.rate_limit_delay = 3.0 |
|
|
|
def search(self, company_name: str, **kwargs) -> str: |
|
"""Search SEC filings for company information""" |
|
self.rate_limit() |
|
|
|
try: |
|
|
|
cik_data = self._find_company_cik(company_name) |
|
|
|
if not cik_data: |
|
return self._fallback_company_search(company_name) |
|
|
|
|
|
submissions = self._get_company_submissions(cik_data['cik']) |
|
|
|
if submissions: |
|
return self._format_sec_results(company_name, cik_data, submissions) |
|
else: |
|
return self._fallback_company_search(company_name) |
|
|
|
except requests.RequestException as e: |
|
|
|
if "404" in str(e): |
|
return self._fallback_company_search(company_name) |
|
return self.format_error_response(company_name, f"Network error accessing SEC: {str(e)}") |
|
except Exception as e: |
|
return self.format_error_response(company_name, str(e)) |
|
|
|
def _find_company_cik(self, company_name: str) -> Optional[Dict]: |
|
"""Find company CIK (Central Index Key) from company name""" |
|
try: |
|
|
|
tickers_url = "https://www.sec.gov/files/company_tickers_exchange.json" |
|
response = requests.get(tickers_url, headers=self.headers, timeout=15) |
|
response.raise_for_status() |
|
|
|
tickers_data = response.json() |
|
|
|
|
|
company_lower = company_name.lower() |
|
|
|
|
|
if isinstance(tickers_data, dict): |
|
|
|
if 'fields' in tickers_data and 'data' in tickers_data: |
|
return self._search_exchange_format(tickers_data, company_lower) |
|
else: |
|
|
|
return self._search_direct_format(tickers_data, company_lower) |
|
elif isinstance(tickers_data, list): |
|
|
|
return self._search_list_format(tickers_data, company_lower) |
|
|
|
return None |
|
|
|
except Exception as e: |
|
print(f"Error finding company CIK: {e}") |
|
return self._fallback_company_lookup(company_name) |
|
|
|
def _fallback_company_lookup(self, company_name: str) -> Optional[Dict]: |
|
"""Fallback company lookup using known major companies""" |
|
|
|
known_companies = { |
|
'apple': {'cik': '0000320193', 'ticker': 'AAPL', 'title': 'Apple Inc.'}, |
|
'microsoft': {'cik': '0000789019', 'ticker': 'MSFT', 'title': 'Microsoft Corporation'}, |
|
'tesla': {'cik': '0001318605', 'ticker': 'TSLA', 'title': 'Tesla, Inc.'}, |
|
'amazon': {'cik': '0001018724', 'ticker': 'AMZN', 'title': 'Amazon.com, Inc.'}, |
|
'google': {'cik': '0001652044', 'ticker': 'GOOGL', 'title': 'Alphabet Inc.'}, |
|
'alphabet': {'cik': '0001652044', 'ticker': 'GOOGL', 'title': 'Alphabet Inc.'}, |
|
'meta': {'cik': '0001326801', 'ticker': 'META', 'title': 'Meta Platforms, Inc.'}, |
|
'facebook': {'cik': '0001326801', 'ticker': 'META', 'title': 'Meta Platforms, Inc.'}, |
|
'nvidia': {'cik': '0001045810', 'ticker': 'NVDA', 'title': 'NVIDIA Corporation'}, |
|
'netflix': {'cik': '0001065280', 'ticker': 'NFLX', 'title': 'Netflix, Inc.'} |
|
} |
|
|
|
company_key = company_name.lower().strip() |
|
for key, data in known_companies.items(): |
|
if key in company_key or company_key in key: |
|
return data |
|
|
|
return None |
|
|
|
def _search_exchange_format(self, tickers_data: dict, company_lower: str) -> Optional[Dict]: |
|
"""Search in exchange ticker data format""" |
|
try: |
|
fields = tickers_data.get('fields', []) |
|
data = tickers_data.get('data', []) |
|
|
|
|
|
cik_idx = None |
|
ticker_idx = None |
|
name_idx = None |
|
|
|
for i, field in enumerate(fields): |
|
if field.lower() in ['cik', 'cik_str']: |
|
cik_idx = i |
|
elif field.lower() in ['ticker', 'symbol']: |
|
ticker_idx = i |
|
elif field.lower() in ['name', 'title', 'company']: |
|
name_idx = i |
|
|
|
|
|
for row in data: |
|
if len(row) > max(filter(None, [cik_idx, ticker_idx, name_idx])): |
|
name = str(row[name_idx]).lower() if name_idx is not None else "" |
|
ticker = str(row[ticker_idx]).lower() if ticker_idx is not None else "" |
|
|
|
if (company_lower in name or |
|
name in company_lower or |
|
company_lower == ticker or |
|
any(word in name for word in company_lower.split() if len(word) > 3)): |
|
|
|
cik = str(row[cik_idx]) if cik_idx is not None else "" |
|
return { |
|
'cik': cik.zfill(10), |
|
'ticker': row[ticker_idx] if ticker_idx is not None else "", |
|
'title': row[name_idx] if name_idx is not None else "" |
|
} |
|
|
|
except (ValueError, IndexError) as e: |
|
print(f"Error parsing exchange format: {e}") |
|
|
|
return None |
|
|
|
def _search_direct_format(self, tickers_data: dict, company_lower: str) -> Optional[Dict]: |
|
"""Search in direct dictionary format""" |
|
for key, entry in tickers_data.items(): |
|
if isinstance(entry, dict): |
|
title = entry.get('title', entry.get('name', '')).lower() |
|
ticker = entry.get('ticker', entry.get('symbol', '')).lower() |
|
|
|
if (company_lower in title or |
|
title in company_lower or |
|
company_lower == ticker or |
|
any(word in title for word in company_lower.split() if len(word) > 3)): |
|
|
|
return { |
|
'cik': str(entry.get('cik_str', entry.get('cik', ''))).zfill(10), |
|
'ticker': entry.get('ticker', entry.get('symbol', '')), |
|
'title': entry.get('title', entry.get('name', '')) |
|
} |
|
return None |
|
|
|
def _search_list_format(self, tickers_data: list, company_lower: str) -> Optional[Dict]: |
|
"""Search in list format""" |
|
for entry in tickers_data: |
|
if isinstance(entry, dict): |
|
title = entry.get('title', entry.get('name', '')).lower() |
|
ticker = entry.get('ticker', entry.get('symbol', '')).lower() |
|
|
|
if (company_lower in title or |
|
title in company_lower or |
|
company_lower == ticker or |
|
any(word in title for word in company_lower.split() if len(word) > 3)): |
|
|
|
return { |
|
'cik': str(entry.get('cik_str', entry.get('cik', ''))).zfill(10), |
|
'ticker': entry.get('ticker', entry.get('symbol', '')), |
|
'title': entry.get('title', entry.get('name', '')) |
|
} |
|
return None |
|
|
|
def _get_company_submissions(self, cik: str) -> Optional[Dict]: |
|
"""Get company submission data from SEC""" |
|
try: |
|
submissions_url = f"{self.base_url}/submissions/CIK{cik}.json" |
|
response = requests.get(submissions_url, headers=self.headers, timeout=15) |
|
response.raise_for_status() |
|
|
|
return response.json() |
|
|
|
except Exception as e: |
|
print(f"Error getting company submissions: {e}") |
|
return None |
|
|
|
def _format_sec_results(self, company_name: str, cik_data: Dict, submissions: Dict) -> str: |
|
"""Format SEC filing results""" |
|
result = f"**SEC Financial Data for: {company_name}**\n\n" |
|
|
|
|
|
result += f"**Company Information:**\n" |
|
result += f"• Official Name: {cik_data['title']}\n" |
|
result += f"• Ticker Symbol: {cik_data.get('ticker', 'N/A')}\n" |
|
result += f"• CIK: {cik_data['cik']}\n" |
|
|
|
|
|
if 'description' in submissions: |
|
business_desc = submissions['description'][:300] + "..." if len(submissions.get('description', '')) > 300 else submissions.get('description', 'Not available') |
|
result += f"• Business Description: {business_desc}\n" |
|
|
|
result += f"• Industry: {submissions.get('sic', 'Not specified')}\n" |
|
result += f"• Fiscal Year End: {submissions.get('fiscalYearEnd', 'Not specified')}\n\n" |
|
|
|
|
|
recent_filings = self._analyze_recent_filings(submissions) |
|
result += recent_filings |
|
|
|
|
|
financial_highlights = self._extract_financial_highlights(submissions) |
|
result += financial_highlights |
|
|
|
return result |
|
|
|
def _analyze_recent_filings(self, submissions: Dict) -> str: |
|
"""Analyze recent SEC filings""" |
|
result = "**Recent SEC Filings:**\n" |
|
|
|
|
|
recent_filings = submissions.get('filings', {}).get('recent', {}) |
|
|
|
if not recent_filings: |
|
return result + "• No recent filings available\n\n" |
|
|
|
forms = recent_filings.get('form', []) |
|
filing_dates = recent_filings.get('filingDate', []) |
|
accession_numbers = recent_filings.get('accessionNumber', []) |
|
|
|
|
|
key_forms = ['10-K', '10-Q', '8-K', 'DEF 14A'] |
|
recent_key_filings = [] |
|
|
|
for i, form in enumerate(forms[:20]): |
|
if form in key_forms and i < len(filing_dates): |
|
recent_key_filings.append({ |
|
'form': form, |
|
'date': filing_dates[i], |
|
'accession': accession_numbers[i] if i < len(accession_numbers) else 'N/A' |
|
}) |
|
|
|
if recent_key_filings: |
|
for filing in recent_key_filings[:5]: |
|
form_description = { |
|
'10-K': 'Annual Report', |
|
'10-Q': 'Quarterly Report', |
|
'8-K': 'Current Report', |
|
'DEF 14A': 'Proxy Statement' |
|
}.get(filing['form'], filing['form']) |
|
|
|
result += f"• {filing['form']} ({form_description}) - Filed: {filing['date']}\n" |
|
else: |
|
result += "• No key financial filings found in recent submissions\n" |
|
|
|
result += "\n" |
|
return result |
|
|
|
def _extract_financial_highlights(self, submissions: Dict) -> str: |
|
"""Extract financial highlights from submission data""" |
|
result = "**Financial Data Analysis:**\n" |
|
|
|
|
|
result += "• Filing Status: Active public company\n" |
|
result += "• Regulatory Compliance: Current with SEC requirements\n" |
|
|
|
|
|
recent_filings = submissions.get('filings', {}).get('recent', {}) |
|
if recent_filings: |
|
forms = recent_filings.get('form', []) |
|
annual_reports = sum(1 for form in forms if form == '10-K') |
|
quarterly_reports = sum(1 for form in forms if form == '10-Q') |
|
|
|
result += f"• Annual Reports (10-K): {annual_reports} on file\n" |
|
result += f"• Quarterly Reports (10-Q): {quarterly_reports} on file\n" |
|
|
|
result += "• Note: Detailed financial metrics require parsing individual filing documents\n\n" |
|
|
|
result += "**Investment Research Notes:**\n" |
|
result += "• Use SEC filings for: revenue trends, risk factors, management discussion\n" |
|
result += "• Key documents: 10-K (annual), 10-Q (quarterly), 8-K (material events)\n" |
|
result += "• Combine with market data for comprehensive analysis\n\n" |
|
|
|
return result |
|
|
|
def _fallback_company_search(self, company_name: str) -> str: |
|
"""Fallback response when company not found in SEC database""" |
|
result = f"**SEC Financial Research for: {company_name}**\n\n" |
|
result += f"**Company Search Results:**\n" |
|
result += f"• Company '{company_name}' not found in SEC EDGAR database\n" |
|
result += f"• This may indicate the company is:\n" |
|
result += f" - Private company (not required to file with SEC)\n" |
|
result += f" - Foreign company not listed on US exchanges\n" |
|
result += f" - Subsidiary of another public company\n" |
|
result += f" - Different legal name than search term\n\n" |
|
|
|
result += f"**Alternative Research Suggestions:**\n" |
|
result += f"• Search for parent company or holding company\n" |
|
result += f"• Check if company trades under different ticker symbol\n" |
|
result += f"• Use company's full legal name for search\n" |
|
result += f"• Consider private company databases for non-public entities\n\n" |
|
|
|
return result |
|
|
|
def should_use_for_query(self, query: str) -> bool: |
|
"""SEC is good for public company financial and business information""" |
|
financial_indicators = [ |
|
'company', 'financial', 'revenue', 'earnings', 'profit', 'stock', |
|
'investment', 'market cap', 'sec filing', 'annual report', |
|
'quarterly', 'balance sheet', 'income statement', 'cash flow', |
|
'public company', 'ticker', 'investor', 'shareholder' |
|
] |
|
|
|
query_lower = query.lower() |
|
return any(indicator in query_lower for indicator in financial_indicators) |
|
|
|
def extract_key_info(self, text: str) -> dict: |
|
"""Extract key information from SEC results""" |
|
base_info = super().extract_key_info(text) |
|
|
|
if text: |
|
|
|
base_info.update({ |
|
'has_ticker': any(pattern in text for pattern in ['Ticker Symbol:', 'ticker']), |
|
'has_cik': 'CIK:' in text, |
|
'has_filings': any(form in text for form in ['10-K', '10-Q', '8-K']), |
|
'is_public_company': 'public company' in text.lower(), |
|
'has_financial_data': any(term in text.lower() for term in ['revenue', 'earnings', 'financial']), |
|
'company_found': 'not found in SEC' not in text |
|
}) |
|
|
|
return base_info |