Spaces:
Running
Running
import streamlit as st | |
import requests | |
import pymupdf # PyMuPDF for PDF extraction | |
import traceback | |
from sentence_transformers import SentenceTransformer | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain_groq import ChatGroq | |
# Load API keys from Streamlit secrets | |
ALPHA_VANTAGE_API_KEY = st.secrets["ALPHA_VANTAGE_API_KEY"] | |
GROQ_API_KEY = st.secrets["GROQ_API_KEY"] | |
# Initialize Sentence Transformer for embeddings | |
embedding_model = SentenceTransformer("all-MiniLM-L6-v2") | |
# Initialize LLM | |
try: | |
llm = ChatGroq(temperature=0, model="llama3-70b-8192", api_key=GROQ_API_KEY) | |
st.success("β Groq LLM initialized successfully.") | |
except Exception as e: | |
st.error("β Failed to initialize Groq LLM.") | |
traceback.print_exc() | |
# Function to extract and chunk text from PDFs | |
def extract_text_from_pdf(uploaded_file, max_length=5000): | |
try: | |
doc = pymupdf.open(stream=uploaded_file.read(), filetype="pdf") # Load PDF | |
full_text = "".join(page.get_text() for page in doc) | |
# Split text into chunks to avoid LLM token limits | |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=max_length, chunk_overlap=200) | |
chunks = text_splitter.split_text(full_text) | |
return chunks # Return list of text chunks | |
except Exception as e: | |
st.error("β Failed to extract text from PDF.") | |
traceback.print_exc() | |
return ["Error extracting text."] | |
# Function to fetch financial data from Alpha Vantage | |
def fetch_financial_data(company_ticker): | |
if not company_ticker: | |
return "No ticker symbol provided. Please enter a valid company ticker." | |
try: | |
# Fetch Market Cap from Company Overview | |
overview_url = f"https://www.alphavantage.co/query?function=OVERVIEW&symbol={company_ticker}&apikey={ALPHA_VANTAGE_API_KEY}" | |
overview_response = requests.get(overview_url) | |
if overview_response.status_code == 200: | |
overview_data = overview_response.json() | |
market_cap = overview_data.get("MarketCapitalization", "N/A") | |
else: | |
st.error(f"β Failed to fetch company overview. Status Code: {overview_response.status_code}") | |
return "Error fetching company overview." | |
# Fetch Revenue from Income Statement | |
income_url = f"https://www.alphavantage.co/query?function=INCOME_STATEMENT&symbol={company_ticker}&apikey={ALPHA_VANTAGE_API_KEY}" | |
income_response = requests.get(income_url) | |
if income_response.status_code == 200: | |
income_data = income_response.json() | |
annual_reports = income_data.get("annualReports", []) | |
revenue = annual_reports[0].get("totalRevenue", "N/A") if annual_reports else "N/A" | |
else: | |
st.error(f"β Failed to fetch income statement. Status Code: {income_response.status_code}") | |
return "Error fetching income statement." | |
return f"Market Cap: ${market_cap}\nTotal Revenue: ${revenue}" | |
except Exception as e: | |
st.error("β Exception in fetching financial data.") | |
traceback.print_exc() | |
return "Error fetching financial data." | |
# Function to generate response using Groq's LLM | |
def generate_response(user_query, company_ticker, mode, uploaded_file): | |
try: | |
if mode == "PDF Upload Mode": | |
chunks = extract_text_from_pdf(uploaded_file) | |
chunked_summary = "\n\n".join(chunks[:3]) # Use first few chunks | |
prompt = f"Summarize the key financial insights from this document:\n\n{chunked_summary}" | |
elif mode == "Live Data Mode": | |
financial_info = fetch_financial_data(company_ticker) | |
prompt = f"Analyze the financial status of {company_ticker} based on:\n{financial_info}\n\nUser Query: {user_query}" | |
else: | |
return "Invalid mode selected." | |
response = llm.invoke(prompt) | |
return response.content | |
except Exception as e: | |
st.error("β Failed to generate AI response.") | |
traceback.print_exc() | |
return "Error generating response." | |
# Streamlit UI | |
st.title("π AI-Powered Financial Insights Chatbot") | |
st.write("Upload financial reports or fetch live financial data to get AI-driven insights.") | |
# User Input Fields | |
user_query = st.text_input("Enter your query:") | |
company_ticker = st.text_input("Enter company ticker symbol (optional):") | |
mode = st.radio("Select Mode:", ["PDF Upload Mode", "Live Data Mode"]) | |
uploaded_file = st.file_uploader("Upload PDF (Only for PDF Mode)", type=["pdf"]) | |
# Button to process request | |
if st.button("Get Insights"): | |
if mode == "PDF Upload Mode" and not uploaded_file: | |
st.error("β Please upload a PDF file.") | |
else: | |
with st.spinner("Processing... β³"): | |
response = generate_response(user_query, company_ticker, mode, uploaded_file) | |
st.subheader("π‘ AI Response") | |
st.write(response) | |