import streamlit as st import os from transformers import pipeline import torch # PyTorch is commonly used by transformers # --- Configuration --- MODEL_NAME = "AdaptLLM/finance-LLM" # Attempt to get token from secrets, handle case where it might not be set yet HF_TOKEN = os.environ.get("HF_TOKEN") # --- Model Loading (Cached by Streamlit for efficiency) --- @st.cache_resource # Cache the pipeline object def load_text_generation_pipeline(): """Loads the text generation pipeline.""" if not HF_TOKEN: st.warning("HF_TOKEN secret not found. Ensure the model is public or add the token to secrets.") # Decide if you want to stop or proceed cautiously # st.stop() # Uncomment this line to halt execution if token is strictly required try: # Determine device: Use GPU (cuda:0) if available, otherwise CPU (-1) # Free Spaces typically only have CPU, so device will likely be -1 device = 0 if torch.cuda.is_available() else -1 st.info(f"Loading model {MODEL_NAME}... This might take a while on the first run.") # Use pipeline for easier text generation generator = pipeline( "text-generation", model=MODEL_NAME, tokenizer=MODEL_NAME, # Usually same as model use_auth_token=HF_TOKEN if HF_TOKEN else None, # Use token if available torch_dtype=torch.float16, # Use float16 to potentially save memory device=device # Specify device placement ) st.success(f"Model {MODEL_NAME} loaded successfully!") return generator except Exception as e: st.error(f"Error loading model pipeline: {e}", icon="🔥") st.error("This could be due to memory limits on the free tier, missing token for a private model, or other issues.") st.stop() # Stop the app if the model fails to load # --- Load the Model Pipeline --- generator = load_text_generation_pipeline() # --- Streamlit App UI --- st.set_page_config(layout="wide") # Use wider layout st.title("💰 FinBuddy Assistant") st.caption("Your AI-powered financial planning assistant (Text Chat - v1)") # Initialize chat history in session state if it doesn't exist if "messages" not in st.session_state: st.session_state.messages = [] # Display past chat messages for message in st.session_state.messages: with st.chat_message(message["role"]): st.markdown(message["content"]) # Display content as markdown # Get user input using chat_input if prompt := st.chat_input("Ask a question about finance..."): # Add user message to session state and display it st.session_state.messages.append({"role": "user", "content": prompt}) with st.chat_message("user"): st.markdown(prompt) # Generate assistant response with st.chat_message("assistant"): message_placeholder = st.empty() # Create placeholder for streaming/final response message_placeholder.markdown("Thinking...⏳") # Initial thinking message # --- Prepare prompt for the model --- # Simple approach: just use the latest user prompt. # TODO: Improve this later to include conversation history for better context. prompt_for_model = prompt try: # Generate response using the pipeline outputs = generator( prompt_for_model, max_new_tokens=512, # Limit the length of the response num_return_sequences=1, eos_token_id=generator.tokenizer.eos_token_id, pad_token_id=generator.tokenizer.eos_token_id # Helps prevent warnings/issues ) if outputs and len(outputs) > 0 and 'generated_text' in outputs[0]: # Extract the generated text full_response = outputs[0]['generated_text'] # --- Attempt to clean the response --- # The pipeline often returns the prompt + response. Try to remove the prompt part. if full_response.startswith(prompt_for_model): assistant_response = full_response[len(prompt_for_model):].strip() # Sometimes models add their own role prefix if assistant_response.lower().startswith("assistant:"): assistant_response = assistant_response[len("assistant:"):].strip() elif assistant_response.lower().startswith("response:"): assistant_response = assistant_response[len("response:"):].strip() else: assistant_response = full_response # Fallback if prompt isn't found at start # Handle cases where the response might be empty after cleaning if not assistant_response: assistant_response = "I received your message, but I don't have a further response right now." else: assistant_response = "Sorry, I couldn't generate a response." # Display the final response message_placeholder.markdown(assistant_response) # Add the final assistant response to session state st.session_state.messages.append({"role": "assistant", "content": assistant_response}) except Exception as e: error_message = f"Error during text generation: {e}" st.error(error_message, icon="🔥") message_placeholder.markdown("Sorry, an error occurred while generating the response.") # Add error indication to history st.session_state.messages.append({"role": "assistant", "content": f"[Error: {e}]"})