import streamlit as st

## dockerized streamlit app wants to read from os.getenv(), otherwise use st.secrets
import os
api_key = os.getenv("LITELLM_KEY")
if api_key is None:
    api_key = st.secrets["LITELLM_KEY"]
cirrus_key = os.getenv("CIRRUS_KEY")
if cirrus_key is None:
    cirrus_key = st.secrets["CIRRUS_KEY"]        

st.title("HWC LLM Testing")


'''
(Demo will take a while to load first while processing all data!  Will be pre-processed in future...)
'''

import requests
import zipfile
def download_and_unzip(url, output_dir):
    response = requests.get(url)
    zip_file_path = os.path.basename(url)
    with open(zip_file_path, 'wb') as f:
        f.write(response.content)
    with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
        zip_ref.extractall(output_dir)
    os.remove(zip_file_path)


import pathlib
from langchain_community.document_loaders import PyPDFLoader
@st.cache_data
def pdf_loader(path):
    all_documents = []
    docs_dir = pathlib.Path(path)
    for file in docs_dir.iterdir():
        loader = PyPDFLoader(file)
        documents = loader.load()
        all_documents.extend(documents)
    return all_documents

download_and_unzip("https://minio.carlboettiger.info/public-data/hwc.zip", "hwc")
docs = pdf_loader('hwc/')


from langchain_openai import OpenAIEmbeddings
embedding = OpenAIEmbeddings(
             model = "cirrus",
             api_key = cirrus_key, 
             base_url = "https://llm.cirrus.carlboettiger.info/v1",
)


# Build a retrival agent
from langchain_text_splitters import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=300)
splits = text_splitter.split_documents(docs)

from langchain_core.vectorstores import InMemoryVectorStore
@st.cache_resource
def vector_store(_splits):
    vectorstore = InMemoryVectorStore.from_documents(documents=_splits, embedding=embedding)
    retriever = vectorstore.as_retriever()
    return retriever

# here we go, slow part:
retriever = vector_store(splits)

# Set up the language model
from langchain_openai import ChatOpenAI
llm = ChatOpenAI(model = "llama3", api_key = api_key, base_url = "https://llm.nrp-nautilus.io",  temperature=0)
## Cirrus instead:
system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following scientific articles as the retrieved context to answer "
    "the question. Appropriately cite the articles from the context on which your answer is based. "
    "Do not attempt to cite articles that are not in the context."
    "If you don't know the answer, say that you don't know."
    "Use up to five sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

from langchain_core.prompts import ChatPromptTemplate
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)
from langchain.chains.combine_documents import create_stuff_documents_chain
question_answer_chain = create_stuff_documents_chain(llm, prompt)
from langchain.chains import create_retrieval_chain
rag_chain = create_retrieval_chain(retriever, question_answer_chain)


# Place agent inside a streamlit application:
if prompt := st.chat_input("What are the most cost-effective prevention methods for elephants raiding my crops?"):
    with st.chat_message("user"):
        st.markdown(prompt)

    with st.chat_message("assistant"):
        results = rag_chain.invoke({"input": prompt})
        st.write(results['answer'])

        with st.expander("See context matched"):
            # FIXME parse results dict and display in pretty format
            st.write(results['context'])


# adapt for memory / multi-question interaction with:
# https://python.langchain.com/docs/tutorials/qa_chat_history/

# Also see structured outputs.