File size: 8,023 Bytes
12d891e
 
7c95914
12d891e
7c95914
 
 
 
 
 
 
 
12d891e
7c95914
a4fe14d
376d93f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5a29b7e
376d93f
83a8ae5
09c08c1
376d93f
a4fe14d
44c0e78
 
8a1f468
 
 
 
 
 
44c0e78
8a1f468
44c0e78
 
f3aaf05
47f6195
7c95914
12d891e
 
abd1f1b
12d891e
d3de2d8
376d93f
 
 
12d891e
 
 
 
 
 
 
 
 
 
 
f8dc8d1
a98948f
8a1f468
 
12d891e
 
403a475
7c95914
12d891e
7c95914
12d891e
 
 
 
 
 
 
 
 
 
8a1f468
7c95914
 
 
 
2df9243
7c95914
 
 
 
8a1f468
a63f1b5
4a1415a
5d91cf0
12d891e
c678561
a6d00b1
12d891e
5e5c096
12d891e
5e5c096
 
 
 
 
12d891e
2b04423
12d891e
 
5e5c096
 
b6bac0f
 
 
 
 
5e5c096
7c95914
 
 
5e5c096
a152229
7c95914
5e5c096
b6bac0f
 
 
 
9ca7d21
 
b6bac0f
5e5c096
b6bac0f
 
5e5c096
2006ce0
 
3a74246
09c08c1
3a74246
e2cb1fd
3a74246
2006ce0
 
 
 
 
5e5c096
 
4a1415a
12d891e
b6bac0f
12d891e
 
 
 
 
36c54d4
12d891e
36c54d4
12d891e
36c54d4
 
 
 
 
12d891e
36c54d4
 
4a1415a
 
12d891e
36c54d4
 
5e5c096
 
12d891e
 
 
 
 
 
abd1f1b
0da8351
4a1415a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
import streamlit as st
from dotenv import load_dotenv
import pickle
from huggingface_hub import Repository
from PyPDF2 import PdfReader
from streamlit_extras.add_vertical_space import add_vertical_space
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.llms import OpenAI
from langchain.chains.question_answering import load_qa_chain
from langchain.callbacks import get_openai_callback
import os

def cloud_button(label, key):
    button_id = f"button_{key}"
    button_html = f"""
        <style>
            #{button_id} {{
                display: inline-flex;
                align-items: center;
                justify-content: center;
                padding: 8px 15px;
                font-size: 14px;
                cursor: pointer;
                text-align: center;
                text-decoration: none;
                outline: none;
                color: #fff;
                background-color: #add8e6;
                border: none;
                border-radius: 20px;
                box-shadow: 0 5px 10px rgba(0,0,0,0.1);
                margin: 5px;
            }}
            #{button_id}:hover {{
                background-color: #9fced1;
            }}
            #{button_id}:active {{
                background-color: #87b6a7;
                box-shadow: 0 2px 5px rgba(0,0,0,0.1);
                transform: translateY(2px);
            }}
        </style>
        <button id="{button_id}" class="cloud-button">{label}</button>
        <script>
            document.getElementById("{button_id}").onclick = function() {{
                window.parent.postMessage({{
                    'setSessionState': {{ '{key}': true }}
                }}, "*");
            }};
        </script>
    """
    
    st.markdown(button_html, unsafe_allow_html=True)
    return st.session_state.get(key, False)

        
# Step 1: Clone the Dataset Repository
repo = Repository(
    local_dir="Private_Book",  # Local directory to clone the repository
    repo_type="dataset",  # Specify that this is a dataset repository
    
    clone_from="Anne31415/Private_Book",  # Replace with your repository URL
    
    token=os.environ["HUB_TOKEN"]  # Use the secret token to authenticate
)
repo.git_pull()  # Pull the latest changes (if any)

# Step 2: Load the PDF File
pdf_file_path = "Private_Book/KOMBI_all2.pdf"  # Replace with your PDF file path

with st.sidebar:
    st.title('BinDoc GmbH')
    st.markdown("Experience revolutionary interaction with BinDocs Chat App, leveraging state-of-the-art AI technology.")
    
    add_vertical_space(1)  # Adjust as per the desired spacing
    
    st.markdown(""")


    Hello! I’m here to assist you with:<br><br>
    📘 **Glossary Inquiries:**<br>
    I can clarify terms like "DiGA", "AOP", or "BfArM", providing clear and concise explanations to help you understand our content better.<br><br>
    🆘 **Help Page Navigation:**<br>
    Ask me if you forgot your password or want to know more about topics related to the platform.<br><br>
    📰 **Latest Whitepapers Insights:**<br>
    Curious about our recent publications? Feel free to ask about our latest whitepapers!<br><br>
    """, unsafe_allow_html=True)
    
    add_vertical_space(1)  # Adjust as per the desired spacing

    st.write('Made with ❤️ by BinDoc GmbH')

    api_key = os.getenv("OPENAI_API_KEY")
    # Retrieve the API key from st.secrets


def load_pdf(file_path):
    pdf_reader = PdfReader(file_path)
    text = ""
    for page in pdf_reader.pages:
        text += page.extract_text()

    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=200,
        length_function=len
    )
    chunks = text_splitter.split_text(text=text)

    store_name, _ = os.path.splitext(os.path.basename(file_path))

    if os.path.exists(f"{store_name}.pkl"):
        with open(f"{store_name}.pkl", "rb") as f:
            VectorStore = pickle.load(f)
    else:
        embeddings = OpenAIEmbeddings()
        VectorStore = FAISS.from_texts(chunks, embedding=embeddings)
        with open(f"{store_name}.pkl", "wb") as f:
            pickle.dump(VectorStore, f)

    return VectorStore



def load_chatbot():
    return load_qa_chain(llm=OpenAI(), chain_type="stuff")

def main():
    # Hide Streamlit's menu and footer
    hide_streamlit_style = """
        <style>
        #MainMenu {visibility: hidden;}
        footer {visibility: hidden;}
        </style>
    """
    st.markdown(hide_streamlit_style, unsafe_allow_html=True)

    # Main content
    st.title("Welcome to BinDocs ChatBot! 🤖")

    # Check if PDF file exists
    pdf_path = pdf_file_path
    if not os.path.exists(pdf_path):
        st.error("File not found. Please check the file path.")
        return

    # Initialize chat history in session state
    if "chat_history" not in st.session_state:
        st.session_state['chat_history'] = []

    # Display previous chat messages
    display_chat_history(st.session_state['chat_history'])

    # Spacer
    st.write("<!-- Start Spacer -->", unsafe_allow_html=True)
    st.write("<div style='flex: 1;'></div>", unsafe_allow_html=True)
    st.write("<!-- End Spacer -->", unsafe_allow_html=True)

    new_messages_placeholder = st.empty()

    if pdf_path is not None:
        # User input for questions
        query = st.text_input("Ask questions about your PDF file (in any preferred language):")

        # Buttons for predefined queries
        if st.button("Was genau ist ein Belegarzt?", key="button1"):
            st.session_state['belegarzt_button'] = True
        if st.button("Wofür wird die Alpha-ID verwendet?", key="button2"):
            query = "Wofür wird die Alpha-ID verwendet?"
        if st.button("Was sind die Vorteile des ambulanten operierens?", key="button3"):
            query = "Was sind die Vorteile des ambulanten operierens?"

        if st.session_state.get('belegarzt_button'):
            query = "Was genau ist ein Belegarzt?"
            st.session_state['belegarzt_button'] = False  # Reset the button state


        # Processing the query
        if query:
            loading_message = st.empty()
            loading_message.text('Bot is thinking...')

            VectorStore = load_pdf(pdf_path)
            chain = load_chatbot()
            docs = VectorStore.similarity_search(query=query, k=3)
            with get_openai_callback() as cb:
                response = chain.run(input_documents=docs, question=query)

            st.session_state['chat_history'].append(("Bot", response, "new"))

            # Display new messages at the bottom
            new_messages = st.session_state['chat_history'][-2:]
            for chat in new_messages:
                background_color = "#FFA07A" if chat[2] == "new" else "#acf" if chat[0] == "User" else "#caf"
                new_messages_placeholder.markdown(f"<div style='background-color: {background_color}; padding: 10px; border-radius: 10px; margin: 10px;'>{chat[0]}: {chat[1]}</div>", unsafe_allow_html=True)

            # Scroll to the latest response using JavaScript
            st.write("<script>document.getElementById('response').scrollIntoView();</script>", unsafe_allow_html=True)

            loading_message.empty()

            # Clear the input field by setting the query variable to an empty string
            query = ""

    # Mark all messages as old after displaying
    st.session_state['chat_history'] = [(sender, msg, "old") for sender, msg, _ in st.session_state['chat_history']]


def display_chat_history(chat_history):
    for chat in chat_history:
        background_color = "#FFA07A" if chat[2] == "new" else "#acf" if chat[0] == "User" else "#caf"
        st.markdown(f"<div style='background-color: {background_color}; padding: 10px; border-radius: 10px; margin: 10px;'>{chat[0]}: {chat[1]}</div>", unsafe_allow_html=True)

if __name__ == "__main__":
    main()