Anne31415 commited on
Commit
12d891e
·
1 Parent(s): fa28fee

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +89 -87
app.py CHANGED
@@ -1,13 +1,8 @@
1
- import os
2
- from huggingface_hub import Repository
3
- import streamlit.components.v1 as components
4
- from datasets import load_dataset
5
- import random
6
  import pickle
7
- from nltk.tokenize import sent_tokenize
8
- import nltk
9
  from PyPDF2 import PdfReader
10
- import streamlit as st
11
  from streamlit_extras.add_vertical_space import add_vertical_space
12
  from langchain.text_splitter import RecursiveCharacterTextSplitter
13
  from langchain.embeddings.openai import OpenAIEmbeddings
@@ -15,9 +10,7 @@ from langchain.vectorstores import FAISS
15
  from langchain.llms import OpenAI
16
  from langchain.chains.question_answering import load_qa_chain
17
  from langchain.callbacks import get_openai_callback
18
-
19
-
20
- nltk.download('punkt')
21
 
22
  # Step 1: Clone the Dataset Repository
23
  repo = Repository(
@@ -31,42 +24,59 @@ repo = Repository(
31
  repo.git_pull() # Pull the latest changes (if any)
32
 
33
  # Step 2: Load the PDF File
34
- pdf_file_path = "Private_Book/ops2023syst_referenz_20221021.pdf" # Replace with your PDF file path
35
-
36
 
37
- # Sidebar contents
38
  with st.sidebar:
39
- st.title(':orange_book: BinDoc GmbH')
 
40
 
41
-
42
- api_key = os.getenv("OPENAI_API_KEY")
43
- # Retrieve the API key from st.secrets
44
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
- if not api_key:
47
- st.warning('API key is required to proceed.')
48
- st.stop() # Stop the app if the API key is not provided
49
 
50
- st.markdown("Experience the future of document interaction with the revolutionary")
51
- st.markdown("**BinDocs Chat App**.")
52
- st.markdown("Harnessing the power of a Large Language Model and AI technology,")
53
- st.markdown("this innovative platform redefines PDF engagement,")
54
- st.markdown("enabling dynamic conversations that bridge the gap between")
55
- st.markdown("human and machine intelligence.")
56
 
57
- add_vertical_space(3) # Add more vertical space between text blocks
58
  st.write('Made with ❤️ by BinDoc GmbH')
59
 
 
 
 
 
60
  def load_pdf(file_path):
61
  pdf_reader = PdfReader(file_path)
62
- chunks = []
63
  for page in pdf_reader.pages:
64
- text = page.extract_text()
65
- if text:
66
- chunks.append(text)
67
-
68
- store_name = os.path.basename(file_path)[:-4]
69
-
 
 
 
 
 
70
  if os.path.exists(f"{store_name}.pkl"):
71
  with open(f"{store_name}.pkl", "rb") as f:
72
  VectorStore = pickle.load(f)
@@ -79,34 +89,25 @@ def load_pdf(file_path):
79
  return VectorStore
80
 
81
 
82
- def load_chatbot(max_tokens=300):
83
- return load_qa_chain(llm=OpenAI(temperature=0.1, max_tokens=max_tokens), chain_type="stuff")
84
-
85
 
86
- def display_chat_history(chat_history):
87
- for chat in chat_history:
88
- background_color = "#FFA07A" if chat[2] == "new" else "#acf" if chat[0] == "User" else "#caf"
89
- st.markdown(f"<div style='background-color: {background_color}; padding: 10px; border-radius: 10px; margin: 10px;'>{chat[0]}: {chat[1]}</div>", unsafe_allow_html=True)
90
-
91
- def remove_incomplete_sentences(text):
92
- sentences = sent_tokenize(text)
93
- complete_sentences = [sent for sent in sentences if sent.endswith(('.', '!', '?'))]
94
- return ' '.join(complete_sentences)
95
-
96
- def remove_redundant_information(text):
97
- sentences = sent_tokenize(text)
98
- unique_sentences = list(set(sentences))
99
- return ' '.join(unique_sentences)
100
 
101
- # Define a maximum token limit to avoid infinite loops
102
- MAX_TOKEN_LIMIT = 400
103
-
104
- import random
105
 
 
 
 
 
 
 
 
106
 
107
- def main():
108
- st.title("BinDocs Chat App")
109
 
 
 
 
 
110
  pdf_path = pdf_file_path
111
  if not os.path.exists(pdf_path):
112
  st.error("File not found. Please check the file path.")
@@ -126,56 +127,57 @@ def main():
126
  if pdf_path is not None:
127
  query = st.text_input("Ask questions about your PDF file (in any preferred language):")
128
 
129
- col1, col2 = st.columns(2)
130
-
131
- with col1:
132
- st.header("Examples1")
133
  if st.button("Was genau ist ein Belegarzt?"):
134
  query = "Was genau ist ein Belegarzt?"
135
  if st.button("Wofür wird die Alpha-ID verwendet?"):
136
  query = "Wofür wird die Alpha-ID verwendet?"
137
-
138
- with col2:
139
- st.header("Examples2")
140
  if st.button("Was sind die Vorteile des ambulanten operierens?"):
141
  query = "Was sind die Vorteile des ambulanten operierens?"
 
 
 
 
 
 
142
 
143
- if st.button("Ask") or (not st.session_state['chat_history'] and query) or (st.session_state['chat_history'] and query != st.session_state['chat_history'][-1][1]):
144
- st.session_state['chat_history'].append(("User", query, "new"))
 
145
 
146
- loading_message = st.empty()
 
147
 
148
- loading_message.text('Bot is thinking...')
149
-
150
- VectorStore = load_pdf(pdf_file_path)
151
- max_tokens = 120
152
- chain = load_chatbot(max_tokens=max_tokens)
153
- docs = VectorStore.similarity_search(query=query, k=2)
154
-
155
- with get_openai_callback() as cb:
156
- response = chain.run(input_documents=docs, question=query)
157
 
158
- # Post-processing to remove incomplete sentences and redundant information
159
- filtered_response = remove_incomplete_sentences(response)
160
- filtered_response = remove_redundant_information(filtered_response)
161
-
162
- st.session_state['chat_history'].append(("Bot", filtered_response, "new"))
163
 
 
164
  new_messages = st.session_state['chat_history'][-2:]
165
  for chat in new_messages:
166
  background_color = "#FFA07A" if chat[2] == "new" else "#acf" if chat[0] == "User" else "#caf"
167
  new_messages_placeholder.markdown(f"<div style='background-color: {background_color}; padding: 10px; border-radius: 10px; margin: 10px;'>{chat[0]}: {chat[1]}</div>", unsafe_allow_html=True)
168
 
 
169
  st.write("<script>document.getElementById('response').scrollIntoView();</script>", unsafe_allow_html=True)
170
 
171
  loading_message.empty()
172
 
 
173
  query = ""
174
- else:
175
- st.warning("Please enter a query before asking questions.")
176
 
177
- st.session_state['chat_history'] = [(sender, msg, "old") for sender, msg, _ in st.session_state['chat_history']]
178
-
 
 
 
 
 
 
 
179
 
180
  if __name__ == "__main__":
181
- main()
 
1
+ import streamlit as st
2
+ from dotenv import load_dotenv
 
 
 
3
  import pickle
4
+ from huggingface_hub import Repository
 
5
  from PyPDF2 import PdfReader
 
6
  from streamlit_extras.add_vertical_space import add_vertical_space
7
  from langchain.text_splitter import RecursiveCharacterTextSplitter
8
  from langchain.embeddings.openai import OpenAIEmbeddings
 
10
  from langchain.llms import OpenAI
11
  from langchain.chains.question_answering import load_qa_chain
12
  from langchain.callbacks import get_openai_callback
13
+ import os
 
 
14
 
15
  # Step 1: Clone the Dataset Repository
16
  repo = Repository(
 
24
  repo.git_pull() # Pull the latest changes (if any)
25
 
26
  # Step 2: Load the PDF File
27
+ pdf_file_path = "Private_Book/Glossar_HELP_DESK_combi.pdf" # Replace with your PDF file path
 
28
 
 
29
  with st.sidebar:
30
+ st.title('BinDoc GmbH')
31
+ st.markdown("Experience revolutionary interaction with BinDocs Chat App, leveraging state-of-the-art AI technology.")
32
 
33
+ add_vertical_space(1) # Adjust as per the desired spacing
 
 
34
 
35
+ st.markdown("""
36
+ Hello! I’m here to assist you with:<br><br>
37
+ 📘 **Glossary Inquiries:**<br>
38
+ I can clarify terms like "DiGA", "AOP", or "BfArM", providing clear and concise explanations to help you understand our content better.<br><br>
39
+ 🆘 **Help Page Navigation:**<br>
40
+ Ask me if you forgot your password or want to know more about topics related to the platform.<br><br>
41
+ 📰 **Latest Whitepapers Insights:**<br>
42
+ Curious about our recent publications? Feel free to ask about our latest whitepapers!<br><br>
43
+ """, unsafe_allow_html=True)
44
+
45
+ add_vertical_space(1) # Adjust as per the desired spacing
46
+
47
+ glossary_option = st.selectbox(
48
+ 'Want to see more Glossary Topics to ask about?',
49
+ ('Basisfallwert', 'Cash Flow', 'Arzneimittelgesetz (AMG)')
50
+ )
51
 
52
+ add_vertical_space(1)
 
 
53
 
54
+ whitepaper_option = st.selectbox(
55
+ 'Did you know we\'ve authored some really insightful and helpful whitepapers as well?',
56
+ ('Die Value Story als strategisches Instrument', 'Patientenmagnet Robotik: Best Practice Beispiel ', 'Das AGAPLesion-Konzept ')
57
+ )
 
 
58
 
 
59
  st.write('Made with ❤️ by BinDoc GmbH')
60
 
61
+ api_key = os.getenv("OPENAI_API_KEY")
62
+ # Retrieve the API key from st.secrets
63
+
64
+
65
  def load_pdf(file_path):
66
  pdf_reader = PdfReader(file_path)
67
+ text = ""
68
  for page in pdf_reader.pages:
69
+ text += page.extract_text()
70
+
71
+ text_splitter = RecursiveCharacterTextSplitter(
72
+ chunk_size=1000,
73
+ chunk_overlap=200,
74
+ length_function=len
75
+ )
76
+ chunks = text_splitter.split_text(text=text)
77
+
78
+ store_name, _ = os.path.splitext(os.path.basename(file_path))
79
+
80
  if os.path.exists(f"{store_name}.pkl"):
81
  with open(f"{store_name}.pkl", "rb") as f:
82
  VectorStore = pickle.load(f)
 
89
  return VectorStore
90
 
91
 
 
 
 
92
 
93
+ def load_chatbot():
94
+ return load_qa_chain(llm=OpenAI(), chain_type="stuff")
 
 
 
 
 
 
 
 
 
 
 
 
95
 
96
+ def main():
 
 
 
97
 
98
+ hide_streamlit_style = """
99
+ <style>
100
+ #MainMenu {visibility: hidden;}
101
+ footer {visibility: hidden;}
102
+ </style>
103
+ """
104
+ st.markdown(hide_streamlit_style, unsafe_allow_html=True)
105
 
 
 
106
 
107
+ # Main content
108
+ st.title("Welcome to BinDocs ChatBot! 🤖")
109
+
110
+ # Directly specifying the path to the PDF file
111
  pdf_path = pdf_file_path
112
  if not os.path.exists(pdf_path):
113
  st.error("File not found. Please check the file path.")
 
127
  if pdf_path is not None:
128
  query = st.text_input("Ask questions about your PDF file (in any preferred language):")
129
 
 
 
 
 
130
  if st.button("Was genau ist ein Belegarzt?"):
131
  query = "Was genau ist ein Belegarzt?"
132
  if st.button("Wofür wird die Alpha-ID verwendet?"):
133
  query = "Wofür wird die Alpha-ID verwendet?"
 
 
 
134
  if st.button("Was sind die Vorteile des ambulanten operierens?"):
135
  query = "Was sind die Vorteile des ambulanten operierens?"
136
+ if st.button("Was kann ich mit dem Prognose-Analyse Toll machen?"):
137
+ query = "Was kann ich mit dem Prognose-Analyse Toll machen?"
138
+ if st.button("Was sagt mir die Farbe der Balken der Bevölkerungsentwicklung?"):
139
+ query = "Was sagt mir die Farbe der Balken der Bevölkerungsentwicklung?"
140
+ if st.button("Ich habe mein Meta Password vergessen, wie kann ich es zurücksetzen?"):
141
+ query = ("Ich habe mein Meta Password vergessen, wie kann ich es zurücksetzen?")
142
 
143
+
144
+ if st.button("Ask") or (not st.session_state['chat_history'] and query) or (st.session_state['chat_history'] and query != st.session_state['chat_history'][-1][1]):
145
+ st.session_state['chat_history'].append(("User", query, "new"))
146
 
147
+ loading_message = st.empty()
148
+ loading_message.text('Bot is thinking...')
149
 
150
+ VectorStore = load_pdf(pdf_path)
151
+ chain = load_chatbot()
152
+ docs = VectorStore.similarity_search(query=query, k=3)
153
+ with get_openai_callback() as cb:
154
+ response = chain.run(input_documents=docs, question=query)
 
 
 
 
155
 
156
+ st.session_state['chat_history'].append(("Bot", response, "new"))
 
 
 
 
157
 
158
+ # Display new messages at the bottom
159
  new_messages = st.session_state['chat_history'][-2:]
160
  for chat in new_messages:
161
  background_color = "#FFA07A" if chat[2] == "new" else "#acf" if chat[0] == "User" else "#caf"
162
  new_messages_placeholder.markdown(f"<div style='background-color: {background_color}; padding: 10px; border-radius: 10px; margin: 10px;'>{chat[0]}: {chat[1]}</div>", unsafe_allow_html=True)
163
 
164
+ # Scroll to the latest response using JavaScript
165
  st.write("<script>document.getElementById('response').scrollIntoView();</script>", unsafe_allow_html=True)
166
 
167
  loading_message.empty()
168
 
169
+ # Clear the input field by setting the query variable to an empty string
170
  query = ""
 
 
171
 
172
+ # Mark all messages as old after displaying
173
+ st.session_state['chat_history'] = [(sender, msg, "old") for sender, msg, _ in st.session_state['chat_history']]
174
+
175
+
176
+
177
+ def display_chat_history(chat_history):
178
+ for chat in chat_history:
179
+ background_color = "#FFA07A" if chat[2] == "new" else "#acf" if chat[0] == "User" else "#caf"
180
+ st.markdown(f"<div style='background-color: {background_color}; padding: 10px; border-radius: 10px; margin: 10px;'>{chat[0]}: {chat[1]}</div>", unsafe_allow_html=True)
181
 
182
  if __name__ == "__main__":
183
+ main()