Anne31415 commited on
Commit
d3a0859
·
1 Parent(s): 8eedebb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -52
app.py CHANGED
@@ -1,4 +1,5 @@
1
  import streamlit as st
 
2
  from dotenv import load_dotenv
3
  import pickle
4
  from huggingface_hub import Repository
@@ -16,15 +17,13 @@ import os
16
  repo = Repository(
17
  local_dir="Private_Book", # Local directory to clone the repository
18
  repo_type="dataset", # Specify that this is a dataset repository
19
-
20
  clone_from="Anne31415/Private_Book", # Replace with your repository URL
21
-
22
  token=os.environ["HUB_TOKEN"] # Use the secret token to authenticate
23
  )
24
  repo.git_pull() # Pull the latest changes (if any)
25
 
26
  # Step 2: Load the PDF File
27
- pdf_file_path = "Private_Book/KOMBI_all2.pdf" # Replace with your PDF file path
28
 
29
  with st.sidebar:
30
  st.title('BinDoc GmbH')
@@ -49,40 +48,44 @@ with st.sidebar:
49
  api_key = os.getenv("OPENAI_API_KEY")
50
  # Retrieve the API key from st.secrets
51
 
52
-
53
- def load_pdf(file_path):
54
- pdf_reader = PdfReader(file_path)
55
- text = ""
56
- for page in pdf_reader.pages:
57
- text += page.extract_text()
58
-
59
- text_splitter = RecursiveCharacterTextSplitter(
60
- chunk_size=1000,
61
- chunk_overlap=200,
62
- length_function=len
63
- )
64
- chunks = text_splitter.split_text(text=text)
65
-
66
- store_name, _ = os.path.splitext(os.path.basename(file_path))
67
-
68
- if os.path.exists(f"{store_name}.pkl"):
69
- with open(f"{store_name}.pkl", "rb") as f:
70
- VectorStore = pickle.load(f)
71
- else:
72
  embeddings = OpenAIEmbeddings()
73
  VectorStore = FAISS.from_texts(chunks, embedding=embeddings)
74
  with open(f"{store_name}.pkl", "wb") as f:
75
  pickle.dump(VectorStore, f)
 
 
 
76
 
77
  return VectorStore
78
 
79
-
 
 
 
 
 
 
80
 
81
  def load_chatbot():
82
  return load_qa_chain(llm=OpenAI(), chain_type="stuff")
83
 
84
  def main():
85
-
86
  hide_streamlit_style = """
87
  <style>
88
  #MainMenu {visibility: hidden;}
@@ -91,51 +94,51 @@ def main():
91
  """
92
  st.markdown(hide_streamlit_style, unsafe_allow_html=True)
93
 
94
-
95
  # Main content
96
  st.title("Welcome to BinDocs ChatBot! 🤖")
97
-
98
- # Directly specifying the path to the PDF file
99
- pdf_path = pdf_file_path
100
- if not os.path.exists(pdf_path):
101
- st.error("File not found. Please check the file path.")
102
- return
103
 
104
- if "chat_history" not in st.session_state:
105
- st.session_state['chat_history'] = []
 
 
 
 
 
106
 
107
- display_chat_history(st.session_state['chat_history'])
108
 
109
- st.write("<!-- Start Spacer -->", unsafe_allow_html=True)
110
- st.write("<div style='flex: 1;'></div>", unsafe_allow_html=True)
111
- st.write("<!-- End Spacer -->", unsafe_allow_html=True)
112
 
113
- new_messages_placeholder = st.empty()
 
 
 
 
 
 
114
 
115
- if pdf_path is not None:
116
  query = st.text_input("Ask questions about your PDF file (in any preferred language):")
117
 
118
  if st.button("Was genau ist ein Belegarzt?"):
119
  query = "Was genau ist ein Belegarzt?"
120
  if st.button("Wofür wird die Alpha-ID verwendet?"):
121
  query = "Wofür wird die Alpha-ID verwendet?"
122
- if st.button("Was sind die Vorteile des ambulanten operierens?"):
123
- query = "Was sind die Vorteile des ambulanten operierens?"
124
- if st.button("Was kann ich mit dem Prognose-Analyse Toll machen?"):
125
- query = "Was kann ich mit dem Prognose-Analyse Toll machen?"
126
  if st.button("Was sagt mir die Farbe der Balken der Bevölkerungsentwicklung?"):
127
  query = "Was sagt mir die Farbe der Balken der Bevölkerungsentwicklung?"
128
- if st.button("Ich habe mein Meta Password vergessen, wie kann ich es zurücksetzen?"):
129
- query = ("Ich habe mein Meta Password vergessen, wie kann ich es zurücksetzen?")
130
 
131
-
132
- if st.button("Ask") or (not st.session_state['chat_history'] and query) or (st.session_state['chat_history'] and query != st.session_state['chat_history'][-1][1]):
133
  st.session_state['chat_history'].append(("User", query, "new"))
134
 
135
  loading_message = st.empty()
136
  loading_message.text('Bot is thinking...')
137
 
138
- VectorStore = load_pdf(pdf_path)
139
  chain = load_chatbot()
140
  docs = VectorStore.similarity_search(query=query, k=3)
141
  with get_openai_callback() as cb:
@@ -154,18 +157,17 @@ def main():
154
 
155
  loading_message.empty()
156
 
157
- # Clear the input field by setting the query variable to an empty string
158
  query = ""
159
 
160
  # Mark all messages as old after displaying
161
  st.session_state['chat_history'] = [(sender, msg, "old") for sender, msg, _ in st.session_state['chat_history']]
162
 
163
 
164
-
165
  def display_chat_history(chat_history):
166
  for chat in chat_history:
167
  background_color = "#FFA07A" if chat[2] == "new" else "#acf" if chat[0] == "User" else "#caf"
168
  st.markdown(f"<div style='background-color: {background_color}; padding: 10px; border-radius: 10px; margin: 10px;'>{chat[0]}: {chat[1]}</div>", unsafe_allow_html=True)
169
 
170
  if __name__ == "__main__":
171
- main()
 
1
  import streamlit as st
2
+ import streamlit_analytics
3
  from dotenv import load_dotenv
4
  import pickle
5
  from huggingface_hub import Repository
 
17
  repo = Repository(
18
  local_dir="Private_Book", # Local directory to clone the repository
19
  repo_type="dataset", # Specify that this is a dataset repository
 
20
  clone_from="Anne31415/Private_Book", # Replace with your repository URL
 
21
  token=os.environ["HUB_TOKEN"] # Use the secret token to authenticate
22
  )
23
  repo.git_pull() # Pull the latest changes (if any)
24
 
25
  # Step 2: Load the PDF File
26
+ pdf_path = "Private_Book/KOMBI_all2.pdf" # Replace with your PDF file path
27
 
28
  with st.sidebar:
29
  st.title('BinDoc GmbH')
 
48
  api_key = os.getenv("OPENAI_API_KEY")
49
  # Retrieve the API key from st.secrets
50
 
51
+ # Updated caching mechanism using st.cache_data
52
+ @st.cache_data(persist="disk") # Using persist="disk" to save cache across sessions
53
+
54
+
55
+ def load_vector_store(file_path, store_name, force_reload=False):
56
+ # Check if we need to force reload the vector store (e.g., when the PDF changes)
57
+ if force_reload or not os.path.exists(f"{store_name}.pkl"):
58
+ text_splitter = RecursiveCharacterTextSplitter(
59
+ chunk_size=1000,
60
+ chunk_overlap=200,
61
+ length_function=len
62
+ )
63
+
64
+ text = load_pdf_text(file_path)
65
+ chunks = text_splitter.split_text(text=text)
66
+
 
 
 
 
67
  embeddings = OpenAIEmbeddings()
68
  VectorStore = FAISS.from_texts(chunks, embedding=embeddings)
69
  with open(f"{store_name}.pkl", "wb") as f:
70
  pickle.dump(VectorStore, f)
71
+ else:
72
+ with open(f"{store_name}.pkl", "rb") as f:
73
+ VectorStore = pickle.load(f)
74
 
75
  return VectorStore
76
 
77
+ # Utility function to load text from a PDF
78
+ def load_pdf_text(file_path):
79
+ pdf_reader = PdfReader(file_path)
80
+ text = ""
81
+ for page in pdf_reader.pages:
82
+ text += page.extract_text() or "" # Add fallback for pages where text extraction fails
83
+ return text
84
 
85
  def load_chatbot():
86
  return load_qa_chain(llm=OpenAI(), chain_type="stuff")
87
 
88
  def main():
 
89
  hide_streamlit_style = """
90
  <style>
91
  #MainMenu {visibility: hidden;}
 
94
  """
95
  st.markdown(hide_streamlit_style, unsafe_allow_html=True)
96
 
 
97
  # Main content
98
  st.title("Welcome to BinDocs ChatBot! 🤖")
 
 
 
 
 
 
99
 
100
+ # Start tracking user interactions
101
+ with streamlit_analytics.track():
102
+ if not os.path.exists(pdf_path):
103
+ st.error("File not found. Please check the file path.")
104
+ return
105
+
106
+ VectorStore = load_vector_store(pdf_path, "my_vector_store", force_reload=False)
107
 
 
108
 
109
+ if "chat_history" not in st.session_state:
110
+ st.session_state['chat_history'] = []
 
111
 
112
+ display_chat_history(st.session_state['chat_history'])
113
+
114
+ st.write("<!-- Start Spacer -->", unsafe_allow_html=True)
115
+ st.write("<div style='flex: 1;'></div>", unsafe_allow_html=True)
116
+ st.write("<!-- End Spacer -->", unsafe_allow_html=True)
117
+
118
+ new_messages_placeholder = st.empty()
119
 
 
120
  query = st.text_input("Ask questions about your PDF file (in any preferred language):")
121
 
122
  if st.button("Was genau ist ein Belegarzt?"):
123
  query = "Was genau ist ein Belegarzt?"
124
  if st.button("Wofür wird die Alpha-ID verwendet?"):
125
  query = "Wofür wird die Alpha-ID verwendet?"
126
+ if st.button("Was sind die Vorteile des ambulanten Operierens?"):
127
+ query = "Was sind die Vorteile des ambulanten Operierens?"
128
+ if st.button("Was kann ich mit dem Prognose-Analyse-Tool machen?"):
129
+ query = "Was kann ich mit dem Prognose-Analyse-Tool machen?"
130
  if st.button("Was sagt mir die Farbe der Balken der Bevölkerungsentwicklung?"):
131
  query = "Was sagt mir die Farbe der Balken der Bevölkerungsentwicklung?"
132
+ if st.button("Ich habe mein Meta-Password vergessen, wie kann ich es zurücksetzen?"):
133
+ query = "Ich habe mein Meta-Password vergessen, wie kann ich es zurücksetzen?"
134
 
135
+ if query:
 
136
  st.session_state['chat_history'].append(("User", query, "new"))
137
 
138
  loading_message = st.empty()
139
  loading_message.text('Bot is thinking...')
140
 
141
+ # Use the VectorStore loaded at the start from the session state
142
  chain = load_chatbot()
143
  docs = VectorStore.similarity_search(query=query, k=3)
144
  with get_openai_callback() as cb:
 
157
 
158
  loading_message.empty()
159
 
160
+ # Clear the input field after the query is made
161
  query = ""
162
 
163
  # Mark all messages as old after displaying
164
  st.session_state['chat_history'] = [(sender, msg, "old") for sender, msg, _ in st.session_state['chat_history']]
165
 
166
 
 
167
  def display_chat_history(chat_history):
168
  for chat in chat_history:
169
  background_color = "#FFA07A" if chat[2] == "new" else "#acf" if chat[0] == "User" else "#caf"
170
  st.markdown(f"<div style='background-color: {background_color}; padding: 10px; border-radius: 10px; margin: 10px;'>{chat[0]}: {chat[1]}</div>", unsafe_allow_html=True)
171
 
172
  if __name__ == "__main__":
173
+ main()