Anne31415 commited on
Commit
8a1f468
·
1 Parent(s): 22586c2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -37
app.py CHANGED
@@ -1,6 +1,5 @@
1
  import streamlit as st
2
  from dotenv import load_dotenv
3
- import pinecone
4
  import pickle
5
  from huggingface_hub import Repository
6
  from PyPDF2 import PdfReader
@@ -13,35 +12,19 @@ from langchain.chains.question_answering import load_qa_chain
13
  from langchain.callbacks import get_openai_callback
14
  import os
15
 
16
- # Load all necessary environment variables at the beginning of the script
17
- PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
18
- OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
19
-
20
-
21
- pinecone.init(
22
- PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
23
- environment="gcp-starter" # next to api key in console
24
- )
25
-
26
- INDEX_NAME = "pdfbot1"
27
- if INDEX_NAME not in pinecone.list_indexes():
28
- pinecone.create_index(name=INDEX_NAME, metric="cosine", shards=1)
29
-
30
- index = Pinecone.from_documents(docs, embeddings, index_name=index_name)
31
-
32
-
33
  # Step 1: Clone the Dataset Repository
34
  repo = Repository(
35
- local_dir="Private_Book",
36
- repo_type="dataset",
37
- clone_from="Anne31415/Private_Book",
38
- token=os.environ["HUB_TOKEN"]
 
 
39
  )
40
- repo.git_pull()
41
-
42
 
43
  # Step 2: Load the PDF File
44
- pdf_file_path = "Private_Book/Glossar_HELP_DESK_combi.pdf" # Replace with your PDF file path
45
 
46
  with st.sidebar:
47
  st.title('BinDoc GmbH')
@@ -63,6 +46,8 @@ with st.sidebar:
63
 
64
  st.write('Made with ❤️ by BinDoc GmbH')
65
 
 
 
66
 
67
 
68
  def load_pdf(file_path):
@@ -79,6 +64,7 @@ def load_pdf(file_path):
79
  chunks = text_splitter.split_text(text=text)
80
 
81
  store_name, _ = os.path.splitext(os.path.basename(file_path))
 
82
  if os.path.exists(f"{store_name}.pkl"):
83
  with open(f"{store_name}.pkl", "rb") as f:
84
  VectorStore = pickle.load(f)
@@ -87,10 +73,8 @@ def load_pdf(file_path):
87
  VectorStore = FAISS.from_texts(chunks, embedding=embeddings)
88
  with open(f"{store_name}.pkl", "wb") as f:
89
  pickle.dump(VectorStore, f)
90
- vector_dict = {str(i): vector for i, vector in enumerate(VectorStore.vectors)}
91
- pinecone.upsert(items=vector_dict, index_name=INDEX_NAME)
92
- return VectorStore
93
 
 
94
 
95
 
96
 
@@ -154,15 +138,6 @@ def main():
154
  VectorStore = load_pdf(pdf_path)
155
  chain = load_chatbot()
156
  docs = VectorStore.similarity_search(query=query, k=3)
157
-
158
- # Searching for similar documents in Pinecone
159
- query_vector = embeddings.embed_text(query)
160
- search_results = pinecone.query(queries=[query_vector], index_name=INDEX_NAME, top_k=3)
161
- # Extracting document ids from Pinecone's results
162
- doc_ids = [int(item.id) for item in search_results.results[0].matches]
163
- # Retrieving the actual document texts based on the ids
164
- docs = [texts[id] for id in doc_ids]
165
-
166
  with get_openai_callback() as cb:
167
  response = chain.run(input_documents=docs, question=query)
168
 
 
1
  import streamlit as st
2
  from dotenv import load_dotenv
 
3
  import pickle
4
  from huggingface_hub import Repository
5
  from PyPDF2 import PdfReader
 
12
  from langchain.callbacks import get_openai_callback
13
  import os
14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  # Step 1: Clone the Dataset Repository
16
  repo = Repository(
17
+ local_dir="Private_Book", # Local directory to clone the repository
18
+ repo_type="dataset", # Specify that this is a dataset repository
19
+
20
+ clone_from="Anne31415/Private_Book", # Replace with your repository URL
21
+
22
+ token=os.environ["HUB_TOKEN"] # Use the secret token to authenticate
23
  )
24
+ repo.git_pull() # Pull the latest changes (if any)
 
25
 
26
  # Step 2: Load the PDF File
27
+ pdf_file_path = "Private_Book/KOMBI_all.pdf" # Replace with your PDF file path
28
 
29
  with st.sidebar:
30
  st.title('BinDoc GmbH')
 
46
 
47
  st.write('Made with ❤️ by BinDoc GmbH')
48
 
49
+ api_key = os.getenv("OPENAI_API_KEY")
50
+ # Retrieve the API key from st.secrets
51
 
52
 
53
  def load_pdf(file_path):
 
64
  chunks = text_splitter.split_text(text=text)
65
 
66
  store_name, _ = os.path.splitext(os.path.basename(file_path))
67
+
68
  if os.path.exists(f"{store_name}.pkl"):
69
  with open(f"{store_name}.pkl", "rb") as f:
70
  VectorStore = pickle.load(f)
 
73
  VectorStore = FAISS.from_texts(chunks, embedding=embeddings)
74
  with open(f"{store_name}.pkl", "wb") as f:
75
  pickle.dump(VectorStore, f)
 
 
 
76
 
77
+ return VectorStore
78
 
79
 
80
 
 
138
  VectorStore = load_pdf(pdf_path)
139
  chain = load_chatbot()
140
  docs = VectorStore.similarity_search(query=query, k=3)
 
 
 
 
 
 
 
 
 
141
  with get_openai_callback() as cb:
142
  response = chain.run(input_documents=docs, question=query)
143