Anne31415 commited on
Commit
5d91cf0
·
1 Parent(s): f76e9bf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -15
app.py CHANGED
@@ -13,9 +13,13 @@ from langchain.chains.question_answering import load_qa_chain
13
  from langchain.callbacks import get_openai_callback
14
  import os
15
 
16
- pinecone.init(api_key="PINECONE_API_KEY")
 
 
17
 
18
- INDEX_NAME = "your_vector_index_name"
 
 
19
  if INDEX_NAME not in pinecone.list_indexes():
20
  pinecone.create_index(name=INDEX_NAME, metric="cosine", shards=1)
21
 
@@ -23,14 +27,13 @@ if INDEX_NAME not in pinecone.list_indexes():
23
 
24
  # Step 1: Clone the Dataset Repository
25
  repo = Repository(
26
- local_dir="Private_Book", # Local directory to clone the repository
27
- repo_type="dataset", # Specify that this is a dataset repository
28
-
29
- clone_from="Anne31415/Private_Book", # Replace with your repository URL
30
-
31
- token=os.environ["HUB_TOKEN"] # Use the secret token to authenticate
32
  )
33
- repo.git_pull() # Pull the latest changes (if any)
 
34
 
35
  # Step 2: Load the PDF File
36
  pdf_file_path = "Private_Book/Glossar_HELP_DESK_combi.pdf" # Replace with your PDF file path
@@ -55,8 +58,6 @@ with st.sidebar:
55
 
56
  st.write('Made with ❤️ by BinDoc GmbH')
57
 
58
- api_key = os.getenv("OPENAI_API_KEY")
59
- # Retrieve the API key from st.secrets
60
 
61
 
62
  def load_pdf(file_path):
@@ -73,7 +74,6 @@ def load_pdf(file_path):
73
  chunks = text_splitter.split_text(text=text)
74
 
75
  store_name, _ = os.path.splitext(os.path.basename(file_path))
76
-
77
  if os.path.exists(f"{store_name}.pkl"):
78
  with open(f"{store_name}.pkl", "rb") as f:
79
  VectorStore = pickle.load(f)
@@ -82,15 +82,13 @@ def load_pdf(file_path):
82
  VectorStore = FAISS.from_texts(chunks, embedding=embeddings)
83
  with open(f"{store_name}.pkl", "wb") as f:
84
  pickle.dump(VectorStore, f)
85
-
86
- # Add Pinecone integration here
87
  vector_dict = {str(i): vector for i, vector in enumerate(VectorStore.vectors)}
88
  pinecone.upsert(items=vector_dict, index_name=INDEX_NAME)
89
-
90
  return VectorStore
91
 
92
 
93
 
 
94
  def load_chatbot():
95
  return load_qa_chain(llm=OpenAI(), chain_type="stuff")
96
 
@@ -151,6 +149,15 @@ def main():
151
  VectorStore = load_pdf(pdf_path)
152
  chain = load_chatbot()
153
  docs = VectorStore.similarity_search(query=query, k=3)
 
 
 
 
 
 
 
 
 
154
  with get_openai_callback() as cb:
155
  response = chain.run(input_documents=docs, question=query)
156
 
 
13
  from langchain.callbacks import get_openai_callback
14
  import os
15
 
16
+ # Load all necessary environment variables at the beginning of the script
17
+ PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
18
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
19
 
20
+ pinecone.init(PINECONE_API_KEY="PINECONE_API_KEY")
21
+
22
+ INDEX_NAME = "pdfbot1"
23
  if INDEX_NAME not in pinecone.list_indexes():
24
  pinecone.create_index(name=INDEX_NAME, metric="cosine", shards=1)
25
 
 
27
 
28
  # Step 1: Clone the Dataset Repository
29
  repo = Repository(
30
+ local_dir="Private_Book",
31
+ repo_type="dataset",
32
+ clone_from="Anne31415/Private_Book",
33
+ token=os.environ["HUB_TOKEN"]
 
 
34
  )
35
+ repo.git_pull()
36
+
37
 
38
  # Step 2: Load the PDF File
39
  pdf_file_path = "Private_Book/Glossar_HELP_DESK_combi.pdf" # Replace with your PDF file path
 
58
 
59
  st.write('Made with ❤️ by BinDoc GmbH')
60
 
 
 
61
 
62
 
63
  def load_pdf(file_path):
 
74
  chunks = text_splitter.split_text(text=text)
75
 
76
  store_name, _ = os.path.splitext(os.path.basename(file_path))
 
77
  if os.path.exists(f"{store_name}.pkl"):
78
  with open(f"{store_name}.pkl", "rb") as f:
79
  VectorStore = pickle.load(f)
 
82
  VectorStore = FAISS.from_texts(chunks, embedding=embeddings)
83
  with open(f"{store_name}.pkl", "wb") as f:
84
  pickle.dump(VectorStore, f)
 
 
85
  vector_dict = {str(i): vector for i, vector in enumerate(VectorStore.vectors)}
86
  pinecone.upsert(items=vector_dict, index_name=INDEX_NAME)
 
87
  return VectorStore
88
 
89
 
90
 
91
+
92
  def load_chatbot():
93
  return load_qa_chain(llm=OpenAI(), chain_type="stuff")
94
 
 
149
  VectorStore = load_pdf(pdf_path)
150
  chain = load_chatbot()
151
  docs = VectorStore.similarity_search(query=query, k=3)
152
+
153
+ # Searching for similar documents in Pinecone
154
+ query_vector = embeddings.embed_text(query)
155
+ search_results = pinecone.query(queries=[query_vector], index_name=INDEX_NAME, top_k=3)
156
+ # Extracting document ids from Pinecone's results
157
+ doc_ids = [int(item.id) for item in search_results.results[0].matches]
158
+ # Retrieving the actual document texts based on the ids
159
+ docs = [texts[id] for id in doc_ids]
160
+
161
  with get_openai_callback() as cb:
162
  response = chain.run(input_documents=docs, question=query)
163