Backup_PublicBookBot

Sleeping

App Files Files Community

Anne31415 commited on Oct 20, 2023

Commit

5d91cf0

1 Parent(s): f76e9bf

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -15

app.py CHANGED Viewed

@@ -13,9 +13,13 @@ from langchain.chains.question_answering import load_qa_chain
 from langchain.callbacks import get_openai_callback
 import os
-pinecone.init(api_key="PINECONE_API_KEY")
-INDEX_NAME = "your_vector_index_name"
 if INDEX_NAME not in pinecone.list_indexes():
     pinecone.create_index(name=INDEX_NAME, metric="cosine", shards=1)
@@ -23,14 +27,13 @@ if INDEX_NAME not in pinecone.list_indexes():
 # Step 1: Clone the Dataset Repository
 repo = Repository(
-    local_dir="Private_Book",  # Local directory to clone the repository
-    repo_type="dataset",  # Specify that this is a dataset repository
-    clone_from="Anne31415/Private_Book",  # Replace with your repository URL
-    token=os.environ["HUB_TOKEN"]  # Use the secret token to authenticate
 )
-repo.git_pull()  # Pull the latest changes (if any)
 # Step 2: Load the PDF File
 pdf_file_path = "Private_Book/Glossar_HELP_DESK_combi.pdf"  # Replace with your PDF file path
@@ -55,8 +58,6 @@ with st.sidebar:
     st.write('Made with ❤️ by BinDoc GmbH')
-    api_key = os.getenv("OPENAI_API_KEY")
-    # Retrieve the API key from st.secrets
 def load_pdf(file_path):
@@ -73,7 +74,6 @@ def load_pdf(file_path):
     chunks = text_splitter.split_text(text=text)
     store_name, _ = os.path.splitext(os.path.basename(file_path))
     if os.path.exists(f"{store_name}.pkl"):
         with open(f"{store_name}.pkl", "rb") as f:
             VectorStore = pickle.load(f)
@@ -82,15 +82,13 @@ def load_pdf(file_path):
         VectorStore = FAISS.from_texts(chunks, embedding=embeddings)
         with open(f"{store_name}.pkl", "wb") as f:
             pickle.dump(VectorStore, f)
-    # Add Pinecone integration here
     vector_dict = {str(i): vector for i, vector in enumerate(VectorStore.vectors)}
     pinecone.upsert(items=vector_dict, index_name=INDEX_NAME)
     return VectorStore
 def load_chatbot():
     return load_qa_chain(llm=OpenAI(), chain_type="stuff")
@@ -151,6 +149,15 @@ def main():
             VectorStore = load_pdf(pdf_path)
             chain = load_chatbot()
             docs = VectorStore.similarity_search(query=query, k=3)
             with get_openai_callback() as cb:
                 response = chain.run(input_documents=docs, question=query)

 from langchain.callbacks import get_openai_callback
 import os
+# Load all necessary environment variables at the beginning of the script
+PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+pinecone.init(PINECONE_API_KEY="PINECONE_API_KEY")
+INDEX_NAME = "pdfbot1"
 if INDEX_NAME not in pinecone.list_indexes():
     pinecone.create_index(name=INDEX_NAME, metric="cosine", shards=1)
 # Step 1: Clone the Dataset Repository
 repo = Repository(
+    local_dir="Private_Book",
+    repo_type="dataset",
+    clone_from="Anne31415/Private_Book",
+    token=os.environ["HUB_TOKEN"]
 )
+repo.git_pull()
 # Step 2: Load the PDF File
 pdf_file_path = "Private_Book/Glossar_HELP_DESK_combi.pdf"  # Replace with your PDF file path
     st.write('Made with ❤️ by BinDoc GmbH')
 def load_pdf(file_path):
     chunks = text_splitter.split_text(text=text)
     store_name, _ = os.path.splitext(os.path.basename(file_path))
     if os.path.exists(f"{store_name}.pkl"):
         with open(f"{store_name}.pkl", "rb") as f:
             VectorStore = pickle.load(f)
         VectorStore = FAISS.from_texts(chunks, embedding=embeddings)
         with open(f"{store_name}.pkl", "wb") as f:
             pickle.dump(VectorStore, f)
     vector_dict = {str(i): vector for i, vector in enumerate(VectorStore.vectors)}
     pinecone.upsert(items=vector_dict, index_name=INDEX_NAME)
     return VectorStore
 def load_chatbot():
     return load_qa_chain(llm=OpenAI(), chain_type="stuff")
             VectorStore = load_pdf(pdf_path)
             chain = load_chatbot()
             docs = VectorStore.similarity_search(query=query, k=3)
+            # Searching for similar documents in Pinecone
+            query_vector = embeddings.embed_text(query)
+            search_results = pinecone.query(queries=[query_vector], index_name=INDEX_NAME, top_k=3)
+            # Extracting document ids from Pinecone's results
+            doc_ids = [int(item.id) for item in search_results.results[0].matches]
+            # Retrieving the actual document texts based on the ids
+            docs = [texts[id] for id in doc_ids]
             with get_openai_callback() as cb:
                 response = chain.run(input_documents=docs, question=query)