Backup_PublicBookBot

Running

App Files Files Community

Anne31415 commited on Oct 24, 2023

Commit

8a1f468

1 Parent(s): 22586c2

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -37

app.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import streamlit as st
 from dotenv import load_dotenv
-import pinecone
 import pickle
 from huggingface_hub import Repository
 from PyPDF2 import PdfReader
@@ -13,35 +12,19 @@ from langchain.chains.question_answering import load_qa_chain
 from langchain.callbacks import get_openai_callback
 import os
-# Load all necessary environment variables at the beginning of the script
-PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
-OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
-pinecone.init(
-    PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
-    environment="gcp-starter"  # next to api key in console
-)
-INDEX_NAME = "pdfbot1"
-if INDEX_NAME not in pinecone.list_indexes():
-    pinecone.create_index(name=INDEX_NAME, metric="cosine", shards=1)
-index = Pinecone.from_documents(docs, embeddings, index_name=index_name)
 # Step 1: Clone the Dataset Repository
 repo = Repository(
-    local_dir="Private_Book",
-    repo_type="dataset",
-    clone_from="Anne31415/Private_Book",
-    token=os.environ["HUB_TOKEN"]
 )
-repo.git_pull()
 # Step 2: Load the PDF File
-pdf_file_path = "Private_Book/Glossar_HELP_DESK_combi.pdf"  # Replace with your PDF file path
 with st.sidebar:
     st.title('BinDoc GmbH')
@@ -63,6 +46,8 @@ with st.sidebar:
     st.write('Made with ❤️ by BinDoc GmbH')
 def load_pdf(file_path):
@@ -79,6 +64,7 @@ def load_pdf(file_path):
     chunks = text_splitter.split_text(text=text)
     store_name, _ = os.path.splitext(os.path.basename(file_path))
     if os.path.exists(f"{store_name}.pkl"):
         with open(f"{store_name}.pkl", "rb") as f:
             VectorStore = pickle.load(f)
@@ -87,10 +73,8 @@ def load_pdf(file_path):
         VectorStore = FAISS.from_texts(chunks, embedding=embeddings)
         with open(f"{store_name}.pkl", "wb") as f:
             pickle.dump(VectorStore, f)
-    vector_dict = {str(i): vector for i, vector in enumerate(VectorStore.vectors)}
-    pinecone.upsert(items=vector_dict, index_name=INDEX_NAME)
-    return VectorStore
@@ -154,15 +138,6 @@ def main():
             VectorStore = load_pdf(pdf_path)
             chain = load_chatbot()
             docs = VectorStore.similarity_search(query=query, k=3)
-            # Searching for similar documents in Pinecone
-            query_vector = embeddings.embed_text(query)
-            search_results = pinecone.query(queries=[query_vector], index_name=INDEX_NAME, top_k=3)
-            # Extracting document ids from Pinecone's results
-            doc_ids = [int(item.id) for item in search_results.results[0].matches]
-            # Retrieving the actual document texts based on the ids
-            docs = [texts[id] for id in doc_ids]
             with get_openai_callback() as cb:
                 response = chain.run(input_documents=docs, question=query)

 import streamlit as st
 from dotenv import load_dotenv
 import pickle
 from huggingface_hub import Repository
 from PyPDF2 import PdfReader
 from langchain.callbacks import get_openai_callback
 import os
 # Step 1: Clone the Dataset Repository
 repo = Repository(
+    local_dir="Private_Book",  # Local directory to clone the repository
+    repo_type="dataset",  # Specify that this is a dataset repository
+    clone_from="Anne31415/Private_Book",  # Replace with your repository URL
+    token=os.environ["HUB_TOKEN"]  # Use the secret token to authenticate
 )
+repo.git_pull()  # Pull the latest changes (if any)
 # Step 2: Load the PDF File
+pdf_file_path = "Private_Book/KOMBI_all.pdf"  # Replace with your PDF file path
 with st.sidebar:
     st.title('BinDoc GmbH')
     st.write('Made with ❤️ by BinDoc GmbH')
+    api_key = os.getenv("OPENAI_API_KEY")
+    # Retrieve the API key from st.secrets
 def load_pdf(file_path):
     chunks = text_splitter.split_text(text=text)
     store_name, _ = os.path.splitext(os.path.basename(file_path))
     if os.path.exists(f"{store_name}.pkl"):
         with open(f"{store_name}.pkl", "rb") as f:
             VectorStore = pickle.load(f)
         VectorStore = FAISS.from_texts(chunks, embedding=embeddings)
         with open(f"{store_name}.pkl", "wb") as f:
             pickle.dump(VectorStore, f)
+    return VectorStore
             VectorStore = load_pdf(pdf_path)
             chain = load_chatbot()
             docs = VectorStore.similarity_search(query=query, k=3)
             with get_openai_callback() as cb:
                 response = chain.run(input_documents=docs, question=query)