Anne31415 commited on
Commit
835e20f
·
verified ·
1 Parent(s): b0f784f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -22
app.py CHANGED
@@ -50,30 +50,52 @@ api_key = os.getenv("OPENAI_API_KEY")
50
 
51
 
52
 
53
- # Updated caching mechanism using st.cache_data
54
- @st.cache_data(persist="disk") # Using persist="disk" to save cache across sessions
55
  def load_vector_store(file_path, store_name, force_reload=False):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
- # Check if we need to force reload the vector store (e.g., when the PDF changes)
58
- if force_reload or not os.path.exists(f"{store_name}.pkl"):
59
- text_splitter = RecursiveCharacterTextSplitter(
60
- chunk_size=1000,
61
- chunk_overlap=200,
62
- length_function=len
63
- )
64
-
65
- text = load_pdf_text(file_path)
66
- chunks = text_splitter.split_text(text=text)
67
-
68
- embeddings = OpenAIEmbeddings()
69
- VectorStore = FAISS.from_texts(chunks, embedding=embeddings)
70
- with open(f"{store_name}.pkl", "wb") as f:
71
- pickle.dump(VectorStore, f)
72
- else:
73
- with open(f"{store_name}.pkl", "rb") as f:
74
- VectorStore = pickle.load(f)
75
-
76
- return VectorStore
77
 
78
  # Utility function to load text from a PDF
79
  def load_pdf_text(file_path):
 
50
 
51
 
52
 
53
+ # Updated load_vector_store function with Streamlit text outputs and directory handling for Git
54
+ @st.cache_data(persist="disk")
55
  def load_vector_store(file_path, store_name, force_reload=False):
56
+ local_repo_path = "Private_Book"
57
+ vector_store_path = os.path.join(local_repo_path, f"{store_name}.pkl")
58
+
59
+ # Check if vector store already exists and force_reload is False
60
+ if not force_reload and os.path.exists(vector_store_path):
61
+ with open(vector_store_path, "rb") as f:
62
+ VectorStore = pickle.load(f)
63
+ st.text(f"Loaded existing vector store from {vector_store_path}")
64
+ else:
65
+ # Load and process the PDF, then create the vector store
66
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200, length_function=len)
67
+ text = load_pdf_text(file_path)
68
+ chunks = text_splitter.split_text(text=text)
69
+ embeddings = OpenAIEmbeddings()
70
+ VectorStore = FAISS.from_texts(chunks, embedding=embeddings)
71
+
72
+ # Serialize the vector store
73
+ with open(vector_store_path, "wb") as f:
74
+ pickle.dump(VectorStore, f)
75
+ st.text(f"Created and saved vector store at {vector_store_path}")
76
+
77
+ # Change working directory for Git operations
78
+ original_dir = os.getcwd()
79
+ os.chdir(local_repo_path)
80
+
81
+ try:
82
+ # Check current working directory and list files for debugging
83
+ st.text(f"Current working directory: {os.getcwd()}")
84
+ st.text(f"Files in current directory: {os.listdir()}")
85
+
86
+ # Adjusted file path for Git command
87
+ repo.git_add(f"{store_name}.pkl") # Use just the file name
88
+ repo.git_commit(f"Update vector store: {store_name}")
89
+ repo.git_push()
90
+ st.text("Committed and pushed vector store to repository.")
91
+ except Exception as e:
92
+ st.error(f"Error during Git operations: {e}")
93
+ finally:
94
+ # Change back to the original directory
95
+ os.chdir(original_dir)
96
+
97
+ return VectorStore
98
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
 
100
  # Utility function to load text from a PDF
101
  def load_pdf_text(file_path):