Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -50,30 +50,52 @@ api_key = os.getenv("OPENAI_API_KEY")
|
|
50 |
|
51 |
|
52 |
|
53 |
-
# Updated
|
54 |
-
@st.cache_data(persist="disk")
|
55 |
def load_vector_store(file_path, store_name, force_reload=False):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
|
57 |
-
# Check if we need to force reload the vector store (e.g., when the PDF changes)
|
58 |
-
if force_reload or not os.path.exists(f"{store_name}.pkl"):
|
59 |
-
text_splitter = RecursiveCharacterTextSplitter(
|
60 |
-
chunk_size=1000,
|
61 |
-
chunk_overlap=200,
|
62 |
-
length_function=len
|
63 |
-
)
|
64 |
-
|
65 |
-
text = load_pdf_text(file_path)
|
66 |
-
chunks = text_splitter.split_text(text=text)
|
67 |
-
|
68 |
-
embeddings = OpenAIEmbeddings()
|
69 |
-
VectorStore = FAISS.from_texts(chunks, embedding=embeddings)
|
70 |
-
with open(f"{store_name}.pkl", "wb") as f:
|
71 |
-
pickle.dump(VectorStore, f)
|
72 |
-
else:
|
73 |
-
with open(f"{store_name}.pkl", "rb") as f:
|
74 |
-
VectorStore = pickle.load(f)
|
75 |
-
|
76 |
-
return VectorStore
|
77 |
|
78 |
# Utility function to load text from a PDF
|
79 |
def load_pdf_text(file_path):
|
|
|
50 |
|
51 |
|
52 |
|
53 |
+
# Updated load_vector_store function with Streamlit text outputs and directory handling for Git
|
54 |
+
@st.cache_data(persist="disk")
|
55 |
def load_vector_store(file_path, store_name, force_reload=False):
|
56 |
+
local_repo_path = "Private_Book"
|
57 |
+
vector_store_path = os.path.join(local_repo_path, f"{store_name}.pkl")
|
58 |
+
|
59 |
+
# Check if vector store already exists and force_reload is False
|
60 |
+
if not force_reload and os.path.exists(vector_store_path):
|
61 |
+
with open(vector_store_path, "rb") as f:
|
62 |
+
VectorStore = pickle.load(f)
|
63 |
+
st.text(f"Loaded existing vector store from {vector_store_path}")
|
64 |
+
else:
|
65 |
+
# Load and process the PDF, then create the vector store
|
66 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200, length_function=len)
|
67 |
+
text = load_pdf_text(file_path)
|
68 |
+
chunks = text_splitter.split_text(text=text)
|
69 |
+
embeddings = OpenAIEmbeddings()
|
70 |
+
VectorStore = FAISS.from_texts(chunks, embedding=embeddings)
|
71 |
+
|
72 |
+
# Serialize the vector store
|
73 |
+
with open(vector_store_path, "wb") as f:
|
74 |
+
pickle.dump(VectorStore, f)
|
75 |
+
st.text(f"Created and saved vector store at {vector_store_path}")
|
76 |
+
|
77 |
+
# Change working directory for Git operations
|
78 |
+
original_dir = os.getcwd()
|
79 |
+
os.chdir(local_repo_path)
|
80 |
+
|
81 |
+
try:
|
82 |
+
# Check current working directory and list files for debugging
|
83 |
+
st.text(f"Current working directory: {os.getcwd()}")
|
84 |
+
st.text(f"Files in current directory: {os.listdir()}")
|
85 |
+
|
86 |
+
# Adjusted file path for Git command
|
87 |
+
repo.git_add(f"{store_name}.pkl") # Use just the file name
|
88 |
+
repo.git_commit(f"Update vector store: {store_name}")
|
89 |
+
repo.git_push()
|
90 |
+
st.text("Committed and pushed vector store to repository.")
|
91 |
+
except Exception as e:
|
92 |
+
st.error(f"Error during Git operations: {e}")
|
93 |
+
finally:
|
94 |
+
# Change back to the original directory
|
95 |
+
os.chdir(original_dir)
|
96 |
+
|
97 |
+
return VectorStore
|
98 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
99 |
|
100 |
# Utility function to load text from a PDF
|
101 |
def load_pdf_text(file_path):
|