Spaces:

Mojo3
/

Mayyar-RAG

Sleeping

Mojo3 commited on Jan 29

Commit

2a1b8e8

verified ·

1 Parent(s): cc4a792

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -41,6 +41,7 @@ documents = load_docx_files_from_drive(docs_folder)
 def split_extracted_text_into_chunks(documents):
     # List to hold all chunks
     chunks = []
@@ -73,6 +74,7 @@ chunks = split_extracted_text_into_chunks(documents)
 def save_chunks_to_file(chunks, output_file_path):
     # Open the file in write mode
     with open(output_file_path, "w", encoding="utf-8") as file:
         for i, chunk in enumerate(chunks, start=1):
@@ -100,6 +102,7 @@ embedding_model = HuggingFaceEmbeddings(
 # Step 2: Embed the chunks (now simplified)
 def embed_chunks(chunks):
     return [
         {"chunk": chunk, "embedding": embedding_model.embed_query(chunk)}
         for chunk in chunks
@@ -111,6 +114,7 @@ embeddings = embed_chunks(chunks)
 # Step 3: Prepare documents (unchanged)
 def prepare_documents_for_chroma(embeddings):
     return [
         Document2(page_content=entry["chunk"], metadata={"chunk_index": i})
         for i, entry in enumerate(embeddings, start=1)

 def split_extracted_text_into_chunks(documents):
+    print("Splitting text into chunks")
     # List to hold all chunks
     chunks = []
 def save_chunks_to_file(chunks, output_file_path):
+    print("Saving chunks to file")
     # Open the file in write mode
     with open(output_file_path, "w", encoding="utf-8") as file:
         for i, chunk in enumerate(chunks, start=1):
 # Step 2: Embed the chunks (now simplified)
 def embed_chunks(chunks):
+    print("Embedding the chunks")
     return [
         {"chunk": chunk, "embedding": embedding_model.embed_query(chunk)}
         for chunk in chunks
 # Step 3: Prepare documents (unchanged)
 def prepare_documents_for_chroma(embeddings):
+    print("Preparing documents for chroma")
     return [
         Document2(page_content=entry["chunk"], metadata={"chunk_index": i})
         for i, entry in enumerate(embeddings, start=1)