gourisankar85 commited on
Commit
ecc9585
·
verified ·
1 Parent(s): c737bb6

Upload 4 files

Browse files
retriever/document_manager.py CHANGED
@@ -43,7 +43,7 @@ class DocumentManager:
43
  self.document_ids[filename] = doc_id
44
 
45
  # Chunk the pages
46
- chunks = chunk_documents(page_list, doc_id, chunk_size=1000, chunk_overlap=200)
47
  self.chunked_documents[filename] = chunks
48
 
49
  # Add chunks to vector store
 
43
  self.document_ids[filename] = doc_id
44
 
45
  # Chunk the pages
46
+ chunks = chunk_documents(page_list, doc_id, chunk_size=2000, chunk_overlap=300)
47
  self.chunked_documents[filename] = chunks
48
 
49
  # Add chunks to vector store
retriever/llm_manager.py CHANGED
@@ -109,7 +109,7 @@ class LLMManager:
109
  result = qa_chain.invoke({"query": question})
110
  response = result['result']
111
  source_docs = result['source_documents']
112
- logging.info(f"Generated response for question: {question} : {response}")
113
  return response, source_docs
114
  except Exception as e:
115
  logging.error(f"Error during QA chain invocation: {str(e)}")
 
109
  result = qa_chain.invoke({"query": question})
110
  response = result['result']
111
  source_docs = result['source_documents']
112
+ #logging.info(f"Generated response for question: {question} : {response}")
113
  return response, source_docs
114
  except Exception as e:
115
  logging.error(f"Error during QA chain invocation: {str(e)}")
retriever/vector_store_manager.py CHANGED
@@ -26,13 +26,15 @@ class VectorStoreManager:
26
  allow_dangerous_deserialization=True
27
  )
28
  else:
29
- logging.info("Creating new vector store")
30
  # Return an empty vector store; it will be populated when documents are added
31
  return FAISS.from_texts(
32
  texts=[""], # Dummy text to initialize
33
  embedding=self.embedding_model,
34
  metadatas=[{"source": "init", "doc_id": "init"}]
35
- )
 
 
36
 
37
  def add_documents(self, documents):
38
  """
@@ -48,10 +50,16 @@ class VectorStoreManager:
48
  metadatas = [{'source': doc['source'], 'doc_id': doc['doc_id']} for doc in documents]
49
 
50
  logging.info("Adding new documents to vector store")
51
- self.vector_store.add_texts(
52
- texts=texts,
53
- metadatas=metadatas
54
- )
 
 
 
 
 
 
55
  self.vector_store.save_local(self.embedding_path)
56
  logging.info(f"Vector store updated and saved to {self.embedding_path}")
57
 
@@ -71,7 +79,7 @@ class VectorStoreManager:
71
  return []
72
 
73
  try:
74
-
75
  # Define a filter function to match doc_id
76
  filter_fn = lambda metadata: metadata['doc_id'] == doc_id
77
 
 
26
  allow_dangerous_deserialization=True
27
  )
28
  else:
29
+ '''logging.info("Creating new vector store")
30
  # Return an empty vector store; it will be populated when documents are added
31
  return FAISS.from_texts(
32
  texts=[""], # Dummy text to initialize
33
  embedding=self.embedding_model,
34
  metadatas=[{"source": "init", "doc_id": "init"}]
35
+ )'''
36
+ logging.info("Creating new vector store (unpopulated)")
37
+ return None
38
 
39
  def add_documents(self, documents):
40
  """
 
50
  metadatas = [{'source': doc['source'], 'doc_id': doc['doc_id']} for doc in documents]
51
 
52
  logging.info("Adding new documents to vector store")
53
+
54
+ if not self.vector_store:
55
+ self.vector_store = FAISS.from_texts(
56
+ texts=texts,
57
+ embedding=self.embedding_model,
58
+ metadatas=metadatas
59
+ )
60
+ else:
61
+ self.vector_store.add_texts(texts=texts, metadatas=metadatas)
62
+
63
  self.vector_store.save_local(self.embedding_path)
64
  logging.info(f"Vector store updated and saved to {self.embedding_path}")
65
 
 
79
  return []
80
 
81
  try:
82
+ query = " ".join(query.lower().split())
83
  # Define a filter function to match doc_id
84
  filter_fn = lambda metadata: metadata['doc_id'] == doc_id
85