christopher commited on
Commit
64f44e3
·
1 Parent(s): e21244d

typo in semantic search invocation

Browse files
Files changed (1) hide show
  1. database/query_processor.py +16 -22
database/query_processor.py CHANGED
@@ -29,22 +29,28 @@ class QueryProcessor:
29
 
30
  # Query processing
31
  query_embedding = self.embedding_model.encode(query).tolist()
 
 
 
32
  entities = self.nlp_model.extract_entities(query)
 
33
 
34
  # Database search
35
- articles = await self._execute_search(
36
  query_embedding,
37
  start_dt,
38
  end_dt,
39
  topic,
40
- [ent[0] for ent in entities]
41
  )
42
 
43
  if not articles:
 
44
  return {"message": "No articles found", "articles": []}
45
 
46
  # Summary generation
47
  summary_data = self._generate_summary(articles)
 
48
  return {
49
  "summary": summary_data["summary"],
50
  "key_sentences": summary_data["key_sentences"],
@@ -64,34 +70,22 @@ class QueryProcessor:
64
  logger.error(f"Invalid date format: {date_str}")
65
  raise ValueError(f"Invalid date format. Expected YYYY-MM-DD, got {date_str}")
66
 
67
- def _extract_entities_safely(self, text: str) -> List[Tuple[str, str]]:
68
- """Robust entity extraction handling both strings and lists"""
69
- try:
70
- if isinstance(text, list):
71
- logger.warning("Received list input for entity extraction, joining to string")
72
- text = " ".join(text)
73
- return self.nlp_model.extract_entities(text)
74
- except Exception as e:
75
- logger.error(f"Entity extraction failed: {str(e)}")
76
- return []
77
-
78
  async def _execute_semantic_search(
79
  self,
80
  query_embedding: List[float],
81
  start_date: Optional[dt],
82
  end_date: Optional[dt],
83
  topic: Optional[str],
84
- entities: List[Tuple[str, str]]
85
  ) -> List[Dict[str, Any]]:
86
  """Execute search with proper error handling"""
87
  try:
88
- entity_texts = [ent[0] for ent in entities]
89
  return await self.db_service.semantic_search(
90
  query_embedding=query_embedding,
91
  start_date=start_date,
92
  end_date=end_date,
93
  topic=topic,
94
- entities=entity_texts
95
  )
96
  except Exception as e:
97
  logger.error(f"Semantic search failed: {str(e)}")
@@ -100,11 +94,10 @@ class QueryProcessor:
100
  def _generate_summary(self, articles: List[Dict[str, Any]]) -> Dict[str, Any]:
101
  """Generate summary from articles with fallback handling"""
102
  try:
103
- contents = [article["content"] for article in articles]
104
  sentences = []
105
-
106
- for content in contents:
107
- if content:
108
  sentences.extend(self.nlp_model.tokenize_sentences(content))
109
 
110
  if not sentences:
@@ -114,16 +107,17 @@ class QueryProcessor:
114
  "key_sentences": []
115
  }
116
 
 
117
  embeddings = self.embedding_model.encode(sentences)
118
  similarity_matrix = np.inner(embeddings, embeddings)
119
  centrality_scores = degree_centrality_scores(similarity_matrix, threshold=None)
120
 
 
121
  top_indices = np.argsort(-centrality_scores)[:10]
122
  key_sentences = [sentences[idx].strip() for idx in top_indices]
123
- combined_text = ' '.join(key_sentences)
124
 
125
  return {
126
- "summary": self.summarization_model.summarize(combined_text),
127
  "key_sentences": key_sentences
128
  }
129
 
 
29
 
30
  # Query processing
31
  query_embedding = self.embedding_model.encode(query).tolist()
32
+ logger.debug(f"Generated embedding for query: {query[:50]}...")
33
+
34
+ # Entity extraction
35
  entities = self.nlp_model.extract_entities(query)
36
+ logger.debug(f"Extracted entities: {entities}")
37
 
38
  # Database search
39
+ articles = await self._execute_semantic_search(
40
  query_embedding,
41
  start_dt,
42
  end_dt,
43
  topic,
44
+ [ent[0] for ent in entities] # Just the entity texts
45
  )
46
 
47
  if not articles:
48
+ logger.info("No articles found matching criteria")
49
  return {"message": "No articles found", "articles": []}
50
 
51
  # Summary generation
52
  summary_data = self._generate_summary(articles)
53
+
54
  return {
55
  "summary": summary_data["summary"],
56
  "key_sentences": summary_data["key_sentences"],
 
70
  logger.error(f"Invalid date format: {date_str}")
71
  raise ValueError(f"Invalid date format. Expected YYYY-MM-DD, got {date_str}")
72
 
 
 
 
 
 
 
 
 
 
 
 
73
  async def _execute_semantic_search(
74
  self,
75
  query_embedding: List[float],
76
  start_date: Optional[dt],
77
  end_date: Optional[dt],
78
  topic: Optional[str],
79
+ entities: List[str]
80
  ) -> List[Dict[str, Any]]:
81
  """Execute search with proper error handling"""
82
  try:
 
83
  return await self.db_service.semantic_search(
84
  query_embedding=query_embedding,
85
  start_date=start_date,
86
  end_date=end_date,
87
  topic=topic,
88
+ entities=entities
89
  )
90
  except Exception as e:
91
  logger.error(f"Semantic search failed: {str(e)}")
 
94
  def _generate_summary(self, articles: List[Dict[str, Any]]) -> Dict[str, Any]:
95
  """Generate summary from articles with fallback handling"""
96
  try:
97
+ # Extract and process content
98
  sentences = []
99
+ for article in articles:
100
+ if content := article.get("content"):
 
101
  sentences.extend(self.nlp_model.tokenize_sentences(content))
102
 
103
  if not sentences:
 
107
  "key_sentences": []
108
  }
109
 
110
+ # Generate summary
111
  embeddings = self.embedding_model.encode(sentences)
112
  similarity_matrix = np.inner(embeddings, embeddings)
113
  centrality_scores = degree_centrality_scores(similarity_matrix, threshold=None)
114
 
115
+ # Get top 10 most central sentences
116
  top_indices = np.argsort(-centrality_scores)[:10]
117
  key_sentences = [sentences[idx].strip() for idx in top_indices]
 
118
 
119
  return {
120
+ "summary": self.summarization_model.summarize(' '.join(key_sentences)),
121
  "key_sentences": key_sentences
122
  }
123