krishnadhulipalla commited on
Commit
889a853
·
1 Parent(s): c7da115

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +104 -43
app.py CHANGED
@@ -38,7 +38,7 @@ if not api_key:
38
  raise RuntimeError("🚨 NVIDIA_API_KEY not found in environment! Please add it in Hugging Face Secrets.")
39
 
40
  # Constants
41
- FAISS_PATH = "faiss_store/v61_600_150"
42
  CHUNKS_PATH = "all_chunks.json"
43
 
44
  if not Path(FAISS_PATH).exists():
@@ -99,6 +99,14 @@ class KnowledgeBase(BaseModel):
99
  last_followups: List[str] = Field(default_factory=list, description="List of follow-up suggestions from the last assistant response")
100
  tone: Optional[Literal['formal', 'casual', 'playful', 'direct', 'uncertain']] = Field(None, description="Inferred tone or attitude from the user based on recent input")
101
 
 
 
 
 
 
 
 
 
102
  # Initialize the knowledge base
103
  # knowledge_base = KnowledgeBase()
104
  user_kbs = {}
@@ -106,9 +114,9 @@ kb_lock = Lock()
106
 
107
  # LLMs
108
  # repharser_llm = ChatNVIDIA(model="mistralai/mistral-7b-instruct-v0.3") | StrOutputParser()
109
- repharser_llm = ChatNVIDIA(model="microsoft/phi-3-mini-4k-instruct") | StrOutputParser()
110
- # instruct_llm = ChatNVIDIA(model="mistralai/mixtral-8x22b-instruct-v0.1") | StrOutputParser()
111
- instruct_llm = ChatNVIDIA(model="mistralai/mistral-7b-instruct-v0.3") | StrOutputParser()
112
  relevance_llm = ChatNVIDIA(model="nvidia/llama-3.1-nemotron-70b-instruct") | StrOutputParser()
113
  answer_llm = ChatOpenAI(
114
  model="gpt-4o",
@@ -121,59 +129,92 @@ answer_llm = ChatOpenAI(
121
  # Prompts
122
  repharser_prompt = ChatPromptTemplate.from_template(
123
  "You are a smart retrieval assistant helping a search engine understand user intent more precisely.\n\n"
124
- "Given a user question, generate **1 diverse rewrite** that is semantically equivalent but phrased differently. \n"
125
- "The rewrite should be optimized for **retrieval from a hybrid system** using BM25 (keyword match) and dense vector embeddings.\n\n"
 
 
 
 
 
126
  "Guidelines:\n"
127
- "- Expand abbreviations or implied intent when useful\n"
128
- "- Add relevant technical terms, tools, frameworks, or synonyms (e.g., 'LLM', 'pipeline', 'project')\n"
129
- "- Rephrase using different sentence structure or tone\n"
130
- "- Use field-specific vocabulary (e.g., data science, ML, software, research) if it fits the query\n"
131
  "- Prioritize clarity and retrievability over stylistic variation\n\n"
132
- "Original Question:\n{query}\n\n"
133
- "Rewrite:\n1."
134
  )
135
 
136
  relevance_prompt = ChatPromptTemplate.from_template("""
137
- You are Krishna's personal AI assistant classifier.
138
- Your job is to decide whether a user's question can be meaningfully answered using the provided document chunks **or** relevant user memory.
 
 
 
 
139
  Return a JSON object:
140
- - "is_out_of_scope": true if the chunks and memory cannot help answer the question
141
- - "justification": a short sentence explaining your decision
 
 
142
  ---
143
- Special instructions:
144
- Treat short or vague queries like "yes", "tell me more", "go on", or "give me" as follow-up prompts.
145
- Assume the user is asking for **continuation** of the previous assistant response or follow-ups stored in memory. Consider that context as *in-scope*.
146
- Also consider if the user's question can be answered using stored memory (like their name, company, interests, or last follow-up topics).
147
- Do NOT classify these types of queries as "out of scope".
148
- Only mark as out-of-scope if the user asks something truly unrelated to both:
149
- - Krishna's background
150
- - Stored user memory
 
 
 
 
 
 
 
151
  ---
 
152
  Examples:
153
- Q: "Tell me more"
154
- Chunks: previously retrieved info about Krishna's ML tools
155
- Memory: User previously asked about PyTorch and ML pipelines
 
 
 
156
  Output:
157
  {{
158
  "is_out_of_scope": false,
159
- "justification": "User is requesting a follow-up to a valid context, based on prior conversation"
 
160
  }}
 
161
  Q: "What is Krishna's Hogwarts house?"
162
- Chunks: None about fiction
163
- Memory: User hasn't mentioned fiction/fantasy
 
164
  Output:
165
  {{
166
  "is_out_of_scope": true,
167
- "justification": "The question is unrelated to Krishna or user context"
168
  }}
 
169
  ---
 
170
  Now your turn.
171
- User Question:
 
172
  "{query}"
 
 
 
 
173
  Chunks:
174
  {contents}
 
175
  User Memory (Knowledge Base):
176
  {memory}
 
177
  Return ONLY the JSON object.
178
  """)
179
 
@@ -325,16 +366,18 @@ def safe_json_parse(s: str) -> Dict:
325
  # Rewrite generation
326
  rephraser_chain = (
327
  repharser_prompt
328
- | repharser_llm
329
  | RunnableLambda(parse_rewrites)
330
  )
331
 
332
  generate_rewrites_chain = (
333
  RunnableAssign({
334
- "rewrites": lambda x: rephraser_chain.invoke({"query": x["query"]})
 
335
  })
336
  | RunnableAssign({
337
- "all_queries": lambda x: [x["query"]] + x["rewrites"]
 
338
  })
339
  )
340
 
@@ -345,8 +388,11 @@ hybrid_chain = generate_rewrites_chain | retrieve_chain
345
  # Validation
346
  extract_validation_inputs = RunnableLambda(lambda x: {
347
  "query": x["query"],
348
- "contents": [c["content"] for c in x["chunks"]],
349
- "memory": x["memory"]
 
 
 
350
  })
351
 
352
  validation_chain = (
@@ -432,6 +478,19 @@ def update_knowledge_base(session_id: str, user_input: str, assistant_response:
432
  print(f"✅ KNOWLEDGE BASE UPDATED FOR SESSION {session_id}")
433
  except Exception as e:
434
  print(f"❌ KNOWLEDGE BASE UPDATE FAILED: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
435
 
436
  # New chain to preserve memory through the pipeline
437
  preserve_memory_chain = RunnableLambda(lambda x: {
@@ -443,6 +502,8 @@ preserve_memory_chain = RunnableLambda(lambda x: {
443
  full_pipeline = (
444
  preserve_memory_chain
445
  | RunnableAssign({"validation": validation_chain})
 
 
446
  | answer_chain
447
  )
448
 
@@ -460,7 +521,7 @@ def chat_interface(message, history, request: gr.Request):
460
  "alpha": 0.5,
461
  "vectorstore": vectorstore,
462
  "bm25_retriever": bm25_retriever,
463
- "memory": kb.model_dump_json()
464
  }
465
 
466
  full_response = ""
@@ -487,11 +548,6 @@ demo = gr.ChatInterface(
487
  margin: 0 auto;
488
  width: 100%;
489
  }
490
- .gradio-container{
491
- max-width: 1000px !important;
492
- margin: 0 auto;
493
- width:100%;
494
- }
495
  .float {
496
  display: none;
497
  }
@@ -511,6 +567,11 @@ demo = gr.ChatInterface(
511
  width: 1px;
512
  height: 1px;
513
  }
 
 
 
 
 
514
 
515
  ::-webkit-scrollbar-track {
516
  background: transparent;
 
38
  raise RuntimeError("🚨 NVIDIA_API_KEY not found in environment! Please add it in Hugging Face Secrets.")
39
 
40
  # Constants
41
+ FAISS_PATH = "faiss_store/v64_600-150"
42
  CHUNKS_PATH = "all_chunks.json"
43
 
44
  if not Path(FAISS_PATH).exists():
 
99
  last_followups: List[str] = Field(default_factory=list, description="List of follow-up suggestions from the last assistant response")
100
  tone: Optional[Literal['formal', 'casual', 'playful', 'direct', 'uncertain']] = Field(None, description="Inferred tone or attitude from the user based on recent input")
101
 
102
+ def dump_truncated(self, max_len: int = 500):
103
+ memory = self.dict()
104
+ if len(memory["last_input"]) > max_len:
105
+ memory["last_input"] = memory["last_input"][:max_len] + "..."
106
+ if len(memory["last_output"]) > max_len:
107
+ memory["last_output"] = memory["last_output"][:max_len] + "..."
108
+ return memory
109
+
110
  # Initialize the knowledge base
111
  # knowledge_base = KnowledgeBase()
112
  user_kbs = {}
 
114
 
115
  # LLMs
116
  # repharser_llm = ChatNVIDIA(model="mistralai/mistral-7b-instruct-v0.3") | StrOutputParser()
117
+ #repharser_llm = ChatNVIDIA(model="microsoft/phi-3-mini-4k-instruct") | StrOutputParser()
118
+ instruct_llm = ChatNVIDIA(model="mistralai/mixtral-8x22b-instruct-v0.1") | StrOutputParser()
119
+ rephraser_llm = ChatNVIDIA(model="mistralai/mistral-7b-instruct-v0.3") | StrOutputParser()
120
  relevance_llm = ChatNVIDIA(model="nvidia/llama-3.1-nemotron-70b-instruct") | StrOutputParser()
121
  answer_llm = ChatOpenAI(
122
  model="gpt-4o",
 
129
  # Prompts
130
  repharser_prompt = ChatPromptTemplate.from_template(
131
  "You are a smart retrieval assistant helping a search engine understand user intent more precisely.\n\n"
132
+ "Your job is to rewrite the user's message into a clearer, more descriptive query for information retrieval.\n\n"
133
+ "Context:\n"
134
+ "- The user may sometimes respond with short or vague messages like 'B', 'yes', or 'tell me more'.\n"
135
+ "- In such cases, refer to the user's previous assistant message or `last_followups` list to understand the actual intent.\n"
136
+ "- Expand their reply based on that context to create a full meaningful query.\n\n"
137
+ "User Query:\n{query}\n\n"
138
+ "Last Follow-up Suggestions:\n{memory}\n\n"
139
  "Guidelines:\n"
140
+ "- Expand abbreviations or implied selections\n"
141
+ "- Reconstruct full intent if the query is a reply to an earlier assistant suggestion\n"
142
+ "- Rephrase using domain-specific terms (e.g., ML, infrastructure, research, deployment)\n"
143
+ "- Focus on maximizing retrievability via keyword-rich formulation\n\n"
144
  "- Prioritize clarity and retrievability over stylistic variation\n\n"
145
+ "Expanded Rewrite:\n1."
 
146
  )
147
 
148
  relevance_prompt = ChatPromptTemplate.from_template("""
149
+ You are Krishna's personal AI assistant classifier and chunk reranker.
150
+
151
+ Your job has two goals:
152
+ 1. Classify whether a user's question can be meaningfully answered using the retrieved document chunks or user memory.
153
+ 2. If it can, rerank the chunks from most to least relevant to the question.
154
+
155
  Return a JSON object:
156
+ - "is_out_of_scope": true if the **rewritten query**, original query, and memory offer no path to answer the user’s intent
157
+ - "justification": short explanation of your decision
158
+ - "reranked_chunks": a list of chunk indices ordered by decreasing relevance (only if in-scope)
159
+
160
  ---
161
+
162
+ Special Instructions:
163
+
164
+ If the user input is vague, short, or a follow-up (e.g., "yes", "A", "B", "go on", "sure"), check:
165
+ If the assistant previously showed suggestions or follow-up questions (in memory → `last_followups`)
166
+ If the rewritten query adds meaningful context (e.g., "B" "Tell me more about Data-Centric AI")
167
+
168
+ If **any of the above** resolve the intent, treat it as in-scope.
169
+
170
+ ❌ Mark as out-of-scope only if:
171
+ - The query (even after rewriting) has no clear relevance to Krishna's profile or user memory
172
+ - There are no helpful document chunks or memory fields to answer it
173
+
174
+ 🚫 Do not infer meaning through metaphor or vague similarity — only use concrete, literal context.
175
+
176
  ---
177
+
178
  Examples:
179
+
180
+ Q: "B"
181
+ Rewritten Query: "Tell me more about Data-Centric AI for Real-Time Analytics"
182
+ last_followups: [ ... contains that option ... ]
183
+ Memory: user showed interest in analytics pipelines
184
+
185
  Output:
186
  {{
187
  "is_out_of_scope": false,
188
+ "justification": "User is selecting a previous assistant suggestion",
189
+ "reranked_chunks": [0, 2, 1]
190
  }}
191
+
192
  Q: "What is Krishna's Hogwarts house?"
193
+ Chunks: none on fiction
194
+ Memory: no fantasy topics
195
+
196
  Output:
197
  {{
198
  "is_out_of_scope": true,
199
+ "justification": "Fictional topic unrelated to Krishna or conversation"
200
  }}
201
+
202
  ---
203
+
204
  Now your turn.
205
+
206
+ Original User Question:
207
  "{query}"
208
+
209
+ Rewritten Query (if available):
210
+ "{rewritten_query}"
211
+
212
  Chunks:
213
  {contents}
214
+
215
  User Memory (Knowledge Base):
216
  {memory}
217
+
218
  Return ONLY the JSON object.
219
  """)
220
 
 
366
  # Rewrite generation
367
  rephraser_chain = (
368
  repharser_prompt
369
+ | rephraser_llm
370
  | RunnableLambda(parse_rewrites)
371
  )
372
 
373
  generate_rewrites_chain = (
374
  RunnableAssign({
375
+ "rewrites": lambda x: rephraser_chain.invoke({"query": x["query"],
376
+ "memory": x["memory"]})
377
  })
378
  | RunnableAssign({
379
+ "all_queries": lambda x: [x["query"]] + x["rewrites"],
380
+ "rewritten_query": lambda x: x["rewrites"][0] if x["rewrites"] else x["query"]
381
  })
382
  )
383
 
 
388
  # Validation
389
  extract_validation_inputs = RunnableLambda(lambda x: {
390
  "query": x["query"],
391
+ "rewritten_query": x.get("rewritten_query", x["query"]),
392
+ "contents": "\n".join(
393
+ f"Chunk #{i}: {chunk['content']}" for i, chunk in enumerate(x["chunks"])
394
+ ),
395
+ "memory": json.dumps(x["memory"])
396
  })
397
 
398
  validation_chain = (
 
478
  print(f"✅ KNOWLEDGE BASE UPDATED FOR SESSION {session_id}")
479
  except Exception as e:
480
  print(f"❌ KNOWLEDGE BASE UPDATE FAILED: {str(e)}")
481
+
482
+ def reorder_chunks_if_needed(inputs):
483
+ validation = inputs.get("validation", {})
484
+ chunks = inputs.get("chunks", [])
485
+
486
+ if not validation.get("is_out_of_scope", True) and "reranked_chunks" in validation:
487
+ try:
488
+ ranked_indices = validation["reranked_chunks"]
489
+ inputs["chunks"] = [chunks[i] for i in ranked_indices if i < len(chunks)]
490
+ except Exception as e:
491
+ print("⚠️ Failed to reorder chunks:", e)
492
+
493
+ return inputs
494
 
495
  # New chain to preserve memory through the pipeline
496
  preserve_memory_chain = RunnableLambda(lambda x: {
 
502
  full_pipeline = (
503
  preserve_memory_chain
504
  | RunnableAssign({"validation": validation_chain})
505
+ | RunnableLambda(reorder_chunks_if_needed)
506
+ #| PPrint()
507
  | answer_chain
508
  )
509
 
 
521
  "alpha": 0.5,
522
  "vectorstore": vectorstore,
523
  "bm25_retriever": bm25_retriever,
524
+ "memory": json.dumps(kb.dump_truncated())
525
  }
526
 
527
  full_response = ""
 
548
  margin: 0 auto;
549
  width: 100%;
550
  }
 
 
 
 
 
551
  .float {
552
  display: none;
553
  }
 
567
  width: 1px;
568
  height: 1px;
569
  }
570
+ .gradio-container{
571
+ max-width: 1000px !important;
572
+ margin: 0 auto;
573
+ width:100%;
574
+ }
575
 
576
  ::-webkit-scrollbar-track {
577
  background: transparent;