krishnadhulipalla commited on
Commit
44ac5ee
·
1 Parent(s): 0320bf7

Updated the Global KB to session KB

Browse files
Files changed (1) hide show
  1. app.py +54 -68
app.py CHANGED
@@ -8,9 +8,9 @@ import re
8
  import hashlib
9
  import gradio as gr
10
  from functools import partial
11
- import threading
12
  from collections import defaultdict
13
  from pathlib import Path
 
14
  from typing import List, Dict, Any, Optional, List, Literal, Type
15
  import numpy as np
16
  from dotenv import load_dotenv
@@ -48,11 +48,8 @@ if not Path(CHUNKS_PATH).exists():
48
  raise FileNotFoundError(f"Chunks file not found at {CHUNKS_PATH}")
49
 
50
  KRISHNA_BIO = """Krishna Vamsi Dhulipalla completed masters in Computer Science at Virginia Tech, awarded degree in december 2024, with over 3 years of experience across data engineering, machine learning research, and real-time analytics. He specializes in building scalable data systems and intelligent LLM-powered applications, with strong expertise in Python, PyTorch, Hugging Face Transformers, and end-to-end ML pipelines.
51
-
52
  He has led projects involving retrieval-augmented generation (RAG), feature selection for genomic classification, fine-tuning domain-specific LLMs (e.g., DNABERT, HyenaDNA), and real-time forecasting systems using Kafka, Spark, and Airflow. His cloud proficiency spans AWS (S3, SageMaker, ECS, CloudWatch), GCP (BigQuery, Cloud Composer), and DevOps tools like Docker, Kubernetes, and MLflow.
53
-
54
  Krishna’s research has focused on genomic sequence modeling, transformer optimization, MLOps automation, and cross-domain generalization. He has published work in bioinformatics and machine learning applications for circadian transcription prediction and transcription factor binding.
55
-
56
  He holds certifications in NVIDIA’s RAG Agents with LLMs, Google Cloud Data Engineering, and AWS ML Specialization. Krishna is passionate about scalable LLM infrastructure, data-centric AI, and domain-adaptive ML solutions — combining deep technical expertise with real-world engineering impact.
57
  \n\n
58
  Beside carrer, Krishna loves hiking, cricket, and exploring new technologies. He is big fan of Marvel Movies and Space exploration.
@@ -103,12 +100,15 @@ class KnowledgeBase(BaseModel):
103
  tone: Optional[Literal['formal', 'casual', 'playful', 'direct', 'uncertain']] = Field(None, description="Inferred tone or attitude from the user based on recent input")
104
 
105
  # Initialize the knowledge base
106
- knowledge_base = KnowledgeBase()
 
 
107
 
108
  # LLMs
109
  # repharser_llm = ChatNVIDIA(model="mistralai/mistral-7b-instruct-v0.3") | StrOutputParser()
110
  repharser_llm = ChatNVIDIA(model="microsoft/phi-3-mini-4k-instruct") | StrOutputParser()
111
- instruct_llm = ChatNVIDIA(model="mistralai/mixtral-8x22b-instruct-v0.1") | StrOutputParser()
 
112
  relevance_llm = ChatNVIDIA(model="nvidia/llama-3.1-nemotron-70b-instruct") | StrOutputParser()
113
  answer_llm = ChatOpenAI(
114
  model="gpt-4o",
@@ -135,69 +135,48 @@ repharser_prompt = ChatPromptTemplate.from_template(
135
 
136
  relevance_prompt = ChatPromptTemplate.from_template("""
137
  You are Krishna's personal AI assistant classifier.
138
-
139
  Your job is to decide whether a user's question can be meaningfully answered using the provided document chunks **or** relevant user memory.
140
-
141
  Return a JSON object:
142
  - "is_out_of_scope": true if the chunks and memory cannot help answer the question
143
  - "justification": a short sentence explaining your decision
144
-
145
  ---
146
-
147
  Special instructions:
148
-
149
  ✅ Treat short or vague queries like "yes", "tell me more", "go on", or "give me" as follow-up prompts.
150
  Assume the user is asking for **continuation** of the previous assistant response or follow-ups stored in memory. Consider that context as *in-scope*.
151
-
152
  ✅ Also consider if the user's question can be answered using stored memory (like their name, company, interests, or last follow-up topics).
153
-
154
  Do NOT classify these types of queries as "out of scope".
155
-
156
  Only mark as out-of-scope if the user asks something truly unrelated to both:
157
  - Krishna's background
158
  - Stored user memory
159
-
160
  ---
161
-
162
  Examples:
163
-
164
  Q: "Tell me more"
165
  Chunks: previously retrieved info about Krishna's ML tools
166
  Memory: User previously asked about PyTorch and ML pipelines
167
-
168
  Output:
169
  {{
170
  "is_out_of_scope": false,
171
  "justification": "User is requesting a follow-up to a valid context, based on prior conversation"
172
  }}
173
-
174
  Q: "What is Krishna's Hogwarts house?"
175
  Chunks: None about fiction
176
  Memory: User hasn't mentioned fiction/fantasy
177
-
178
  Output:
179
  {{
180
  "is_out_of_scope": true,
181
  "justification": "The question is unrelated to Krishna or user context"
182
  }}
183
-
184
  ---
185
-
186
  Now your turn.
187
-
188
  User Question:
189
  "{query}"
190
-
191
  Chunks:
192
  {contents}
193
-
194
  User Memory (Knowledge Base):
195
  {memory}
196
-
197
  Return ONLY the JSON object.
198
  """)
199
 
200
-
201
  answer_prompt_relevant = ChatPromptTemplate.from_template(
202
  "You are Krishna's personal AI assistant. Your job is to answer the user’s question clearly, thoroughly, and professionally using the provided context.\n"
203
  "Rather than copying sentences, synthesize relevant insights and explain them like a knowledgeable peer.\n\n"
@@ -367,7 +346,7 @@ hybrid_chain = generate_rewrites_chain | retrieve_chain
367
  extract_validation_inputs = RunnableLambda(lambda x: {
368
  "query": x["query"],
369
  "contents": [c["content"] for c in x["chunks"]],
370
- "memory": knowledge_base.model_dump_json()
371
  })
372
 
373
  validation_chain = (
@@ -388,7 +367,7 @@ def prepare_answer_inputs(x: Dict) -> Dict:
388
  "profile": KRISHNA_BIO,
389
  "context": context,
390
  "use_fallback": x["validation"]["is_out_of_scope"],
391
- "memory": knowledge_base.model_dump_json()
392
  }
393
 
394
  select_and_prompt = RunnableLambda(lambda x:
@@ -430,40 +409,48 @@ knowledge_extractor = RExtract(
430
  prompt=parser_prompt
431
  )
432
 
433
- def update_knowledge_base(user_input: str, assistant_response: str):
434
- """Update the knowledge base asynchronously after response is sent"""
435
- global knowledge_base
436
-
 
 
 
 
 
437
  try:
438
- # print("\n" + "="*50)
439
- # print("🔥 STARTING KNOWLEDGE BASE UPDATE")
440
- # print(f"User Input: {user_input}")
441
- # print(f"Assistant Response: {assistant_response[:100]}...")
442
-
443
- # Prepare input for knowledge extractor
444
  kb_input = {
445
- "know_base": knowledge_base.model_dump_json(), # Fixed deprecation
446
  "input": user_input,
447
  "output": assistant_response
448
  }
449
-
450
- #print("🧠 Calling knowledge extractor...")
451
  new_kb = knowledge_extractor.invoke(kb_input)
452
- knowledge_base = new_kb # Update global knowledge base
453
-
454
- # Detailed debug output
455
- print("✅ KNOWLEDGE BASE UPDATED SUCCESSFULLY")
456
-
457
  except Exception as e:
458
  print(f"❌ KNOWLEDGE BASE UPDATE FAILED: {str(e)}")
459
- import traceback
460
- traceback.print_exc()
461
 
462
- # Full Pipeline
463
- full_pipeline = hybrid_chain | RunnableAssign({"validation": validation_chain}) | answer_chain
 
 
 
464
 
 
 
 
 
 
 
465
 
466
- def chat_interface(message, history):
 
 
 
 
 
467
  inputs = {
468
  "query": message,
469
  "all_queries": [message],
@@ -472,21 +459,18 @@ def chat_interface(message, history):
472
  "alpha": 0.5,
473
  "vectorstore": vectorstore,
474
  "bm25_retriever": bm25_retriever,
 
475
  }
 
476
  full_response = ""
477
-
478
- # Stream the response to user
479
  for chunk in full_pipeline.stream(inputs):
480
- if isinstance(chunk, dict) and "answer" in chunk:
481
- full_response += chunk["answer"]
482
- yield full_response
483
- elif isinstance(chunk, str):
484
  full_response += chunk
485
  yield full_response
486
-
487
- # After streaming completes, update KB in background thread
488
  if full_response:
489
- update_knowledge_base(message, full_response)
490
 
491
  with gr.Blocks(css="""
492
  html, body, .gradio-container {
@@ -500,20 +484,17 @@ with gr.Blocks(css="""
500
  margin: 0 auto;
501
  padding: 1rem;
502
  }
503
-
504
  .chatbox-container {
505
  display: flex;
506
  flex-direction: column;
507
  height: 95%;
508
  overflow-y: auto;
509
  }
510
-
511
  .chatbot {
512
  flex: 1;
513
  overflow-y: auto;
514
  min-height: 500px;
515
  }
516
-
517
  .textbox {
518
  margin-top: 1rem;
519
  }
@@ -532,10 +513,15 @@ demo = gr.ChatInterface(
532
  title="💬 Ask Krishna's AI Assistant",
533
  description="💡 Ask anything about Krishna Vamsi Dhulipalla",
534
  examples=[
535
- "Give me an overview of Krishna Vamsi Dhulipallas work experience across different roles?",
536
  "What programming languages and tools does Krishna use for data science?",
537
  "Can this chatbot tell me what Krishna's chatbot architecture looks like and how it works?"
538
- ],
539
  )
540
-
541
- demo.launch(max_threads=4, prevent_thread_lock=True, debug=True)
 
 
 
 
 
 
8
  import hashlib
9
  import gradio as gr
10
  from functools import partial
 
11
  from collections import defaultdict
12
  from pathlib import Path
13
+ from threading import Lock
14
  from typing import List, Dict, Any, Optional, List, Literal, Type
15
  import numpy as np
16
  from dotenv import load_dotenv
 
48
  raise FileNotFoundError(f"Chunks file not found at {CHUNKS_PATH}")
49
 
50
  KRISHNA_BIO = """Krishna Vamsi Dhulipalla completed masters in Computer Science at Virginia Tech, awarded degree in december 2024, with over 3 years of experience across data engineering, machine learning research, and real-time analytics. He specializes in building scalable data systems and intelligent LLM-powered applications, with strong expertise in Python, PyTorch, Hugging Face Transformers, and end-to-end ML pipelines.
 
51
  He has led projects involving retrieval-augmented generation (RAG), feature selection for genomic classification, fine-tuning domain-specific LLMs (e.g., DNABERT, HyenaDNA), and real-time forecasting systems using Kafka, Spark, and Airflow. His cloud proficiency spans AWS (S3, SageMaker, ECS, CloudWatch), GCP (BigQuery, Cloud Composer), and DevOps tools like Docker, Kubernetes, and MLflow.
 
52
  Krishna’s research has focused on genomic sequence modeling, transformer optimization, MLOps automation, and cross-domain generalization. He has published work in bioinformatics and machine learning applications for circadian transcription prediction and transcription factor binding.
 
53
  He holds certifications in NVIDIA’s RAG Agents with LLMs, Google Cloud Data Engineering, and AWS ML Specialization. Krishna is passionate about scalable LLM infrastructure, data-centric AI, and domain-adaptive ML solutions — combining deep technical expertise with real-world engineering impact.
54
  \n\n
55
  Beside carrer, Krishna loves hiking, cricket, and exploring new technologies. He is big fan of Marvel Movies and Space exploration.
 
100
  tone: Optional[Literal['formal', 'casual', 'playful', 'direct', 'uncertain']] = Field(None, description="Inferred tone or attitude from the user based on recent input")
101
 
102
  # Initialize the knowledge base
103
+ # knowledge_base = KnowledgeBase()
104
+ user_kbs = {}
105
+ kb_lock = Lock()
106
 
107
  # LLMs
108
  # repharser_llm = ChatNVIDIA(model="mistralai/mistral-7b-instruct-v0.3") | StrOutputParser()
109
  repharser_llm = ChatNVIDIA(model="microsoft/phi-3-mini-4k-instruct") | StrOutputParser()
110
+ # instruct_llm = ChatNVIDIA(model="mistralai/mixtral-8x22b-instruct-v0.1") | StrOutputParser()
111
+ instruct_llm = ChatNVIDIA(model="mistralai/mistral-7b-instruct-v0.3") | StrOutputParser()
112
  relevance_llm = ChatNVIDIA(model="nvidia/llama-3.1-nemotron-70b-instruct") | StrOutputParser()
113
  answer_llm = ChatOpenAI(
114
  model="gpt-4o",
 
135
 
136
  relevance_prompt = ChatPromptTemplate.from_template("""
137
  You are Krishna's personal AI assistant classifier.
 
138
  Your job is to decide whether a user's question can be meaningfully answered using the provided document chunks **or** relevant user memory.
 
139
  Return a JSON object:
140
  - "is_out_of_scope": true if the chunks and memory cannot help answer the question
141
  - "justification": a short sentence explaining your decision
 
142
  ---
 
143
  Special instructions:
 
144
  ✅ Treat short or vague queries like "yes", "tell me more", "go on", or "give me" as follow-up prompts.
145
  Assume the user is asking for **continuation** of the previous assistant response or follow-ups stored in memory. Consider that context as *in-scope*.
 
146
  ✅ Also consider if the user's question can be answered using stored memory (like their name, company, interests, or last follow-up topics).
 
147
  Do NOT classify these types of queries as "out of scope".
 
148
  Only mark as out-of-scope if the user asks something truly unrelated to both:
149
  - Krishna's background
150
  - Stored user memory
 
151
  ---
 
152
  Examples:
 
153
  Q: "Tell me more"
154
  Chunks: previously retrieved info about Krishna's ML tools
155
  Memory: User previously asked about PyTorch and ML pipelines
 
156
  Output:
157
  {{
158
  "is_out_of_scope": false,
159
  "justification": "User is requesting a follow-up to a valid context, based on prior conversation"
160
  }}
 
161
  Q: "What is Krishna's Hogwarts house?"
162
  Chunks: None about fiction
163
  Memory: User hasn't mentioned fiction/fantasy
 
164
  Output:
165
  {{
166
  "is_out_of_scope": true,
167
  "justification": "The question is unrelated to Krishna or user context"
168
  }}
 
169
  ---
 
170
  Now your turn.
 
171
  User Question:
172
  "{query}"
 
173
  Chunks:
174
  {contents}
 
175
  User Memory (Knowledge Base):
176
  {memory}
 
177
  Return ONLY the JSON object.
178
  """)
179
 
 
180
  answer_prompt_relevant = ChatPromptTemplate.from_template(
181
  "You are Krishna's personal AI assistant. Your job is to answer the user’s question clearly, thoroughly, and professionally using the provided context.\n"
182
  "Rather than copying sentences, synthesize relevant insights and explain them like a knowledgeable peer.\n\n"
 
346
  extract_validation_inputs = RunnableLambda(lambda x: {
347
  "query": x["query"],
348
  "contents": [c["content"] for c in x["chunks"]],
349
+ "memory": x["memory"]
350
  })
351
 
352
  validation_chain = (
 
367
  "profile": KRISHNA_BIO,
368
  "context": context,
369
  "use_fallback": x["validation"]["is_out_of_scope"],
370
+ "memory": x["memory"]
371
  }
372
 
373
  select_and_prompt = RunnableLambda(lambda x:
 
409
  prompt=parser_prompt
410
  )
411
 
412
+ def get_knowledge_base(session_id: str) -> KnowledgeBase:
413
+ """Get or create a knowledge base for a session"""
414
+ with kb_lock:
415
+ if session_id not in user_kbs:
416
+ user_kbs[session_id] = KnowledgeBase()
417
+ return user_kbs[session_id]
418
+
419
+ def update_knowledge_base(session_id: str, user_input: str, assistant_response: str):
420
+ """Update the knowledge base for a specific session"""
421
  try:
422
+ kb = get_knowledge_base(session_id)
 
 
 
 
 
423
  kb_input = {
424
+ "know_base": kb.model_dump_json(),
425
  "input": user_input,
426
  "output": assistant_response
427
  }
 
 
428
  new_kb = knowledge_extractor.invoke(kb_input)
429
+ with kb_lock:
430
+ user_kbs[session_id] = new_kb
431
+ print(f"✅ KNOWLEDGE BASE UPDATED FOR SESSION {session_id}")
 
 
432
  except Exception as e:
433
  print(f"❌ KNOWLEDGE BASE UPDATE FAILED: {str(e)}")
 
 
434
 
435
+ # New chain to preserve memory through the pipeline
436
+ preserve_memory_chain = RunnableLambda(lambda x: {
437
+ **hybrid_chain.invoke(x),
438
+ "memory": x["memory"]
439
+ })
440
 
441
+ # Full pipeline
442
+ full_pipeline = (
443
+ preserve_memory_chain
444
+ | RunnableAssign({"validation": validation_chain})
445
+ | answer_chain
446
+ )
447
 
448
+ def chat_interface(message, history, request: gr.Request):
449
+ """Modified chat interface with session support"""
450
+ session_id = request.session_hash
451
+ kb = get_knowledge_base(session_id)
452
+
453
+ # Initialize inputs with session-specific KB
454
  inputs = {
455
  "query": message,
456
  "all_queries": [message],
 
459
  "alpha": 0.5,
460
  "vectorstore": vectorstore,
461
  "bm25_retriever": bm25_retriever,
462
+ "memory": kb.model_dump_json()
463
  }
464
+
465
  full_response = ""
 
 
466
  for chunk in full_pipeline.stream(inputs):
467
+ if isinstance(chunk, str):
 
 
 
468
  full_response += chunk
469
  yield full_response
470
+
471
+ # Update KB after response
472
  if full_response:
473
+ update_knowledge_base(session_id, message, full_response)
474
 
475
  with gr.Blocks(css="""
476
  html, body, .gradio-container {
 
484
  margin: 0 auto;
485
  padding: 1rem;
486
  }
 
487
  .chatbox-container {
488
  display: flex;
489
  flex-direction: column;
490
  height: 95%;
491
  overflow-y: auto;
492
  }
 
493
  .chatbot {
494
  flex: 1;
495
  overflow-y: auto;
496
  min-height: 500px;
497
  }
 
498
  .textbox {
499
  margin-top: 1rem;
500
  }
 
513
  title="💬 Ask Krishna's AI Assistant",
514
  description="💡 Ask anything about Krishna Vamsi Dhulipalla",
515
  examples=[
516
+ "Give me an overview of Krishna Vamsi Dhulipalla's work experience across different roles?",
517
  "What programming languages and tools does Krishna use for data science?",
518
  "Can this chatbot tell me what Krishna's chatbot architecture looks like and how it works?"
519
+ ]
520
  )
521
+ # Launch with request support
522
+ demo.queue()
523
+ demo.launch(
524
+ max_threads=4,
525
+ prevent_thread_lock=True,
526
+ debug=True
527
+ )