krishnadhulipalla commited on
Commit
fd1d0b8
·
1 Parent(s): a141819

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +388 -384
app.py CHANGED
@@ -1,385 +1,389 @@
1
- import os
2
- import json
3
- import re
4
- import hashlib
5
- from functools import partial
6
- from collections import defaultdict
7
- from pathlib import Path
8
- from typing import List, Dict, Any
9
- import numpy as np
10
- from dotenv import load_dotenv
11
- from rich.console import Console
12
- from rich.style import Style
13
- from langchain_core.runnables import RunnableLambda
14
- from langchain_nvidia_ai_endpoints import ChatNVIDIA
15
- from langchain_core.output_parsers import StrOutputParser
16
- from langchain_core.prompts import ChatPromptTemplate
17
- from langchain.schema.runnable.passthrough import RunnableAssign
18
- from langchain.text_splitter import RecursiveCharacterTextSplitter
19
- from langchain_huggingface import HuggingFaceEmbeddings
20
- from langchain.vectorstores import FAISS
21
- from langchain.docstore.document import Document
22
- from langchain.retrievers import BM25Retriever
23
- from langchain_openai import ChatOpenAI
24
- from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
25
-
26
- dotenv_path = os.path.join(os.getcwd(), ".env")
27
- load_dotenv(dotenv_path)
28
- api_key = os.getenv("NVIDIA_API_KEY")
29
- os.environ["NVIDIA_API_KEY"] = api_key
30
-
31
- # Constants
32
- FAISS_PATH = "faiss_store/v30_600_150"
33
- CHUNKS_PATH = "all_chunks.json"
34
- KRISHNA_BIO = """Krishna Vamsi Dhulipalla is a graduate student in Computer Science at Virginia Tech (M.Eng, expected 2024), with over 3 years of experience across data engineering, machine learning research, and real-time analytics. He specializes in building scalable data systems and intelligent LLM-powered applications, with strong expertise in Python, PyTorch, Hugging Face Transformers, and end-to-end ML pipelines.
35
-
36
- He has led projects involving retrieval-augmented generation (RAG), feature selection for genomic classification, fine-tuning domain-specific LLMs (e.g., DNABERT, HyenaDNA), and real-time forecasting systems using Kafka, Spark, and Airflow. His cloud proficiency spans AWS (S3, SageMaker, ECS, CloudWatch), GCP (BigQuery, Cloud Composer), and DevOps tools like Docker, Kubernetes, and MLflow.
37
-
38
- Krishna’s academic focus areas include genomic sequence modeling, transformer optimization, MLOps automation, and cross-domain generalization. He has published research in bioinformatics and ML applications for circadian transcription prediction and transcription factor binding.
39
-
40
- He is certified in NVIDIA’s RAG Agents with LLMs, Google Cloud Data Engineering, AWS ML Specialization, and has a proven ability to blend research and engineering in real-world systems. Krishna is passionate about scalable LLM infra, data-centric AI, and domain-adaptive ML solutions."""
41
-
42
- def initialize_console():
43
- console = Console()
44
- base_style = Style(color="#76B900", bold=True)
45
- return partial(console.print, style=base_style)
46
-
47
- pprint = initialize_console()
48
-
49
- def load_chunks_from_json(path: str = CHUNKS_PATH) -> List[Dict]:
50
- with open(path, "r", encoding="utf-8") as f:
51
- return json.load(f)
52
-
53
- def load_faiss(path: str = FAISS_PATH,
54
- model_name: str = "sentence-transformers/all-MiniLM-L6-v2") -> FAISS:
55
- embeddings = HuggingFaceEmbeddings(model_name=model_name)
56
- return FAISS.load_local(path, embeddings, allow_dangerous_deserialization=True)
57
-
58
- def initialize_resources():
59
- vectorstore = load_faiss()
60
- all_chunks = load_chunks_from_json()
61
- all_texts = [chunk["text"] for chunk in all_chunks]
62
- metadatas = [chunk["metadata"] for chunk in all_chunks]
63
- return vectorstore, all_chunks, all_texts, metadatas
64
-
65
- vectorstore, all_chunks, all_texts, metadatas = initialize_resources()
66
-
67
- # LLMs
68
- repharser_llm = ChatNVIDIA(model="mistralai/mistral-7b-instruct-v0.3") | StrOutputParser()
69
- relevance_llm = ChatNVIDIA(model="meta/llama3-70b-instruct") | StrOutputParser()
70
- answer_llm = ChatOpenAI(
71
- model="gpt-4-1106-preview",
72
- temperature=0.3,
73
- openai_api_key=os.getenv("OPENAI_API_KEY"),
74
- streaming=True,
75
- callbacks=[StreamingStdOutCallbackHandler()]
76
- ) | StrOutputParser()
77
-
78
-
79
- # Prompts
80
- repharser_prompt = ChatPromptTemplate.from_template(
81
- "Rewrite the question below in 4 diverse ways to retrieve semantically similar information.Ensure diversity in phrasings across style, voice, and abstraction:\n\nQuestion: {query}\n\nRewrites:"
82
- )
83
-
84
- relevance_prompt = ChatPromptTemplate.from_template("""
85
- You are Krishna's personal AI assistant validator.
86
- Your job is to review a user's question and a list of retrieved document chunks.
87
- Identify which chunks (if any) directly help answer the question. Return **all relevant chunks**.
88
-
89
- ---
90
- ⚠️ Do NOT select chunks just because they include keywords or technical terms.
91
-
92
- Exclude chunks that:
93
- - Mention universities, CGPA, or education history (they show qualifications, not skills)
94
- - List certifications or course names (they show credentials, not skills used)
95
- - Describe goals, future plans, or job aspirations
96
- - Contain tools mentioned in passing without describing actual usage
97
-
98
- Only include chunks if they contain **evidence of specific knowledge, tools used, skills applied, or experience demonstrated.**
99
-
100
- ---
101
-
102
- 🔎 Examples:
103
-
104
- Q1: "What are Krishna's skills?"
105
- - Chunk A: Lists programming languages, ML tools, and projects → ✅
106
- - Chunk B: Talks about a Coursera certificate in ML → ❌
107
- - Chunk C: States a CGPA and master’s degree → ❌
108
- - Chunk D: Describes tools Krishna used in his work → ✅
109
-
110
- Output:
111
- {{
112
- "valid_chunks": [A, D],
113
- "is_out_of_scope": false,
114
- "justification": "Chunks A and D describe tools and skills Krishna has actually used."
115
- }}
116
-
117
- Q2: "What is Krishna's favorite color?"
118
- - All chunks are about technical work or academic history
119
-
120
- Output:
121
- {{
122
- "valid_chunks": [],
123
- "is_out_of_scope": true,
124
- "justification": "None of the chunks are related to the user's question about preferences or colors."
125
- }}
126
-
127
- ---
128
-
129
- Now your turn.
130
-
131
- User Question:
132
- "{query}"
133
-
134
- Chunks:
135
- {contents}
136
-
137
- Return only the JSON object. Think carefully before selecting any chunk.
138
- """)
139
-
140
- answer_prompt_relevant = ChatPromptTemplate.from_template(
141
- "You are Krishna's personal AI assistant. Your job is to answer the user’s question clearly and professionally using the provided context.\n"
142
- "Rather than copying sentences, synthesize relevant insights and explain them like a knowledgeable peer.\n\n"
143
- "Krishna's Background:\n{profile}\n\n"
144
- "Make your response rich and informative by:\n"
145
- "- Combining relevant facts from multiple parts of the context\n"
146
- "- Using natural, human-style language (not just bullet points)\n"
147
- "- Expanding briefly on tools or skills when appropriate\n"
148
- "- Avoiding repetition, filler, or hallucinations\n\n"
149
- "Context:\n{context}\n\n"
150
- "User Question:\n{query}\n\n"
151
- "Answer:"
152
- )
153
-
154
- answer_prompt_fallback = ChatPromptTemplate.from_template(
155
- "You are Krishna’s personal AI assistant. The user asked a question unrelated to Krishna’s background.\n"
156
- "Gently let the user know, and then pivot to something Krishna is actually involved in to keep the conversation helpful.\n\n"
157
- "Krishna's Background:\n{profile}\n\n"
158
- "User Question:\n{query}\n\n"
159
- "Your Answer:"
160
- )
161
- # Helper Functions
162
- def parse_rewrites(raw_response: str) -> list[str]:
163
- lines = raw_response.strip().split("\n")
164
- return [line.strip("0123456789. ").strip() for line in lines if line.strip()][:4]
165
-
166
- def hybrid_retrieve(inputs, exclude_terms=None):
167
- # if exclude_terms is None:
168
- # exclude_terms = ["cgpa", "university", "b.tech", "m.s.", "certification", "coursera", "edx", "goal", "aspiration", "linkedin", "publication", "ieee", "doi", "degree"]
169
-
170
- all_queries = inputs["all_queries"]
171
- bm25_retriever = BM25Retriever.from_texts(texts=all_texts, metadatas=metadatas)
172
- bm25_retriever.k = inputs["k_per_query"]
173
- vectorstore = inputs["vectorstore"]
174
- alpha = inputs["alpha"]
175
- top_k = inputs.get("top_k", 15)
176
-
177
- scored_chunks = defaultdict(lambda: {
178
- "vector_scores": [],
179
- "bm25_score": 0.0,
180
- "content": None,
181
- "metadata": None,
182
- })
183
-
184
- for subquery in all_queries:
185
- vec_hits = vectorstore.similarity_search_with_score(subquery, k=inputs["k_per_query"])
186
- for doc, score in vec_hits:
187
- key = hashlib.md5(doc.page_content.encode("utf-8")).hexdigest()
188
- scored_chunks[key]["vector_scores"].append(score)
189
- scored_chunks[key]["content"] = doc.page_content
190
- scored_chunks[key]["metadata"] = doc.metadata
191
-
192
- bm_hits = bm25_retriever.invoke(subquery)
193
- for rank, doc in enumerate(bm_hits):
194
- key = hashlib.md5(doc.page_content.encode("utf-8")).hexdigest()
195
- bm_score = 1.0 - (rank / inputs["k_per_query"])
196
- scored_chunks[key]["bm25_score"] += bm_score
197
- scored_chunks[key]["content"] = doc.page_content
198
- scored_chunks[key]["metadata"] = doc.metadata
199
-
200
- all_vec_means = [np.mean(v["vector_scores"]) for v in scored_chunks.values() if v["vector_scores"]]
201
- max_vec = max(all_vec_means) if all_vec_means else 1
202
- min_vec = min(all_vec_means) if all_vec_means else 0
203
-
204
- final_results = []
205
- for chunk in scored_chunks.values():
206
- vec_score = np.mean(chunk["vector_scores"]) if chunk["vector_scores"] else 0.0
207
- norm_vec = (vec_score - min_vec) / (max_vec - min_vec) if max_vec != min_vec else 1.0
208
- bm25_score = chunk["bm25_score"] / len(all_queries)
209
- final_score = alpha * norm_vec + (1 - alpha) * bm25_score
210
-
211
- content = chunk["content"].lower()
212
- # if any(term in content for term in exclude_terms):
213
- # continue
214
- if final_score < 0.05 or len(content.strip()) < 100:
215
- continue
216
-
217
- final_results.append({
218
- "content": chunk["content"],
219
- "source": chunk["metadata"].get("source", ""),
220
- "final_score": float(round(final_score, 4)),
221
- "vector_score": float(round(vec_score, 4)),
222
- "bm25_score": float(round(bm25_score, 4)),
223
- "metadata": chunk["metadata"],
224
- "summary": chunk["metadata"].get("summary", ""),
225
- "synthetic_queries": chunk["metadata"].get("synthetic_queries", [])
226
- })
227
-
228
- final_results = sorted(final_results, key=lambda x: x["final_score"], reverse=True)
229
-
230
- seen = set()
231
- unique_chunks = []
232
- for chunk in final_results:
233
- clean_text = re.sub(r'\W+', '', chunk["content"].lower())[:300]
234
- fingerprint = (chunk["source"], clean_text)
235
- if fingerprint not in seen:
236
- seen.add(fingerprint)
237
- unique_chunks.append(chunk)
238
-
239
- unique_chunks = unique_chunks[:top_k]
240
-
241
- return {
242
- "query": inputs["query"],
243
- "chunks": unique_chunks
244
- }
245
-
246
- def safe_json_parse(s: str) -> Dict:
247
- return json.loads(s) if isinstance(s, str) and "valid_chunks" in s else {
248
- "valid_chunks": [],
249
- "is_out_of_scope": True,
250
- "justification": "Fallback due to invalid LLM output"
251
- }
252
-
253
- # Rewrite generation
254
- rephraser_chain = (
255
- repharser_prompt
256
- | repharser_llm
257
- | RunnableLambda(parse_rewrites)
258
- )
259
-
260
- generate_rewrites_chain = (
261
- RunnableAssign({
262
- "rewrites": lambda x: rephraser_chain.invoke({"query": x["query"]})
263
- })
264
- | RunnableAssign({
265
- "all_queries": lambda x: [x["query"]] + x["rewrites"]
266
- })
267
- )
268
-
269
- # Retrieval
270
- retrieve_chain = RunnableLambda(hybrid_retrieve)
271
- hybrid_chain = generate_rewrites_chain | retrieve_chain
272
-
273
- # Validation
274
- extract_validation_inputs = RunnableLambda(lambda x: {
275
- "query": x["query"],
276
- "contents": [c["content"] for c in x["chunks"]]
277
- })
278
-
279
- validation_chain = (
280
- extract_validation_inputs
281
- | relevance_prompt
282
- | relevance_llm
283
- | RunnableLambda(safe_json_parse)
284
- )
285
-
286
- # Answer Generation
287
- def prepare_answer_inputs(x: Dict) -> Dict:
288
- context = KRISHNA_BIO if x["validation"]["is_out_of_scope"] else "\n\n".join(
289
- [x["chunks"][i-1]["content"] for i in x["validation"]["valid_chunks"]])
290
-
291
- return {
292
- "query": x["query"],
293
- "profile": KRISHNA_BIO,
294
- "context": context,
295
- "use_fallback": x["validation"]["is_out_of_scope"]
296
- }
297
-
298
- select_and_prompt = RunnableLambda(lambda x:
299
- answer_prompt_fallback.invoke(x) if x["use_fallback"]
300
- else answer_prompt_relevant.invoke(x))
301
-
302
- answer_chain = (
303
- prepare_answer_inputs
304
- | select_and_prompt
305
- | relevance_llm
306
- )
307
-
308
- # Full Pipeline
309
- full_pipeline = (
310
- hybrid_chain
311
- | RunnableAssign({"validation": validation_chain})
312
- | RunnableAssign({"answer": answer_chain})
313
- )
314
-
315
- import gradio as gr
316
-
317
- def chat_interface(message, history):
318
- inputs = {
319
- "query": message,
320
- "all_queries": [message],
321
- "all_texts": all_chunks,
322
- "k_per_query": 3,
323
- "alpha": 0.7,
324
- "vectorstore": vectorstore,
325
- "full_document": "",
326
- }
327
- response = ""
328
- for chunk in full_pipeline.stream(inputs):
329
- if isinstance(chunk, str):
330
- response += chunk
331
- yield response
332
- elif isinstance(chunk, dict) and "answer" in chunk:
333
- response += chunk["answer"]
334
- yield response
335
-
336
- with gr.Blocks(css="""
337
- html, body, .gradio-container {
338
- height: 100%;
339
- margin: 0;
340
- padding: 0;
341
- }
342
- .gradio-container {
343
- width: 90%;
344
- max-width: 1000px;
345
- margin: 0 auto;
346
- padding: 1rem;
347
- }
348
-
349
- .chatbox-container {
350
- display: flex;
351
- flex-direction: column;
352
- height: 95%;
353
- }
354
-
355
- .chatbot {
356
- flex: 1;
357
- overflow-y: auto;
358
- min-height: 500px;
359
- }
360
-
361
- .textbox {
362
- margin-top: 1rem;
363
- }
364
- #component-523 {
365
- height: 98%;
366
- }
367
- """) as demo:
368
- with gr.Column(elem_classes="chatbox-container"):
369
- gr.Markdown("## 💬 Ask Krishna's AI Assistant")
370
- gr.Markdown("💡 Ask anything about Krishna Vamsi Dhulipalla")
371
- chatbot = gr.Chatbot(elem_classes="chatbot")
372
- textbox = gr.Textbox(placeholder="Ask a question about Krishna...", elem_classes="textbox")
373
-
374
- gr.ChatInterface(
375
- fn=chat_interface,
376
- chatbot=chatbot,
377
- textbox=textbox,
378
- examples=[
379
- "What are Krishna's research interests?",
380
- "Where did Krishna work?",
381
- "What did he study at Virginia Tech?"
382
- ],
383
- )
384
-
 
 
 
 
385
  demo.launch()
 
1
+ import os
2
+ import json
3
+ import re
4
+ import hashlib
5
+ from functools import partial
6
+ from collections import defaultdict
7
+ from pathlib import Path
8
+ from typing import List, Dict, Any
9
+ import numpy as np
10
+ from dotenv import load_dotenv
11
+ from rich.console import Console
12
+ from rich.style import Style
13
+ from langchain_core.runnables import RunnableLambda
14
+ from langchain_nvidia_ai_endpoints import ChatNVIDIA
15
+ from langchain_core.output_parsers import StrOutputParser
16
+ from langchain_core.prompts import ChatPromptTemplate
17
+ from langchain.schema.runnable.passthrough import RunnableAssign
18
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
19
+ from langchain_huggingface import HuggingFaceEmbeddings
20
+ from langchain.vectorstores import FAISS
21
+ from langchain.docstore.document import Document
22
+ from langchain.retrievers import BM25Retriever
23
+ from langchain_openai import ChatOpenAI
24
+ from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
25
+
26
+ #dotenv_path = os.path.join(os.getcwd(), ".env")
27
+ #load_dotenv(dotenv_path)
28
+ #api_key = os.getenv("NVIDIA_API_KEY")
29
+ #os.environ["NVIDIA_API_KEY"] = api_key
30
+
31
+ api_key = os.environ.get("NVIDIA_API_KEY")
32
+ if not api_key:
33
+ raise RuntimeError("🚨 NVIDIA_API_KEY not found in environment! Please add it in Hugging Face Secrets.")
34
+
35
+ # Constants
36
+ FAISS_PATH = "faiss_store/v30_600_150"
37
+ CHUNKS_PATH = "all_chunks.json"
38
+ KRISHNA_BIO = """Krishna Vamsi Dhulipalla is a graduate student in Computer Science at Virginia Tech (M.Eng, expected 2024), with over 3 years of experience across data engineering, machine learning research, and real-time analytics. He specializes in building scalable data systems and intelligent LLM-powered applications, with strong expertise in Python, PyTorch, Hugging Face Transformers, and end-to-end ML pipelines.
39
+
40
+ He has led projects involving retrieval-augmented generation (RAG), feature selection for genomic classification, fine-tuning domain-specific LLMs (e.g., DNABERT, HyenaDNA), and real-time forecasting systems using Kafka, Spark, and Airflow. His cloud proficiency spans AWS (S3, SageMaker, ECS, CloudWatch), GCP (BigQuery, Cloud Composer), and DevOps tools like Docker, Kubernetes, and MLflow.
41
+
42
+ Krishna’s academic focus areas include genomic sequence modeling, transformer optimization, MLOps automation, and cross-domain generalization. He has published research in bioinformatics and ML applications for circadian transcription prediction and transcription factor binding.
43
+
44
+ He is certified in NVIDIA’s RAG Agents with LLMs, Google Cloud Data Engineering, AWS ML Specialization, and has a proven ability to blend research and engineering in real-world systems. Krishna is passionate about scalable LLM infra, data-centric AI, and domain-adaptive ML solutions."""
45
+
46
+ def initialize_console():
47
+ console = Console()
48
+ base_style = Style(color="#76B900", bold=True)
49
+ return partial(console.print, style=base_style)
50
+
51
+ pprint = initialize_console()
52
+
53
+ def load_chunks_from_json(path: str = CHUNKS_PATH) -> List[Dict]:
54
+ with open(path, "r", encoding="utf-8") as f:
55
+ return json.load(f)
56
+
57
+ def load_faiss(path: str = FAISS_PATH,
58
+ model_name: str = "sentence-transformers/all-MiniLM-L6-v2") -> FAISS:
59
+ embeddings = HuggingFaceEmbeddings(model_name=model_name)
60
+ return FAISS.load_local(path, embeddings, allow_dangerous_deserialization=True)
61
+
62
+ def initialize_resources():
63
+ vectorstore = load_faiss()
64
+ all_chunks = load_chunks_from_json()
65
+ all_texts = [chunk["text"] for chunk in all_chunks]
66
+ metadatas = [chunk["metadata"] for chunk in all_chunks]
67
+ return vectorstore, all_chunks, all_texts, metadatas
68
+
69
+ vectorstore, all_chunks, all_texts, metadatas = initialize_resources()
70
+
71
+ # LLMs
72
+ repharser_llm = ChatNVIDIA(model="mistralai/mistral-7b-instruct-v0.3") | StrOutputParser()
73
+ relevance_llm = ChatNVIDIA(model="meta/llama3-70b-instruct") | StrOutputParser()
74
+ answer_llm = ChatOpenAI(
75
+ model="gpt-4-1106-preview",
76
+ temperature=0.3,
77
+ openai_api_key=os.environ.get("OPENAI_API_KEY"),
78
+ streaming=True,
79
+ callbacks=[StreamingStdOutCallbackHandler()]
80
+ ) | StrOutputParser()
81
+
82
+
83
+ # Prompts
84
+ repharser_prompt = ChatPromptTemplate.from_template(
85
+ "Rewrite the question below in 4 diverse ways to retrieve semantically similar information.Ensure diversity in phrasings across style, voice, and abstraction:\n\nQuestion: {query}\n\nRewrites:"
86
+ )
87
+
88
+ relevance_prompt = ChatPromptTemplate.from_template("""
89
+ You are Krishna's personal AI assistant validator.
90
+ Your job is to review a user's question and a list of retrieved document chunks.
91
+ Identify which chunks (if any) directly help answer the question. Return **all relevant chunks**.
92
+
93
+ ---
94
+ ⚠️ Do NOT select chunks just because they include keywords or technical terms.
95
+
96
+ Exclude chunks that:
97
+ - Mention universities, CGPA, or education history (they show qualifications, not skills)
98
+ - List certifications or course names (they show credentials, not skills used)
99
+ - Describe goals, future plans, or job aspirations
100
+ - Contain tools mentioned in passing without describing actual usage
101
+
102
+ Only include chunks if they contain **evidence of specific knowledge, tools used, skills applied, or experience demonstrated.**
103
+
104
+ ---
105
+
106
+ 🔎 Examples:
107
+
108
+ Q1: "What are Krishna's skills?"
109
+ - Chunk A: Lists programming languages, ML tools, and projects → ✅
110
+ - Chunk B: Talks about a Coursera certificate in ML → ❌
111
+ - Chunk C: States a CGPA and master’s degree → ❌
112
+ - Chunk D: Describes tools Krishna used in his work → ✅
113
+
114
+ Output:
115
+ {{
116
+ "valid_chunks": [A, D],
117
+ "is_out_of_scope": false,
118
+ "justification": "Chunks A and D describe tools and skills Krishna has actually used."
119
+ }}
120
+
121
+ Q2: "What is Krishna's favorite color?"
122
+ - All chunks are about technical work or academic history → ❌
123
+
124
+ Output:
125
+ {{
126
+ "valid_chunks": [],
127
+ "is_out_of_scope": true,
128
+ "justification": "None of the chunks are related to the user's question about preferences or colors."
129
+ }}
130
+
131
+ ---
132
+
133
+ Now your turn.
134
+
135
+ User Question:
136
+ "{query}"
137
+
138
+ Chunks:
139
+ {contents}
140
+
141
+ Return only the JSON object. Think carefully before selecting any chunk.
142
+ """)
143
+
144
+ answer_prompt_relevant = ChatPromptTemplate.from_template(
145
+ "You are Krishna's personal AI assistant. Your job is to answer the user’s question clearly and professionally using the provided context.\n"
146
+ "Rather than copying sentences, synthesize relevant insights and explain them like a knowledgeable peer.\n\n"
147
+ "Krishna's Background:\n{profile}\n\n"
148
+ "Make your response rich and informative by:\n"
149
+ "- Combining relevant facts from multiple parts of the context\n"
150
+ "- Using natural, human-style language (not just bullet points)\n"
151
+ "- Expanding briefly on tools or skills when appropriate\n"
152
+ "- Avoiding repetition, filler, or hallucinations\n\n"
153
+ "Context:\n{context}\n\n"
154
+ "User Question:\n{query}\n\n"
155
+ "Answer:"
156
+ )
157
+
158
+ answer_prompt_fallback = ChatPromptTemplate.from_template(
159
+ "You are Krishna’s personal AI assistant. The user asked a question unrelated to Krishna’s background.\n"
160
+ "Gently let the user know, and then pivot to something Krishna is actually involved in to keep the conversation helpful.\n\n"
161
+ "Krishna's Background:\n{profile}\n\n"
162
+ "User Question:\n{query}\n\n"
163
+ "Your Answer:"
164
+ )
165
+ # Helper Functions
166
+ def parse_rewrites(raw_response: str) -> list[str]:
167
+ lines = raw_response.strip().split("\n")
168
+ return [line.strip("0123456789. ").strip() for line in lines if line.strip()][:4]
169
+
170
+ def hybrid_retrieve(inputs, exclude_terms=None):
171
+ # if exclude_terms is None:
172
+ # exclude_terms = ["cgpa", "university", "b.tech", "m.s.", "certification", "coursera", "edx", "goal", "aspiration", "linkedin", "publication", "ieee", "doi", "degree"]
173
+
174
+ all_queries = inputs["all_queries"]
175
+ bm25_retriever = BM25Retriever.from_texts(texts=all_texts, metadatas=metadatas)
176
+ bm25_retriever.k = inputs["k_per_query"]
177
+ vectorstore = inputs["vectorstore"]
178
+ alpha = inputs["alpha"]
179
+ top_k = inputs.get("top_k", 15)
180
+
181
+ scored_chunks = defaultdict(lambda: {
182
+ "vector_scores": [],
183
+ "bm25_score": 0.0,
184
+ "content": None,
185
+ "metadata": None,
186
+ })
187
+
188
+ for subquery in all_queries:
189
+ vec_hits = vectorstore.similarity_search_with_score(subquery, k=inputs["k_per_query"])
190
+ for doc, score in vec_hits:
191
+ key = hashlib.md5(doc.page_content.encode("utf-8")).hexdigest()
192
+ scored_chunks[key]["vector_scores"].append(score)
193
+ scored_chunks[key]["content"] = doc.page_content
194
+ scored_chunks[key]["metadata"] = doc.metadata
195
+
196
+ bm_hits = bm25_retriever.invoke(subquery)
197
+ for rank, doc in enumerate(bm_hits):
198
+ key = hashlib.md5(doc.page_content.encode("utf-8")).hexdigest()
199
+ bm_score = 1.0 - (rank / inputs["k_per_query"])
200
+ scored_chunks[key]["bm25_score"] += bm_score
201
+ scored_chunks[key]["content"] = doc.page_content
202
+ scored_chunks[key]["metadata"] = doc.metadata
203
+
204
+ all_vec_means = [np.mean(v["vector_scores"]) for v in scored_chunks.values() if v["vector_scores"]]
205
+ max_vec = max(all_vec_means) if all_vec_means else 1
206
+ min_vec = min(all_vec_means) if all_vec_means else 0
207
+
208
+ final_results = []
209
+ for chunk in scored_chunks.values():
210
+ vec_score = np.mean(chunk["vector_scores"]) if chunk["vector_scores"] else 0.0
211
+ norm_vec = (vec_score - min_vec) / (max_vec - min_vec) if max_vec != min_vec else 1.0
212
+ bm25_score = chunk["bm25_score"] / len(all_queries)
213
+ final_score = alpha * norm_vec + (1 - alpha) * bm25_score
214
+
215
+ content = chunk["content"].lower()
216
+ # if any(term in content for term in exclude_terms):
217
+ # continue
218
+ if final_score < 0.05 or len(content.strip()) < 100:
219
+ continue
220
+
221
+ final_results.append({
222
+ "content": chunk["content"],
223
+ "source": chunk["metadata"].get("source", ""),
224
+ "final_score": float(round(final_score, 4)),
225
+ "vector_score": float(round(vec_score, 4)),
226
+ "bm25_score": float(round(bm25_score, 4)),
227
+ "metadata": chunk["metadata"],
228
+ "summary": chunk["metadata"].get("summary", ""),
229
+ "synthetic_queries": chunk["metadata"].get("synthetic_queries", [])
230
+ })
231
+
232
+ final_results = sorted(final_results, key=lambda x: x["final_score"], reverse=True)
233
+
234
+ seen = set()
235
+ unique_chunks = []
236
+ for chunk in final_results:
237
+ clean_text = re.sub(r'\W+', '', chunk["content"].lower())[:300]
238
+ fingerprint = (chunk["source"], clean_text)
239
+ if fingerprint not in seen:
240
+ seen.add(fingerprint)
241
+ unique_chunks.append(chunk)
242
+
243
+ unique_chunks = unique_chunks[:top_k]
244
+
245
+ return {
246
+ "query": inputs["query"],
247
+ "chunks": unique_chunks
248
+ }
249
+
250
+ def safe_json_parse(s: str) -> Dict:
251
+ return json.loads(s) if isinstance(s, str) and "valid_chunks" in s else {
252
+ "valid_chunks": [],
253
+ "is_out_of_scope": True,
254
+ "justification": "Fallback due to invalid LLM output"
255
+ }
256
+
257
+ # Rewrite generation
258
+ rephraser_chain = (
259
+ repharser_prompt
260
+ | repharser_llm
261
+ | RunnableLambda(parse_rewrites)
262
+ )
263
+
264
+ generate_rewrites_chain = (
265
+ RunnableAssign({
266
+ "rewrites": lambda x: rephraser_chain.invoke({"query": x["query"]})
267
+ })
268
+ | RunnableAssign({
269
+ "all_queries": lambda x: [x["query"]] + x["rewrites"]
270
+ })
271
+ )
272
+
273
+ # Retrieval
274
+ retrieve_chain = RunnableLambda(hybrid_retrieve)
275
+ hybrid_chain = generate_rewrites_chain | retrieve_chain
276
+
277
+ # Validation
278
+ extract_validation_inputs = RunnableLambda(lambda x: {
279
+ "query": x["query"],
280
+ "contents": [c["content"] for c in x["chunks"]]
281
+ })
282
+
283
+ validation_chain = (
284
+ extract_validation_inputs
285
+ | relevance_prompt
286
+ | relevance_llm
287
+ | RunnableLambda(safe_json_parse)
288
+ )
289
+
290
+ # Answer Generation
291
+ def prepare_answer_inputs(x: Dict) -> Dict:
292
+ context = KRISHNA_BIO if x["validation"]["is_out_of_scope"] else "\n\n".join(
293
+ [x["chunks"][i-1]["content"] for i in x["validation"]["valid_chunks"]])
294
+
295
+ return {
296
+ "query": x["query"],
297
+ "profile": KRISHNA_BIO,
298
+ "context": context,
299
+ "use_fallback": x["validation"]["is_out_of_scope"]
300
+ }
301
+
302
+ select_and_prompt = RunnableLambda(lambda x:
303
+ answer_prompt_fallback.invoke(x) if x["use_fallback"]
304
+ else answer_prompt_relevant.invoke(x))
305
+
306
+ answer_chain = (
307
+ prepare_answer_inputs
308
+ | select_and_prompt
309
+ | relevance_llm
310
+ )
311
+
312
+ # Full Pipeline
313
+ full_pipeline = (
314
+ hybrid_chain
315
+ | RunnableAssign({"validation": validation_chain})
316
+ | RunnableAssign({"answer": answer_chain})
317
+ )
318
+
319
+ import gradio as gr
320
+
321
+ def chat_interface(message, history):
322
+ inputs = {
323
+ "query": message,
324
+ "all_queries": [message],
325
+ "all_texts": all_chunks,
326
+ "k_per_query": 3,
327
+ "alpha": 0.7,
328
+ "vectorstore": vectorstore,
329
+ "full_document": "",
330
+ }
331
+ response = ""
332
+ for chunk in full_pipeline.stream(inputs):
333
+ if isinstance(chunk, str):
334
+ response += chunk
335
+ yield response
336
+ elif isinstance(chunk, dict) and "answer" in chunk:
337
+ response += chunk["answer"]
338
+ yield response
339
+
340
+ with gr.Blocks(css="""
341
+ html, body, .gradio-container {
342
+ height: 100%;
343
+ margin: 0;
344
+ padding: 0;
345
+ }
346
+ .gradio-container {
347
+ width: 90%;
348
+ max-width: 1000px;
349
+ margin: 0 auto;
350
+ padding: 1rem;
351
+ }
352
+
353
+ .chatbox-container {
354
+ display: flex;
355
+ flex-direction: column;
356
+ height: 95%;
357
+ }
358
+
359
+ .chatbot {
360
+ flex: 1;
361
+ overflow-y: auto;
362
+ min-height: 500px;
363
+ }
364
+
365
+ .textbox {
366
+ margin-top: 1rem;
367
+ }
368
+ #component-523 {
369
+ height: 98%;
370
+ }
371
+ """) as demo:
372
+ with gr.Column(elem_classes="chatbox-container"):
373
+ gr.Markdown("## 💬 Ask Krishna's AI Assistant")
374
+ gr.Markdown("💡 Ask anything about Krishna Vamsi Dhulipalla")
375
+ chatbot = gr.Chatbot(elem_classes="chatbot")
376
+ textbox = gr.Textbox(placeholder="Ask a question about Krishna...", elem_classes="textbox")
377
+
378
+ gr.ChatInterface(
379
+ fn=chat_interface,
380
+ chatbot=chatbot,
381
+ textbox=textbox,
382
+ examples=[
383
+ "What are Krishna's research interests?",
384
+ "Where did Krishna work?",
385
+ "What did he study at Virginia Tech?"
386
+ ],
387
+ )
388
+
389
  demo.launch()