krishnadhulipalla commited on
Commit
22bbe4e
·
1 Parent(s): c43e786

reduce latency to 3s

Browse files
Files changed (1) hide show
  1. app.py +15 -16
app.py CHANGED
@@ -19,7 +19,7 @@ from langchain_core.output_parsers import StrOutputParser
19
  from langchain_core.prompts import ChatPromptTemplate
20
  from langchain.schema.runnable.passthrough import RunnableAssign
21
  from langchain_huggingface import HuggingFaceEmbeddings
22
- from langchain.vectorstores import FAISS
23
  from langchain_community.retrievers import BM25Retriever
24
  from langchain_openai import ChatOpenAI
25
  from langchain.output_parsers import PydanticOutputParser
@@ -105,7 +105,7 @@ knowledge_base = KnowledgeBase()
105
  # repharser_llm = ChatNVIDIA(model="mistralai/mistral-7b-instruct-v0.3") | StrOutputParser()
106
  repharser_llm = ChatNVIDIA(model="microsoft/phi-3-mini-4k-instruct") | StrOutputParser()
107
  instruct_llm = ChatNVIDIA(model="mistralai/mixtral-8x22b-instruct-v0.1") | StrOutputParser()
108
- relevance_llm = ChatNVIDIA(model="meta/llama3-70b-instruct") | StrOutputParser()
109
  answer_llm = ChatOpenAI(
110
  model="gpt-4o",
111
  temperature=0.3,
@@ -116,16 +116,17 @@ answer_llm = ChatOpenAI(
116
 
117
  # Prompts
118
  repharser_prompt = ChatPromptTemplate.from_template(
119
- "You are a smart retrieval assistant. Rewrite the user's question into 2 different variants optimized for hybrid retrieval systems (BM25 + dense vectors).\n\n"
120
- "Your rewrites should:\n"
121
- "- Vary tone and phrasing\n"
122
- "- Expand or clarify intent if implicit\n"
123
- "- Include helpful keywords, synonyms, or topic-specific terms if possible\n"
124
- "- Be semantically close but diverse enough to match different chunks in the knowledge base\n\n"
 
 
 
125
  "Original Question:\n{query}\n\n"
126
- "Rewrites:\n"
127
- "1.\n"
128
- "2."
129
  )
130
 
131
  relevance_prompt = ChatPromptTemplate.from_template("""
@@ -240,7 +241,7 @@ parser_prompt = ChatPromptTemplate.from_template(
240
  # Helper Functions
241
  def parse_rewrites(raw_response: str) -> list[str]:
242
  lines = raw_response.strip().split("\n")
243
- return [line.strip("0123456789. ").strip() for line in lines if line.strip()][:2]
244
 
245
  def hybrid_retrieve(inputs, exclude_terms=None):
246
  bm25_retriever = inputs["bm25_retriever"]
@@ -393,7 +394,7 @@ select_and_prompt = RunnableLambda(lambda x:
393
  answer_chain = (
394
  prepare_answer_inputs
395
  | select_and_prompt
396
- | answer_llm
397
  )
398
 
399
  def RExtract(pydantic_class: Type[BaseModel], llm, prompt):
@@ -463,19 +464,17 @@ def chat_interface(message, history):
463
  "query": message,
464
  "all_queries": [message],
465
  "all_texts": all_chunks,
466
- "k_per_query": 10,
467
  "alpha": 0.5,
468
  "vectorstore": vectorstore,
469
  "bm25_retriever": bm25_retriever,
470
  }
471
  full_response = ""
472
- collected = None
473
 
474
  # Stream the response to user
475
  for chunk in full_pipeline.stream(inputs):
476
  if isinstance(chunk, dict) and "answer" in chunk:
477
  full_response += chunk["answer"]
478
- collected = chunk
479
  yield full_response
480
  elif isinstance(chunk, str):
481
  full_response += chunk
 
19
  from langchain_core.prompts import ChatPromptTemplate
20
  from langchain.schema.runnable.passthrough import RunnableAssign
21
  from langchain_huggingface import HuggingFaceEmbeddings
22
+ from langchain_community.vectorstores import FAISS
23
  from langchain_community.retrievers import BM25Retriever
24
  from langchain_openai import ChatOpenAI
25
  from langchain.output_parsers import PydanticOutputParser
 
105
  # repharser_llm = ChatNVIDIA(model="mistralai/mistral-7b-instruct-v0.3") | StrOutputParser()
106
  repharser_llm = ChatNVIDIA(model="microsoft/phi-3-mini-4k-instruct") | StrOutputParser()
107
  instruct_llm = ChatNVIDIA(model="mistralai/mixtral-8x22b-instruct-v0.1") | StrOutputParser()
108
+ relevance_llm = ChatNVIDIA(model="nvidia/llama-3.1-nemotron-70b-instruct") | StrOutputParser()
109
  answer_llm = ChatOpenAI(
110
  model="gpt-4o",
111
  temperature=0.3,
 
116
 
117
  # Prompts
118
  repharser_prompt = ChatPromptTemplate.from_template(
119
+ "You are a smart retrieval assistant helping a search engine understand user intent more precisely.\n\n"
120
+ "Given a user question, generate **1 diverse rewrite** that is semantically equivalent but phrased differently. \n"
121
+ "The rewrite should be optimized for **retrieval from a hybrid system** using BM25 (keyword match) and dense vector embeddings.\n\n"
122
+ "Guidelines:\n"
123
+ "- Expand abbreviations or implied intent when useful\n"
124
+ "- Add relevant technical terms, tools, frameworks, or synonyms (e.g., 'LLM', 'pipeline', 'project')\n"
125
+ "- Rephrase using different sentence structure or tone\n"
126
+ "- Use field-specific vocabulary (e.g., data science, ML, software, research) if it fits the query\n"
127
+ "- Prioritize clarity and retrievability over stylistic variation\n\n"
128
  "Original Question:\n{query}\n\n"
129
+ "Rewrite:\n1."
 
 
130
  )
131
 
132
  relevance_prompt = ChatPromptTemplate.from_template("""
 
241
  # Helper Functions
242
  def parse_rewrites(raw_response: str) -> list[str]:
243
  lines = raw_response.strip().split("\n")
244
+ return [line.strip("0123456789. ").strip() for line in lines if line.strip()][:1]
245
 
246
  def hybrid_retrieve(inputs, exclude_terms=None):
247
  bm25_retriever = inputs["bm25_retriever"]
 
394
  answer_chain = (
395
  prepare_answer_inputs
396
  | select_and_prompt
397
+ | relevance_llm
398
  )
399
 
400
  def RExtract(pydantic_class: Type[BaseModel], llm, prompt):
 
464
  "query": message,
465
  "all_queries": [message],
466
  "all_texts": all_chunks,
467
+ "k_per_query": 8,
468
  "alpha": 0.5,
469
  "vectorstore": vectorstore,
470
  "bm25_retriever": bm25_retriever,
471
  }
472
  full_response = ""
 
473
 
474
  # Stream the response to user
475
  for chunk in full_pipeline.stream(inputs):
476
  if isinstance(chunk, dict) and "answer" in chunk:
477
  full_response += chunk["answer"]
 
478
  yield full_response
479
  elif isinstance(chunk, str):
480
  full_response += chunk