Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
| import logging | |
| from operator import itemgetter | |
| from langchain.memory import ConversationBufferWindowMemory | |
| from langchain.retrievers import ContextualCompressionRetriever | |
| from langchain_core.output_parsers import StrOutputParser | |
| from langchain_core.runnables import Runnable, RunnableLambda | |
| from langchain_openai import ChatOpenAI | |
| from rag.retrievers import RetrieversConfig | |
| from .helpers import ( | |
| DocumentFormatter, | |
| create_langsmith_client, | |
| get_datetime, | |
| get_reranker, | |
| ) | |
| from .prompt_template import generate_prompt_template | |
| logging.basicConfig(level=logging.ERROR) | |
| def retrievers_setup(retrievers_config, reranker: bool = False) -> tuple: | |
| """Set up retrievers with re-ranking | |
| Args: | |
| retrievers_config (_type_): | |
| reranker (bool, optional): Defaults to False. | |
| Returns: | |
| tuple: Retrievers | |
| """ | |
| # Practitioners | |
| practitioners_retriever = retrievers_config.get_practitioners_retriever(k=10) | |
| # Tall Tree documents | |
| documents_retriever = retrievers_config.get_documents_retriever(k=10) | |
| # Re-ranking (optional): Improves quality and serves as a filter | |
| if reranker: | |
| practitioners_retriever_reranker = ContextualCompressionRetriever( | |
| base_compressor=get_reranker(top_n=10), | |
| base_retriever=practitioners_retriever, | |
| ) | |
| documents_retriever_reranker = ContextualCompressionRetriever( | |
| base_compressor=get_reranker(top_n=8), | |
| base_retriever=documents_retriever, | |
| ) | |
| return practitioners_retriever_reranker, documents_retriever_reranker | |
| else: | |
| return practitioners_retriever, documents_retriever | |
| # Set retrievers as global variables (I see better loading time from Streamlit this way) | |
| practitioners_retriever, documents_retriever = retrievers_setup( | |
| retrievers_config=RetrieversConfig(), reranker=True | |
| ) | |
| # Set up runnable and chat memory | |
| def get_runnable_and_memory( | |
| model: str = "gpt-4o-mini", temperature: float = 0.1 | |
| ) -> tuple[Runnable, ConversationBufferWindowMemory]: | |
| """Set up runnable and chat memory | |
| Args: | |
| model_name (str, optional): LLM model. Defaults to "gpt-4o-mini". | |
| temperature (float, optional): Model temperature. Defaults to 0.1. | |
| Returns: | |
| Runnable, Memory: Runnable and Memory | |
| """ | |
| # Set up Langsmith to trace the runnable | |
| create_langsmith_client() | |
| # LLM and prompt template | |
| llm = ChatOpenAI( | |
| model=model, | |
| temperature=temperature, | |
| ) | |
| prompt = generate_prompt_template() | |
| # Set conversation history window memory. It only uses the last k interactions | |
| memory = ConversationBufferWindowMemory( | |
| memory_key="history", | |
| return_messages=True, | |
| k=6, | |
| ) | |
| # Set up runnable using LCEL | |
| setup = { | |
| "practitioners_db": itemgetter("user_query") | |
| | practitioners_retriever | |
| | DocumentFormatter("Practitioner #"), | |
| "tall_tree_db": itemgetter("user_query") | |
| | documents_retriever | |
| | DocumentFormatter("No."), | |
| "timestamp": lambda _: get_datetime(), | |
| "history": RunnableLambda(memory.load_memory_variables) | itemgetter("history"), | |
| "user_query": itemgetter("user_query"), | |
| } | |
| runnable = setup | prompt | llm | StrOutputParser() | |
| return runnable, memory | |