import os import streamlit as st from langsmith import traceable from langchain_community.llms import Replicate from langchain.memory import ConversationBufferWindowMemory from langchain.chains import ConversationalRetrievalChain from langchain_community.document_transformers import LongContextReorder from app.prompts import sahabat_prompt def reorder_embedding(docs): """Reorder documents for long context retrieval.""" reordering = LongContextReorder() return reordering.transform_documents(docs) @traceable(name="Create RAG Conversational Chain") def create_conversational_chain(vector_store): """Create a Conversational Retrieval Chain for RAG with tuned parameters for lower latency.""" llm = Replicate( model="fauzi3007/sahabat-ai-replicate:c3fc398f441379bd3fb6a4498950f9302aa75b7a95e76978a689ceb5c4b4bf09", model_kwargs={"temperature": 0.1, "top_p": 0.9, "max_new_tokens": 3000} ) memory = ConversationBufferWindowMemory( k=6, memory_key="chat_history", return_messages=True, output_key="answer" ) chain = ConversationalRetrievalChain.from_llm( llm, retriever=vector_store.as_retriever(search_type="mmr", search_kwargs={"k": 6, "fetch_k": 20}), combine_docs_chain_kwargs={"prompt": sahabat_prompt}, return_source_documents=True, memory=memory, ) return chain def get_rag_chain(vector_store): """Return a Conversational Retrieval Chain for external use.""" return create_conversational_chain(vector_store)