Spaces:
Running
Running
import os | |
import streamlit as st | |
from langsmith import traceable | |
from langchain_community.llms import Replicate | |
from langchain.memory import ConversationBufferWindowMemory | |
from langchain.chains import ConversationalRetrievalChain | |
from langchain_community.document_transformers import LongContextReorder | |
from app.prompts import sahabat_prompt | |
def reorder_embedding(docs): | |
"""Reorder documents for long context retrieval.""" | |
reordering = LongContextReorder() | |
return reordering.transform_documents(docs) | |
def create_conversational_chain(vector_store): | |
"""Create a Conversational Retrieval Chain for RAG with tuned parameters for lower latency.""" | |
llm = Replicate( | |
model="fauzi3007/sahabat-ai-replicate:c3fc398f441379bd3fb6a4498950f9302aa75b7a95e76978a689ceb5c4b4bf09", | |
model_kwargs={"temperature": 0.1, "top_p": 0.9, "max_new_tokens": 3000} | |
) | |
memory = ConversationBufferWindowMemory( | |
k=6, | |
memory_key="chat_history", | |
return_messages=True, | |
output_key="answer" | |
) | |
chain = ConversationalRetrievalChain.from_llm( | |
llm, | |
retriever=vector_store.as_retriever(search_type="mmr", search_kwargs={"k": 6, "fetch_k": 20}), | |
combine_docs_chain_kwargs={"prompt": sahabat_prompt}, | |
return_source_documents=True, | |
memory=memory, | |
) | |
return chain | |
def get_rag_chain(vector_store): | |
"""Return a Conversational Retrieval Chain for external use.""" | |
return create_conversational_chain(vector_store) | |