Spaces:
Sleeping
Sleeping
File size: 1,560 Bytes
daa81fb 3de8e98 daa81fb 3de8e98 daa81fb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 |
import os
import streamlit as st
from langsmith import traceable
from langchain_community.llms import Replicate
from langchain.memory import ConversationBufferWindowMemory
from langchain.chains import ConversationalRetrievalChain
from langchain_community.document_transformers import LongContextReorder
from app.prompts import sahabat_prompt
def reorder_embedding(docs):
"""Reorder documents for long context retrieval."""
reordering = LongContextReorder()
return reordering.transform_documents(docs)
@traceable(name="Create RAG Conversational Chain")
def create_conversational_chain(vector_store):
"""Create a Conversational Retrieval Chain for RAG with tuned parameters for lower latency."""
llm = Replicate(
model="fauzi3007/sahabat-ai-replicate:c3fc398f441379bd3fb6a4498950f9302aa75b7a95e76978a689ceb5c4b4bf09",
model_kwargs={"temperature": 0.1, "top_p": 0.9, "max_new_tokens": 3000}
)
memory = ConversationBufferWindowMemory(
k=6,
memory_key="chat_history",
return_messages=True,
output_key="answer"
)
chain = ConversationalRetrievalChain.from_llm(
llm,
retriever=vector_store.as_retriever(search_type="mmr", search_kwargs={"k": 6, "fetch_k": 20}),
combine_docs_chain_kwargs={"prompt": sahabat_prompt},
return_source_documents=True,
memory=memory,
)
return chain
def get_rag_chain(vector_store):
"""Return a Conversational Retrieval Chain for external use."""
return create_conversational_chain(vector_store)
|