import streamlit as st import os # import redis from langchain.docstore.document import Document from sklearn.datasets import fetch_20newsgroups from langchain_huggingface import HuggingFaceEmbeddings from langchain_redis import RedisConfig, RedisVectorStore from langchain_openai import ChatOpenAI from langchain_core.output_parsers import StrOutputParser from langchain_core.prompts import ChatPromptTemplate from langchain_core.runnables import RunnablePassthrough # from dotenv import load_dotenv # load_dotenv() redis_pass = os.getenv("REDIS_PASS") url = f"redis://default:{redis_pass}@redis-14461.c264.ap-south-1-1.ec2.redns.redis-cloud.com:14461" # st.write(url) @st.cache_resource def load(): openai_api_key = os.getenv("OPENAI_API_KEY") # redis_pass = os.getenv("REDIS_PASS") # REDIS_URL = os.getenv("REDIS_URL", url) REDIS_URL = url # redis_client = redis.from_url(REDIS_URL) # redis_client = redis.Redis( # host='redis-14461.c264.ap-south-1-1.ec2.redns.redis-cloud.com', # port=14461, # decode_responses=True, # username="default", # password=redis_pass, # ) categories = ["alt.atheism", "sci.space"] newsgroups = fetch_20newsgroups( subset="train", categories=categories, shuffle=True, random_state=42 ) # Use only the first 250 documents texts = newsgroups.data[:250] metadata = [ {"category": newsgroups.target_names[target]} for target in newsgroups.target[:250] ] embeddings = HuggingFaceEmbeddings(model_name="msmarco-distilbert-base-v4") config = RedisConfig( index_name="newsgroups", redis_url=REDIS_URL, metadata_schema=[ {"name": "category", "type": "tag"}, ], ) vector_store = RedisVectorStore(embeddings, config=config) ids = vector_store.add_texts(texts, metadata) retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 2}) llm = ChatOpenAI(model="gpt-4o", temperature=0, base_url="https://models.inference.ai.azure.com", api_key=openai_api_key) return retriever, llm retriever, llm = load() # Prompt prompt = ChatPromptTemplate.from_messages( [ ( "human", """You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise. Question: {question} Context: {context} Answer:""", ), ] ) def format_docs(docs): return "\n\n".join(doc.page_content for doc in docs) rag_chain = ( {"context": retriever | format_docs, "question": RunnablePassthrough()} | prompt | llm | StrOutputParser() ) if query:=st.chat_input("Ask a question"): response = rag_chain.invoke(query) with st.chat_message("assistant"): st.write(response)