mafzaal commited on
Commit
d1134f7
·
1 Parent(s): 48ec46f

Refactor research agent implementation and remove unused modules

Browse files

- Removed the LangChainRAG model and related components to streamline the architecture.
- Introduced a new agent building mechanism using `build_agent` function.
- Updated the configuration to include a maximum search results parameter.
- Replaced the RAG prompt template with a more concise version.
- Implemented a new RSS feed tool for fetching articles from specified URLs.
- Consolidated search tools into a single function for easier management.
- Enhanced error handling and logging throughout the agent's processing functions.
- Updated dependencies in `pyproject.toml` to include necessary libraries for new features.

py-src/app.py CHANGED
@@ -18,44 +18,12 @@ from qdrant_client import QdrantClient
18
  from qdrant_client.http.models import Distance, VectorParams
19
  from lets_talk.config import LLM_MODEL, LLM_TEMPERATURE
20
  import lets_talk.utils.blog as blog
21
- from lets_talk.models.rag import LangChainRAG
22
 
23
-
24
- # Load vector store using the utility function
25
- vector_store = blog.load_vector_store()
26
-
27
- # Create a retriever
28
- retriever = vector_store.as_retriever()
29
-
30
- # Set up ChatOpenAI with environment variables
31
-
32
- llm = ChatOpenAI(model=LLM_MODEL, temperature=LLM_TEMPERATURE)
33
-
34
- # Create RAG prompt template
35
- rag_prompt_template = """\
36
- You are a helpful assistant that answers questions based on the context provided.
37
- Generate a concise answer to the question in markdown format and include a list of relevant links to the context.
38
- Use links from context to help user to navigate to to find more information.
39
- You have access to the following information:
40
-
41
- Context:
42
- {context}
43
-
44
- Question:
45
- {question}
46
-
47
- If context is unrelated to question, say "I don't know".
48
- """
49
 
50
- rag_prompt = ChatPromptTemplate.from_template(rag_prompt_template)
51
-
52
- # Create chain
53
- retrieval_augmented_qa_chain = (
54
- {"context": itemgetter("question") | retriever, "question": itemgetter("question")}
55
- | RunnablePassthrough.assign(context=itemgetter("context"))
56
- | {"response": rag_prompt | llm, "context": itemgetter("context")}
57
- )
58
 
 
59
 
60
 
61
  @cl.on_chat_start
@@ -74,29 +42,56 @@ async def setup_chain():
74
  # Set a loading message
75
  msg = cl.Message(content="Let's talk about [TheDataGuy](https://thedataguy.pro)'s blog posts, how can I help you?", author="System")
76
  await msg.send()
77
-
78
- #rag_chain = LangChainRAG(llm=llm, retriever=retriever)
79
 
80
  # Store the chain in user session
81
- cl.user_session.set("chain", retrieval_augmented_qa_chain)
82
- #cl.user_session.set("chain", rag_chain)
83
 
84
 
85
 
86
 
87
  @cl.on_message
88
  async def on_message(message: cl.Message):
 
 
 
 
 
 
 
 
 
 
89
  msg = cl.Message(content="")
90
-
91
- # Get chain from user session
92
- chain = cl.user_session.get("chain")
93
 
94
- # Call the chain with the user message
95
- response = await chain.ainvoke({"question": message.content})
96
- #response = await chain.arun_pipeline(message.content)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
 
98
- # Stream tokens from the final_answer
99
- await msg.stream_token(response["response"].content)
100
  await msg.send()
101
 
102
 
 
18
  from qdrant_client.http.models import Distance, VectorParams
19
  from lets_talk.config import LLM_MODEL, LLM_TEMPERATURE
20
  import lets_talk.utils.blog as blog
21
+ from lets_talk.agent import build_agent,parse_output
22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
+
 
 
 
 
 
 
 
25
 
26
+ tdg_agent = build_agent()
27
 
28
 
29
  @cl.on_chat_start
 
42
  # Set a loading message
43
  msg = cl.Message(content="Let's talk about [TheDataGuy](https://thedataguy.pro)'s blog posts, how can I help you?", author="System")
44
  await msg.send()
 
 
45
 
46
  # Store the chain in user session
47
+ cl.user_session.set("agent", tdg_agent)
48
+
49
 
50
 
51
 
52
 
53
  @cl.on_message
54
  async def on_message(message: cl.Message):
55
+ """
56
+ Handler for user messages. Processes the query through the research agent
57
+ and streams the response back to the user.
58
+
59
+ Args:
60
+ message: The user's message
61
+ """
62
+ agent_executor = cl.user_session.get("agent")
63
+
64
+ # Create Chainlit message for streaming
65
  msg = cl.Message(content="")
 
 
 
66
 
67
+ # Create a parent step for the research process
68
+ with cl.Step(name="TheDataGuy thinking", type="tool") as step:
69
+ # Run the agent executor with callbacks to stream the response
70
+ result = await agent_executor.ainvoke(
71
+ {"question": message.content},
72
+ config={
73
+ "callbacks": [cl.AsyncLangchainCallbackHandler()],
74
+ "configurable": {"session_id": message.id} # Add session_id from message
75
+ }
76
+ )
77
+
78
+ # Add steps from agent's intermediate steps
79
+ # for i, step_data in enumerate(result.get("intermediate_steps", [])):
80
+ # step_name = f"Using: {step_data[0].tool}"
81
+ # step_input = str(step_data[0].tool_input)
82
+ # step_output = str(step_data[1])
83
+
84
+ # # Create individual steps as children of the main step
85
+ # with cl.Step(name=step_name, type="tool") as substep:
86
+ # await cl.Message(
87
+ # content=f"**Input:** {step_input}\n\n**Output:** {step_output}",
88
+ # ).send()
89
+
90
+ # Get the final answer
91
+ final_answer = parse_output(result)
92
 
93
+ # Stream tokens from the final_answer
94
+ await msg.stream_token(final_answer)
95
  await msg.send()
96
 
97
 
py-src/lets_talk/{models/agent.py → agent.py} RENAMED
@@ -1,6 +1,4 @@
1
- """
2
- LangGraph Agent implementation for the Research Agent.
3
- """
4
  from typing import TypedDict, Annotated, Dict, Any, Literal, Union, cast, List, Optional
5
  from langchain_openai import ChatOpenAI
6
  from langchain_core.tools import Tool
@@ -9,8 +7,12 @@ from langchain_core.documents import Document
9
  from langgraph.graph.message import add_messages
10
  from langgraph.graph import StateGraph, END
11
  from langgraph.prebuilt import ToolNode
12
- from lets_talk.models.research_tools import RAGQueryInput
13
  from lets_talk.config import LLM_MODEL, LLM_TEMPERATURE
 
 
 
 
14
 
15
  class ResearchAgentState(TypedDict):
16
  """
@@ -23,7 +25,68 @@ class ResearchAgentState(TypedDict):
23
  """
24
  messages: Annotated[list[BaseMessage], add_messages]
25
  context: str
26
- documents: Optional[List[Document]]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
 
29
  def call_model(model, state: Dict[str, Any]) -> Dict[str, list[BaseMessage]]:
@@ -44,8 +107,8 @@ def call_model(model, state: Dict[str, Any]) -> Dict[str, list[BaseMessage]]:
44
  # Add context from documents if available
45
  if context:
46
  # Insert system message with context before the latest user message
47
- context_message = SystemMessage(content=f"Use the following information from uploaded documents to enhance your response if relevant:\n\n{context}")
48
-
49
  # Find the position of the last user message
50
  for i in range(len(messages)-1, -1, -1):
51
  if isinstance(messages[i], HumanMessage):
@@ -87,17 +150,8 @@ def should_continue(state: Dict[str, Any]) -> Union[Literal["action"], Literal["
87
  return "end"
88
 
89
 
90
- def retrieve_from_documents(state: Dict[str, Any], retriever) -> Dict[str, str]:
91
- """
92
- Retrieve relevant context from uploaded documents based on the user query.
93
-
94
- Args:
95
- state: Current state containing messages and optional documents
96
- retriever: Document retriever to use
97
-
98
- Returns:
99
- Updated state with context from document retrieval
100
- """
101
  # Get the last user message
102
  for message in reversed(state["messages"]):
103
  if isinstance(message, HumanMessage):
@@ -107,48 +161,26 @@ def retrieve_from_documents(state: Dict[str, Any], retriever) -> Dict[str, str]:
107
  # No user message found
108
  return {"context": ""}
109
 
110
- # Skip if no documents are uploaded
111
- if not retriever:
112
- return {"context": ""}
113
-
114
  try:
115
- # Retrieve relevant documents
116
- docs = retriever.invoke(query)
117
- if not docs:
118
- return {"context": ""}
119
-
120
- # Extract text from documents
121
- context = "\n\n".join([f"Document excerpt: {doc.page_content}" for doc in docs])
122
  return {"context": context}
123
  except Exception as e:
124
  print(f"Error retrieving from documents: {str(e)}")
125
  return {"context": ""}
126
 
127
 
128
- def document_search_tool(retriever, query: str) -> str:
129
- """
130
- Tool function to search within uploaded documents.
131
-
132
- Args:
133
- retriever: Document retriever to use
134
- query: Search query string
135
-
136
- Returns:
137
- Information retrieved from the documents
138
- """
139
- if not retriever:
140
- return "No documents have been uploaded yet. Please upload a document first."
141
-
142
- docs = retriever.invoke(query)
143
  if not docs:
144
- return "No relevant information found in the uploaded documents."
145
 
146
- # Format the results
147
- results = []
148
- for i, doc in enumerate(docs):
149
- results.append(f"[Document {i+1}] {doc.page_content}")
150
-
151
- return "\n\n".join(results)
152
 
153
 
154
  def convert_inputs(input_object: Dict[str, str]) -> Dict[str, list[BaseMessage]]:
@@ -183,22 +215,10 @@ def parse_output(input_state: Dict[str, Any]) -> str:
183
  return "I encountered an error while processing your request."
184
 
185
 
186
- def build_agent_chain(tools, retriever) -> StateGraph:
187
- """
188
- Constructs and returns the research agent execution chain.
189
-
190
- The chain consists of:
191
- 1. A retrieval node that gets context from documents
192
- 2. An agent node that processes messages
193
- 3. A tool node that executes tools when called
194
-
195
- Args:
196
- tools: List of tools for the agent
197
- retriever: Optional retriever for document search
198
-
199
- Returns:
200
- Compiled agent chain ready for execution
201
- """
202
  # Create an instance of ChatOpenAI
203
  model = ChatOpenAI(model=LLM_MODEL, temperature=LLM_TEMPERATURE)
204
  model = model.bind_tools(tools)
@@ -206,9 +226,9 @@ def build_agent_chain(tools, retriever) -> StateGraph:
206
  # Create document search tool if retriever is provided
207
 
208
  doc_search_tool = Tool(
209
- name="DocumentSearch",
210
- description="Search within the user's uploaded document. Use this tool when you need information from the specific document that was uploaded.",
211
- func=lambda query: document_search_tool(retriever, query),
212
  args_schema=RAGQueryInput
213
  )
214
 
@@ -226,17 +246,15 @@ def build_agent_chain(tools, retriever) -> StateGraph:
226
  def call_model_node(state):
227
  return call_model(model, state)
228
 
229
- # Add nodes
230
 
231
  # Define retrieval node factory with bound retriever
232
  def retrieve_node(state):
233
- return retrieve_from_documents(state, retriever)
234
 
235
  uncompiled_graph.add_node("retrieve", retrieve_node)
236
  uncompiled_graph.set_entry_point("retrieve")
237
- uncompiled_graph.add_edge("retrieve", "agent")
238
-
239
  uncompiled_graph.add_node("agent", call_model_node)
 
240
  uncompiled_graph.add_node("action", tool_node)
241
 
242
  # Add an end node - this is required for the "end" state to be valid
 
1
+
 
 
2
  from typing import TypedDict, Annotated, Dict, Any, Literal, Union, cast, List, Optional
3
  from langchain_openai import ChatOpenAI
4
  from langchain_core.tools import Tool
 
7
  from langgraph.graph.message import add_messages
8
  from langgraph.graph import StateGraph, END
9
  from langgraph.prebuilt import ToolNode
10
+ from lets_talk.models import RAGQueryInput
11
  from lets_talk.config import LLM_MODEL, LLM_TEMPERATURE
12
+ from lets_talk.tools import create_search_tools
13
+ from datetime import datetime
14
+ import lets_talk.rag as rag
15
+
16
 
17
  class ResearchAgentState(TypedDict):
18
  """
 
25
  """
26
  messages: Annotated[list[BaseMessage], add_messages]
27
  context: str
28
+
29
+
30
+ rag_prompt_template = """\
31
+ You are a helpful assistant that answers questions based on the context provided.
32
+ Generate a concise answer to the question in markdown format and include a list of relevant links to the context.
33
+ Use links from context to help user to navigate to to find more information.
34
+
35
+ You have access to the following information:
36
+
37
+ Context:
38
+ {context}
39
+
40
+ If context is unrelated to question, say "I don't know".
41
+ """
42
+
43
+ # Update the call_model function to include current datetime
44
+ def call_model(model, state: Dict[str, Any]) -> Dict[str, list[BaseMessage]]:
45
+ """
46
+ Process the current state through the language model.
47
+
48
+ Args:
49
+ model: Language model with tools bound
50
+ state: Current state containing messages and context
51
+
52
+ Returns:
53
+ Updated state with model's response added to messages
54
+ """
55
+ try:
56
+ messages = state["messages"]
57
+ context = state.get("context", "")
58
+
59
+
60
+ # Get current datetime
61
+ current_datetime = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
62
+ # Insert system message with context before the latest user message
63
+ sys_prompt = rag_prompt_template.format(
64
+ context=context,
65
+ )
66
+ sys_prompt = f"Today is: {current_datetime}\n\n" + sys_prompt
67
+ print(sys_prompt)
68
+
69
+ context_message = SystemMessage(content=sys_prompt)
70
+
71
+ # Find the position of the last user message
72
+ for i in range(len(messages)-1, -1, -1):
73
+ if isinstance(messages[i], HumanMessage):
74
+ # Insert context right after the last user message
75
+ enhanced_messages = messages[:i+1] + [context_message] + messages[i+1:]
76
+ break
77
+ else:
78
+ # No user message found, just append context
79
+ enhanced_messages = messages + [context_message]
80
+
81
+ # Get response from the model
82
+ response = model.invoke(enhanced_messages)
83
+ return {"messages": [response]}
84
+ except Exception as e:
85
+ # Handle exceptions gracefully
86
+ error_msg = f"Error calling model: {str(e)}"
87
+ print(error_msg) # Log the error
88
+ # Return a fallback response
89
+ return {"messages": [HumanMessage(content=error_msg)]}
90
 
91
 
92
  def call_model(model, state: Dict[str, Any]) -> Dict[str, list[BaseMessage]]:
 
107
  # Add context from documents if available
108
  if context:
109
  # Insert system message with context before the latest user message
110
+ context_message = SystemMessage(content=rag_prompt_template.format(context=context))
111
+
112
  # Find the position of the last user message
113
  for i in range(len(messages)-1, -1, -1):
114
  if isinstance(messages[i], HumanMessage):
 
150
  return "end"
151
 
152
 
153
+ def retrieve_from_blog(state: Dict[str, Any]) -> Dict[str, str]:
154
+
 
 
 
 
 
 
 
 
 
155
  # Get the last user message
156
  for message in reversed(state["messages"]):
157
  if isinstance(message, HumanMessage):
 
161
  # No user message found
162
  return {"context": ""}
163
 
 
 
 
 
164
  try:
165
+ #context = blog_search_tool(query)
166
+ response = rag.rag_chain.invoke({"question": query})
167
+
168
+ context = response["response"].content
169
+
 
 
170
  return {"context": context}
171
  except Exception as e:
172
  print(f"Error retrieving from documents: {str(e)}")
173
  return {"context": ""}
174
 
175
 
176
+ def blog_search_tool(query: str) -> str:
177
+ docs = rag.retriever.invoke(query)
 
 
 
 
 
 
 
 
 
 
 
 
 
178
  if not docs:
179
+ return "No relevant documents found."
180
 
181
+ context = "\n\n---".join([ f"link: {doc.metadata["url"] }\n\n{doc.page_content}" for doc in docs])
182
+ return context
183
+
 
 
 
184
 
185
 
186
  def convert_inputs(input_object: Dict[str, str]) -> Dict[str, list[BaseMessage]]:
 
215
  return "I encountered an error while processing your request."
216
 
217
 
218
+ def build_agent() -> StateGraph:
219
+
220
+ tools = create_search_tools(5)
221
+
 
 
 
 
 
 
 
 
 
 
 
 
222
  # Create an instance of ChatOpenAI
223
  model = ChatOpenAI(model=LLM_MODEL, temperature=LLM_TEMPERATURE)
224
  model = model.bind_tools(tools)
 
226
  # Create document search tool if retriever is provided
227
 
228
  doc_search_tool = Tool(
229
+ name="TheDataGuy Blog Search",
230
+ description="Search within blog posts of thedataguy.pro. ALWAYS use this tool to retrieve the context.",
231
+ func=lambda query: blog_search_tool(query),
232
  args_schema=RAGQueryInput
233
  )
234
 
 
246
  def call_model_node(state):
247
  return call_model(model, state)
248
 
 
249
 
250
  # Define retrieval node factory with bound retriever
251
  def retrieve_node(state):
252
+ return retrieve_from_blog(state)
253
 
254
  uncompiled_graph.add_node("retrieve", retrieve_node)
255
  uncompiled_graph.set_entry_point("retrieve")
 
 
256
  uncompiled_graph.add_node("agent", call_model_node)
257
+ uncompiled_graph.add_edge("retrieve", "agent")
258
  uncompiled_graph.add_node("action", tool_node)
259
 
260
  # Add an end node - this is required for the "end" state to be valid
py-src/lets_talk/config.py CHANGED
@@ -12,11 +12,7 @@ QDRANT_COLLECTION = os.environ.get("QDRANT_COLLECTION", "thedataguy_documents")
12
  BLOG_BASE_URL = os.environ.get("BLOG_BASE_URL", "https://thedataguy.pro/blog/")
13
  LLM_MODEL = os.environ.get("LLM_MODEL", "gpt-4o-mini")
14
  LLM_TEMPERATURE = float(os.environ.get("TEMPERATURE", "0"))
 
15
 
16
 
17
 
18
- SYSTEM_TEMPLATE = """
19
- You are a helpful assistant that answers questions based on the context provided.
20
- Generate a concise answer to the question in markdown format and include a list of relevant links to the context.
21
- Use links from context to help user to navigate to to find more information. If context is unrelated to question, say "I don't know".
22
- """
 
12
  BLOG_BASE_URL = os.environ.get("BLOG_BASE_URL", "https://thedataguy.pro/blog/")
13
  LLM_MODEL = os.environ.get("LLM_MODEL", "gpt-4o-mini")
14
  LLM_TEMPERATURE = float(os.environ.get("TEMPERATURE", "0"))
15
+ MAX_SEARCH_RESULTS = int(os.environ.get("MAX_SEARCH_RESULTS", "5"))
16
 
17
 
18
 
 
 
 
 
 
py-src/lets_talk/models.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel, Field
2
+ from typing import List, Optional
3
+
4
+ class ArxivQueryInput(BaseModel):
5
+ """Input for arXiv query."""
6
+ query: str = Field(..., description="The search query to find papers on arXiv")
7
+ max_results: int = Field(default=5, description="The maximum number of results to return")
8
+
9
+ class RAGQueryInput(BaseModel):
10
+ """Input for RAG query."""
11
+ query: str = Field(..., description="The query to search in the uploaded document")
12
+
13
+ class WebSearchInput(BaseModel):
14
+ """Input for web search."""
15
+ query: str = Field(..., description="The search query for web search")
16
+ max_results: int = Field(default=5, description="The maximum number of results to return")
17
+
18
+ class DocumentAnalysisInput(BaseModel):
19
+ """Input for document analysis."""
20
+ query: str = Field(..., description="The specific question to analyze in the document")
21
+ include_citations: bool = Field(default=True, description="Whether to include citations in the response")
22
+
23
+ class RSSFeedInput(BaseModel):
24
+ """Input for RSS feed tool."""
25
+ urls: List[str] = Field(..., description="List of RSS feed URLs to fetch articles from")
26
+ query: Optional[str] = Field(None, description="Optional query to filter articles by relevance")
27
+ max_results: int = Field(default=5, description="Maximum number of articles to return")
28
+ nlp: bool = Field(default=True, description="Whether to use NLP processing on articles (extracts keywords and summaries)")
29
+
py-src/lets_talk/models/__init__.py DELETED
File without changes
py-src/lets_talk/models/rag.py DELETED
@@ -1,69 +0,0 @@
1
- """
2
- RAG (Retrieval Augmented Generation) model implementation.
3
- """
4
- from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
5
- from langchain_core.output_parsers import StrOutputParser
6
- from langchain_core.runnables import RunnablePassthrough
7
-
8
- from lets_talk import config
9
-
10
- # Create prompt template
11
- prompt = ChatPromptTemplate.from_messages([
12
- ("system", config.SYSTEM_TEMPLATE),
13
- MessagesPlaceholder(variable_name="chat_history"),
14
- ("human", "{question}"),
15
- ("human", "Context: {context}")
16
- ])
17
-
18
- class LangChainRAG:
19
- """
20
- RAG implementation using LangChain components.
21
- """
22
- def __init__(self, retriever, llm):
23
- """
24
- Initialize the RAG model.
25
-
26
- Args:
27
- retriever: Document retriever component
28
- llm: Language model for generation
29
- """
30
- self.retriever = retriever
31
- self.llm = llm
32
- self.chain = self._create_chain()
33
-
34
- def _create_chain(self):
35
- """
36
- Create the RAG chain.
37
-
38
- Returns:
39
- A runnable chain that processes user queries
40
- """
41
- # Define the RAG chain
42
- rag_chain = (
43
- {"context": self.retriever, "question": RunnablePassthrough(), "chat_history": lambda _: []}
44
- | prompt
45
- | self.llm
46
- | StrOutputParser()
47
- )
48
- return rag_chain
49
-
50
- async def arun_pipeline(self, user_query: str):
51
- """
52
- Run the RAG pipeline with the user query.
53
-
54
- Args:
55
- user_query: User's question
56
-
57
- Returns:
58
- Dict containing the response generator and context
59
- """
60
- # Get relevant documents for context
61
- docs = self.retriever.invoke(user_query)
62
- context_list = [(doc.page_content, doc.metadata) for doc in docs]
63
-
64
- # Create async generator for streaming
65
- async def generate_response():
66
- async for chunk in self.chain.astream(user_query):
67
- yield chunk
68
-
69
- return {"response": generate_response(), "context": context_list}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
py-src/lets_talk/rag.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ RAG (Retrieval Augmented Generation) model implementation.
3
+ """
4
+ from operator import itemgetter
5
+ from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
6
+ from langchain_core.output_parsers import StrOutputParser
7
+ from langchain_core.runnables import RunnablePassthrough
8
+ from langchain.prompts import ChatPromptTemplate
9
+ from langchain.schema.runnable import RunnablePassthrough
10
+ from langchain_openai.chat_models import ChatOpenAI
11
+ from langchain_qdrant import QdrantVectorStore
12
+ from lets_talk import config
13
+ from lets_talk.utils import blog
14
+ import lets_talk.utils.blog as blog
15
+
16
+ # Load vector store using the utility function
17
+ vector_store:QdrantVectorStore = blog.load_vector_store()
18
+
19
+ # Create a retriever
20
+ retriever = vector_store.as_retriever()
21
+
22
+ llm = ChatOpenAI(model=config.LLM_MODEL, temperature=config.LLM_TEMPERATURE)
23
+
24
+ # Create RAG prompt template
25
+ rag_prompt_template = """\
26
+ You are a helpful assistant that answers questions based on the context provided.
27
+ Generate a concise answer to the question in markdown format and include a list of relevant links to the context.
28
+ Use links from context to help user to navigate to to find more information.
29
+ You have access to the following information:
30
+
31
+ Context:
32
+ {context}
33
+
34
+ Question:
35
+ {question}
36
+
37
+ If context is unrelated to question, say "I don't know".
38
+ """
39
+
40
+ rag_prompt = ChatPromptTemplate.from_template(rag_prompt_template)
41
+
42
+ # Create chain
43
+ rag_chain = (
44
+ {"context": itemgetter("question") | retriever, "question": itemgetter("question")}
45
+ | RunnablePassthrough.assign(context=itemgetter("context"))
46
+ | {"response": rag_prompt | llm, "context": itemgetter("context")}
47
+ )
48
+
49
+
py-src/lets_talk/{models/research_tools.py → rss_tool.py} RENAMED
@@ -5,37 +5,10 @@ This module implements input schemas and tools specifically for research purpose
5
  """
6
  from typing import List, Optional, Dict, Any
7
  from pydantic import BaseModel, Field
8
-
9
  from langchain_core.tools import Tool
10
  from langchain_core.documents import Document
11
  import feedparser
12
- import datetime
13
-
14
- class ArxivQueryInput(BaseModel):
15
- """Input for arXiv query."""
16
- query: str = Field(..., description="The search query to find papers on arXiv")
17
- max_results: int = Field(default=5, description="The maximum number of results to return")
18
-
19
- class RAGQueryInput(BaseModel):
20
- """Input for RAG query."""
21
- query: str = Field(..., description="The query to search in the uploaded document")
22
-
23
- class WebSearchInput(BaseModel):
24
- """Input for web search."""
25
- query: str = Field(..., description="The search query for web search")
26
- max_results: int = Field(default=5, description="The maximum number of results to return")
27
-
28
- class DocumentAnalysisInput(BaseModel):
29
- """Input for document analysis."""
30
- query: str = Field(..., description="The specific question to analyze in the document")
31
- include_citations: bool = Field(default=True, description="Whether to include citations in the response")
32
-
33
- class RSSFeedInput(BaseModel):
34
- """Input for RSS feed tool."""
35
- urls: List[str] = Field(..., description="List of RSS feed URLs to fetch articles from")
36
- query: Optional[str] = Field(None, description="Optional query to filter articles by relevance")
37
- max_results: int = Field(default=5, description="Maximum number of articles to return")
38
- nlp: bool = Field(default=True, description="Whether to use NLP processing on articles (extracts keywords and summaries)")
39
 
40
 
41
  def rss_feed_tool(urls: List[str], query: Optional[str] = None, max_results: int = 5, nlp: bool = True) -> str:
 
5
  """
6
  from typing import List, Optional, Dict, Any
7
  from pydantic import BaseModel, Field
 
8
  from langchain_core.tools import Tool
9
  from langchain_core.documents import Document
10
  import feedparser
11
+ from .models import RSSFeedInput
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
 
14
  def rss_feed_tool(urls: List[str], query: Optional[str] = None, max_results: int = 5, nlp: bool = True) -> str:
py-src/lets_talk/{models/search_tools.py → tools.py} RENAMED
@@ -3,8 +3,10 @@ Search tools module containing different search implementations.
3
  """
4
 
5
  from langchain_community.tools.arxiv.tool import ArxivQueryRun
6
- from langchain_community.tools import DuckDuckGoSearchResults
7
  from langchain_core.tools import Tool
 
 
8
 
9
  def create_search_tools(max_results=5):
10
  """
@@ -16,13 +18,33 @@ def create_search_tools(max_results=5):
16
  Returns:
17
  List of search tools for the agent
18
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  # Initialize standard search tools
20
- #tavily_tool = TavilySearchResults(max_results=max_results)
21
- duckduckgo_tool = DuckDuckGoSearchResults(max_results=max_results)
22
- arxiv_tool = ArxivQueryRun()
23
 
24
  return [
25
- #tavily_tool,
26
- duckduckgo_tool,
27
- arxiv_tool,
28
  ]
 
3
  """
4
 
5
  from langchain_community.tools.arxiv.tool import ArxivQueryRun
6
+ #from langchain_community.tools import DuckDuckGoSearchResults
7
  from langchain_core.tools import Tool
8
+ from .rss_tool import rss_feed_tool
9
+
10
 
11
  def create_search_tools(max_results=5):
12
  """
 
18
  Returns:
19
  List of search tools for the agent
20
  """
21
+
22
+
23
+ def create_rss_feed_tool() -> Tool:
24
+ """
25
+ Create and return an RSS feed tool.
26
+
27
+ Returns:
28
+ Tool object for RSS feed functionality
29
+ """
30
+ def _rss_feed_tool_wrapper(*args, **kwargs):
31
+
32
+ return rss_feed_tool(urls=['https://thedataguy.pro/rss.xml'])
33
+
34
+ return Tool(
35
+ name="RSSFeedReader",
36
+ description="Fetch and read articles from TheDataGuy's RSS feeds. Use this tool when you need the latest blog posts, what's new or latest updates.",
37
+ func=_rss_feed_tool_wrapper
38
+ )
39
+
40
+
41
  # Initialize standard search tools
42
+ #duckduckgo_tool = DuckDuckGoSearchResults(max_results=max_results)
43
+ #arxiv_tool = ArxivQueryRun()
44
+ tdg_rss_tool = create_rss_feed_tool()
45
 
46
  return [
47
+ tdg_rss_tool,
48
+ #duckduckgo_tool,
49
+ #arxiv_tool,
50
  ]
pyproject.toml CHANGED
@@ -5,7 +5,9 @@ description = "Add your description here"
5
  readme = "README.md"
6
  requires-python = ">=3.13"
7
  dependencies = [
 
8
  "chainlit>=2.5.5",
 
9
  "ipykernel>=6.29.5",
10
  "ipython>=9.2.0",
11
  "langchain>=0.3.25",
@@ -15,6 +17,7 @@ dependencies = [
15
  "langchain-openai>=0.3.16",
16
  "langchain-qdrant>=0.2.0",
17
  "langchain-text-splitters>=0.3.8",
 
18
  "pandas>=2.2.3",
19
  "python-dotenv>=1.1.0",
20
  "qdrant-client>=1.14.2",
 
5
  readme = "README.md"
6
  requires-python = ">=3.13"
7
  dependencies = [
8
+ "arxiv>=2.2.0",
9
  "chainlit>=2.5.5",
10
+ "feedparser>=6.0.11",
11
  "ipykernel>=6.29.5",
12
  "ipython>=9.2.0",
13
  "langchain>=0.3.25",
 
17
  "langchain-openai>=0.3.16",
18
  "langchain-qdrant>=0.2.0",
19
  "langchain-text-splitters>=0.3.8",
20
+ "langgraph>=0.4.3",
21
  "pandas>=2.2.3",
22
  "python-dotenv>=1.1.0",
23
  "qdrant-client>=1.14.2",