KenTheNoob commited on
Commit
24ff9b2
·
1 Parent(s): 1d1cfc4

Syncing with github

Browse files
project/.gitignore CHANGED
@@ -1,4 +1,4 @@
1
  .gradio
2
  .env
3
  __pycache__
4
- /tool/__pycache__
 
1
  .gradio
2
  .env
3
  __pycache__
4
+ /tool/__pycache__
project/ClearML/DataCollectionPipeline.py CHANGED
@@ -151,7 +151,9 @@ def ETL_Pipeline(links):
151
  if (
152
  subdirectory is not None and
153
  'http' not in subdirectory and
154
- mongoCollection.find_one({"link": newLink}) is None
 
 
155
  ):
156
  links.append(newLink)
157
  except:
 
151
  if (
152
  subdirectory is not None and
153
  'http' not in subdirectory and
154
+ '#' not in subdirectory and
155
+ mongoCollection.find_one({"link": newLink}) is None and
156
+ newLink not in links
157
  ):
158
  links.append(newLink)
159
  except:
project/ClearML/FeaturePipeline.py CHANGED
@@ -79,7 +79,6 @@ def chunkDocuments(texts):
79
 
80
  @PipelineDecorator.component(cache=False, return_values=["embeddings"])
81
  def embedChunks(chunks):
82
- embeddings = []
83
  # Setup the text embedder
84
  MODEL = "llama3.2"
85
  try:
@@ -91,11 +90,10 @@ def embedChunks(chunks):
91
  embeddingsModel = OllamaEmbeddings(model=MODEL, base_url="http://host.docker.internal:11434")
92
  else:
93
  embeddingsModel = OllamaEmbeddings(model=MODEL)
94
- for chunk in chunks:
95
- embeddings.append(embeddingsModel.embed_query(chunk))
96
- return embeddings
97
 
98
 
 
99
  @PipelineDecorator.component(cache=False)
100
  def storeEmbeddings(embeddings, links, resultTypes, chunks, chunkNums):
101
  # Create a qdrant connection
@@ -128,8 +126,6 @@ def storeEmbeddings(embeddings, links, resultTypes, chunks, chunkNums):
128
  chunkIndex += 1
129
  if chunkNum == 0:
130
  documentIndex += 1
131
- # Store all documents from each MongoDB collection into qdrant
132
- # Create embeddings for each chunk, of length 2048 using the embedding model
133
  # Store the embedding along with some metadata into the Qdrant vector database
134
  qClient.upsert(
135
  collection_name=resultTypes[documentIndex],
 
79
 
80
  @PipelineDecorator.component(cache=False, return_values=["embeddings"])
81
  def embedChunks(chunks):
 
82
  # Setup the text embedder
83
  MODEL = "llama3.2"
84
  try:
 
90
  embeddingsModel = OllamaEmbeddings(model=MODEL, base_url="http://host.docker.internal:11434")
91
  else:
92
  embeddingsModel = OllamaEmbeddings(model=MODEL)
93
+ return embeddingsModel.embed_documents(chunks)
 
 
94
 
95
 
96
+ # Create embeddings for each chunk, of length 3072 using the embedding model
97
  @PipelineDecorator.component(cache=False)
98
  def storeEmbeddings(embeddings, links, resultTypes, chunks, chunkNums):
99
  # Create a qdrant connection
 
126
  chunkIndex += 1
127
  if chunkNum == 0:
128
  documentIndex += 1
 
 
129
  # Store the embedding along with some metadata into the Qdrant vector database
130
  qClient.upsert(
131
  collection_name=resultTypes[documentIndex],
project/DataCollectionPipeline.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
project/FeaturePipeline.ipynb CHANGED
@@ -2,33 +2,23 @@
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
- "execution_count": 15,
6
  "metadata": {},
7
  "outputs": [
8
  {
9
- "ename": "KeyboardInterrupt",
10
- "evalue": "",
11
- "output_type": "error",
12
- "traceback": [
13
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
14
- "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
15
- "Cell \u001b[0;32mIn[15], line 61\u001b[0m\n\u001b[1;32m 58\u001b[0m chunkNum \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m\n\u001b[1;32m 59\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m chunk \u001b[38;5;129;01min\u001b[39;00m chunks:\n\u001b[1;32m 60\u001b[0m \u001b[38;5;66;03m# Create embeddings for each chunk, of length 2048 using the embedding model\u001b[39;00m\n\u001b[0;32m---> 61\u001b[0m embedding \u001b[38;5;241m=\u001b[39m \u001b[43membeddingsModel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43membed_query\u001b[49m\u001b[43m(\u001b[49m\u001b[43mchunk\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 62\u001b[0m \u001b[38;5;66;03m# Store the embedding along with some metadata into the Qdrant vector database\u001b[39;00m\n\u001b[1;32m 63\u001b[0m qClient\u001b[38;5;241m.\u001b[39mupsert(collection_name\u001b[38;5;241m=\u001b[39mresultType, wait\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, points\u001b[38;5;241m=\u001b[39m[PointStruct(\u001b[38;5;28mid\u001b[39m\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mid\u001b[39m, vector\u001b[38;5;241m=\u001b[39membedding, payload\u001b[38;5;241m=\u001b[39m{\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mlink\u001b[39m\u001b[38;5;124m\"\u001b[39m: link, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtype\u001b[39m\u001b[38;5;124m\"\u001b[39m: resultType, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mchunk\u001b[39m\u001b[38;5;124m\"\u001b[39m: chunkNum, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtext\u001b[39m\u001b[38;5;124m\"\u001b[39m: chunk})])\n",
16
- "File \u001b[0;32m/usr/local/lib/python3.12/site-packages/langchain_community/embeddings/ollama.py:227\u001b[0m, in \u001b[0;36mOllamaEmbeddings.embed_query\u001b[0;34m(self, text)\u001b[0m\n\u001b[1;32m 218\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Embed a query using a Ollama deployed embedding model.\u001b[39;00m\n\u001b[1;32m 219\u001b[0m \n\u001b[1;32m 220\u001b[0m \u001b[38;5;124;03mArgs:\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 224\u001b[0m \u001b[38;5;124;03m Embeddings for the text.\u001b[39;00m\n\u001b[1;32m 225\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 226\u001b[0m instruction_pair \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mquery_instruction\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;132;01m{\u001b[39;00mtext\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m--> 227\u001b[0m embedding \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_embed\u001b[49m\u001b[43m(\u001b[49m\u001b[43m[\u001b[49m\u001b[43minstruction_pair\u001b[49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m[\u001b[38;5;241m0\u001b[39m]\n\u001b[1;32m 228\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m embedding\n",
17
- "File \u001b[0;32m/usr/local/lib/python3.12/site-packages/langchain_community/embeddings/ollama.py:202\u001b[0m, in \u001b[0;36mOllamaEmbeddings._embed\u001b[0;34m(self, input)\u001b[0m\n\u001b[1;32m 200\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 201\u001b[0m iter_ \u001b[38;5;241m=\u001b[39m \u001b[38;5;28minput\u001b[39m\n\u001b[0;32m--> 202\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m [\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_process_emb_response\u001b[49m\u001b[43m(\u001b[49m\u001b[43mprompt\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mfor\u001b[39;00m prompt \u001b[38;5;129;01min\u001b[39;00m iter_]\n",
18
- "File \u001b[0;32m/usr/local/lib/python3.12/site-packages/langchain_community/embeddings/ollama.py:167\u001b[0m, in \u001b[0;36mOllamaEmbeddings._process_emb_response\u001b[0;34m(self, input)\u001b[0m\n\u001b[1;32m 161\u001b[0m headers \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m 162\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mContent-Type\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mapplication/json\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 163\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mheaders \u001b[38;5;129;01mor\u001b[39;00m {}),\n\u001b[1;32m 164\u001b[0m }\n\u001b[1;32m 166\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 167\u001b[0m res \u001b[38;5;241m=\u001b[39m \u001b[43mrequests\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpost\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 168\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbase_url\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m/api/embeddings\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 169\u001b[0m \u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 170\u001b[0m \u001b[43m \u001b[49m\u001b[43mjson\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m{\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmodel\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mprompt\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_default_params\u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 171\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 172\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m requests\u001b[38;5;241m.\u001b[39mexceptions\u001b[38;5;241m.\u001b[39mRequestException \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 173\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mError raised by inference endpoint: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00me\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n",
19
- "File \u001b[0;32m/usr/local/lib/python3.12/site-packages/requests/api.py:115\u001b[0m, in \u001b[0;36mpost\u001b[0;34m(url, data, json, **kwargs)\u001b[0m\n\u001b[1;32m 103\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mpost\u001b[39m(url, data\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, json\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 104\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124mr\u001b[39m\u001b[38;5;124;03m\"\"\"Sends a POST request.\u001b[39;00m\n\u001b[1;32m 105\u001b[0m \n\u001b[1;32m 106\u001b[0m \u001b[38;5;124;03m :param url: URL for the new :class:`Request` object.\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 112\u001b[0m \u001b[38;5;124;03m :rtype: requests.Response\u001b[39;00m\n\u001b[1;32m 113\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 115\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mrequest\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mpost\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mjson\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mjson\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
20
- "File \u001b[0;32m/usr/local/lib/python3.12/site-packages/requests/api.py:59\u001b[0m, in \u001b[0;36mrequest\u001b[0;34m(method, url, **kwargs)\u001b[0m\n\u001b[1;32m 55\u001b[0m \u001b[38;5;66;03m# By using the 'with' statement we are sure the session is closed, thus we\u001b[39;00m\n\u001b[1;32m 56\u001b[0m \u001b[38;5;66;03m# avoid leaving sockets open which can trigger a ResourceWarning in some\u001b[39;00m\n\u001b[1;32m 57\u001b[0m \u001b[38;5;66;03m# cases, and look like a memory leak in others.\u001b[39;00m\n\u001b[1;32m 58\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m sessions\u001b[38;5;241m.\u001b[39mSession() \u001b[38;5;28;01mas\u001b[39;00m session:\n\u001b[0;32m---> 59\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43msession\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmethod\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
21
- "File \u001b[0;32m/usr/local/lib/python3.12/site-packages/requests/sessions.py:589\u001b[0m, in \u001b[0;36mSession.request\u001b[0;34m(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)\u001b[0m\n\u001b[1;32m 584\u001b[0m send_kwargs \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m 585\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtimeout\u001b[39m\u001b[38;5;124m\"\u001b[39m: timeout,\n\u001b[1;32m 586\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mallow_redirects\u001b[39m\u001b[38;5;124m\"\u001b[39m: allow_redirects,\n\u001b[1;32m 587\u001b[0m }\n\u001b[1;32m 588\u001b[0m send_kwargs\u001b[38;5;241m.\u001b[39mupdate(settings)\n\u001b[0;32m--> 589\u001b[0m resp \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msend\u001b[49m\u001b[43m(\u001b[49m\u001b[43mprep\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43msend_kwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 591\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m resp\n",
22
- "File \u001b[0;32m/usr/local/lib/python3.12/site-packages/requests/sessions.py:703\u001b[0m, in \u001b[0;36mSession.send\u001b[0;34m(self, request, **kwargs)\u001b[0m\n\u001b[1;32m 700\u001b[0m start \u001b[38;5;241m=\u001b[39m preferred_clock()\n\u001b[1;32m 702\u001b[0m \u001b[38;5;66;03m# Send the request\u001b[39;00m\n\u001b[0;32m--> 703\u001b[0m r \u001b[38;5;241m=\u001b[39m \u001b[43madapter\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msend\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrequest\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 705\u001b[0m \u001b[38;5;66;03m# Total elapsed time of the request (approximately)\u001b[39;00m\n\u001b[1;32m 706\u001b[0m elapsed \u001b[38;5;241m=\u001b[39m preferred_clock() \u001b[38;5;241m-\u001b[39m start\n",
23
- "File \u001b[0;32m/usr/local/lib/python3.12/site-packages/requests/adapters.py:667\u001b[0m, in \u001b[0;36mHTTPAdapter.send\u001b[0;34m(self, request, stream, timeout, verify, cert, proxies)\u001b[0m\n\u001b[1;32m 664\u001b[0m timeout \u001b[38;5;241m=\u001b[39m TimeoutSauce(connect\u001b[38;5;241m=\u001b[39mtimeout, read\u001b[38;5;241m=\u001b[39mtimeout)\n\u001b[1;32m 666\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 667\u001b[0m resp \u001b[38;5;241m=\u001b[39m \u001b[43mconn\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43murlopen\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 668\u001b[0m \u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 669\u001b[0m \u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 670\u001b[0m \u001b[43m \u001b[49m\u001b[43mbody\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbody\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 671\u001b[0m \u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 672\u001b[0m \u001b[43m \u001b[49m\u001b[43mredirect\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 673\u001b[0m \u001b[43m \u001b[49m\u001b[43massert_same_host\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 674\u001b[0m \u001b[43m \u001b[49m\u001b[43mpreload_content\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 675\u001b[0m \u001b[43m \u001b[49m\u001b[43mdecode_content\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 676\u001b[0m \u001b[43m \u001b[49m\u001b[43mretries\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmax_retries\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 677\u001b[0m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 678\u001b[0m \u001b[43m \u001b[49m\u001b[43mchunked\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mchunked\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 679\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 681\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (ProtocolError, \u001b[38;5;167;01mOSError\u001b[39;00m) \u001b[38;5;28;01mas\u001b[39;00m err:\n\u001b[1;32m 682\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mConnectionError\u001b[39;00m(err, request\u001b[38;5;241m=\u001b[39mrequest)\n",
24
- "File \u001b[0;32m/usr/local/lib/python3.12/site-packages/urllib3/connectionpool.py:789\u001b[0m, in \u001b[0;36mHTTPConnectionPool.urlopen\u001b[0;34m(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, preload_content, decode_content, **response_kw)\u001b[0m\n\u001b[1;32m 786\u001b[0m response_conn \u001b[38;5;241m=\u001b[39m conn \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m release_conn \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 788\u001b[0m \u001b[38;5;66;03m# Make the request on the HTTPConnection object\u001b[39;00m\n\u001b[0;32m--> 789\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_make_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 790\u001b[0m \u001b[43m \u001b[49m\u001b[43mconn\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 791\u001b[0m \u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 792\u001b[0m \u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 793\u001b[0m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout_obj\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 794\u001b[0m \u001b[43m \u001b[49m\u001b[43mbody\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbody\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 795\u001b[0m \u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 796\u001b[0m \u001b[43m \u001b[49m\u001b[43mchunked\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mchunked\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 797\u001b[0m \u001b[43m \u001b[49m\u001b[43mretries\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 798\u001b[0m \u001b[43m \u001b[49m\u001b[43mresponse_conn\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mresponse_conn\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 799\u001b[0m \u001b[43m \u001b[49m\u001b[43mpreload_content\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpreload_content\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 800\u001b[0m \u001b[43m \u001b[49m\u001b[43mdecode_content\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdecode_content\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 801\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mresponse_kw\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 802\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 804\u001b[0m \u001b[38;5;66;03m# Everything went great!\u001b[39;00m\n\u001b[1;32m 805\u001b[0m clean_exit \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n",
25
- "File \u001b[0;32m/usr/local/lib/python3.12/site-packages/urllib3/connectionpool.py:536\u001b[0m, in \u001b[0;36mHTTPConnectionPool._make_request\u001b[0;34m(self, conn, method, url, body, headers, retries, timeout, chunked, response_conn, preload_content, decode_content, enforce_content_length)\u001b[0m\n\u001b[1;32m 534\u001b[0m \u001b[38;5;66;03m# Receive the response from the server\u001b[39;00m\n\u001b[1;32m 535\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 536\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[43mconn\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgetresponse\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 537\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (BaseSSLError, \u001b[38;5;167;01mOSError\u001b[39;00m) \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 538\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_raise_timeout(err\u001b[38;5;241m=\u001b[39me, url\u001b[38;5;241m=\u001b[39murl, timeout_value\u001b[38;5;241m=\u001b[39mread_timeout)\n",
26
- "File \u001b[0;32m/usr/local/lib/python3.12/site-packages/urllib3/connection.py:507\u001b[0m, in \u001b[0;36mHTTPConnection.getresponse\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 504\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mresponse\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m HTTPResponse\n\u001b[1;32m 506\u001b[0m \u001b[38;5;66;03m# Get the response from http.client.HTTPConnection\u001b[39;00m\n\u001b[0;32m--> 507\u001b[0m httplib_response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgetresponse\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 509\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 510\u001b[0m assert_header_parsing(httplib_response\u001b[38;5;241m.\u001b[39mmsg)\n",
27
- "File \u001b[0;32m/usr/local/lib/python3.12/http/client.py:1428\u001b[0m, in \u001b[0;36mHTTPConnection.getresponse\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1426\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 1427\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 1428\u001b[0m \u001b[43mresponse\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbegin\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1429\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mConnectionError\u001b[39;00m:\n\u001b[1;32m 1430\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mclose()\n",
28
- "File \u001b[0;32m/usr/local/lib/python3.12/http/client.py:331\u001b[0m, in \u001b[0;36mHTTPResponse.begin\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 329\u001b[0m \u001b[38;5;66;03m# read until we get a non-100 response\u001b[39;00m\n\u001b[1;32m 330\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[0;32m--> 331\u001b[0m version, status, reason \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_read_status\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 332\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m status \u001b[38;5;241m!=\u001b[39m CONTINUE:\n\u001b[1;32m 333\u001b[0m \u001b[38;5;28;01mbreak\u001b[39;00m\n",
29
- "File \u001b[0;32m/usr/local/lib/python3.12/http/client.py:292\u001b[0m, in \u001b[0;36mHTTPResponse._read_status\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 291\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_read_status\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[0;32m--> 292\u001b[0m line \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mstr\u001b[39m(\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mreadline\u001b[49m\u001b[43m(\u001b[49m\u001b[43m_MAXLINE\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m)\u001b[49m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124miso-8859-1\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 293\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(line) \u001b[38;5;241m>\u001b[39m _MAXLINE:\n\u001b[1;32m 294\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m LineTooLong(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstatus line\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
30
- "File \u001b[0;32m/usr/local/lib/python3.12/socket.py:720\u001b[0m, in \u001b[0;36mSocketIO.readinto\u001b[0;34m(self, b)\u001b[0m\n\u001b[1;32m 718\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[1;32m 719\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 720\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_sock\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrecv_into\u001b[49m\u001b[43m(\u001b[49m\u001b[43mb\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 721\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m timeout:\n\u001b[1;32m 722\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_timeout_occurred \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n",
31
- "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
32
  ]
33
  }
34
  ],
@@ -81,23 +71,27 @@
81
  "for collection in collections:\n",
82
  " mongoCollection = mongoDatabase[collection]\n",
83
  "\n",
84
- " documents = mongoCollection.find()\n",
85
  " id = 0\n",
86
- " for document in documents:\n",
87
- " # For each document, split it into chunks\n",
88
- " link = document[\"link\"]\n",
89
- " resultType = document[\"type\"]\n",
90
- " text = document[\"content\"]\n",
91
- " text = cleanText(text)\n",
92
- " chunks = text_splitter.split_text(text)\n",
93
- " chunkNum = 0\n",
94
- " for chunk in chunks:\n",
95
- " # Create embeddings for each chunk, of length 2048 using the embedding model\n",
96
- " embedding = embeddingsModel.embed_query(chunk)\n",
97
- " # Store the embedding along with some metadata into the Qdrant vector database\n",
98
- " qClient.upsert(collection_name=resultType, wait=True, points=[PointStruct(id=id, vector=embedding, payload={\"link\": link, \"type\": resultType, \"chunk\": chunkNum, \"text\": chunk})])\n",
99
- " chunkNum += 1\n",
100
- " id += 1\n"
 
 
 
 
101
  ]
102
  }
103
  ],
 
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
+ "execution_count": 5,
6
  "metadata": {},
7
  "outputs": [
8
  {
9
+ "name": "stderr",
10
+ "output_type": "stream",
11
+ "text": [
12
+ "/usr/local/lib/python3.12/site-packages/pymongo/synchronous/collection.py:1920: UserWarning: use an explicit session with no_cursor_timeout=True otherwise the cursor may still timeout after 30 minutes, for more info see https://mongodb.com/docs/v4.4/reference/method/cursor.noCursorTimeout/#session-idle-timeout-overrides-nocursortimeout\n",
13
+ " return Cursor(self, *args, **kwargs)\n"
14
+ ]
15
+ },
16
+ {
17
+ "name": "stdout",
18
+ "output_type": "stream",
19
+ "text": [
20
+ "Stopping document loop\n",
21
+ "Stopping document loop\n"
 
 
 
 
 
 
 
 
 
 
22
  ]
23
  }
24
  ],
 
71
  "for collection in collections:\n",
72
  " mongoCollection = mongoDatabase[collection]\n",
73
  "\n",
74
+ " documents = mongoCollection.find(no_cursor_timeout=True)\n",
75
  " id = 0\n",
76
+ " try:\n",
77
+ " for document in documents:\n",
78
+ " # For each document, split it into chunks\n",
79
+ " link = document[\"link\"]\n",
80
+ " resultType = document[\"type\"]\n",
81
+ " text = document[\"content\"]\n",
82
+ " text = cleanText(text)\n",
83
+ " chunks = text_splitter.split_text(text)\n",
84
+ " chunkNum = 0\n",
85
+ " embeddings = embeddingsModel.embed_documents(chunks)\n",
86
+ " for chunk in chunks:\n",
87
+ " # Create embeddings for each chunk, of length 3072 using the embedding model\n",
88
+ " # Store the embedding along with some metadata into the Qdrant vector database\n",
89
+ " qClient.upsert(collection_name=resultType, wait=True, points=[PointStruct(id=id, vector=embeddings[chunkNum], payload={\"link\": link, \"type\": resultType, \"chunk\": chunkNum, \"text\": chunk})])\n",
90
+ " chunkNum += 1\n",
91
+ " id += 1\n",
92
+ " except:\n",
93
+ " print(\"Stopping document loop\")\n",
94
+ " \n"
95
  ]
96
  }
97
  ],
project/InferencePipeline.ipynb DELETED
@@ -1,181 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "code",
5
- "execution_count": null,
6
- "metadata": {},
7
- "outputs": [
8
- {
9
- "name": "stderr",
10
- "output_type": "stream",
11
- "text": [
12
- "/workspaces/RAG_LLM/project/shared.py:57: LangChainDeprecationWarning: The class `OllamaEmbeddings` was deprecated in LangChain 0.3.1 and will be removed in 1.0.0. An updated version of the class exists in the :class:`~langchain-ollama package and should be used instead. To use it run `pip install -U :class:`~langchain-ollama` and import as `from :class:`~langchain_ollama import OllamaEmbeddings``.\n",
13
- " return OllamaEmbeddings(model=MODEL, base_url=\"http://host.docker.internal:11434\")\n",
14
- "/workspaces/RAG_LLM/project/shared.py:70: LangChainDeprecationWarning: The class `Ollama` was deprecated in LangChain 0.3.1 and will be removed in 1.0.0. An updated version of the class exists in the :class:`~langchain-ollama package and should be used instead. To use it run `pip install -U :class:`~langchain-ollama` and import as `from :class:`~langchain_ollama import OllamaLLM``.\n",
15
- " return Ollama(model=MODEL, base_url=\"http://host.docker.internal:11434\")\n"
16
- ]
17
- }
18
- ],
19
- "source": [
20
- "# See README for more info on how the DataCollectionPipeline works\n",
21
- "# The retrieval pipeline is part of the DataCollectionPipeline\n",
22
- "from shared import getQdrantClient, getEmbeddingsModel, getModel\n",
23
- "from langchain_community.llms import Ollama\n",
24
- "from langchain.prompts import PromptTemplate\n",
25
- "from operator import itemgetter\n",
26
- "# Create a qdrant connection\n",
27
- "qClient = getQdrantClient()\n",
28
- "\n",
29
- "# Setup the text embedder\n",
30
- "embeddingsModel = getEmbeddingsModel()\n",
31
- "\n",
32
- "# Setup the model\n",
33
- "model = getModel()\n",
34
- "\n",
35
- "# Retrieval Pipeline\n",
36
- "# Retrieve the chunks with the most similar embeddings from Qdrant\n",
37
- "def retriever(text, collection):\n",
38
- " results = qClient.search(\n",
39
- " collection_name=collection,\n",
40
- " query_vector = embeddingsModel.embed_query(text),\n",
41
- " limit=10\n",
42
- " )\n",
43
- " return results"
44
- ]
45
- },
46
- {
47
- "cell_type": "code",
48
- "execution_count": 5,
49
- "metadata": {},
50
- "outputs": [
51
- {
52
- "name": "stdout",
53
- "output_type": "stream",
54
- "text": [
55
- "Query expansion: Create a user-friendly, community-driven guide that provides an alternative to the traditional ROS documentation, focusing on real-world scenarios and practical applications rather than technical specifications and developer guides.\n",
56
- "Coding Question?: 1\n",
57
- "Related Collection: Github\n",
58
- "Top texts: #About TheRobotOperatingSystem(ROS)isasetofsoftwarelibrariesandtoolsthathelpyoubuildrobotapplications. Fromdriverstostate-of-the-artalgorithms,andwithpowerfuldevelopertools,ROShaswhatyouneedforyournextroboticsproject. Andit'sallopensource. Fullprojectdetailson[ROS.org](https://ros.org/) #GettingStarted LookingtogetstartedwithROS? Our[installationguideishere](https://www.ros.org/blog/getting-started/).\n",
59
- "Link: https://github.com/ros2/ros2/tree/rolling/README.md\n",
60
- "Top texts: type:git url:https://github.com/ros2/tinyxml2_vendor.git version:rolling ros2/tlsf: type:git url:https://github.com/ros2/tlsf.git version:rolling ros2/unique_identifier_msgs: type:git url:https://github.com/ros2/unique_identifier_msgs.git version:rolling ros2/urdf: type:git url:https://github.com/ros2/urdf.git version:rolling ros2/yaml_cpp_vendor: type:git url:https://github.com/ros2/yaml_cpp_vendor.git version:rolling\n",
61
- "Link: https://github.com/ros2/ros2/tree/rolling/ros2.repos\n",
62
- "Top texts: *[ROSResourceStatusPage](https://status.openrobotics.org/) *[REP-2000](https://ros.org/reps/rep-2000.html):ROS2ReleasesandTargetPlatforms ##ProjectResources *[PurchaseROSSwag](https://spring.ros.org/) *[InformationabouttheROSTrademark](https://www.ros.org/blog/media/) *OnSocialMedia *[OpenRoboticsonLinkedIn](https://www.linkedin.com/company/open-source-robotics-foundation) *[OpenRoboticsonTwitter](https://twitter.com/OpenRoboticsOrg) *[ROS.orgonTwitter](https://twitter.com/ROSOrg)\n",
63
- "Link: https://github.com/ros2/ros2/tree/rolling/README.md\n"
64
- ]
65
- },
66
- {
67
- "data": {
68
- "text/plain": [
69
- "\"Here's an example of what the README file for ROS could look like:\\n\\n**Welcome to the Robot Operating System (ROS)**\\n\\nROS is a set of software libraries and tools that help you build robot applications. From driver development to state-of-the-art algorithms, and with powerful development tools, ROS has everything you need for your next robotics project.\\n\\n### Getting Started\\n\\nTo get started with ROS, check out our [installation guide](https://www.ros.org/blog/getting-started/).\\n\\n### What's Included\\n\\nROS includes a range of open-source projects, including:\\n\\n* **tinyxml2_vendor**: A fork of the tinyxml2 library for parsing XML files.\\n* **tlsf**: A library for secure communication over TLS (Transport Layer Security).\\n* **unique_identifier_msgs**: A package for generating unique identifiers for robots and other entities.\\n* **urdf**: A package for working with URDF (Unified Robot Description Format) files.\\n* **yaml_cpp_vendor**: A fork of the yaml-cpp library for parsing YAML files.\\n\\n### ROS Releases and Target Platforms\\n\\nFor more information on ROS releases, target platforms, and release notes, check out [REP-2000](https://ros.org/reps/rep-2000.html).\\n\\n### Project Resources\\n\\n* **ROSSwag**: Purchase ROS-related merchandise from our online store.\\n* **ROS Trademark Information**: Learn about the ROS trademark.\\n\\n### Get Involved\\n\\nStay up-to-date with the latest news and developments in ROS:\\n\\n* Follow us on [LinkedIn](https://www.linkedin.com/company/open-source-robotics-foundation)\\n* Join our Twitter community: [OpenRoboticsOrg](https://twitter.com/OpenRoboticsOrg), [ROSOrg](https://twitter.com/ROSOrg)\\n\\n### License and Contributions\\n\\nROS is an open-source project, licensed under the Apache 2.0 license.\\n\\nWe welcome contributions from the ROS community! If you have any ideas or bug fixes to contribute, check out our [contribution guidelines](https://ros.org/blog/contribute/).\\n\\n**Thank You**\\n\\nThanks for choosing ROS as your platform for robotics development!\\n\\nYou can modify this README file according to your needs and preferences.\""
70
- ]
71
- },
72
- "execution_count": 5,
73
- "metadata": {},
74
- "output_type": "execute_result"
75
- }
76
- ],
77
- "source": [
78
- "# User query\n",
79
- "query = \"Can you create a README file for ROS\"\n",
80
- "\n",
81
- "# Query expansion(I only generate one additional prompt for simplicity)\n",
82
- "template = \"\"\"\n",
83
- "Rewrite the prompt. The new prompt must offer a different perspective.\n",
84
- "Do not change the meaning. Output only the rewritten prompt with no introduction.\n",
85
- " Prompt: {prompt}\n",
86
- "\"\"\"\n",
87
- "prompt = PromptTemplate.from_template(template)\n",
88
- "chain = {\"prompt\": itemgetter(\"prompt\")} | prompt | model\n",
89
- "queryExpansion = chain.invoke({\"prompt\": query})\n",
90
- "print(\"Query expansion: \", queryExpansion)\n",
91
- "\n",
92
- "# Self-querying(The metadata I will be generating determines whether to look through the Qdrant collection containing github code)\n",
93
- "template = \"\"\"\n",
94
- "You are an AI assistant. You must determine if the prompt requires code as the answer.\n",
95
- "Output a 1 if it is or a 0 if it is not and nothing else.\n",
96
- " Prompt: {prompt}\n",
97
- "\"\"\"\n",
98
- "prompt = PromptTemplate.from_template(template)\n",
99
- "chain = {\"prompt\": itemgetter(\"prompt\")} | prompt | model\n",
100
- "codingQuestion = chain.invoke({\"prompt\": query})\n",
101
- "print(\"Coding Question?: \", codingQuestion)\n",
102
- "\n",
103
- "# Filtered vector search for each of the N queries after expansion\n",
104
- "relatedCollection = 'Document'\n",
105
- "if (codingQuestion == '1'):\n",
106
- " relatedCollection = 'Github'\n",
107
- "print(\"Related Collection: \", relatedCollection)\n",
108
- "results1 = retriever(query, relatedCollection)\n",
109
- "results2 = retriever(queryExpansion, relatedCollection)\n",
110
- "\n",
111
- "# Collecting results\n",
112
- "results = results1+results2\n",
113
- "\n",
114
- "# Reranking(Instead of using a CrossEncoder, I will manually compare embeddings)\n",
115
- "ids = [result.id for result in results]\n",
116
- "scores = [result.score for result in results]\n",
117
- "topIds = []\n",
118
- "topIndexes = []\n",
119
- "for x in range(3):\n",
120
- " maxScore = 0\n",
121
- " maxIndex = 0\n",
122
- " for i in range(len(ids)):\n",
123
- " if ids[i] not in topIds and scores[i] > maxScore:\n",
124
- " maxScore = scores[i]\n",
125
- " maxIndex = i\n",
126
- " topIds.append(ids[maxIndex])\n",
127
- " topIndexes.append(maxIndex)\n",
128
- "texts = [result.payload['text'] for result in results]\n",
129
- "links = [result.payload['link'] for result in results]\n",
130
- "topTexts = ''\n",
131
- "for index in topIndexes:\n",
132
- " print(\"Top texts: \", texts[index])\n",
133
- " print(\"Link: \", links[index])\n",
134
- " topTexts += texts[index]\n",
135
- "\n",
136
- "# Building prompt\n",
137
- "if(codingQuestion == '1'):\n",
138
- " template = \"\"\"\n",
139
- " Write code for the following question given the related coding document below.\n",
140
- "\n",
141
- " Document: {document}\n",
142
- " Question: {question}\n",
143
- " \"\"\"\n",
144
- " prompt = PromptTemplate.from_template(template)\n",
145
- "else:\n",
146
- " template = \"\"\"\n",
147
- " Answer the question based on the document below. If you can't answer the question, reply \"I don't know\"\n",
148
- "\n",
149
- " Document: {document}\n",
150
- " Question: {question}\n",
151
- " \"\"\"\n",
152
- " prompt = PromptTemplate.from_template(template)\n",
153
- "\n",
154
- "# Obtaining answer\n",
155
- "chain = {\"document\": itemgetter(\"document\"), \"question\": itemgetter(\"question\")} | prompt | model\n",
156
- "chain.invoke({\"document\": topTexts, \"question\": query})"
157
- ]
158
- }
159
- ],
160
- "metadata": {
161
- "kernelspec": {
162
- "display_name": "Python 3",
163
- "language": "python",
164
- "name": "python3"
165
- },
166
- "language_info": {
167
- "codemirror_mode": {
168
- "name": "ipython",
169
- "version": 3
170
- },
171
- "file_extension": ".py",
172
- "mimetype": "text/x-python",
173
- "name": "python",
174
- "nbconvert_exporter": "python",
175
- "pygments_lexer": "ipython3",
176
- "version": "3.12.7"
177
- }
178
- },
179
- "nbformat": 4,
180
- "nbformat_minor": 2
181
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
project/README.md CHANGED
@@ -1,3 +1,11 @@
 
 
 
 
 
 
 
 
1
  <h1>Installation:</h1>
2
  <h3>Docker setup(easy):<h3>
3
 
@@ -9,11 +17,13 @@
9
  * Run "docker compose up -d"
10
  * Run "docker exec -it ollama ollama pull llama3.2"
11
  * Select the python 3.12.7 kernels for the notebooks and run DataCollectionPipeline.ipynb and FeaturePipeline.ipynb(to populate the mongodb and qdrant databases)
 
 
12
  * The app is available on localhost:7860
13
 
14
  <h3>Non-Docker(web based) setup:<h3>
15
 
16
- If for some reason the docker setup does not work try connecting to mongodb, qdrant, ollama, and gradio from the web:
17
  * Clone the repository from huggingface or the entire repository from github
18
  * Reopen the repository in a dev container
19
  * Copy the .env.example into a new .env file in the project folder
@@ -29,17 +39,14 @@ If for some reason the docker setup does not work try connecting to mongodb, qdr
29
 
30
  <h1>Project infrastructure</h1>
31
 
32
- Note some files may have similar code with other files, such as the ClearML files containing ipynb files rewritten in python in order to work in ClearML or gradio containing code from InferencePipeline.ipynb. The ipynb file prints output to help see what is happening.
33
-
34
- # app.py
35
- Sends a query to the inference pipeline to generate an answer. The DataCollectionPipeline.ipynb and FeaturePipeline.ipynb files must be run first to populate the databases.
36
 
37
  # Data Collection Pipeline
38
  The Data Collection pipeline takes as input a list of links to domains. The links are fed into the ETL pipeline which Extracts data from the links using a crawler, Transforms the data into a standardized format, and Loads the extracted data into a NoSQL data warehouse, which in this case is MongoDB. The ETL pipeline uses a different method of extracting and transforming based on the link type. In this project, I classify links as either a github repository or document each with their own crawler and cleaner. This raw data is used by the feature pipeline.
39
 
40
  # Feature Pipeline
41
  The Feature pipeline contains the ingestion pipeline.
42
- * The ingestion pipeline extracts documents from MongoDB that were stored by the Data Collection Pipeline. It further cleans the data, breaks it into chunks depending on the data category, passes the chunks through an embedding model to generate embeddings, then loads the embeddings plus their metadata into a vector database, which in this case is Qdrant. The embeddings are passed with additional metadata that contains the document link, type, chunk number, and content.
43
 
44
  # Training Pipeline
45
  The training pipeline performs finetuning. I skipped this step since it was not required.
@@ -47,16 +54,22 @@ The training pipeline performs finetuning. I skipped this step since it was not
47
  # Inference Pipeline
48
  The inference pipeline contains the retrieval client/pipeline.
49
  * The retreival client takes a prompt as input. It uses the same embedding model as the ingestion pipeline in order to create an embedding for the prompt. It then queries the Qdrant database for the 10 closest embeddings using cosine distance and extracts the text chunk stored in the embeddings' metadata. This returns chunks that are related to the prompt.
50
- * The inference pipeline takes a query as input. It expands the query into N=2 queries using a prompt template, performs self-querying to extract metadata (document type) from the original query, searches the Qdrant for K=10 relevant chunks to each of the N=2 queries plus metadata using the retrieval client, combines the K=10 results from each of the N=2 queries, filters out only the most relevant 3 results, prompts the LLM with the results as context, and generates an answer.
 
 
 
51
 
52
- # ClearML
53
  The ClearML folder contains the notebook (.ipynb) pipeline files rewritten to work with ClearML. It is similar code to the notebooks, however ClearML does not print any output but instead logs all output in website. The website stores the pipelines which take input and produces output stored in artifacts. These are the differences between the notebook(.ipynb) pipeline files and the ClearML pipeline files(.py):
54
  * The ClearML Data Collection Pipeline works the same way, running the entire ETL pipeline in a single step (I could not split the ETL pipeline into 3 steps (Extract, Transform, Load) since my list of links gets bigger while looping through it(Since I also goes through some links inside of the websites crawled). Breaking it into steps would require more HTTP requests which would greatly slow down the pipeline).
55
  * The Feature Pipeline breaks down the notebook's loop (from the ingestion pipeline) into 5 stages: retrieve documents, clean documents, chunk documents, embed chunks, and store embeddings.
56
- * The Inference Pipeline simply puts each step in the notebook's version into a function. These functions are query expansion, self-querying, filtered vector search, collecting results, reranking, building prompt, and obtaining answer.
57
 
58
  # Tools
59
- The tools folder contains code for viewing/deleting what has been stored in MongoDB and Qdrant
 
 
 
60
 
61
  # shared.py
62
  shared.py is in both the project folder and project/Tools folder. It contains functions for setting up the connections with either the docker containers or web services. If you are running into errors connecting to any of the services, consider editing this file or double checking the .env file. Note the ClearML folder hardcodes all functions since it had trouble importing code.
 
1
+ <h3>My Github and Huggingface</h3>
2
+
3
+ * GitHubID: 32941731
4
+ * GitHub username: KenTheNoob
5
+ * GitHub link(private): https://github.com/KenTheNoob/eng-ai-agents
6
+ * Huggingface username: KenTheNoob
7
+ * Huggingface link: https://huggingface.co/KenTheNoob/RAG_LLM
8
+
9
  <h1>Installation:</h1>
10
  <h3>Docker setup(easy):<h3>
11
 
 
17
  * Run "docker compose up -d"
18
  * Run "docker exec -it ollama ollama pull llama3.2"
19
  * Select the python 3.12.7 kernels for the notebooks and run DataCollectionPipeline.ipynb and FeaturePipeline.ipynb(to populate the mongodb and qdrant databases)
20
+ * Note: Consider changing the links in the DataCollectionPipeline file to only the first one if you want to do a quick test, otherwise data collection and featurization will take hours
21
+ * Note: You can use the code in the Tools folder to show what is in the mongo or qdrant database or clear the databases
22
  * The app is available on localhost:7860
23
 
24
  <h3>Non-Docker(web based) setup:<h3>
25
 
26
+ If for some reason the docker setup does not work try connecting to mongodb, qdrant, ollama, and gradio from the web(otherwise ignore this section):
27
  * Clone the repository from huggingface or the entire repository from github
28
  * Reopen the repository in a dev container
29
  * Copy the .env.example into a new .env file in the project folder
 
39
 
40
  <h1>Project infrastructure</h1>
41
 
42
+ Note some files may have similar code with other files, such as the ClearML files containing ipynb files rewritten in python in order to work in ClearML. The ipynb file prints output to help see what is happening unlike the ClearML py files.
 
 
 
43
 
44
  # Data Collection Pipeline
45
  The Data Collection pipeline takes as input a list of links to domains. The links are fed into the ETL pipeline which Extracts data from the links using a crawler, Transforms the data into a standardized format, and Loads the extracted data into a NoSQL data warehouse, which in this case is MongoDB. The ETL pipeline uses a different method of extracting and transforming based on the link type. In this project, I classify links as either a github repository or document each with their own crawler and cleaner. This raw data is used by the feature pipeline.
46
 
47
  # Feature Pipeline
48
  The Feature pipeline contains the ingestion pipeline.
49
+ * The ingestion pipeline extracts documents from MongoDB that were stored by the Data Collection Pipeline. It further cleans the data(remove non-printable characters), breaks it into chunks, passes the chunks through an embedding model to generate embeddings, then loads the embeddings plus their metadata into a vector database, which in this case is Qdrant. The embeddings are passed with additional metadata that contains the document link, type, chunk number, and content.
50
 
51
  # Training Pipeline
52
  The training pipeline performs finetuning. I skipped this step since it was not required.
 
54
  # Inference Pipeline
55
  The inference pipeline contains the retrieval client/pipeline.
56
  * The retreival client takes a prompt as input. It uses the same embedding model as the ingestion pipeline in order to create an embedding for the prompt. It then queries the Qdrant database for the 10 closest embeddings using cosine distance and extracts the text chunk stored in the embeddings' metadata. This returns chunks that are related to the prompt.
57
+ * The inference pipeline takes a query as input. It expands the query into N=2 queries using a prompt template, performs self-querying to extract metadata (document type) from the original query, searches the Qdrant for K=10 relevant chunks to each of the N=2 queries plus metadata using the retrieval client, combines the K=10 results from each of the N=2 queries, filters out only the most relevant 3 results, prompts the LLM with the results and query metadata as context, and pipes the prompt into the model to generate an answer.
58
+
59
+ # app.py
60
+ Sends a query to the inference pipeline to generate an answer. The DataCollectionPipeline.ipynb and FeaturePipeline.ipynb files must be run first to populate the databases. Note that the docker compose already runs the app in a docker container. The python file allows you to run the app outside a container if you install gradio. When using the gradio app, you can check the useSample box and select an Sample Prompt from the dropdown menu to run the sample prompts, or uncheck the box and run your own custom query.
61
 
62
+ # ClearML(optional setup)
63
  The ClearML folder contains the notebook (.ipynb) pipeline files rewritten to work with ClearML. It is similar code to the notebooks, however ClearML does not print any output but instead logs all output in website. The website stores the pipelines which take input and produces output stored in artifacts. These are the differences between the notebook(.ipynb) pipeline files and the ClearML pipeline files(.py):
64
  * The ClearML Data Collection Pipeline works the same way, running the entire ETL pipeline in a single step (I could not split the ETL pipeline into 3 steps (Extract, Transform, Load) since my list of links gets bigger while looping through it(Since I also goes through some links inside of the websites crawled). Breaking it into steps would require more HTTP requests which would greatly slow down the pipeline).
65
  * The Feature Pipeline breaks down the notebook's loop (from the ingestion pipeline) into 5 stages: retrieve documents, clean documents, chunk documents, embed chunks, and store embeddings.
66
+ * The Inference Pipeline simply puts each step in the gradio app into a function that is tracked by ClearML. These functions are query expansion, self-querying, filtered vector search, collecting results, reranking, building prompt, and obtaining answer.
67
 
68
  # Tools
69
+ The tools folder contains code for viewing/deleting what has been stored in MongoDB and Qdrant(very useful for debugging!).
70
+ * Tools/mongoTools.ipynb can show the amount of documents in the MongoDB database(which consists of two collections), show the full list of links visited, and the first document in each collection(a sample to show what the data stored in MongoDB looks like). The second cell deletes everything in the mongo database if you want to rerun the DataCollection pipeline with fewer links. The DataCollection pipeline will automatically ignore visited links, but if it takes too long, I suggest using the tool to delete everything, then rerunning the pipeline with only the ROS documentation and github links. Nav2 and moveit are massive sites/repositories to crawl.
71
+ * Tools/QdrantTools.ipynb can show the amount of documents in the Qdrant database(which consists of two collections), the first document in each collection(a sample to show what the data stored in Qdrant looks like), and runs a sample search for the closest embeddings/vectors to a sample query(prints out the metadata of the embeddings). Note that the embeddings themselves are not shown because with_vectors=false since normally Qdrant will search for the closest embeddings, but return the payload associated with the embedding(since the embedding itself is useless for generating an answer). The second cell counts how many chunks need to be embedded by the FeaturePipeline and compares it to the total number of chunks from the first cell to give an idea of how close to completion the feature pipeline is(run first cell first). The third cell is an explaination of how Qdrant finds the closest embeddings using cosine distance. The fourth cell allows you to delete everything in the Qdrant database(use with caution!).
72
+ * Tools/InferenceTool.ipynb contains the inference pipeline used by the gradio app. It allows you to generate answers to queries without running the gradio app along with printing out useful debugging information for everything that is being fed into the model. This includes the query expansion(reworded query(s)), whether the query is a coding question(self-querying), which Qdrant collection is being searched, the chunks/text being passed as context, the RAG model's answer, and the original model's answer to compare with the RAG model to see if it performed better.
73
 
74
  # shared.py
75
  shared.py is in both the project folder and project/Tools folder. It contains functions for setting up the connections with either the docker containers or web services. If you are running into errors connecting to any of the services, consider editing this file or double checking the .env file. Note the ClearML folder hardcodes all functions since it had trouble importing code.
project/Tools/InferenceTool.ipynb ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 4,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "# Make sure ollama serve is running(docker or terminal)\n",
10
+ "from operator import itemgetter\n",
11
+ "from langchain.prompts import PromptTemplate\n",
12
+ "from shared import getModel, getEmbeddingsModel, getQdrantClient\n",
13
+ "\n",
14
+ "def answer(query):\n",
15
+ " # Create a qdrant connection\n",
16
+ " qClient = getQdrantClient()\n",
17
+ "\n",
18
+ " # Setup the text embedder\n",
19
+ " embeddingsModel = getEmbeddingsModel()\n",
20
+ "\n",
21
+ " # Setup the model\n",
22
+ " model = getModel()\n",
23
+ "\n",
24
+ " # Retrieval Pipeline\n",
25
+ " # Retrieve the chunks with the most similar embeddings from Qdrant\n",
26
+ " def retriever(text, collection):\n",
27
+ " results = qClient.search(\n",
28
+ " collection_name=collection,\n",
29
+ " query_vector = embeddingsModel.embed_query(text),\n",
30
+ " limit=10\n",
31
+ " )\n",
32
+ " return results\n",
33
+ "\n",
34
+ " # Query expansion(I only generate one additional prompt for simplicity)\n",
35
+ " template = \"\"\"\n",
36
+ " Rewrite the prompt. The new prompt must offer a different perspective.\n",
37
+ " Do not change the meaning. Output only the rewritten prompt with no introduction.\n",
38
+ " Prompt: {prompt}\n",
39
+ " \"\"\"\n",
40
+ " prompt = PromptTemplate.from_template(template)\n",
41
+ " chain = {\"prompt\": itemgetter(\"prompt\")} | prompt | model\n",
42
+ " queryExpansion = chain.invoke({\"prompt\": query})\n",
43
+ " print(\"Query expansion: \", queryExpansion)\n",
44
+ "\n",
45
+ " # Self-querying(The metadata I will be generating determines whether to look through the Qdrant collection containing github code)\n",
46
+ " template = \"\"\"\n",
47
+ " You are an AI assistant. You must determine if the prompt requires code as the answer.\n",
48
+ " Output a 1 if it is or a 0 if it is not and nothing else.\n",
49
+ " Prompt: {prompt}\n",
50
+ " \"\"\"\n",
51
+ " prompt = PromptTemplate.from_template(template)\n",
52
+ " chain = {\"prompt\": itemgetter(\"prompt\")} | prompt | model\n",
53
+ " codingQuestion = chain.invoke({\"prompt\": query})\n",
54
+ " print(\"Coding question?: \", codingQuestion)\n",
55
+ "\n",
56
+ " # Filtered vector search for each of the N queries after expansion\n",
57
+ " relatedCollection = 'Document'\n",
58
+ " if (codingQuestion == '1'):\n",
59
+ " relatedCollection = 'Github'\n",
60
+ " results1 = retriever(query, relatedCollection)\n",
61
+ " results2 = retriever(queryExpansion, relatedCollection)\n",
62
+ " print(\"Related collection: \", relatedCollection)\n",
63
+ " \n",
64
+ "\n",
65
+ " # Collecting results\n",
66
+ " results = results1+results2\n",
67
+ "\n",
68
+ " # Reranking(Instead of using a CrossEncoder, I will manually compare embeddings)\n",
69
+ " ids = [result.id for result in results]\n",
70
+ " scores = [result.score for result in results]\n",
71
+ " topIds = []\n",
72
+ " topIndexes = []\n",
73
+ " for x in range(3):\n",
74
+ " maxScore = 0\n",
75
+ " maxIndex = 0\n",
76
+ " for i in range(len(ids)):\n",
77
+ " if ids[i] not in topIds and scores[i] > maxScore:\n",
78
+ " maxScore = scores[i]\n",
79
+ " maxIndex = i\n",
80
+ " topIds.append(ids[maxIndex])\n",
81
+ " topIndexes.append(maxIndex)\n",
82
+ " texts = [result.payload['text'] for result in results]\n",
83
+ " links = [result.payload['link'] for result in results]\n",
84
+ " topTexts = ''\n",
85
+ " for index in topIndexes:\n",
86
+ " print(\"Top texts: \", texts[index])\n",
87
+ " print(\"Link: \", links[index])\n",
88
+ " topTexts += texts[index]\n",
89
+ "\n",
90
+ " # Building prompt\n",
91
+ " if(codingQuestion == '1'):\n",
92
+ " template = \"\"\"\n",
93
+ " Write code for the following question given the related coding document below.\n",
94
+ "\n",
95
+ " Document: {document}\n",
96
+ " Question: {question}\n",
97
+ " \"\"\"\n",
98
+ " prompt = PromptTemplate.from_template(template)\n",
99
+ " else:\n",
100
+ " template = \"\"\"\n",
101
+ " You are an AI agent that has retreived a document from the web.\n",
102
+ " If the document is useful for answering the question use it.\n",
103
+ " If the document is not useful, answer normally.\n",
104
+ " Do not mention the document.\n",
105
+ "\n",
106
+ " Document: {document}\n",
107
+ " Question: {question}\n",
108
+ " \"\"\"\n",
109
+ " prompt = PromptTemplate.from_template(template)\n",
110
+ "\n",
111
+ " # Obtaining answer\n",
112
+ " chain = {\"document\": itemgetter(\"document\"), \"question\": itemgetter(\"question\")} | prompt | model\n",
113
+ " print(\"RAG answer: \", chain.invoke({\"document\": topTexts, \"question\": query}))\n",
114
+ " print(\"\\n----------------------------------------------\\n\")\n",
115
+ " baseline = model.invoke(query)\n",
116
+ " print(\"Baseline answer: \", baseline[:500])\n",
117
+ " print(\"\\n----------------------------------------------\\n\")\n"
118
+ ]
119
+ },
120
+ {
121
+ "cell_type": "code",
122
+ "execution_count": 5,
123
+ "metadata": {},
124
+ "outputs": [
125
+ {
126
+ "name": "stdout",
127
+ "output_type": "stream",
128
+ "text": [
129
+ "Query expansion: What percentage of global companies have adopted or are utilizing Nav2?\n",
130
+ "Coding question?: 0\n",
131
+ "Related collection: Document\n",
132
+ "Top texts: types of tasks like object following, complete coverage navigation, and more. Nav2 is a production-grade and high-quality navigation framework trusted by 100+ companies worldwide. It provides perception, planning, control, localization, visualization, and much more to build highly reliable autonomous systems. This will compute an environmental model from sensor and semantic data, dynamically path plan, compute velocities for motors, avoid obstacles, and structure higher-level robot behaviors.\n",
133
+ "Link: https://docs.nav2.org/\n",
134
+ "Top texts: not specifically address here. BehaviorTree.CPP upgraded to version 4.5+ Since we migrated from version 3.8 to 4.5, users must upgrade their XML and source code accordingly. You can refer to [this page](https://www.behaviortree.dev/docs/migration) for more details, but the main changes are: XML must be changed. This [python script can help](https://github.com/BehaviorTree/BehaviorTree.CPP/blob/master/convert_v3_to_v4.py). The syntax of SubTrees has changed; the one of SubTreePlus was adopted,\n",
135
+ "Link: https://docs.nav2.org/migration/Iron.html\n",
136
+ "Top texts: September 19, 2015 MoveIt! Upcoming Events - RoboBusiness 2015 Come meet MoveIt! developers and users at RoboBusiness 2015 in San Jose... September 17, 2015 Report on First MoveIt! Community Meeting Watch video of the First MoveIt! Community Meeting in case you missed it. Thank you for coming to the MoveIt! Community Meeting and thanks to the present... July 02, 2015 MoveIt! goes underwater! MoveIt! on an underwater Girona500 AUV robot and 4-DOF arm for autonomous underwater manipulation...\n",
137
+ "Link: https://moveit.ai/blog/\n",
138
+ "RAG answer: Nav2 is trusted by 100+ companies worldwide.\n",
139
+ "\n",
140
+ "----------------------------------------------\n",
141
+ "\n",
142
+ "Baseline answer: I don't have any information about a company called \"Nav2.\" It's possible that it's a small or private company, or it may not be well-known. Can you provide more context or clarify which Nav2 you are referring to?\n",
143
+ "\n",
144
+ "Alternatively, I can suggest some well-known companies that use Nav (a navigation and mapping platform) for their trust services. For example:\n",
145
+ "\n",
146
+ "* Uber uses Nav for its ride-hailing service\n",
147
+ "* Lyft also uses Nav for its service\n",
148
+ "* Pizza Hut uses Nav to help customers navigate to location\n",
149
+ "\n",
150
+ "----------------------------------------------\n",
151
+ "\n"
152
+ ]
153
+ }
154
+ ],
155
+ "source": [
156
+ "#queries = [\"How can I develop the navigation stack of an agent with egomotion?\", \"What is ROS?\", \"How many companies is Nav2 trusted by worldwide?\", \"How would I build a ROS 2 Navigation Framework and System?\", \"Write me code to move a robot using Moveit\"]\n",
157
+ "queries = [\"How many companies is Nav2 trusted by worldwide?\"]\n",
158
+ "for query in queries:\n",
159
+ " answer(query)"
160
+ ]
161
+ }
162
+ ],
163
+ "metadata": {
164
+ "kernelspec": {
165
+ "display_name": "Python 3",
166
+ "language": "python",
167
+ "name": "python3"
168
+ },
169
+ "language_info": {
170
+ "codemirror_mode": {
171
+ "name": "ipython",
172
+ "version": 3
173
+ },
174
+ "file_extension": ".py",
175
+ "mimetype": "text/x-python",
176
+ "name": "python",
177
+ "nbconvert_exporter": "python",
178
+ "pygments_lexer": "ipython3",
179
+ "version": "3.12.7"
180
+ }
181
+ },
182
+ "nbformat": 4,
183
+ "nbformat_minor": 2
184
+ }
project/Tools/QdrantTools.ipynb CHANGED
@@ -2,17 +2,35 @@
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
- "execution_count": 4,
6
  "metadata": {},
7
  "outputs": [
8
  {
9
  "name": "stdout",
10
  "output_type": "stream",
11
  "text": [
12
- "Number of document chunks: 0\n",
13
- "Number of githb chunks: 0\n",
14
  "\n",
15
- "Sample search result(n=2): \n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  ]
17
  }
18
  ],
@@ -39,7 +57,7 @@
39
  "\n",
40
  "# Show everything in the Github collection\n",
41
  "numGithubChunks = 0\n",
42
- "# Note with_vectors defaults to false, so the vectors are not returned\n",
43
  "chunks = qClient.scroll(collection_name='Github', limit=100)\n",
44
  "while True:\n",
45
  " for chunk in chunks[0]:\n",
@@ -49,34 +67,86 @@
49
  " chunks = qClient.scroll(collection_name='Github', limit=100, with_payload=False, offset=chunks[1])\n",
50
  " if chunks[1] is None:\n",
51
  " break\n",
52
- "print(\"Number of githb chunks: \", numDocumentChunks)\n",
53
  "if numGithubChunks > 0:\n",
54
- " print(\"\\nSample github chunk(metadata not the vector): \")\n",
55
  " print(sampleGithubChunk, '\\n')\n",
56
  "\n",
57
  "# Show a sample search\n",
58
  "embeddingsModel = getEmbeddingsModel()\n",
59
  "results = qClient.search(\n",
60
  " collection_name=\"Document\",\n",
61
- " query_vector = embeddingsModel.embed_query(\"What operating system is ROS made for?\"),\n",
62
  " limit=10\n",
63
  ")\n",
64
- "print(\"\\nSample search result(n=2): \")\n",
65
  "for result in results:\n",
66
  " print(result)"
67
  ]
68
  },
69
  {
70
  "cell_type": "code",
71
- "execution_count": 22,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  "metadata": {},
73
  "outputs": [
74
  {
75
  "name": "stdout",
76
  "output_type": "stream",
77
  "text": [
78
- "Cosine Similarity for related sentences: 0.7035977848391597\n",
79
- "Cosine Similarity for unrelated sentences: 0.3566534327076298\n"
80
  ]
81
  }
82
  ],
@@ -84,23 +154,22 @@
84
  "import numpy as np\n",
85
  "# How cosine distance works\n",
86
  "\n",
87
- "embedding1 = embeddingsModel.embed_query(\"What is the weather like?\")\n",
88
- "embedding2 = embeddingsModel.embed_query(\"It is raining today.\")\n",
89
- "embedding3 = embeddingsModel.embed_query(\"ROS is an open source platform\")\n",
90
  "def cosine_similarity(vec1, vec2):\n",
91
  " dot_product = np.dot(vec1, vec2)\n",
92
  " norm_vec1 = np.linalg.norm(vec1)\n",
93
  " norm_vec2 = np.linalg.norm(vec2)\n",
94
  " return dot_product / (norm_vec1 * norm_vec2)\n",
95
- "similarity1 = cosine_similarity(embedding1, embedding2)\n",
96
- "similarity2 = cosine_similarity(embedding1, embedding3)\n",
97
  "print(\"Cosine Similarity for related sentences:\", similarity1)\n",
98
  "print(\"Cosine Similarity for unrelated sentences:\", similarity2)"
99
  ]
100
  },
101
  {
102
  "cell_type": "code",
103
- "execution_count": 3,
104
  "metadata": {},
105
  "outputs": [
106
  {
@@ -109,7 +178,7 @@
109
  "True"
110
  ]
111
  },
112
- "execution_count": 3,
113
  "metadata": {},
114
  "output_type": "execute_result"
115
  }
 
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
+ "execution_count": 33,
6
  "metadata": {},
7
  "outputs": [
8
  {
9
  "name": "stdout",
10
  "output_type": "stream",
11
  "text": [
12
+ "Number of document chunks: 14800\n",
 
13
  "\n",
14
+ "Sample document chunk(metadata not the vector): \n",
15
+ "id=0 payload={'link': 'https://www.ros.org/', 'type': 'Document', 'chunk': 0, 'text': 'ROS: Home Why ROS? Getting Started Community Ecosystem ROS - Robot Operating System The Robot Operating System (ROS) is a set of software libraries and tools that help you build robot applications. From drivers to state-of-the-art algorithms, and with powerful developer tools, ROS has what you need for your next robotics project. And it\\'s all open source. What is ROS? ROS Videos \" Install Jazzy Jalisco Jazzy Jalisco is our latest ROS 2 LTS release targeted at the Ubuntu 24.04 (Noble) and'} vector=None shard_key=None order_value=None \n",
16
+ "\n",
17
+ "Number of githb chunks: 3600\n",
18
+ "\n",
19
+ "Sample github chunk(with_vector=false): \n",
20
+ "id=0 payload={'link': 'https://github.com/ros2/ros2/tree/rolling/README.md', 'type': 'Github', 'chunk': 0, 'text': \"#About TheRobotOperatingSystem(ROS)isasetofsoftwarelibrariesandtoolsthathelpyoubuildrobotapplications. Fromdriverstostate-of-the-artalgorithms,andwithpowerfuldevelopertools,ROShaswhatyouneedforyournextroboticsproject. Andit'sallopensource. Fullprojectdetailson[ROS.org](https://ros.org/) #GettingStarted LookingtogetstartedwithROS? Our[installationguideishere](https://www.ros.org/blog/getting-started/).\"} vector=None shard_key=None order_value=None \n",
21
+ "\n",
22
+ "\n",
23
+ "Sample search result(n=10): \n",
24
+ "id=45 version=45 score=0.5391361 payload={'link': 'https://docs.nav2.org/', 'type': 'Document', 'chunk': 40, 'text': 'types of tasks like object following, complete coverage navigation, and more. Nav2 is a production-grade and high-quality navigation framework trusted by 100+ companies worldwide. It provides perception, planning, control, localization, visualization, and much more to build highly reliable autonomous systems. This will compute an environmental model from sensor and semantic data, dynamically path plan, compute velocities for motors, avoid obstacles, and structure higher-level robot behaviors.'} vector=None shard_key=None order_value=None\n",
25
+ "id=9180 version=9180 score=0.511093 payload={'link': 'https://docs.nav2.org/migration/Iron.html', 'type': 'Document', 'chunk': 39, 'text': 'not specifically address here. BehaviorTree.CPP upgraded to version 4.5+ Since we migrated from version 3.8 to 4.5, users must upgrade their XML and source code accordingly. You can refer to [this page](https://www.behaviortree.dev/docs/migration) for more details, but the main changes are: XML must be changed. This [python script can help](https://github.com/BehaviorTree/BehaviorTree.CPP/blob/master/convert_v3_to_v4.py). The syntax of SubTrees has changed; the one of SubTreePlus was adopted,'} vector=None shard_key=None order_value=None\n",
26
+ "id=9922 version=9922 score=0.5105795 payload={'link': 'https://moveit.ai/blog/', 'type': 'Document', 'chunk': 31, 'text': 'September 19, 2015 MoveIt! Upcoming Events - RoboBusiness 2015 Come meet MoveIt! developers and users at RoboBusiness 2015 in San Jose... September 17, 2015 Report on First MoveIt! Community Meeting Watch video of the First MoveIt! Community Meeting in case you missed it. Thank you for coming to the MoveIt! Community Meeting and thanks to the present... July 02, 2015 MoveIt! goes underwater! MoveIt! on an underwater Girona500 AUV robot and 4-DOF arm for autonomous underwater manipulation...'} vector=None shard_key=None order_value=None\n",
27
+ "id=540 version=540 score=0.51053035 payload={'link': 'https://docs.nav2.org/concepts/index.html', 'type': 'Document', 'chunk': 56, 'text': 'to their task. When the behavior tree ticks the corresponding BT node, it will call the action server to process its task. The action server callback inside the server will call the chosen algorithm by its name (e.g. FollowPath) that maps to a specific algorithm. This allows a user to abstract the algorithm used in the behavior tree to classes of algorithms. For instance, you can have N plugin controllers to follow paths, dock with charger, avoid dynamic obstacles, or interface with a tool.'} vector=None shard_key=None order_value=None\n",
28
+ "id=7618 version=7618 score=0.50761116 payload={'link': 'https://docs.nav2.org/configuration/packages/configuring-savitzky-golay-smoother.html', 'type': 'Document', 'chunk': 39, 'text': 'plugin that will take in an input path and smooth it using a simple and fast smoothing technique based on Savitzky Golay Filters. It uses a digital signal processing technique designed to reduce noise distorting a reference signal, in this case, a path. It is useful for all types of planners, but particularly in NavFn to remove tiny artifacts that can occur near the end of paths or Theta* to slightly soften the transition between Line of Sight line segments without modifying the primary path.'} vector=None shard_key=None order_value=None\n",
29
+ "id=1067 version=1067 score=0.50312483 payload={'link': 'https://docs.nav2.org/setup_guides/algorithm/select_algorithm.html', 'type': 'Document', 'chunk': 48, 'text': 'not suitable for ackermann and legged robots since they have turning constraints. That being said, these plugins are best used on robots that can drive in any direction or rotate safely in place, such as circular differential and circular omnidirectional robots. Another planner plugin is the Smac Hybrid-A* planner that supports arbitrary shaped ackermann and legged robots. It is a highly optimized and fully reconfigurable Hybrid-A* implementation supporting Dubin and Reeds-Shepp motion models.'} vector=None shard_key=None order_value=None\n",
30
+ "id=60 version=60 score=0.5007378 payload={'link': 'https://moveit.ai/', 'type': 'Document', 'chunk': 2, 'text': 'given pose, even in over-actuated arms Control Execute time-parameterized joint trajectories to low level hardware controllers through common interfaces 3D Perception Connect to depth sensors and point clouds with Octomaps Collision Checking Avoid obstacles using geometric primitives, meshes, or point cloud data Companies using MoveIt Powerful 3D Interactive Visualizer Out-of-the box visual demonstrations in Rviz allow new users experimentation with various planning algorithms around obstacles.'} vector=None shard_key=None order_value=None\n",
31
+ "id=9196 version=9196 score=0.49414897 payload={'link': 'https://docs.nav2.org/migration/Iron.html', 'type': 'Document', 'chunk': 55, 'text': 'planner. When enforce_path_inversion is true, the path handler will prune the path to the first time the directions change to force the controller to plan to the inversion point and then be set the rest of the path, once in tolerance. The Path Align critic also contains a parameter use_path_orientations which can be paired with it to incentivize aligning the path containing orientation information to better attempt to achieve path inversions where requested and not do them when not requested.'} vector=None shard_key=None order_value=None\n",
32
+ "id=404 version=404 score=0.4938618 payload={'link': 'https://docs.nav2.org/development_guides/devcontainer_docs/devcontainer_guide.html', 'type': 'Document', 'chunk': 43, 'text': 'needed for building the project, as reused by the projects CI. For example, the dever stage modifies /etc/bash.bashrc to automatically source install/setup.bash from the underlay workspace, ensuring all VS Code extensions are loaded with the correct environment, while avoiding any race conditions during installation and startup. To speed up the initial build, images layers from this builder stage are cached by pulling the same image tag used by the projects CI, hosted from the image registry.'} vector=None shard_key=None order_value=None\n",
33
+ "id=523 version=523 score=0.48727226 payload={'link': 'https://docs.nav2.org/concepts/index.html', 'type': 'Document', 'chunk': 39, 'text': 'with the concepts required to appreciating and working with this project. ROS 2 ROS 2 is the core middleware used for Nav2. If you are unfamiliar with this, please visit the ROS 2 documentation before continuing. Action Server Just as in ROS, action servers are a common way to control long running tasks like navigation. This stack makes more extensive use of actions, and in some cases, without an easy topic interface. It is more important to understand action servers as a developer in ROS 2.'} vector=None shard_key=None order_value=None\n"
34
  ]
35
  }
36
  ],
 
57
  "\n",
58
  "# Show everything in the Github collection\n",
59
  "numGithubChunks = 0\n",
60
+ "# Note with_vectors defaults to false, so the vectors are not returned(since they are very large)\n",
61
  "chunks = qClient.scroll(collection_name='Github', limit=100)\n",
62
  "while True:\n",
63
  " for chunk in chunks[0]:\n",
 
67
  " chunks = qClient.scroll(collection_name='Github', limit=100, with_payload=False, offset=chunks[1])\n",
68
  " if chunks[1] is None:\n",
69
  " break\n",
70
+ "print(\"Number of githb chunks: \", numGithubChunks)\n",
71
  "if numGithubChunks > 0:\n",
72
+ " print(\"\\nSample github chunk(with_vector=false): \")\n",
73
  " print(sampleGithubChunk, '\\n')\n",
74
  "\n",
75
  "# Show a sample search\n",
76
  "embeddingsModel = getEmbeddingsModel()\n",
77
  "results = qClient.search(\n",
78
  " collection_name=\"Document\",\n",
79
+ " query_vector = embeddingsModel.embed_query(\"How many companies is Nav2 trusted by worldwide?\"),\n",
80
  " limit=10\n",
81
  ")\n",
82
+ "print(\"\\nSample search result(n=10): \")\n",
83
  "for result in results:\n",
84
  " print(result)"
85
  ]
86
  },
87
  {
88
  "cell_type": "code",
89
+ "execution_count": 34,
90
+ "metadata": {},
91
+ "outputs": [
92
+ {
93
+ "name": "stdout",
94
+ "output_type": "stream",
95
+ "text": [
96
+ "Total number of chunks to embed: 285569\n",
97
+ "Chunks currently embedded: 18400\n"
98
+ ]
99
+ }
100
+ ],
101
+ "source": [
102
+ "# Check how many chunks total will be processed by the FeaturePipeline\n",
103
+ "from shared import getMongoClient\n",
104
+ "from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
105
+ "\n",
106
+ "\n",
107
+ "texts = []\n",
108
+ "# Create a mongoDB connection\n",
109
+ "mongoHost = getMongoClient()\n",
110
+ "mongoDatabase = mongoHost[\"twin\"]\n",
111
+ "collections = mongoDatabase.list_collection_names()\n",
112
+ "for collection in collections:\n",
113
+ " mongoCollection = mongoDatabase[collection]\n",
114
+ " results = mongoCollection.find()\n",
115
+ " for result in results:\n",
116
+ " # For each document, split it into chunks\n",
117
+ " texts.append(result[\"content\"])\n",
118
+ "\n",
119
+ "cleanTexts = []\n",
120
+ "for text in texts:\n",
121
+ " cleanTexts.append(\"\".join(char for char in text if 32 <= ord(char) <= 126))\n",
122
+ "\n",
123
+ "numChunks = 0\n",
124
+ "text_splitter = RecursiveCharacterTextSplitter(\n",
125
+ " chunk_size=500,\n",
126
+ " chunk_overlap=20,\n",
127
+ " length_function=len,\n",
128
+ " is_separator_regex=False,\n",
129
+ ")\n",
130
+ "for text in cleanTexts:\n",
131
+ " textChunks = text_splitter.split_text(text)\n",
132
+ " for chunk in textChunks:\n",
133
+ " numChunks += 1\n",
134
+ "\n",
135
+ "print(\"Total number of chunks to embed: \", numChunks)\n",
136
+ "print(\"Chunks currently embedded: \", numDocumentChunks+numGithubChunks)"
137
+ ]
138
+ },
139
+ {
140
+ "cell_type": "code",
141
+ "execution_count": 17,
142
  "metadata": {},
143
  "outputs": [
144
  {
145
  "name": "stdout",
146
  "output_type": "stream",
147
  "text": [
148
+ "Cosine Similarity for related sentences: 0.523006986899456\n",
149
+ "Cosine Similarity for unrelated sentences: 0.32259653091273344\n"
150
  ]
151
  }
152
  ],
 
154
  "import numpy as np\n",
155
  "# How cosine distance works\n",
156
  "\n",
157
+ "queryEmbedding = embeddingsModel.embed_query(\"What is the weather like?\")\n",
158
+ "documentEmbedding = embeddingsModel.embed_documents([\"It is raining today.\", \"ROS is an open source platform\"])\n",
 
159
  "def cosine_similarity(vec1, vec2):\n",
160
  " dot_product = np.dot(vec1, vec2)\n",
161
  " norm_vec1 = np.linalg.norm(vec1)\n",
162
  " norm_vec2 = np.linalg.norm(vec2)\n",
163
  " return dot_product / (norm_vec1 * norm_vec2)\n",
164
+ "similarity1 = cosine_similarity(queryEmbedding, documentEmbedding[0])\n",
165
+ "similarity2 = cosine_similarity(queryEmbedding, documentEmbedding[1])\n",
166
  "print(\"Cosine Similarity for related sentences:\", similarity1)\n",
167
  "print(\"Cosine Similarity for unrelated sentences:\", similarity2)"
168
  ]
169
  },
170
  {
171
  "cell_type": "code",
172
+ "execution_count": 19,
173
  "metadata": {},
174
  "outputs": [
175
  {
 
178
  "True"
179
  ]
180
  },
181
+ "execution_count": 19,
182
  "metadata": {},
183
  "output_type": "execute_result"
184
  }
project/Tools/mongoTools.ipynb CHANGED
@@ -2,20 +2,469 @@
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
- "execution_count": 7,
6
  "metadata": {},
7
  "outputs": [
8
  {
9
  "name": "stdout",
10
  "output_type": "stream",
11
  "text": [
12
- "Number of regular documents: 4\n",
13
- "Number of github documents: 6815\n",
14
  "Links crawled: \n",
15
  "https://www.ros.org/\n",
16
  "https://docs.nav2.org/\n",
17
  "https://moveit.ai/\n",
18
  "https://gazebosim.org/home\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  "https://github.com/ros2/ros2/tree/rolling/README.md\n",
20
  "https://github.com/ros2/ros2/tree/rolling/.gitignore\n",
21
  "https://github.com/ros2/ros2/tree/rolling/CODEOWNERS\n",
@@ -6816,23 +7265,8 @@
6816
  "https://github.com/gazebosim/gazebo-classic/blob/gazebo11/./gazebo/msgs/generator/GazeboGenerator.hh\n",
6817
  "https://github.com/gazebosim/gazebo-classic/blob/gazebo11/./gazebo/msgs/generator/gazebo_generator.cc\n",
6818
  "https://github.com/gazebosim/gazebo-classic/blob/gazebo11/./gazebo/msgs/generator/GazeboGenerator.cc\n",
6819
- "https://github.com/ros2/ros2/tree/rolling/README.md\n",
6820
- "https://github.com/ros2/ros2/tree/rolling/.gitignore\n",
6821
- "https://github.com/ros2/ros2/tree/rolling/CODEOWNERS\n",
6822
- "https://github.com/ros2/ros2/tree/rolling/ros2.repos\n",
6823
- "https://github.com/ros2/ros2/tree/rolling/src/.gitkeep\n",
6824
- "https://github.com/ros-navigation/navigation2/tree/main/README.md\n",
6825
- "https://github.com/ros-navigation/navigation2/tree/main/.gitignore\n",
6826
- "https://github.com/ros-navigation/navigation2/tree/main/CODEOWNERS\n",
6827
- "https://github.com/ros-navigation/navigation2/tree/main/ros2.repos\n",
6828
- "https://github.com/ros-navigation/navigation2/tree/main/src/.gitkeep\n",
6829
- "https://github.com/ros2/ros2/tree/rolling/README.md\n",
6830
- "https://github.com/ros2/ros2/tree/rolling/.gitignore\n",
6831
- "https://github.com/ros2/ros2/tree/rolling/CODEOWNERS\n",
6832
- "https://github.com/ros2/ros2/tree/rolling/ros2.repos\n",
6833
- "https://github.com/ros2/ros2/tree/rolling/src/.gitkeep\n",
6834
- "Sample regular document: {'_id': ObjectId('6755a587c408062710d2da36'), 'link': 'https://www.ros.org/', 'type': 'Document', 'content': ' ROS: Home Why ROS? Getting Started Community Ecosystem ROS - Robot Operating System The Robot Operating System (ROS) is a set of software libraries and tools that help you build robot applications. From drivers to state-of-the-art algorithms, and with powerful developer tools, ROS has what you need for your next robotics project. And it\\'s all open source. What is ROS? ROS Videos \" Install Jazzy Jalisco Jazzy Jalisco is our latest ROS 2 LTS release targeted at the Ubuntu 24.04 (Noble) and Windows 10, though other systems are supported to varying degrees. Learn More Humble Hawksbill ROS 2 Humble Hawksbill is a slighly older LTS release of ROS 2 targeted at Ubuntu 22.04 (Jammy) and Windows 10. Other systems are supported including tier 3 support for 20.04 for those transitioning from ROS 1. Learn More Support There are several mechanisms in place to support the ROS community, each with its own purpose. Documentation Documentation and tutorials for ROS 2 Stack Exchange Ask questions. Get answers. Forums Hear the latest discussions ROS 1 Wiki Legacy documentation and tutorials for ROS 1 Documentation Documentation and tutorials for ROS 2 Robotics Stack Exchange Ask questions.Get answers.All ROS versions Forums Hear the latest discussions ROS 1 Wiki Legacy documentation and tutorials for ROS 1 Recent Updates and Highlights ROSCon 2024 Videos are Now Available See the ROSCon 2024 website for details 11/18/2024 - Katherine Scott The videos from ROSCon 2024 in Odense are now available on the ROSCon Website (see the program), this Vimeo showcase, and in the ROS documentation. The ROSCon website also includes the slides from all the talks at ROSCon. I have also included a list of all the videos below. I want to thank AMD for being our 2024 ROSCon video sponsor, their generous support makes the ROSCon live stream and videos possible. READ MORE Recent ROS Discourse Posts ROS News of the Week 11/22/2024 - ROS Discourse Gazebo Classic and Citadel End of Life 12/2/2024 - ROS Discourse ROS 2 driver for Mitsubishi Melfa RV-FR 10/24/2024 ROS Discourse Home Why ROS? Getting Started Community Ecosystem Q&A Forum Packages Wiki Documentation media Q&A Forum Packages ROSCon Wiki documentation discord Brought to you by Open Robotics | licensed under Creative Commons Attributions 3.0 | ©2021 Open Robotics '}\n",
6835
- "Sample github document {'_id': ObjectId('67559bffc408062710d2bc0f'), 'link': 'https://github.com/ros2/ros2/tree/rolling/README.md', 'type': 'Github', 'content': \"#About TheRobotOperatingSystem(ROS)isasetofsoftwarelibrariesandtoolsthathelpyoubuildrobotapplications. Fromdriverstostate-of-the-artalgorithms,andwithpowerfuldevelopertools,ROShaswhatyouneedforyournextroboticsproject. Andit'sallopensource. Fullprojectdetailson[ROS.org](https://ros.org/) #GettingStarted LookingtogetstartedwithROS? Our[installationguideishere](https://www.ros.org/blog/getting-started/). Onceyou'veinstalledROSstartbylearningsome[basicconcepts](https://docs.ros.org/en/rolling/Concepts/Basic.html)andtakealookatour[beginnertutorials](https://docs.ros.org/en/rolling/Tutorials/Beginner-CLI-Tools.html). #JointheROSCommunity ##CommunityResources *[ROSDiscussionForum](https://discourse.ros.org/) *[ROSDiscordServer](https://discord.com/servers/open-robotics-1077825543698927656) *[RoboticsStackExchange](https://robotics.stackexchange.com/)(preferredROSsupportforum). *[OfficialROSVideos](https://vimeo.com/osrfoundation) *[ROSCon](https://roscon.ros.org),ouryearlydeveloperconference. *CiteROS2inacademicworkusing[DOI:10.1126/scirobotics.abm6074](https://www.science.org/doi/10.1126/scirobotics.abm6074) ##DeveloperResources *[ROS2Documentation](https://docs.ros.org/) *[ROSPackageAPIreference](https://docs.ros.org/en/rolling/p/) *[ROSPackageIndex](https://index.ros.org/) *[ROSonDockerHub](https://hub.docker.com/_/ros/) *[ROSResourceStatusPage](https://status.openrobotics.org/) *[REP-2000](https://ros.org/reps/rep-2000.html):ROS2ReleasesandTargetPlatforms ##ProjectResources *[PurchaseROSSwag](https://spring.ros.org/) *[InformationabouttheROSTrademark](https://www.ros.org/blog/media/) *OnSocialMedia *[OpenRoboticsonLinkedIn](https://www.linkedin.com/company/open-source-robotics-foundation) *[OpenRoboticsonTwitter](https://twitter.com/OpenRoboticsOrg) *[ROS.orgonTwitter](https://twitter.com/ROSOrg) ROSismadepossiblethroughthegeneroussupportofopensourcecontributorsandthenon-profit[OpenSourceRoboticsFoundation(OSRF)](https://www.openrobotics.org/). TaxdeductibledonationstotheOSRFcanbe[madehere.](https://donorbox.org/support-open-robotics?utm_medium=qrcode&utm_source=qrcode) \"}\n"
6836
  ]
6837
  }
6838
  ],
@@ -6875,7 +7309,7 @@
6875
  },
6876
  {
6877
  "cell_type": "code",
6878
- "execution_count": 13,
6879
  "metadata": {},
6880
  "outputs": [
6881
  {
@@ -6884,7 +7318,7 @@
6884
  "DeleteResult({'n': 0, 'ok': 1.0}, acknowledged=True)"
6885
  ]
6886
  },
6887
- "execution_count": 13,
6888
  "metadata": {},
6889
  "output_type": "execute_result"
6890
  }
 
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
+ "execution_count": 1,
6
  "metadata": {},
7
  "outputs": [
8
  {
9
  "name": "stdout",
10
  "output_type": "stream",
11
  "text": [
12
+ "Number of regular documents: 453\n",
13
+ "Number of github documents: 6800\n",
14
  "Links crawled: \n",
15
  "https://www.ros.org/\n",
16
  "https://docs.nav2.org/\n",
17
  "https://moveit.ai/\n",
18
  "https://gazebosim.org/home\n",
19
+ "https://www.ros.org/blog/why-ros\n",
20
+ "https://www.ros.org/blog/getting-started\n",
21
+ "https://www.ros.org/blog/community\n",
22
+ "https://www.ros.org/blog/ecosystem\n",
23
+ "https://www.ros.org/blog/getting-started/\n",
24
+ "https://www.ros.org/blog/media\n",
25
+ "https://www.ros.org/blog/discord\n",
26
+ "https://docs.nav2.org/getting_started/index.html\n",
27
+ "https://docs.nav2.org/development_guides/index.html\n",
28
+ "https://docs.nav2.org/development_guides/build_docs/index.html\n",
29
+ "https://docs.nav2.org/development_guides/build_docs/build_troubleshooting_guide.html\n",
30
+ "https://docs.nav2.org/development_guides/devcontainer_docs/index.html\n",
31
+ "https://docs.nav2.org/development_guides/devcontainer_docs/devcontainer_guide.html\n",
32
+ "https://docs.nav2.org/development_guides/involvement_docs/index.html\n",
33
+ "https://docs.nav2.org/concepts/index.html\n",
34
+ "https://docs.nav2.org/setup_guides/index.html\n",
35
+ "https://docs.nav2.org/setup_guides/transformation/setup_transforms.html\n",
36
+ "https://docs.nav2.org/setup_guides/urdf/setup_urdf.html\n",
37
+ "https://docs.nav2.org/setup_guides/odom/setup_odom.html\n",
38
+ "https://docs.nav2.org/setup_guides/sensors/setup_sensors.html\n",
39
+ "https://docs.nav2.org/setup_guides/footprint/setup_footprint.html\n",
40
+ "https://docs.nav2.org/setup_guides/algorithm/select_algorithm.html\n",
41
+ "https://docs.nav2.org/about/robots.html\n",
42
+ "https://docs.nav2.org/tutorials/index.html\n",
43
+ "https://docs.nav2.org/tutorials/docs/navigation2_on_real_turtlebot3.html\n",
44
+ "https://docs.nav2.org/tutorials/docs/navigation2_with_slam.html\n",
45
+ "https://docs.nav2.org/tutorials/docs/navigation2_with_stvl.html\n",
46
+ "https://docs.nav2.org/tutorials/docs/navigation2_with_gps.html\n",
47
+ "https://docs.nav2.org/tutorials/docs/using_groot.html\n",
48
+ "https://docs.nav2.org/tutorials/docs/integrating_vio.html\n",
49
+ "https://docs.nav2.org/tutorials/docs/navigation2_dynamic_point_following.html\n",
50
+ "https://docs.nav2.org/tutorials/docs/navigation2_with_keepout_filter.html\n",
51
+ "https://docs.nav2.org/tutorials/docs/navigation2_with_speed_filter.html\n",
52
+ "https://docs.nav2.org/tutorials/docs/using_docking.html\n",
53
+ "https://docs.nav2.org/tutorials/docs/using_shim_controller.html\n",
54
+ "https://docs.nav2.org/tutorials/docs/adding_smoother.html\n",
55
+ "https://docs.nav2.org/tutorials/docs/using_collision_monitor.html\n",
56
+ "https://docs.nav2.org/tutorials/docs/adding_a_nav2_task_server.html\n",
57
+ "https://docs.nav2.org/tutorials/docs/filtering_of_noise-induced_obstacles.html\n",
58
+ "https://docs.nav2.org/tutorials/docs/camera_calibration.html\n",
59
+ "https://docs.nav2.org/tutorials/docs/get_backtrace.html\n",
60
+ "https://docs.nav2.org/tutorials/docs/get_profile.html\n",
61
+ "https://docs.nav2.org/plugin_tutorials/index.html\n",
62
+ "https://docs.nav2.org/plugin_tutorials/docs/writing_new_costmap2d_plugin.html\n",
63
+ "https://docs.nav2.org/plugin_tutorials/docs/writing_new_nav2planner_plugin.html\n",
64
+ "https://docs.nav2.org/plugin_tutorials/docs/writing_new_nav2controller_plugin.html\n",
65
+ "https://docs.nav2.org/plugin_tutorials/docs/writing_new_bt_plugin.html\n",
66
+ "https://docs.nav2.org/plugin_tutorials/docs/writing_new_behavior_plugin.html\n",
67
+ "https://docs.nav2.org/plugin_tutorials/docs/writing_new_navigator_plugin.html\n",
68
+ "https://docs.nav2.org/configuration/index.html\n",
69
+ "https://docs.nav2.org/configuration/packages/configuring-bt-navigator.html\n",
70
+ "https://docs.nav2.org/configuration/packages/configuring-bt-xml.html\n",
71
+ "https://docs.nav2.org/configuration/packages/bt-plugins/actions/Wait.html\n",
72
+ "https://docs.nav2.org/configuration/packages/bt-plugins/actions/Spin.html\n",
73
+ "https://docs.nav2.org/configuration/packages/bt-plugins/actions/BackUp.html\n",
74
+ "https://docs.nav2.org/configuration/packages/bt-plugins/actions/DriveOnHeading.html\n",
75
+ "https://docs.nav2.org/configuration/packages/bt-plugins/actions/AssistedTeleop.html\n",
76
+ "https://docs.nav2.org/configuration/packages/bt-plugins/actions/ComputePathToPose.html\n",
77
+ "https://docs.nav2.org/configuration/packages/bt-plugins/actions/FollowPath.html\n",
78
+ "https://docs.nav2.org/configuration/packages/bt-plugins/actions/NavigateToPose.html\n",
79
+ "https://docs.nav2.org/configuration/packages/bt-plugins/actions/ClearEntireCostmap.html\n",
80
+ "https://docs.nav2.org/configuration/packages/bt-plugins/actions/ClearCostmapExceptRegion.html\n",
81
+ "https://docs.nav2.org/configuration/packages/bt-plugins/actions/ClearCostmapAroundRobot.html\n",
82
+ "https://docs.nav2.org/configuration/packages/bt-plugins/actions/ReinitializeGlobalLocalization.html\n",
83
+ "https://docs.nav2.org/configuration/packages/bt-plugins/actions/TruncatePath.html\n",
84
+ "https://docs.nav2.org/configuration/packages/bt-plugins/actions/TruncatePathLocal.html\n",
85
+ "https://docs.nav2.org/configuration/packages/bt-plugins/actions/PlannerSelector.html\n",
86
+ "https://docs.nav2.org/configuration/packages/bt-plugins/actions/ControllerSelector.html\n",
87
+ "https://docs.nav2.org/configuration/packages/bt-plugins/actions/SmootherSelector.html\n",
88
+ "https://docs.nav2.org/configuration/packages/bt-plugins/actions/GoalCheckerSelector.html\n",
89
+ "https://docs.nav2.org/configuration/packages/bt-plugins/actions/ProgressCheckerSelector.html\n",
90
+ "https://docs.nav2.org/configuration/packages/bt-plugins/actions/NavigateThroughPoses.html\n",
91
+ "https://docs.nav2.org/configuration/packages/bt-plugins/actions/ComputePathThroughPoses.html\n",
92
+ "https://docs.nav2.org/configuration/packages/bt-plugins/actions/ComputeCoveragePath.html\n",
93
+ "https://docs.nav2.org/configuration/packages/bt-plugins/actions/CancelCoverage.html\n",
94
+ "https://docs.nav2.org/configuration/packages/bt-plugins/actions/RemovePassedGoals.html\n",
95
+ "https://docs.nav2.org/configuration/packages/bt-plugins/actions/RemoveInCollisionGoals.html\n",
96
+ "https://docs.nav2.org/configuration/packages/bt-plugins/actions/CancelControl.html\n",
97
+ "https://docs.nav2.org/configuration/packages/bt-plugins/actions/CancelBackUp.html\n",
98
+ "https://docs.nav2.org/configuration/packages/bt-plugins/actions/CancelSpin.html\n",
99
+ "https://docs.nav2.org/configuration/packages/bt-plugins/actions/CancelWait.html\n",
100
+ "https://docs.nav2.org/configuration/packages/bt-plugins/actions/CancelDriveOnHeading.html\n",
101
+ "https://docs.nav2.org/configuration/packages/bt-plugins/actions/CancelAssistedTeleop.html\n",
102
+ "https://docs.nav2.org/configuration/packages/bt-plugins/actions/Smooth.html\n",
103
+ "https://docs.nav2.org/configuration/packages/bt-plugins/actions/GetPoseFromPath.html\n",
104
+ "https://docs.nav2.org/configuration/packages/bt-plugins/actions/DockRobot.html\n",
105
+ "https://docs.nav2.org/configuration/packages/bt-plugins/actions/UndockRobot.html\n",
106
+ "https://docs.nav2.org/configuration/packages/bt-plugins/conditions/GoalReached.html\n",
107
+ "https://docs.nav2.org/configuration/packages/bt-plugins/conditions/TransformAvailable.html\n",
108
+ "https://docs.nav2.org/configuration/packages/bt-plugins/conditions/DistanceTraveled.html\n",
109
+ "https://docs.nav2.org/configuration/packages/bt-plugins/conditions/GoalUpdated.html\n",
110
+ "https://docs.nav2.org/configuration/packages/bt-plugins/conditions/GloballyUpdatedGoal.html\n",
111
+ "https://docs.nav2.org/configuration/packages/bt-plugins/conditions/InitialPoseReceived.html\n",
112
+ "https://docs.nav2.org/configuration/packages/bt-plugins/conditions/IsStuck.html\n",
113
+ "https://docs.nav2.org/configuration/packages/bt-plugins/conditions/IsStopped.html\n",
114
+ "https://docs.nav2.org/configuration/packages/bt-plugins/conditions/TimeExpired.html\n",
115
+ "https://docs.nav2.org/configuration/packages/bt-plugins/conditions/IsBatteryLow.html\n",
116
+ "https://docs.nav2.org/configuration/packages/bt-plugins/conditions/IsPathValid.html\n",
117
+ "https://docs.nav2.org/configuration/packages/bt-plugins/conditions/PathExpiringTimer.html\n",
118
+ "https://docs.nav2.org/configuration/packages/bt-plugins/conditions/AreErrorCodesPresent.html\n",
119
+ "https://docs.nav2.org/configuration/packages/bt-plugins/conditions/WouldAControllerRecoveryHelp.html\n",
120
+ "https://docs.nav2.org/configuration/packages/bt-plugins/conditions/WouldAPlannerRecoveryHelp.html\n",
121
+ "https://docs.nav2.org/configuration/packages/bt-plugins/conditions/WouldASmootherRecoveryHelp.html\n",
122
+ "https://docs.nav2.org/configuration/packages/bt-plugins/conditions/IsBatteryCharging.html\n",
123
+ "https://docs.nav2.org/configuration/packages/bt-plugins/controls/PipelineSequence.html\n",
124
+ "https://docs.nav2.org/configuration/packages/bt-plugins/controls/RoundRobin.html\n",
125
+ "https://docs.nav2.org/configuration/packages/bt-plugins/controls/RecoveryNode.html\n",
126
+ "https://docs.nav2.org/configuration/packages/bt-plugins/decorators/RateController.html\n",
127
+ "https://docs.nav2.org/configuration/packages/bt-plugins/decorators/DistanceController.html\n",
128
+ "https://docs.nav2.org/configuration/packages/bt-plugins/decorators/SpeedController.html\n",
129
+ "https://docs.nav2.org/configuration/packages/bt-plugins/decorators/GoalUpdater.html\n",
130
+ "https://docs.nav2.org/configuration/packages/bt-plugins/decorators/PathLongerOnApproach.html\n",
131
+ "https://docs.nav2.org/configuration/packages/bt-plugins/decorators/SingleTrigger.html\n",
132
+ "https://docs.nav2.org/configuration/packages/configuring-costmaps.html\n",
133
+ "https://docs.nav2.org/configuration/packages/costmap-plugins/static.html\n",
134
+ "https://docs.nav2.org/configuration/packages/costmap-plugins/inflation.html\n",
135
+ "https://docs.nav2.org/configuration/packages/costmap-plugins/obstacle.html\n",
136
+ "https://docs.nav2.org/configuration/packages/costmap-plugins/voxel.html\n",
137
+ "https://docs.nav2.org/configuration/packages/costmap-plugins/range.html\n",
138
+ "https://docs.nav2.org/configuration/packages/costmap-plugins/denoise.html\n",
139
+ "https://docs.nav2.org/configuration/packages/costmap-plugins/keepout_filter.html\n",
140
+ "https://docs.nav2.org/configuration/packages/costmap-plugins/speed_filter.html\n",
141
+ "https://docs.nav2.org/configuration/packages/costmap-plugins/binary_filter.html\n",
142
+ "https://docs.nav2.org/configuration/packages/configuring-lifecycle.html\n",
143
+ "https://docs.nav2.org/configuration/packages/configuring-planner-server.html\n",
144
+ "https://docs.nav2.org/configuration/packages/configuring-coverage-server.html\n",
145
+ "https://docs.nav2.org/configuration/packages/configuring-navfn.html\n",
146
+ "https://docs.nav2.org/configuration/packages/configuring-smac-planner.html\n",
147
+ "https://docs.nav2.org/configuration/packages/smac/configuring-smac-2d.html\n",
148
+ "https://docs.nav2.org/configuration/packages/smac/configuring-smac-hybrid.html\n",
149
+ "https://docs.nav2.org/configuration/packages/smac/configuring-smac-lattice.html\n",
150
+ "https://docs.nav2.org/configuration/packages/configuring-thetastar.html\n",
151
+ "https://docs.nav2.org/configuration/packages/configuring-controller-server.html\n",
152
+ "https://docs.nav2.org/configuration/packages/nav2_controller-plugins/simple_progress_checker.html\n",
153
+ "https://docs.nav2.org/configuration/packages/nav2_controller-plugins/pose_progress_checker.html\n",
154
+ "https://docs.nav2.org/configuration/packages/nav2_controller-plugins/simple_goal_checker.html\n",
155
+ "https://docs.nav2.org/configuration/packages/nav2_controller-plugins/stopped_goal_checker.html\n",
156
+ "https://docs.nav2.org/configuration/packages/configuring-dwb-controller.html\n",
157
+ "https://docs.nav2.org/configuration/packages/dwb-params/controller.html\n",
158
+ "https://docs.nav2.org/configuration/packages/dwb-params/iterator.html\n",
159
+ "https://docs.nav2.org/configuration/packages/dwb-params/kinematic.html\n",
160
+ "https://docs.nav2.org/configuration/packages/dwb-params/visualization.html\n",
161
+ "https://docs.nav2.org/configuration/packages/dwb-plugins/limited_accel_generator.html\n",
162
+ "https://docs.nav2.org/configuration/packages/dwb-plugins/standard_traj_generator.html\n",
163
+ "https://docs.nav2.org/configuration/packages/trajectory_critics/base_obstacle.html\n",
164
+ "https://docs.nav2.org/configuration/packages/trajectory_critics/goal_align.html\n",
165
+ "https://docs.nav2.org/configuration/packages/trajectory_critics/goal_dist.html\n",
166
+ "https://docs.nav2.org/configuration/packages/trajectory_critics/obstacle_footprint.html\n",
167
+ "https://docs.nav2.org/configuration/packages/trajectory_critics/oscillation.html\n",
168
+ "https://docs.nav2.org/configuration/packages/trajectory_critics/path_align.html\n",
169
+ "https://docs.nav2.org/configuration/packages/trajectory_critics/path_dist.html\n",
170
+ "https://docs.nav2.org/configuration/packages/trajectory_critics/prefer_forward.html\n",
171
+ "https://docs.nav2.org/configuration/packages/trajectory_critics/rotate_to_goal.html\n",
172
+ "https://docs.nav2.org/configuration/packages/trajectory_critics/twirling.html\n",
173
+ "https://docs.nav2.org/configuration/packages/configuring-regulated-pp.html\n",
174
+ "https://docs.nav2.org/configuration/packages/configuring-mppic.html\n",
175
+ "https://docs.nav2.org/configuration/packages/configuring-rotation-shim-controller.html\n",
176
+ "https://docs.nav2.org/configuration/packages/configuring-graceful-motion-controller.html\n",
177
+ "https://docs.nav2.org/configuration/packages/configuring-map-server.html\n",
178
+ "https://docs.nav2.org/configuration/packages/configuring-amcl.html\n",
179
+ "https://docs.nav2.org/configuration/packages/configuring-behavior-server.html\n",
180
+ "https://docs.nav2.org/configuration/packages/configuring-smoother-server.html\n",
181
+ "https://docs.nav2.org/configuration/packages/configuring-simple-smoother.html\n",
182
+ "https://docs.nav2.org/configuration/packages/configuring-savitzky-golay-smoother.html\n",
183
+ "https://docs.nav2.org/configuration/packages/configuring-constrained-smoother.html\n",
184
+ "https://docs.nav2.org/configuration/packages/configuring-velocity-smoother.html\n",
185
+ "https://docs.nav2.org/configuration/packages/configuring-collision-monitor.html\n",
186
+ "https://docs.nav2.org/configuration/packages/collision_monitor/configuring-collision-monitor-node.html\n",
187
+ "https://docs.nav2.org/configuration/packages/collision_monitor/configuring-collision-detector-node.html\n",
188
+ "https://docs.nav2.org/configuration/packages/configuring-waypoint-follower.html\n",
189
+ "https://docs.nav2.org/configuration/packages/nav2_waypoint_follower-plugins/wait_at_waypoint.html\n",
190
+ "https://docs.nav2.org/configuration/packages/nav2_waypoint_follower-plugins/photo_at_waypoint.html\n",
191
+ "https://docs.nav2.org/configuration/packages/nav2_waypoint_follower-plugins/input_at_waypoint.html\n",
192
+ "https://docs.nav2.org/configuration/packages/configuring-loopback-sim.html\n",
193
+ "https://docs.nav2.org/configuration/packages/configuring-docking-server.html\n",
194
+ "https://docs.nav2.org/tuning/index.html\n",
195
+ "https://docs.nav2.org/behavior_trees/index.html\n",
196
+ "https://docs.nav2.org/behavior_trees/overview/nav2_specific_nodes.html\n",
197
+ "https://docs.nav2.org/behavior_trees/overview/detailed_behavior_tree_walkthrough.html\n",
198
+ "https://docs.nav2.org/behavior_trees/trees/nav_to_pose_recovery.html\n",
199
+ "https://docs.nav2.org/behavior_trees/trees/nav_through_poses_recovery.html\n",
200
+ "https://docs.nav2.org/behavior_trees/trees/nav_to_pose_and_pause_near_goal_obstacle.html\n",
201
+ "https://docs.nav2.org/behavior_trees/trees/nav_to_pose_with_consistent_replanning_and_if_path_becomes_invalid.html\n",
202
+ "https://docs.nav2.org/behavior_trees/trees/follow_point.html\n",
203
+ "https://docs.nav2.org/behavior_trees/trees/odometry_calibration.html\n",
204
+ "https://docs.nav2.org/plugins/index.html\n",
205
+ "https://docs.nav2.org/migration/index.html\n",
206
+ "https://docs.nav2.org/migration/Dashing.html\n",
207
+ "https://docs.nav2.org/migration/Eloquent.html\n",
208
+ "https://docs.nav2.org/migration/Foxy.html\n",
209
+ "https://docs.nav2.org/migration/Galactic.html\n",
210
+ "https://docs.nav2.org/migration/Humble.html\n",
211
+ "https://docs.nav2.org/migration/Iron.html\n",
212
+ "https://docs.nav2.org/migration/Jazzy.html\n",
213
+ "https://docs.nav2.org/commander_api/index.html\n",
214
+ "https://docs.nav2.org/roadmap/roadmap.html\n",
215
+ "https://docs.nav2.org/about/index.html\n",
216
+ "https://docs.nav2.org/about/related_projects.html\n",
217
+ "https://docs.nav2.org/_images/sponsors_feb_2024.png\n",
218
+ "https://docs.nav2.org/_images/nav2_architecture.png\n",
219
+ "https://moveit.ai/install\n",
220
+ "https://moveit.ai/install-moveit2/binary/\n",
221
+ "https://moveit.ai/documentation/applications/\n",
222
+ "https://moveit.ai/documentation/concepts/\n",
223
+ "https://moveit.ai/documentation/related_projects/\n",
224
+ "https://moveit.ai/documentation/plugins/\n",
225
+ "https://moveit.ai/documentation/planners/\n",
226
+ "https://moveit.ai/documentation/source-code-api/\n",
227
+ "https://moveit.ai/about/\n",
228
+ "https://moveit.ai/robots/\n",
229
+ "https://moveit.ai/about/citations/\n",
230
+ "https://moveit.ai/about/distribution/\n",
231
+ "https://moveit.ai/documentation/faqs/\n",
232
+ "https://moveit.ai/support/\n",
233
+ "https://moveit.ai/about/press_kit/\n",
234
+ "https://moveit.ai/documentation/contributing\n",
235
+ "https://moveit.ai/documentation/contributing/\n",
236
+ "https://moveit.ai/events/\n",
237
+ "https://moveit.ai/documentation/contributing/roadmap/\n",
238
+ "https://moveit.ai/events/2024-google-summer-of-code/\n",
239
+ "https://moveit.ai/documentation/contributing/pullrequests/\n",
240
+ "https://moveit.ai/documentation/contributing/code/\n",
241
+ "https://moveit.ai/blog/\n",
242
+ "https://moveit.ai/about/get_involved/\n",
243
+ "https://moveit.ai/install-moveit2/source/\n",
244
+ "https://www.ros.org/blog/discord/\n",
245
+ "https://docs.nav2.org/index.html\n",
246
+ "https://docs.nav2.org/_images/rviz-not-started.png\n",
247
+ "https://docs.nav2.org/_images/gazebo_turtlebot1.png\n",
248
+ "https://docs.nav2.org/_images/rviz_initial.png\n",
249
+ "https://docs.nav2.org/_images/rviz-set-initial-pose.png\n",
250
+ "https://docs.nav2.org/_images/navstack-ready.png\n",
251
+ "https://docs.nav2.org/_images/navigate-to-pose.png\n",
252
+ "https://docs.nav2.org/_images/navigation_with_recovery_behaviours.gif\n",
253
+ "https://docs.nav2.org/_images/base-bot_1.png\n",
254
+ "https://docs.nav2.org/_images/base-bot_2.png\n",
255
+ "https://docs.nav2.org/_images/gazebo_sam_bot.png\n",
256
+ "https://docs.nav2.org/_images/rviz.png\n",
257
+ "https://docs.nav2.org/_images/add_topic_laserscan.png\n",
258
+ "https://docs.nav2.org/_images/add_my_marker.png\n",
259
+ "https://docs.nav2.org/_images/add_topic_local_costmap.png\n",
260
+ "https://docs.nav2.org/_images/add_topic_global_costmap.png\n",
261
+ "https://docs.nav2.org/_images/rviz_after_launch_view.png\n",
262
+ "https://docs.nav2.org/_images/rviz_slam_map_view.png\n",
263
+ "https://docs.nav2.org/_images/rviz_set_initial_pose.png\n",
264
+ "https://docs.nav2.org/_images/rviz_send_goal.png\n",
265
+ "https://docs.nav2.org/_images/rviz_robot_navigating.png\n",
266
+ "https://docs.nav2.org/_images/WGS_84_reference_frame.svg\n",
267
+ "https://docs.nav2.org/_images/South-America-UTM-zones.png\n",
268
+ "https://docs.nav2.org/_images/gazebo_sonoma_raceway.png\n",
269
+ "https://docs.nav2.org/_images/mapviz_init.png\n",
270
+ "https://docs.nav2.org/_images/localization_check.gif\n",
271
+ "https://docs.nav2.org/_images/navigation_check.gif\n",
272
+ "https://docs.nav2.org/_images/interactive_wpf.gif\n",
273
+ "https://docs.nav2.org/_images/groot_export_new_node.png\n",
274
+ "https://docs.nav2.org/_images/vio.png\n",
275
+ "https://docs.nav2.org/_images/main_diagram.png\n",
276
+ "https://docs.nav2.org/_images/drawing_keepout_mask.png\n",
277
+ "https://docs.nav2.org/_images/keepout_global.gif\n",
278
+ "https://docs.nav2.org/_images/keepout_mask.png\n",
279
+ "https://docs.nav2.org/_images/drawing_speed_mask.png\n",
280
+ "https://docs.nav2.org/_images/speed_global.gif\n",
281
+ "https://docs.nav2.org/_images/speed_mask.png\n",
282
+ "https://docs.nav2.org/_images/smoothing_path.png\n",
283
+ "https://docs.nav2.org/_images/polygons.png\n",
284
+ "https://docs.nav2.org/_images/holonomic_direction.png\n",
285
+ "https://docs.nav2.org/_images/holonomic_examples1.png\n",
286
+ "https://docs.nav2.org/_images/polygons_visualization.png\n",
287
+ "https://docs.nav2.org/_images/collision.png\n",
288
+ "https://docs.nav2.org/_images/title.png\n",
289
+ "https://docs.nav2.org/_images/3x3_kernels.png\n",
290
+ "https://docs.nav2.org/_images/dilate.gif\n",
291
+ "https://docs.nav2.org/_images/connected_components.gif\n",
292
+ "https://docs.nav2.org/_images/ROS2_topic_hz.png\n",
293
+ "https://docs.nav2.org/_images/window1.png\n",
294
+ "https://docs.nav2.org/_images/calibration.jpg\n",
295
+ "https://docs.nav2.org/_images/greenbars.png\n",
296
+ "https://docs.nav2.org/_images/calibration_complete.png\n",
297
+ "https://docs.nav2.org/_images/calibration_parameters.png\n",
298
+ "https://docs.nav2.org/_images/kcachegrind.png\n",
299
+ "https://docs.nav2.org/_images/call_graph.png\n",
300
+ "https://docs.nav2.org/_images/gradient_layer_preview.gif\n",
301
+ "https://docs.nav2.org/_images/gradient_layer_run.png\n",
302
+ "https://docs.nav2.org/_images/nav2_straightline_gif.gif\n",
303
+ "https://docs.nav2.org/_images/nav2_pure_pursuit_gif.gif\n",
304
+ "https://docs.nav2.org/_images/2d_test.png\n",
305
+ "https://docs.nav2.org/_images/hybrid_144.png\n",
306
+ "https://docs.nav2.org/_images/state_reverse.png\n",
307
+ "https://docs.nav2.org/_images/00-37.png\n",
308
+ "https://docs.nav2.org/_images/constrained_smoother.png\n",
309
+ "https://docs.nav2.org/_images/w_cost_cusp_multiplier.png\n",
310
+ "https://docs.nav2.org/_images/cost_check_points.png\n",
311
+ "https://docs.nav2.org/_images/holonomic_examples.png\n",
312
+ "https://docs.nav2.org/_images/odometry_calibration.gif\n",
313
+ "https://docs.nav2.org/_images/panel-feedback.gif\n",
314
+ "https://docs.nav2.org/_images/use_final_approach_orientation_false.gif\n",
315
+ "https://docs.nav2.org/_images/use_final_approach_orientation_true.gif\n",
316
+ "https://docs.nav2.org/_images/rpp_goal_lookahead_interpolate.gif\n",
317
+ "https://moveit.ai/install/\n",
318
+ "https://moveit.ai/install/source/\n",
319
+ "https://moveit.ai/install/source-windows/\n",
320
+ "https://moveit.ai/install/docker/\n",
321
+ "https://moveit.ai/install-moveit2/binary-windows/\n",
322
+ "https://moveit.ai/install-moveit2/source-windows/\n",
323
+ "https://moveit.ai/documentation/contributing/releases/\n",
324
+ "https://moveit.ai/documentation/concepts/developer_concepts/\n",
325
+ "https://moveit.ai/about/maintainer_policy\n",
326
+ "https://moveit.ai/about/citations\n",
327
+ "https://moveit.ai/blog\n",
328
+ "https://moveit.ai/about/press_kit\n",
329
+ "https://moveit.ai/documentation/faqs\n",
330
+ "https://moveit.ai/documentation/contributing/pullrequests\n",
331
+ "https://moveit.ai/events/moveit/mentor/google/2022/05/20/2022-google-summer-of-code-students.html\n",
332
+ "https://moveit.ai/events/moveit/mentor/google/2022/03/15/gsoc.html\n",
333
+ "https://moveit.ai/ros/moveit/events/2021/10/29/rosworld-moveit-workshop.html\n",
334
+ "https://moveit.ai/events/world-moveit-day/ros/moveit/2021/03/22/world-moveit-day-2021-lighting-talks.html\n",
335
+ "https://moveit.ai/events/moveit/mentor/google/2021/03/11/gsof-mentor.html\n",
336
+ "https://moveit.ai/events/world-moveit-day/2021/01/15/world-moveit-day-2021.html\n",
337
+ "https://moveit.ai/events/world-moveit-day/2020/04/28/world-moveit-day-2020.html\n",
338
+ "https://moveit.ai/feed.xml\n",
339
+ "https://moveit.ai/documentation/contributing/future_projects/\n",
340
+ "https://moveit.ai/documentation/contributing/maintainer_pr\n",
341
+ "https://moveit.ai/documentation/contributing/continuous_integration/\n",
342
+ "https://moveit.ai/documentation/contributing/syncing_backporting/\n",
343
+ "https://moveit.ai/moveit/gsoc/2024/08/22/GSoC-2024-mujoco-support-for-ros2-moveit.html\n",
344
+ "https://moveit.ai/moveit/gsoc/2024/08/19/GSoC-2024-Zenoh-Support-and-Benchmarking.html\n",
345
+ "https://moveit.ai/release/jazzy/rolling/2024/06/30/New-MoveIt-LTS-release-for-ROS-2-Jazzy.html\n",
346
+ "https://moveit.ai/moveit/gsoc/2024/06/07/Google-Summer-of-Code-Contributor-Introductions.html\n",
347
+ "https://moveit.ai/planning%20pipeline/moveit2/motion%20planning/2024/03/25/MoveIt-Planning-Pipeline-Refactoring.html\n",
348
+ "https://moveit.ai/open%20source/open%20core%20software/2024/02/22/MoveIt-Pro-Open-Core.html\n",
349
+ "https://moveit.ai/2024/02/20/Introduciong-MoveIt-Pro-Rapid-Robotics-Application-Development-for-Unstructured-Environments.html\n",
350
+ "https://moveit.ai/bin%20picking/grasping/segmentation/manipulation/2024/01/31/Bin-Picking-Flexible-&-Fast-For-Any-Brand-of-Robot-Arm.html\n",
351
+ "https://moveit.ai/moveit/roscon/2023/11/29/MoveItCon-2023-Recap.html\n",
352
+ "https://moveit.ai/moveit/benchmarking/inverse%20kinematics/servo/2023/11/21/GSoC-2023-MoveIt-Servo-and-IK-Benchmarking.html\n",
353
+ "https://moveit.ai/moveit%202/ros/2023/05/31/balancing-stability-and-development.html\n",
354
+ "https://moveit.ai/moveit%202/ros/2023/05/19/optimization-based-planning-with-stomp.html\n",
355
+ "https://moveit.ai/moveit%202/ros/2023/05/03/google-summer-of-code-participants.html\n",
356
+ "https://moveit.ai/moveit%202/ros/2023/03/20/google-summer-of-code-2023.html\n",
357
+ "https://moveit.ai/moveit/ros/python/google/2023/02/15/MoveIt-Humble-Release.html\n",
358
+ "https://moveit.ai/moveit%202/parallel%20planning/motion%20planning/2023/02/15/parallel-planning-with-MoveIt-2.html\n",
359
+ "https://moveit.ai/moveit/google/2023/01/12/gsoc-simultaneous-trajectory-execution.html\n",
360
+ "https://moveit.ai/picknik/moveit/2022/08/16/Announcing-MoveIt-Studio.html\n",
361
+ "https://moveit.ai/ros/2022/07/22/Declarative-ROS-2-Parameters.html\n",
362
+ "https://moveit.ai/moveit/ros/2022/07/22/MoveIt-Servo-Inverse-Kinematics.html\n",
363
+ "https://moveit.ai/moveit/ros/ros2/humble/rolling/2022/07/15/MoveIt-2.5.2.html\n",
364
+ "https://moveit.ai/moveit/ros/humble/2022/06/02/MoveIt-Humble-Release.html\n",
365
+ "https://moveit.ai/robowflex/moveit/automation/2022/05/05/zak-kingston.html\n",
366
+ "https://moveit.ai/areospace/moveit/automation/2022/04/20/moveit-for-areospace.html\n",
367
+ "https://moveit.ai/moveit/ros/2022/03/02/2022-community-meeting.html\n",
368
+ "https://moveit.ai/moveit/ros/2022/02/15/2022-community-meeting.html\n",
369
+ "https://moveit.ai/moveit/ros/2022/01/20/2022-doc-a-thon.html\n",
370
+ "https://moveit.ai/moveit/ros/2021/12/17/sprint-report-3.html\n",
371
+ "https://moveit.ai/moveit/ros/2021/11/30/sprint-report-2.html\n",
372
+ "https://moveit.ai/ros/moveit/galactic/2021/07/08/moveit-galactic.html\n",
373
+ "https://moveit.ai/ros2/moveit/2021/06/08/moveit-vs-moveit2.html\n",
374
+ "https://moveit.ai/ros/moveit/noetic/2021/05/13/noetic-release.html\n",
375
+ "https://moveit.ai/ros/moveit/noetic/2021/04/15/noetic-update.html\n",
376
+ "https://moveit.ai/ros/moveit/2021/04/04/WMD-2021-results.html\n",
377
+ "https://moveit.ai/ros/moveit/2021/04/01/moveit3_release.html\n",
378
+ "https://moveit.ai/moveit/2021/01/14/moveitcon-2019-macau.html\n",
379
+ "https://moveit.ai/moveit/ros%202/tsc/2020/12/21/moveit-for-ros-2-migration-update.html\n",
380
+ "https://moveit.ai/moveit/pilz/motion%20planner/2020/12/17/Pilz-Plugin-for-MoveIt.html\n",
381
+ "https://moveit.ai/ros/ros%20world/moveit/2020/11/24/ros-world-2020.html\n",
382
+ "https://moveit.ai/bullet/collision%20detection/moveit/2020/11/18/bullet-collision.html\n",
383
+ "https://moveit.ai/moveit/ros/noetic/2020/10/13/announcing-moveit-1-1-1-release-for-ros-noetic.html\n",
384
+ "https://moveit.ai/moveit/ros/noetic/2020/09/28/moveit-noetic.html\n",
385
+ "https://moveit.ai/deep%20learning/grasping/moveit/3d%20perception/2020/09/28/grasp-deep-learning.html\n",
386
+ "https://moveit.ai/moveit/2020/09/10/ompl-constrained-planning-gsoc.html\n",
387
+ "https://moveit.ai/moveit/ros2/servo/jog/2020/09/09/moveit2-servo.html\n",
388
+ "https://moveit.ai/moveit2/ros2/foxy/release/2020/09/04/moveit2-foxy-release.html\n",
389
+ "https://moveit.ai/moveit/ros/2020/08/26/moveit-calibration.html\n",
390
+ "https://moveit.ai/moveit/ros/2020/08/04/moveit-melodic-release.html\n",
391
+ "https://moveit.ai/moveit/ros/2020/07/24/moveit-research-roundup.html\n",
392
+ "https://moveit.ai/moveit/ros/microsoft/windows/2020/07/14/moveit-on-windows.html\n",
393
+ "https://moveit.ai/moveit/ros/2020/06/26/world-moveit-day-2020-recap.html\n",
394
+ "https://moveit.ai/industrial/moveit2/ur5/2020/06/09/moveit2-robotic-application.html\n",
395
+ "https://moveit.ai/planning/ompl/2020/06/05/ompl-1-5-0-released.html\n",
396
+ "https://moveit.ai/moveit/gsoc/code/students/2020/05/05/gsoc-2020-projects.html\n",
397
+ "https://moveit.ai/moveit/ros/2020/04/22/moveitcon-2020.html\n",
398
+ "https://moveit.ai/moveit/ros/2020/02/25/mtc.html\n",
399
+ "https://moveit.ai/moveit/ros2/2020/02/18/moveit-2-beta-feature-list.html\n",
400
+ "https://moveit.ai/moveit/ros/2019/12/26/world-moveit-day-2019-recap.html\n",
401
+ "https://moveit.ai/moveit/ros/2019/11/18/moveit-grasps.html\n",
402
+ "https://moveit.ai/moveit/ros/2019/11/13/world-moveit-day-2019.html\n",
403
+ "https://moveit.ai/moveit/ros/2019/09/19/moveit-workshop-macau.html\n",
404
+ "https://moveit.ai/moveit!/ros/2019/06/12/google-summer-of-code.html\n",
405
+ "https://moveit.ai/moveit!/ros/2019/05/31/moveit2-alpha-release.html\n",
406
+ "https://moveit.ai/moveit!/ros/2019/05/28/moveit-survey-results.html\n",
407
+ "https://moveit.ai/moveit!/ros/2019/04/16/realtime-robotics.html\n",
408
+ "https://moveit.ai/moveit!/ros/descartes/2019/04/12/moveit-descartes.html\n",
409
+ "https://moveit.ai/moveit!/ros/2019/04/08/moveit-survey.html\n",
410
+ "https://moveit.ai/moveit!/ros/2019/03/08/announcing-the-moveit-1-release.html\n",
411
+ "https://moveit.ai/moveit!/ros/2019/03/01/announcing-the-moveit-2-port.html\n",
412
+ "https://moveit.ai/moveit!/ros/2019/02/11/china-developer-workshop-report.html\n",
413
+ "https://moveit.ai/moveit!/ros/2019/01/04/china-developer-workshop.html\n",
414
+ "https://moveit.ai/moveit!/ros/2018/12/11/gsoc-2018-perception-pipeline.html\n",
415
+ "https://moveit.ai/moveit!/ros/2018/11/20/wordlmoveitdayreport3.html\n",
416
+ "https://moveit.ai/moveit!/ros/2018/10/25/gsoc-motion-planning-support.html\n",
417
+ "https://moveit.ai/moveit!/ros/2018/10/23/gsoc-2018-setup-assistant-v2.html\n",
418
+ "https://moveit.ai/moveit!/ros/2018/09/26/moveit-at-roscon-2018.html\n",
419
+ "https://moveit.ai/moveit!/ros/2018/08/03/save-the-date-world-moveit-day.html\n",
420
+ "https://moveit.ai/moveit!/ros/2018/05/23/firstmelodicrelease.html\n",
421
+ "https://moveit.ai/moveit!/ros/2018/05/08/google-summer-of-code.html\n",
422
+ "https://moveit.ai/moveit!/ros/2018/04/16/moveit-on-discourse.html\n",
423
+ "https://moveit.ai/moveit!/ros/2018/02/26/tutorials-documentation-codesprint.html\n",
424
+ "https://moveit.ai/moveit!/ros/2018/02/01/wordlmoveitdayreport2.html\n",
425
+ "https://moveit.ai/moveit!/ros/2017/09/20/wordlmoveitday.html\n",
426
+ "https://moveit.ai/moveit!/ros/2017/06/20/videoMontage2017.html\n",
427
+ "https://moveit.ai/moveit!/ros/2017/03/15/videoMontageRequest.html\n",
428
+ "https://moveit.ai/moveit!/ros/2017/01/03/firstIndigoRelease.html\n",
429
+ "https://moveit.ai/moveit!/ros/2016/12/15/firstkineticrelease.html\n",
430
+ "https://moveit.ai/moveit!/ros/2016/11/01/moveit-community.html\n",
431
+ "https://moveit.ai/moveit!/ros/2016/10/11/moveit_community_meeting.html\n",
432
+ "https://moveit.ai/moveit!/ros/2016/09/02/firstjaderelease.html\n",
433
+ "https://moveit.ai/moveit!/ros/2016/08/28/wordlmoveitdayreport.html\n",
434
+ "https://moveit.ai/moveit!/ros/2016/08/22/teamdelftamazon.html\n",
435
+ "https://moveit.ai/moveit!/ros/2016/08/05/wordlmoveitday.html\n",
436
+ "https://moveit.ai/moveit!/ros/2015/09/28/iros.html\n",
437
+ "https://moveit.ai/moveit!/ros/2015/09/19/robobusiness.html\n",
438
+ "https://moveit.ai/moveit!/ros/2015/09/17/moveit-community.html\n",
439
+ "https://moveit.ai/moveit!/ros/2015/07/02/going-underwater.html\n",
440
+ "https://moveit.ai/moveit!/ros/2015/06/03/icra-2015-update.html\n",
441
+ "https://moveit.ai/moveit!/ros/2015/05/24/see-you-at-icra-2015.html\n",
442
+ "https://moveit.ai/moveit!/ros/2015/03/17/versatile-manipulation-baxter-robot-with-moveit-used-to-teach-robotics-fundamentals-at-columbia-university.html\n",
443
+ "https://moveit.ai/moveit!/ros/2015/02/17/new-pal-mobile-manipulation-robot-tiago-runs-moveit.html\n",
444
+ "https://moveit.ai/moveit!/ros/2015/01/29/update-on-moveit-in-ros-i-community-meeting.html\n",
445
+ "https://moveit.ai/moveit!/ros/2015/01/29/the-amazon-picking-challenge.html\n",
446
+ "https://moveit.ai/moveit!/ros/2015/01/29/alten-mechatronics-applies-robotic-technology-in-fei-transmission-electron-microscopes-tem.html\n",
447
+ "https://moveit.ai/moveit!/ros/2015/01/29/new-capabilities-in-moveit-the-cartesian-path-planner-plugin.html\n",
448
+ "https://moveit.ai/moveit!/ros/2014/08/05/ronex-and-moveit.html\n",
449
+ "https://moveit.ai/moveit!/ros/2014/05/08/ckbot-whole-arm-grasping-and-moveit.html\n",
450
+ "https://moveit.ai/moveit!/ros/2014/04/03/rossurvey.html\n",
451
+ "https://moveit.ai/general/2014/02/20/first-new-robot-2014-hollie.html\n",
452
+ "https://moveit.ai/moveit!/ros/2014/01/07/a-new-year-a-new-website-a-new-movie.html\n",
453
+ "https://moveit.ai/moveit!/ros/2013/11/05/moveit-survey-results.html\n",
454
+ "https://moveit.ai/moveit!/ros/2013/10/22/robobusiness-2013.html\n",
455
+ "https://moveit.ai/moveit!/ros/2013/10/08/moveit-survey.html\n",
456
+ "https://moveit.ai/moveit!/ros/2013/08/20/moveit-pick-place-pr2.html\n",
457
+ "https://moveit.ai/moveit!/ros/2013/05/07/icra-motion-planning-tutorial.html\n",
458
+ "https://moveit.ai/moveit!/ros/2013/05/06/icra-roscon-trip-report.html\n",
459
+ "https://moveit.ai/install/source/dependencies/\n",
460
+ "https://moveit.ai/events/2022-google-summer-of-code/\n",
461
+ "https://moveit.ai/events/rosworld-2021-workshop/\n",
462
+ "https://moveit.ai/events/2022-moveit-community-meeting/\n",
463
+ "https://moveit.ai/assets/pdfs/2019/moveit_2019_survey.pdf\n",
464
+ "https://moveit.ai/events/world-moveit-day-2018/\n",
465
+ "https://moveit.ai/events/world-moveit-day-2017/\n",
466
+ "https://moveit.ai/events/world-moveit-day/\n",
467
+ "https://moveit.ai/events/rosworld-2021-workshop/Stretch%20Payload%20&%20Pulling%20Force.pdf\n",
468
  "https://github.com/ros2/ros2/tree/rolling/README.md\n",
469
  "https://github.com/ros2/ros2/tree/rolling/.gitignore\n",
470
  "https://github.com/ros2/ros2/tree/rolling/CODEOWNERS\n",
 
7265
  "https://github.com/gazebosim/gazebo-classic/blob/gazebo11/./gazebo/msgs/generator/GazeboGenerator.hh\n",
7266
  "https://github.com/gazebosim/gazebo-classic/blob/gazebo11/./gazebo/msgs/generator/gazebo_generator.cc\n",
7267
  "https://github.com/gazebosim/gazebo-classic/blob/gazebo11/./gazebo/msgs/generator/GazeboGenerator.cc\n",
7268
+ "Sample regular document: {'_id': ObjectId('6755e9baa1014826dc57c074'), 'link': 'https://www.ros.org/', 'type': 'Document', 'content': ' ROS: Home Why ROS? Getting Started Community Ecosystem ROS - Robot Operating System The Robot Operating System (ROS) is a set of software libraries and tools that help you build robot applications. From drivers to state-of-the-art algorithms, and with powerful developer tools, ROS has what you need for your next robotics project. And it\\'s all open source. What is ROS? ROS Videos \" Install Jazzy Jalisco Jazzy Jalisco is our latest ROS 2 LTS release targeted at the Ubuntu 24.04 (Noble) and Windows 10, though other systems are supported to varying degrees. Learn More Humble Hawksbill ROS 2 Humble Hawksbill is a slighly older LTS release of ROS 2 targeted at Ubuntu 22.04 (Jammy) and Windows 10. Other systems are supported including tier 3 support for 20.04 for those transitioning from ROS 1. Learn More Support There are several mechanisms in place to support the ROS community, each with its own purpose. Documentation Documentation and tutorials for ROS 2 Stack Exchange Ask questions. Get answers. Forums Hear the latest discussions ROS 1 Wiki Legacy documentation and tutorials for ROS 1 Documentation Documentation and tutorials for ROS 2 Robotics Stack Exchange Ask questions.Get answers.All ROS versions Forums Hear the latest discussions ROS 1 Wiki Legacy documentation and tutorials for ROS 1 Recent Updates and Highlights ROSCon 2024 Videos are Now Available See the ROSCon 2024 website for details 11/18/2024 - Katherine Scott The videos from ROSCon 2024 in Odense are now available on the ROSCon Website (see the program), this Vimeo showcase, and in the ROS documentation. The ROSCon website also includes the slides from all the talks at ROSCon. I have also included a list of all the videos below. I want to thank AMD for being our 2024 ROSCon video sponsor, their generous support makes the ROSCon live stream and videos possible. READ MORE Recent ROS Discourse Posts ROS News of the Week 11/22/2024 - ROS Discourse Gazebo Classic and Citadel End of Life 12/2/2024 - ROS Discourse ROS 2 driver for Mitsubishi Melfa RV-FR 10/24/2024 ROS Discourse Home Why ROS? Getting Started Community Ecosystem Q&A Forum Packages Wiki Documentation media Q&A Forum Packages ROSCon Wiki documentation discord Brought to you by Open Robotics | licensed under Creative Commons Attributions 3.0 | ©2021 Open Robotics '}\n",
7269
+ "Sample github document {'_id': ObjectId('6755e9bda1014826dc57c078'), 'link': 'https://github.com/ros2/ros2/tree/rolling/README.md', 'type': 'Github', 'content': \"#About TheRobotOperatingSystem(ROS)isasetofsoftwarelibrariesandtoolsthathelpyoubuildrobotapplications. Fromdriverstostate-of-the-artalgorithms,andwithpowerfuldevelopertools,ROShaswhatyouneedforyournextroboticsproject. Andit'sallopensource. Fullprojectdetailson[ROS.org](https://ros.org/) #GettingStarted LookingtogetstartedwithROS? Our[installationguideishere](https://www.ros.org/blog/getting-started/). Onceyou'veinstalledROSstartbylearningsome[basicconcepts](https://docs.ros.org/en/rolling/Concepts/Basic.html)andtakealookatour[beginnertutorials](https://docs.ros.org/en/rolling/Tutorials/Beginner-CLI-Tools.html). #JointheROSCommunity ##CommunityResources *[ROSDiscussionForum](https://discourse.ros.org/) *[ROSDiscordServer](https://discord.com/servers/open-robotics-1077825543698927656) *[RoboticsStackExchange](https://robotics.stackexchange.com/)(preferredROSsupportforum). *[OfficialROSVideos](https://vimeo.com/osrfoundation) *[ROSCon](https://roscon.ros.org),ouryearlydeveloperconference. *CiteROS2inacademicworkusing[DOI:10.1126/scirobotics.abm6074](https://www.science.org/doi/10.1126/scirobotics.abm6074) ##DeveloperResources *[ROS2Documentation](https://docs.ros.org/) *[ROSPackageAPIreference](https://docs.ros.org/en/rolling/p/) *[ROSPackageIndex](https://index.ros.org/) *[ROSonDockerHub](https://hub.docker.com/_/ros/) *[ROSResourceStatusPage](https://status.openrobotics.org/) *[REP-2000](https://ros.org/reps/rep-2000.html):ROS2ReleasesandTargetPlatforms ##ProjectResources *[PurchaseROSSwag](https://spring.ros.org/) *[InformationabouttheROSTrademark](https://www.ros.org/blog/media/) *OnSocialMedia *[OpenRoboticsonLinkedIn](https://www.linkedin.com/company/open-source-robotics-foundation) *[OpenRoboticsonTwitter](https://twitter.com/OpenRoboticsOrg) *[ROS.orgonTwitter](https://twitter.com/ROSOrg) ROSismadepossiblethroughthegeneroussupportofopensourcecontributorsandthenon-profit[OpenSourceRoboticsFoundation(OSRF)](https://www.openrobotics.org/). TaxdeductibledonationstotheOSRFcanbe[madehere.](https://donorbox.org/support-open-robotics?utm_medium=qrcode&utm_source=qrcode) \"}\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7270
  ]
7271
  }
7272
  ],
 
7309
  },
7310
  {
7311
  "cell_type": "code",
7312
+ "execution_count": 9,
7313
  "metadata": {},
7314
  "outputs": [
7315
  {
 
7318
  "DeleteResult({'n': 0, 'ok': 1.0}, acknowledged=True)"
7319
  ]
7320
  },
7321
+ "execution_count": 9,
7322
  "metadata": {},
7323
  "output_type": "execute_result"
7324
  }
project/TrainingPipeline.ipynb CHANGED
@@ -12,7 +12,7 @@
12
  ],
13
  "metadata": {
14
  "kernelspec": {
15
- "display_name": ".venv",
16
  "language": "python",
17
  "name": "python3"
18
  },
@@ -26,7 +26,7 @@
26
  "name": "python",
27
  "nbconvert_exporter": "python",
28
  "pygments_lexer": "ipython3",
29
- "version": "3.11.9"
30
  }
31
  },
32
  "nbformat": 4,
 
12
  ],
13
  "metadata": {
14
  "kernelspec": {
15
+ "display_name": "Python 3",
16
  "language": "python",
17
  "name": "python3"
18
  },
 
26
  "name": "python",
27
  "nbconvert_exporter": "python",
28
  "pygments_lexer": "ipython3",
29
+ "version": "3.12.7"
30
  }
31
  },
32
  "nbformat": 4,
project/app.py CHANGED
@@ -1,15 +1,8 @@
1
- # Make sure you have run "ollama serve"
2
  # This is the same code as ClearML
3
- import os
4
- import sys
5
  from operator import itemgetter
6
-
7
  import gradio as gr
8
- from dotenv import load_dotenv
9
  from langchain.prompts import PromptTemplate
10
- from langchain_community.embeddings import OllamaEmbeddings
11
- from langchain_community.llms import Ollama
12
- from qdrant_client import QdrantClient
13
  from shared import getModel, getEmbeddingsModel, getQdrantClient
14
 
15
  def answer(samplePrompt, useSample, Query):
@@ -84,8 +77,6 @@ def answer(samplePrompt, useSample, Query):
84
  links = [result.payload['link'] for result in results]
85
  topTexts = ''
86
  for index in topIndexes:
87
- print("Top texts: ", texts[index])
88
- print("Link: ", links[index])
89
  topTexts += texts[index]
90
 
91
  # Building prompt
@@ -99,7 +90,10 @@ def answer(samplePrompt, useSample, Query):
99
  prompt = PromptTemplate.from_template(template)
100
  else:
101
  template = """
102
- Answer the question based on the document below. If you can't answer the question, reply "I don't know"
 
 
 
103
 
104
  Document: {document}
105
  Question: {question}
@@ -115,7 +109,10 @@ demo = gr.Interface(
115
  fn=answer,
116
  inputs=[
117
  gr.Dropdown(
118
- ["What is ROS?", "Write me code to move a robot"], label="Sample Prompt"
 
 
 
119
  ),
120
  "checkbox",
121
  "text",
 
1
+ # Make sure ollama serve is running(docker or terminal)
2
  # This is the same code as ClearML
 
 
3
  from operator import itemgetter
 
4
  import gradio as gr
 
5
  from langchain.prompts import PromptTemplate
 
 
 
6
  from shared import getModel, getEmbeddingsModel, getQdrantClient
7
 
8
  def answer(samplePrompt, useSample, Query):
 
77
  links = [result.payload['link'] for result in results]
78
  topTexts = ''
79
  for index in topIndexes:
 
 
80
  topTexts += texts[index]
81
 
82
  # Building prompt
 
90
  prompt = PromptTemplate.from_template(template)
91
  else:
92
  template = """
93
+ You are an AI agent that has retreived a document from the web.
94
+ If the document is useful for answering the question use it.
95
+ If the document is not useful, answer normally.
96
+ Do not mention the document.
97
 
98
  Document: {document}
99
  Question: {question}
 
109
  fn=answer,
110
  inputs=[
111
  gr.Dropdown(
112
+ ["How can I develop the navigation stack of an agent with egomotion?",
113
+ "What is ROS?", "How many companies is Nav2 trusted by worldwide?",
114
+ "How would I build a ROS 2 Navigation Framework and System?",
115
+ "Write me code to move a robot using Moveit"], label="Sample Prompt"
116
  ),
117
  "checkbox",
118
  "text",