Syncing with github

Browse files

Files changed (12) hide show

project/.gitignore +1 -1
project/ClearML/DataCollectionPipeline.py +3 -1
project/ClearML/FeaturePipeline.py +2 -6
project/DataCollectionPipeline.ipynb +0 -0
project/FeaturePipeline.ipynb +34 -40
project/InferencePipeline.ipynb +0 -181
project/README.md +23 -10
project/Tools/InferenceTool.ipynb +184 -0
project/Tools/QdrantTools.ipynb +88 -19
project/Tools/mongoTools.ipynb +456 -22
project/TrainingPipeline.ipynb +2 -2
project/app.py +9 -12

project/.gitignore CHANGED Viewed

@@ -1,4 +1,4 @@
 .gradio
 .env
 __pycache__
-/tool/__pycache__

 .gradio
 .env
 __pycache__
+/tool/__pycache__

project/ClearML/DataCollectionPipeline.py CHANGED Viewed

@@ -151,7 +151,9 @@ def ETL_Pipeline(links):
                         if (
                             subdirectory is not None and
                             'http' not in subdirectory and
-                            mongoCollection.find_one({"link": newLink}) is None
                         ):
                             links.append(newLink)
                 except:

                         if (
                             subdirectory is not None and
                             'http' not in subdirectory and
+                            '#' not in subdirectory and
+                            mongoCollection.find_one({"link": newLink}) is None and
+                            newLink not in links
                         ):
                             links.append(newLink)
                 except:

project/ClearML/FeaturePipeline.py CHANGED Viewed

@@ -79,7 +79,6 @@ def chunkDocuments(texts):
 @PipelineDecorator.component(cache=False, return_values=["embeddings"])
 def embedChunks(chunks):
-    embeddings = []
     # Setup the text embedder
     MODEL = "llama3.2"
     try:
@@ -91,11 +90,10 @@ def embedChunks(chunks):
         embeddingsModel = OllamaEmbeddings(model=MODEL, base_url="http://host.docker.internal:11434")
     else:
         embeddingsModel = OllamaEmbeddings(model=MODEL)
-    for chunk in chunks:
-        embeddings.append(embeddingsModel.embed_query(chunk))
-    return embeddings
 @PipelineDecorator.component(cache=False)
 def storeEmbeddings(embeddings, links, resultTypes, chunks, chunkNums):
     # Create a qdrant connection
@@ -128,8 +126,6 @@ def storeEmbeddings(embeddings, links, resultTypes, chunks, chunkNums):
         chunkIndex += 1
         if chunkNum == 0:
             documentIndex += 1
-        # Store all documents from each MongoDB collection into qdrant
-        # Create embeddings for each chunk, of length 2048 using the embedding model
         # Store the embedding along with some metadata into the Qdrant vector database
         qClient.upsert(
             collection_name=resultTypes[documentIndex],

 @PipelineDecorator.component(cache=False, return_values=["embeddings"])
 def embedChunks(chunks):
     # Setup the text embedder
     MODEL = "llama3.2"
     try:
         embeddingsModel = OllamaEmbeddings(model=MODEL, base_url="http://host.docker.internal:11434")
     else:
         embeddingsModel = OllamaEmbeddings(model=MODEL)
+    return embeddingsModel.embed_documents(chunks)
+# Create embeddings for each chunk, of length 3072 using the embedding model
 @PipelineDecorator.component(cache=False)
 def storeEmbeddings(embeddings, links, resultTypes, chunks, chunkNums):
     # Create a qdrant connection
         chunkIndex += 1
         if chunkNum == 0:
             documentIndex += 1
         # Store the embedding along with some metadata into the Qdrant vector database
         qClient.upsert(
             collection_name=resultTypes[documentIndex],

project/DataCollectionPipeline.ipynb CHANGED Viewed

The diff for this file is too large to render. See raw diff

project/FeaturePipeline.ipynb CHANGED Viewed

@@ -2,33 +2,23 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 15,
    "metadata": {},
    "outputs": [
     {
-     "ename": "KeyboardInterrupt",
-     "evalue": "",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
-      "Cell \u001b[0;32mIn[15], line 61\u001b[0m\n\u001b[1;32m     58\u001b[0m chunkNum \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m\n\u001b[1;32m     59\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m chunk \u001b[38;5;129;01min\u001b[39;00m chunks:\n\u001b[1;32m     60\u001b[0m     \u001b[38;5;66;03m# Create embeddings for each chunk, of length 2048 using the embedding model\u001b[39;00m\n\u001b[0;32m---> 61\u001b[0m     embedding \u001b[38;5;241m=\u001b[39m \u001b[43membeddingsModel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43membed_query\u001b[49m\u001b[43m(\u001b[49m\u001b[43mchunk\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     62\u001b[0m     \u001b[38;5;66;03m# Store the embedding along with some metadata into the Qdrant vector database\u001b[39;00m\n\u001b[1;32m     63\u001b[0m     qClient\u001b[38;5;241m.\u001b[39mupsert(collection_name\u001b[38;5;241m=\u001b[39mresultType, wait\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, points\u001b[38;5;241m=\u001b[39m[PointStruct(\u001b[38;5;28mid\u001b[39m\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mid\u001b[39m, vector\u001b[38;5;241m=\u001b[39membedding, payload\u001b[38;5;241m=\u001b[39m{\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mlink\u001b[39m\u001b[38;5;124m\"\u001b[39m: link, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtype\u001b[39m\u001b[38;5;124m\"\u001b[39m: resultType, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mchunk\u001b[39m\u001b[38;5;124m\"\u001b[39m: chunkNum, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtext\u001b[39m\u001b[38;5;124m\"\u001b[39m: chunk})])\n",
-      "File \u001b[0;32m/usr/local/lib/python3.12/site-packages/langchain_community/embeddings/ollama.py:227\u001b[0m, in \u001b[0;36mOllamaEmbeddings.embed_query\u001b[0;34m(self, text)\u001b[0m\n\u001b[1;32m    218\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Embed a query using a Ollama deployed embedding model.\u001b[39;00m\n\u001b[1;32m    219\u001b[0m \n\u001b[1;32m    220\u001b[0m \u001b[38;5;124;03mArgs:\u001b[39;00m\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    224\u001b[0m \u001b[38;5;124;03m    Embeddings for the text.\u001b[39;00m\n\u001b[1;32m    225\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m    226\u001b[0m instruction_pair \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mquery_instruction\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;132;01m{\u001b[39;00mtext\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m--> 227\u001b[0m embedding \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_embed\u001b[49m\u001b[43m(\u001b[49m\u001b[43m[\u001b[49m\u001b[43minstruction_pair\u001b[49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m[\u001b[38;5;241m0\u001b[39m]\n\u001b[1;32m    228\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m embedding\n",
-      "File \u001b[0;32m/usr/local/lib/python3.12/site-packages/langchain_community/embeddings/ollama.py:202\u001b[0m, in \u001b[0;36mOllamaEmbeddings._embed\u001b[0;34m(self, input)\u001b[0m\n\u001b[1;32m    200\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    201\u001b[0m     iter_ \u001b[38;5;241m=\u001b[39m \u001b[38;5;28minput\u001b[39m\n\u001b[0;32m--> 202\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m [\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_process_emb_response\u001b[49m\u001b[43m(\u001b[49m\u001b[43mprompt\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mfor\u001b[39;00m prompt \u001b[38;5;129;01min\u001b[39;00m iter_]\n",
-      "File \u001b[0;32m/usr/local/lib/python3.12/site-packages/langchain_community/embeddings/ollama.py:167\u001b[0m, in \u001b[0;36mOllamaEmbeddings._process_emb_response\u001b[0;34m(self, input)\u001b[0m\n\u001b[1;32m    161\u001b[0m headers \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m    162\u001b[0m     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mContent-Type\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mapplication/json\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m    163\u001b[0m     \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mheaders \u001b[38;5;129;01mor\u001b[39;00m {}),\n\u001b[1;32m    164\u001b[0m }\n\u001b[1;32m    166\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 167\u001b[0m     res \u001b[38;5;241m=\u001b[39m \u001b[43mrequests\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpost\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    168\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbase_url\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m/api/embeddings\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m    169\u001b[0m \u001b[43m        \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    170\u001b[0m \u001b[43m        \u001b[49m\u001b[43mjson\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m{\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmodel\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mprompt\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_default_params\u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    171\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    172\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m requests\u001b[38;5;241m.\u001b[39mexceptions\u001b[38;5;241m.\u001b[39mRequestException \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m    173\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mError raised by inference endpoint: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00me\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n",
-      "File \u001b[0;32m/usr/local/lib/python3.12/site-packages/requests/api.py:115\u001b[0m, in \u001b[0;36mpost\u001b[0;34m(url, data, json, **kwargs)\u001b[0m\n\u001b[1;32m    103\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mpost\u001b[39m(url, data\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, json\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m    104\u001b[0m \u001b[38;5;250m    \u001b[39m\u001b[38;5;124mr\u001b[39m\u001b[38;5;124;03m\"\"\"Sends a POST request.\u001b[39;00m\n\u001b[1;32m    105\u001b[0m \n\u001b[1;32m    106\u001b[0m \u001b[38;5;124;03m    :param url: URL for the new :class:`Request` object.\u001b[39;00m\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    112\u001b[0m \u001b[38;5;124;03m    :rtype: requests.Response\u001b[39;00m\n\u001b[1;32m    113\u001b[0m \u001b[38;5;124;03m    \"\"\"\u001b[39;00m\n\u001b[0;32m--> 115\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mrequest\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mpost\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mjson\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mjson\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
-      "File \u001b[0;32m/usr/local/lib/python3.12/site-packages/requests/api.py:59\u001b[0m, in \u001b[0;36mrequest\u001b[0;34m(method, url, **kwargs)\u001b[0m\n\u001b[1;32m     55\u001b[0m \u001b[38;5;66;03m# By using the 'with' statement we are sure the session is closed, thus we\u001b[39;00m\n\u001b[1;32m     56\u001b[0m \u001b[38;5;66;03m# avoid leaving sockets open which can trigger a ResourceWarning in some\u001b[39;00m\n\u001b[1;32m     57\u001b[0m \u001b[38;5;66;03m# cases, and look like a memory leak in others.\u001b[39;00m\n\u001b[1;32m     58\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m sessions\u001b[38;5;241m.\u001b[39mSession() \u001b[38;5;28;01mas\u001b[39;00m session:\n\u001b[0;32m---> 59\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43msession\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmethod\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
-      "File \u001b[0;32m/usr/local/lib/python3.12/site-packages/requests/sessions.py:589\u001b[0m, in \u001b[0;36mSession.request\u001b[0;34m(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)\u001b[0m\n\u001b[1;32m    584\u001b[0m send_kwargs \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m    585\u001b[0m     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtimeout\u001b[39m\u001b[38;5;124m\"\u001b[39m: timeout,\n\u001b[1;32m    586\u001b[0m     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mallow_redirects\u001b[39m\u001b[38;5;124m\"\u001b[39m: allow_redirects,\n\u001b[1;32m    587\u001b[0m }\n\u001b[1;32m    588\u001b[0m send_kwargs\u001b[38;5;241m.\u001b[39mupdate(settings)\n\u001b[0;32m--> 589\u001b[0m resp \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msend\u001b[49m\u001b[43m(\u001b[49m\u001b[43mprep\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43msend_kwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    591\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m resp\n",
-      "File \u001b[0;32m/usr/local/lib/python3.12/site-packages/requests/sessions.py:703\u001b[0m, in \u001b[0;36mSession.send\u001b[0;34m(self, request, **kwargs)\u001b[0m\n\u001b[1;32m    700\u001b[0m start \u001b[38;5;241m=\u001b[39m preferred_clock()\n\u001b[1;32m    702\u001b[0m \u001b[38;5;66;03m# Send the request\u001b[39;00m\n\u001b[0;32m--> 703\u001b[0m r \u001b[38;5;241m=\u001b[39m \u001b[43madapter\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msend\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrequest\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    705\u001b[0m \u001b[38;5;66;03m# Total elapsed time of the request (approximately)\u001b[39;00m\n\u001b[1;32m    706\u001b[0m elapsed \u001b[38;5;241m=\u001b[39m preferred_clock() \u001b[38;5;241m-\u001b[39m start\n",
-      "File \u001b[0;32m/usr/local/lib/python3.12/site-packages/requests/adapters.py:667\u001b[0m, in \u001b[0;36mHTTPAdapter.send\u001b[0;34m(self, request, stream, timeout, verify, cert, proxies)\u001b[0m\n\u001b[1;32m    664\u001b[0m     timeout \u001b[38;5;241m=\u001b[39m TimeoutSauce(connect\u001b[38;5;241m=\u001b[39mtimeout, read\u001b[38;5;241m=\u001b[39mtimeout)\n\u001b[1;32m    666\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 667\u001b[0m     resp \u001b[38;5;241m=\u001b[39m \u001b[43mconn\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43murlopen\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    668\u001b[0m \u001b[43m        \u001b[49m\u001b[43mmethod\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    669\u001b[0m \u001b[43m        \u001b[49m\u001b[43murl\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    670\u001b[0m \u001b[43m        \u001b[49m\u001b[43mbody\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbody\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    671\u001b[0m \u001b[43m        \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    672\u001b[0m \u001b[43m        \u001b[49m\u001b[43mredirect\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m    673\u001b[0m \u001b[43m        \u001b[49m\u001b[43massert_same_host\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m    674\u001b[0m \u001b[43m        \u001b[49m\u001b[43mpreload_content\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m    675\u001b[0m \u001b[43m        \u001b[49m\u001b[43mdecode_content\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m    676\u001b[0m \u001b[43m        \u001b[49m\u001b[43mretries\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmax_retries\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    677\u001b[0m \u001b[43m        \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    678\u001b[0m \u001b[43m        \u001b[49m\u001b[43mchunked\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mchunked\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    679\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    681\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (ProtocolError, \u001b[38;5;167;01mOSError\u001b[39;00m) \u001b[38;5;28;01mas\u001b[39;00m err:\n\u001b[1;32m    682\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mConnectionError\u001b[39;00m(err, request\u001b[38;5;241m=\u001b[39mrequest)\n",
-      "File \u001b[0;32m/usr/local/lib/python3.12/site-packages/urllib3/connectionpool.py:789\u001b[0m, in \u001b[0;36mHTTPConnectionPool.urlopen\u001b[0;34m(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, preload_content, decode_content, **response_kw)\u001b[0m\n\u001b[1;32m    786\u001b[0m response_conn \u001b[38;5;241m=\u001b[39m conn \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m release_conn \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m    788\u001b[0m \u001b[38;5;66;03m# Make the request on the HTTPConnection object\u001b[39;00m\n\u001b[0;32m--> 789\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_make_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    790\u001b[0m \u001b[43m    \u001b[49m\u001b[43mconn\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    791\u001b[0m \u001b[43m    \u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    792\u001b[0m \u001b[43m    \u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    793\u001b[0m \u001b[43m    \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout_obj\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    794\u001b[0m \u001b[43m    \u001b[49m\u001b[43mbody\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbody\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    795\u001b[0m \u001b[43m    \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    796\u001b[0m \u001b[43m    \u001b[49m\u001b[43mchunked\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mchunked\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    797\u001b[0m \u001b[43m    \u001b[49m\u001b[43mretries\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    798\u001b[0m \u001b[43m    \u001b[49m\u001b[43mresponse_conn\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mresponse_conn\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    799\u001b[0m \u001b[43m    \u001b[49m\u001b[43mpreload_content\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpreload_content\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    800\u001b[0m \u001b[43m    \u001b[49m\u001b[43mdecode_content\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdecode_content\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    801\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mresponse_kw\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    802\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    804\u001b[0m \u001b[38;5;66;03m# Everything went great!\u001b[39;00m\n\u001b[1;32m    805\u001b[0m clean_exit \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n",
-      "File \u001b[0;32m/usr/local/lib/python3.12/site-packages/urllib3/connectionpool.py:536\u001b[0m, in \u001b[0;36mHTTPConnectionPool._make_request\u001b[0;34m(self, conn, method, url, body, headers, retries, timeout, chunked, response_conn, preload_content, decode_content, enforce_content_length)\u001b[0m\n\u001b[1;32m    534\u001b[0m \u001b[38;5;66;03m# Receive the response from the server\u001b[39;00m\n\u001b[1;32m    535\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 536\u001b[0m     response \u001b[38;5;241m=\u001b[39m \u001b[43mconn\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgetresponse\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    537\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (BaseSSLError, \u001b[38;5;167;01mOSError\u001b[39;00m) \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m    538\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_raise_timeout(err\u001b[38;5;241m=\u001b[39me, url\u001b[38;5;241m=\u001b[39murl, timeout_value\u001b[38;5;241m=\u001b[39mread_timeout)\n",
-      "File \u001b[0;32m/usr/local/lib/python3.12/site-packages/urllib3/connection.py:507\u001b[0m, in \u001b[0;36mHTTPConnection.getresponse\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    504\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mresponse\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m HTTPResponse\n\u001b[1;32m    506\u001b[0m \u001b[38;5;66;03m# Get the response from http.client.HTTPConnection\u001b[39;00m\n\u001b[0;32m--> 507\u001b[0m httplib_response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgetresponse\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    509\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m    510\u001b[0m     assert_header_parsing(httplib_response\u001b[38;5;241m.\u001b[39mmsg)\n",
-      "File \u001b[0;32m/usr/local/lib/python3.12/http/client.py:1428\u001b[0m, in \u001b[0;36mHTTPConnection.getresponse\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m   1426\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m   1427\u001b[0m     \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 1428\u001b[0m         \u001b[43mresponse\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbegin\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1429\u001b[0m     \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mConnectionError\u001b[39;00m:\n\u001b[1;32m   1430\u001b[0m         \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mclose()\n",
-      "File \u001b[0;32m/usr/local/lib/python3.12/http/client.py:331\u001b[0m, in \u001b[0;36mHTTPResponse.begin\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    329\u001b[0m \u001b[38;5;66;03m# read until we get a non-100 response\u001b[39;00m\n\u001b[1;32m    330\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[0;32m--> 331\u001b[0m     version, status, reason \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_read_status\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    332\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m status \u001b[38;5;241m!=\u001b[39m CONTINUE:\n\u001b[1;32m    333\u001b[0m         \u001b[38;5;28;01mbreak\u001b[39;00m\n",
-      "File \u001b[0;32m/usr/local/lib/python3.12/http/client.py:292\u001b[0m, in \u001b[0;36mHTTPResponse._read_status\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    291\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_read_status\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[0;32m--> 292\u001b[0m     line \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mstr\u001b[39m(\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mreadline\u001b[49m\u001b[43m(\u001b[49m\u001b[43m_MAXLINE\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m)\u001b[49m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124miso-8859-1\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m    293\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(line) \u001b[38;5;241m>\u001b[39m _MAXLINE:\n\u001b[1;32m    294\u001b[0m         \u001b[38;5;28;01mraise\u001b[39;00m LineTooLong(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstatus line\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
-      "File \u001b[0;32m/usr/local/lib/python3.12/socket.py:720\u001b[0m, in \u001b[0;36mSocketIO.readinto\u001b[0;34m(self, b)\u001b[0m\n\u001b[1;32m    718\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[1;32m    719\u001b[0m     \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 720\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_sock\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrecv_into\u001b[49m\u001b[43m(\u001b[49m\u001b[43mb\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    721\u001b[0m     \u001b[38;5;28;01mexcept\u001b[39;00m timeout:\n\u001b[1;32m    722\u001b[0m         \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_timeout_occurred \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n",
-      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
      ]
     }
    ],
@@ -81,23 +71,27 @@
     "for collection in collections:\n",
     "    mongoCollection = mongoDatabase[collection]\n",
     "\n",
-    "    documents = mongoCollection.find()\n",
     "    id = 0\n",
-    "    for document in documents:\n",
-    "        # For each document, split it into chunks\n",
-    "        link = document[\"link\"]\n",
-    "        resultType = document[\"type\"]\n",
-    "        text = document[\"content\"]\n",
-    "        text = cleanText(text)\n",
-    "        chunks = text_splitter.split_text(text)\n",
-    "        chunkNum = 0\n",
-    "        for chunk in chunks:\n",
-    "            # Create embeddings for each chunk, of length 2048 using the embedding model\n",
-    "            embedding = embeddingsModel.embed_query(chunk)\n",
-    "            # Store the embedding along with some metadata into the Qdrant vector database\n",
-    "            qClient.upsert(collection_name=resultType, wait=True, points=[PointStruct(id=id, vector=embedding, payload={\"link\": link, \"type\": resultType, \"chunk\": chunkNum, \"text\": chunk})])\n",
-    "            chunkNum += 1\n",
-    "            id += 1\n"
    ]
   }
  ],

  "cells": [
   {
    "cell_type": "code",
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/lib/python3.12/site-packages/pymongo/synchronous/collection.py:1920: UserWarning: use an explicit session with no_cursor_timeout=True otherwise the cursor may still timeout after 30 minutes, for more info see https://mongodb.com/docs/v4.4/reference/method/cursor.noCursorTimeout/#session-idle-timeout-overrides-nocursortimeout\n",
+      "  return Cursor(self, *args, **kwargs)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Stopping document loop\n",
+      "Stopping document loop\n"
      ]
     }
    ],
     "for collection in collections:\n",
     "    mongoCollection = mongoDatabase[collection]\n",
     "\n",
+    "    documents = mongoCollection.find(no_cursor_timeout=True)\n",
     "    id = 0\n",
+    "    try:\n",
+    "        for document in documents:\n",
+    "            # For each document, split it into chunks\n",
+    "            link = document[\"link\"]\n",
+    "            resultType = document[\"type\"]\n",
+    "            text = document[\"content\"]\n",
+    "            text = cleanText(text)\n",
+    "            chunks = text_splitter.split_text(text)\n",
+    "            chunkNum = 0\n",
+    "            embeddings = embeddingsModel.embed_documents(chunks)\n",
+    "            for chunk in chunks:\n",
+    "                # Create embeddings for each chunk, of length 3072 using the embedding model\n",
+    "                # Store the embedding along with some metadata into the Qdrant vector database\n",
+    "                qClient.upsert(collection_name=resultType, wait=True, points=[PointStruct(id=id, vector=embeddings[chunkNum], payload={\"link\": link, \"type\": resultType, \"chunk\": chunkNum, \"text\": chunk})])\n",
+    "                chunkNum += 1\n",
+    "                id += 1\n",
+    "    except:\n",
+    "        print(\"Stopping document loop\")\n",
+    "    \n"
    ]
   }
  ],

project/InferencePipeline.ipynb DELETED Viewed

@@ -1,181 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/workspaces/RAG_LLM/project/shared.py:57: LangChainDeprecationWarning: The class `OllamaEmbeddings` was deprecated in LangChain 0.3.1 and will be removed in 1.0.0. An updated version of the class exists in the :class:`~langchain-ollama package and should be used instead. To use it run `pip install -U :class:`~langchain-ollama` and import as `from :class:`~langchain_ollama import OllamaEmbeddings``.\n",
-      "  return OllamaEmbeddings(model=MODEL, base_url=\"http://host.docker.internal:11434\")\n",
-      "/workspaces/RAG_LLM/project/shared.py:70: LangChainDeprecationWarning: The class `Ollama` was deprecated in LangChain 0.3.1 and will be removed in 1.0.0. An updated version of the class exists in the :class:`~langchain-ollama package and should be used instead. To use it run `pip install -U :class:`~langchain-ollama` and import as `from :class:`~langchain_ollama import OllamaLLM``.\n",
-      "  return Ollama(model=MODEL, base_url=\"http://host.docker.internal:11434\")\n"
-     ]
-    }
-   ],
-   "source": [
-    "# See README for more info on how the DataCollectionPipeline works\n",
-    "# The retrieval pipeline is part of the DataCollectionPipeline\n",
-    "from shared import getQdrantClient, getEmbeddingsModel, getModel\n",
-    "from langchain_community.llms import Ollama\n",
-    "from langchain.prompts import PromptTemplate\n",
-    "from operator import itemgetter\n",
-    "# Create a qdrant connection\n",
-    "qClient = getQdrantClient()\n",
-    "\n",
-    "# Setup the text embedder\n",
-    "embeddingsModel = getEmbeddingsModel()\n",
-    "\n",
-    "# Setup the model\n",
-    "model = getModel()\n",
-    "\n",
-    "# Retrieval Pipeline\n",
-    "# Retrieve the chunks with the most similar embeddings from Qdrant\n",
-    "def retriever(text, collection):\n",
-    "    results = qClient.search(\n",
-    "        collection_name=collection,\n",
-    "        query_vector = embeddingsModel.embed_query(text),\n",
-    "        limit=10\n",
-    "    )\n",
-    "    return results"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Query expansion:  Create a user-friendly, community-driven guide that provides an alternative to the traditional ROS documentation, focusing on real-world scenarios and practical applications rather than technical specifications and developer guides.\n",
-      "Coding Question?:  1\n",
-      "Related Collection:  Github\n",
-      "Top texts:  #About TheRobotOperatingSystem(ROS)isasetofsoftwarelibrariesandtoolsthathelpyoubuildrobotapplications. Fromdriverstostate-of-the-artalgorithms,andwithpowerfuldevelopertools,ROShaswhatyouneedforyournextroboticsproject. Andit'sallopensource. Fullprojectdetailson[ROS.org](https://ros.org/) #GettingStarted LookingtogetstartedwithROS? Our[installationguideishere](https://www.ros.org/blog/getting-started/).\n",
-      "Link:  https://github.com/ros2/ros2/tree/rolling/README.md\n",
-      "Top texts:  type:git url:https://github.com/ros2/tinyxml2_vendor.git version:rolling ros2/tlsf: type:git url:https://github.com/ros2/tlsf.git version:rolling ros2/unique_identifier_msgs: type:git url:https://github.com/ros2/unique_identifier_msgs.git version:rolling ros2/urdf: type:git url:https://github.com/ros2/urdf.git version:rolling ros2/yaml_cpp_vendor: type:git url:https://github.com/ros2/yaml_cpp_vendor.git version:rolling\n",
-      "Link:  https://github.com/ros2/ros2/tree/rolling/ros2.repos\n",
-      "Top texts:  *[ROSResourceStatusPage](https://status.openrobotics.org/) *[REP-2000](https://ros.org/reps/rep-2000.html):ROS2ReleasesandTargetPlatforms ##ProjectResources *[PurchaseROSSwag](https://spring.ros.org/) *[InformationabouttheROSTrademark](https://www.ros.org/blog/media/) *OnSocialMedia *[OpenRoboticsonLinkedIn](https://www.linkedin.com/company/open-source-robotics-foundation) *[OpenRoboticsonTwitter](https://twitter.com/OpenRoboticsOrg) *[ROS.orgonTwitter](https://twitter.com/ROSOrg)\n",
-      "Link:  https://github.com/ros2/ros2/tree/rolling/README.md\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "\"Here's an example of what the README file for ROS could look like:\\n\\n**Welcome to the Robot Operating System (ROS)**\\n\\nROS is a set of software libraries and tools that help you build robot applications. From driver development to state-of-the-art algorithms, and with powerful development tools, ROS has everything you need for your next robotics project.\\n\\n### Getting Started\\n\\nTo get started with ROS, check out our [installation guide](https://www.ros.org/blog/getting-started/).\\n\\n### What's Included\\n\\nROS includes a range of open-source projects, including:\\n\\n*   **tinyxml2_vendor**: A fork of the tinyxml2 library for parsing XML files.\\n*   **tlsf**: A library for secure communication over TLS (Transport Layer Security).\\n*   **unique_identifier_msgs**: A package for generating unique identifiers for robots and other entities.\\n*   **urdf**: A package for working with URDF (Unified Robot Description Format) files.\\n*   **yaml_cpp_vendor**: A fork of the yaml-cpp library for parsing YAML files.\\n\\n### ROS Releases and Target Platforms\\n\\nFor more information on ROS releases, target platforms, and release notes, check out [REP-2000](https://ros.org/reps/rep-2000.html).\\n\\n### Project Resources\\n\\n*   **ROSSwag**: Purchase ROS-related merchandise from our online store.\\n*   **ROS Trademark Information**: Learn about the ROS trademark.\\n\\n### Get Involved\\n\\nStay up-to-date with the latest news and developments in ROS:\\n\\n*   Follow us on [LinkedIn](https://www.linkedin.com/company/open-source-robotics-foundation)\\n*   Join our Twitter community: [OpenRoboticsOrg](https://twitter.com/OpenRoboticsOrg), [ROSOrg](https://twitter.com/ROSOrg)\\n\\n### License and Contributions\\n\\nROS is an open-source project, licensed under the Apache 2.0 license.\\n\\nWe welcome contributions from the ROS community! If you have any ideas or bug fixes to contribute, check out our [contribution guidelines](https://ros.org/blog/contribute/).\\n\\n**Thank You**\\n\\nThanks for choosing ROS as your platform for robotics development!\\n\\nYou can modify this README file according to your needs and preferences.\""
-      ]
-     },
-     "execution_count": 5,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# User query\n",
-    "query = \"Can you create a README file for ROS\"\n",
-    "\n",
-    "# Query expansion(I only generate one additional prompt for simplicity)\n",
-    "template = \"\"\"\n",
-    "Rewrite the prompt. The new prompt must offer a different perspective.\n",
-    "Do not change the meaning. Output only the rewritten prompt with no introduction.\n",
-    "    Prompt: {prompt}\n",
-    "\"\"\"\n",
-    "prompt = PromptTemplate.from_template(template)\n",
-    "chain = {\"prompt\": itemgetter(\"prompt\")} | prompt | model\n",
-    "queryExpansion = chain.invoke({\"prompt\": query})\n",
-    "print(\"Query expansion: \", queryExpansion)\n",
-    "\n",
-    "# Self-querying(The metadata I will be generating determines whether to look through the Qdrant collection containing github code)\n",
-    "template = \"\"\"\n",
-    "You are an AI assistant. You must determine if the prompt requires code as the answer.\n",
-    "Output a 1 if it is or a 0 if it is not and nothing else.\n",
-    "    Prompt: {prompt}\n",
-    "\"\"\"\n",
-    "prompt = PromptTemplate.from_template(template)\n",
-    "chain = {\"prompt\": itemgetter(\"prompt\")} | prompt | model\n",
-    "codingQuestion = chain.invoke({\"prompt\": query})\n",
-    "print(\"Coding Question?: \", codingQuestion)\n",
-    "\n",
-    "# Filtered vector search for each of the N queries after expansion\n",
-    "relatedCollection = 'Document'\n",
-    "if (codingQuestion == '1'):\n",
-    "    relatedCollection = 'Github'\n",
-    "print(\"Related Collection: \", relatedCollection)\n",
-    "results1 = retriever(query, relatedCollection)\n",
-    "results2 = retriever(queryExpansion, relatedCollection)\n",
-    "\n",
-    "# Collecting results\n",
-    "results = results1+results2\n",
-    "\n",
-    "# Reranking(Instead of using a CrossEncoder, I will manually compare embeddings)\n",
-    "ids = [result.id for result in results]\n",
-    "scores = [result.score for result in results]\n",
-    "topIds = []\n",
-    "topIndexes = []\n",
-    "for x in range(3):\n",
-    "    maxScore = 0\n",
-    "    maxIndex = 0\n",
-    "    for i in range(len(ids)):\n",
-    "        if ids[i] not in topIds and scores[i] > maxScore:\n",
-    "            maxScore = scores[i]\n",
-    "            maxIndex = i\n",
-    "    topIds.append(ids[maxIndex])\n",
-    "    topIndexes.append(maxIndex)\n",
-    "texts = [result.payload['text'] for result in results]\n",
-    "links = [result.payload['link'] for result in results]\n",
-    "topTexts = ''\n",
-    "for index in topIndexes:\n",
-    "    print(\"Top texts: \", texts[index])\n",
-    "    print(\"Link: \", links[index])\n",
-    "    topTexts += texts[index]\n",
-    "\n",
-    "# Building prompt\n",
-    "if(codingQuestion == '1'):\n",
-    "    template = \"\"\"\n",
-    "    Write code for the following question given the related coding document below.\n",
-    "\n",
-    "    Document: {document}\n",
-    "    Question: {question}\n",
-    "    \"\"\"\n",
-    "    prompt = PromptTemplate.from_template(template)\n",
-    "else:\n",
-    "    template = \"\"\"\n",
-    "    Answer the question based on the document below. If you can't answer the question, reply \"I don't know\"\n",
-    "\n",
-    "    Document: {document}\n",
-    "    Question: {question}\n",
-    "    \"\"\"\n",
-    "    prompt = PromptTemplate.from_template(template)\n",
-    "\n",
-    "# Obtaining answer\n",
-    "chain = {\"document\": itemgetter(\"document\"), \"question\": itemgetter(\"question\")} | prompt | model\n",
-    "chain.invoke({\"document\": topTexts, \"question\": query})"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.12.7"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

project/README.md CHANGED Viewed

@@ -1,3 +1,11 @@
 <h1>Installation:</h1>
 <h3>Docker setup(easy):<h3>
@@ -9,11 +17,13 @@
 * Run "docker compose up -d"
 * Run "docker exec -it ollama ollama pull llama3.2"
 * Select the python 3.12.7 kernels for the notebooks and run DataCollectionPipeline.ipynb and FeaturePipeline.ipynb(to populate the mongodb and qdrant databases)
 * The app is available on localhost:7860
 <h3>Non-Docker(web based) setup:<h3>
-If for some reason the docker setup does not work try connecting to mongodb, qdrant, ollama, and gradio from the web:
 * Clone the repository from huggingface or the entire repository from github
 * Reopen the repository in a dev container
 * Copy the .env.example into a new .env file in the project folder
@@ -29,17 +39,14 @@ If for some reason the docker setup does not work try connecting to mongodb, qdr
 <h1>Project infrastructure</h1>
-Note some files may have similar code with other files, such as the ClearML files containing ipynb files rewritten in python in order to work in ClearML or gradio containing code from InferencePipeline.ipynb. The ipynb file prints output to help see what is happening.
-# app.py
-Sends a query to the inference pipeline to generate an answer. The DataCollectionPipeline.ipynb and FeaturePipeline.ipynb files must be run first to populate the databases.
 # Data Collection Pipeline
 The Data Collection pipeline takes as input a list of links to domains. The links are fed into the ETL pipeline which Extracts data from the links using a crawler, Transforms the data into a standardized format, and Loads the extracted data into a NoSQL data warehouse, which in this case is MongoDB. The ETL pipeline uses a different method of extracting and transforming based on the link type. In this project, I classify links as either a github repository or document each with their own crawler and cleaner. This raw data is used by the feature pipeline.
 # Feature Pipeline
 The Feature pipeline contains the ingestion pipeline.
-* The ingestion pipeline extracts documents from MongoDB that were stored by the Data Collection Pipeline. It further cleans the data, breaks it into chunks depending on the data category, passes the chunks through an embedding model to generate embeddings, then loads the embeddings plus their metadata into a vector database, which in this case is Qdrant. The embeddings are passed with additional metadata that contains the document link, type, chunk number, and content.
 # Training Pipeline
 The training pipeline performs finetuning. I skipped this step since it was not required.
@@ -47,16 +54,22 @@ The training pipeline performs finetuning. I skipped this step since it was not
 # Inference Pipeline
 The inference pipeline contains the retrieval client/pipeline.
 * The retreival client takes a prompt as input. It uses the same embedding model as the ingestion pipeline in order to create an embedding for the prompt. It then queries the Qdrant database for the 10 closest embeddings using cosine distance and extracts the text chunk stored in the embeddings' metadata. This returns chunks that are related to the prompt.
-* The inference pipeline takes a query as input. It expands the query into N=2 queries using a prompt template, performs self-querying to extract metadata (document type) from the original query, searches the Qdrant for K=10 relevant chunks to each of the N=2 queries plus metadata using the retrieval client, combines the K=10 results from each of the N=2 queries, filters out only the most relevant 3 results, prompts the LLM with the results as context, and generates an answer.
-# ClearML
 The ClearML folder contains the notebook (.ipynb) pipeline files rewritten to work with ClearML. It is similar code to the notebooks, however ClearML does not print any output but instead logs all output in website. The website stores the pipelines which take input and produces output stored in artifacts. These are the differences between the notebook(.ipynb) pipeline files and the ClearML pipeline files(.py):
 * The ClearML Data Collection Pipeline works the same way, running the entire ETL pipeline in a single step (I could not split the ETL pipeline into 3 steps (Extract, Transform, Load) since my list of links gets bigger while looping through it(Since I also goes through some links inside of the websites crawled). Breaking it into steps would require more HTTP requests which would greatly slow down the pipeline).
 * The Feature Pipeline breaks down the notebook's loop (from the ingestion pipeline) into 5 stages: retrieve documents, clean documents, chunk documents, embed chunks, and store embeddings.
-* The Inference Pipeline simply puts each step in the notebook's version into a function. These functions are query expansion, self-querying, filtered vector search, collecting results, reranking, building prompt, and obtaining answer.
 # Tools
-The tools folder contains code for viewing/deleting what has been stored in MongoDB and Qdrant
 # shared.py
 shared.py is in both the project folder and project/Tools folder. It contains functions for setting up the connections with either the docker containers or web services. If you are running into errors connecting to any of the services, consider editing this file or double checking the .env file. Note the ClearML folder hardcodes all functions since it had trouble importing code.

+<h3>My Github and Huggingface</h3>
+* GitHubID: 32941731
+* GitHub username: KenTheNoob
+* GitHub link(private): https://github.com/KenTheNoob/eng-ai-agents
+* Huggingface username: KenTheNoob
+* Huggingface link: https://huggingface.co/KenTheNoob/RAG_LLM
 <h1>Installation:</h1>
 <h3>Docker setup(easy):<h3>
 * Run "docker compose up -d"
 * Run "docker exec -it ollama ollama pull llama3.2"
 * Select the python 3.12.7 kernels for the notebooks and run DataCollectionPipeline.ipynb and FeaturePipeline.ipynb(to populate the mongodb and qdrant databases)
+  * Note: Consider changing the links in the DataCollectionPipeline file to only the first one if you want to do a quick test, otherwise data collection and featurization will take hours
+  * Note: You can use the code in the Tools folder to show what is in the mongo or qdrant database or clear the databases
 * The app is available on localhost:7860
 <h3>Non-Docker(web based) setup:<h3>
+If for some reason the docker setup does not work try connecting to mongodb, qdrant, ollama, and gradio from the web(otherwise ignore this section):
 * Clone the repository from huggingface or the entire repository from github
 * Reopen the repository in a dev container
 * Copy the .env.example into a new .env file in the project folder
 <h1>Project infrastructure</h1>
+Note some files may have similar code with other files, such as the ClearML files containing ipynb files rewritten in python in order to work in ClearML. The ipynb file prints output to help see what is happening unlike the ClearML py files.
 # Data Collection Pipeline
 The Data Collection pipeline takes as input a list of links to domains. The links are fed into the ETL pipeline which Extracts data from the links using a crawler, Transforms the data into a standardized format, and Loads the extracted data into a NoSQL data warehouse, which in this case is MongoDB. The ETL pipeline uses a different method of extracting and transforming based on the link type. In this project, I classify links as either a github repository or document each with their own crawler and cleaner. This raw data is used by the feature pipeline.
 # Feature Pipeline
 The Feature pipeline contains the ingestion pipeline.
+* The ingestion pipeline extracts documents from MongoDB that were stored by the Data Collection Pipeline. It further cleans the data(remove non-printable characters), breaks it into chunks, passes the chunks through an embedding model to generate embeddings, then loads the embeddings plus their metadata into a vector database, which in this case is Qdrant. The embeddings are passed with additional metadata that contains the document link, type, chunk number, and content.
 # Training Pipeline
 The training pipeline performs finetuning. I skipped this step since it was not required.
 # Inference Pipeline
 The inference pipeline contains the retrieval client/pipeline.
 * The retreival client takes a prompt as input. It uses the same embedding model as the ingestion pipeline in order to create an embedding for the prompt. It then queries the Qdrant database for the 10 closest embeddings using cosine distance and extracts the text chunk stored in the embeddings' metadata. This returns chunks that are related to the prompt.
+* The inference pipeline takes a query as input. It expands the query into N=2 queries using a prompt template, performs self-querying to extract metadata (document type) from the original query, searches the Qdrant for K=10 relevant chunks to each of the N=2 queries plus metadata using the retrieval client, combines the K=10 results from each of the N=2 queries, filters out only the most relevant 3 results, prompts the LLM with the results and query metadata as context, and pipes the prompt into the model to generate an answer.
+# app.py
+Sends a query to the inference pipeline to generate an answer. The DataCollectionPipeline.ipynb and FeaturePipeline.ipynb files must be run first to populate the databases. Note that the docker compose already runs the app in a docker container. The python file allows you to run the app outside a container if you install gradio. When using the gradio app, you can check the useSample box and select an Sample Prompt from the dropdown menu to run the sample prompts, or uncheck the box and run your own custom query.
+# ClearML(optional setup)
 The ClearML folder contains the notebook (.ipynb) pipeline files rewritten to work with ClearML. It is similar code to the notebooks, however ClearML does not print any output but instead logs all output in website. The website stores the pipelines which take input and produces output stored in artifacts. These are the differences between the notebook(.ipynb) pipeline files and the ClearML pipeline files(.py):
 * The ClearML Data Collection Pipeline works the same way, running the entire ETL pipeline in a single step (I could not split the ETL pipeline into 3 steps (Extract, Transform, Load) since my list of links gets bigger while looping through it(Since I also goes through some links inside of the websites crawled). Breaking it into steps would require more HTTP requests which would greatly slow down the pipeline).
 * The Feature Pipeline breaks down the notebook's loop (from the ingestion pipeline) into 5 stages: retrieve documents, clean documents, chunk documents, embed chunks, and store embeddings.
+* The Inference Pipeline simply puts each step in the gradio app into a function that is tracked by ClearML. These functions are query expansion, self-querying, filtered vector search, collecting results, reranking, building prompt, and obtaining answer.
 # Tools
+The tools folder contains code for viewing/deleting what has been stored in MongoDB and Qdrant(very useful for debugging!).
+* Tools/mongoTools.ipynb can show the amount of documents in the MongoDB database(which consists of two collections), show the full list of links visited, and the first document in each collection(a sample to show what the data stored in MongoDB looks like). The second cell deletes everything in the mongo database if you want to rerun the DataCollection pipeline with fewer links. The DataCollection pipeline will automatically ignore visited links, but if it takes too long, I suggest using the tool to delete everything, then rerunning the pipeline with only the ROS documentation and github links. Nav2 and moveit are massive sites/repositories to crawl.
+* Tools/QdrantTools.ipynb can show the amount of documents in the Qdrant database(which consists of two collections), the first document in each collection(a sample to show what the data stored in Qdrant looks like), and runs a sample search for the closest embeddings/vectors to a sample query(prints out the metadata of the embeddings). Note that the embeddings themselves are not shown because with_vectors=false since normally Qdrant will search for the closest embeddings, but return the payload associated with the embedding(since the embedding itself is useless for generating an answer). The second cell counts how many chunks need to be embedded by the FeaturePipeline and compares it to the total number of chunks from the first cell to give an idea of how close to completion the feature pipeline is(run first cell first). The third cell is an explaination of how Qdrant finds the closest embeddings using cosine distance. The fourth cell allows you to delete everything in the Qdrant database(use with caution!).
+* Tools/InferenceTool.ipynb contains the inference pipeline used by the gradio app. It allows you to generate answers to queries without running the gradio app along with printing out useful debugging information for everything that is being fed into the model. This includes the query expansion(reworded query(s)), whether the query is a coding question(self-querying), which Qdrant collection is being searched, the chunks/text being passed as context, the RAG model's answer, and the original model's answer to compare with the RAG model to see if it performed better.
 # shared.py
 shared.py is in both the project folder and project/Tools folder. It contains functions for setting up the connections with either the docker containers or web services. If you are running into errors connecting to any of the services, consider editing this file or double checking the .env file. Note the ClearML folder hardcodes all functions since it had trouble importing code.

project/Tools/InferenceTool.ipynb ADDED Viewed

	@@ -0,0 +1,184 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Make sure ollama serve is running(docker or terminal)\n",
+    "from operator import itemgetter\n",
+    "from langchain.prompts import PromptTemplate\n",
+    "from shared import getModel, getEmbeddingsModel, getQdrantClient\n",
+    "\n",
+    "def answer(query):\n",
+    "    # Create a qdrant connection\n",
+    "    qClient = getQdrantClient()\n",
+    "\n",
+    "    # Setup the text embedder\n",
+    "    embeddingsModel = getEmbeddingsModel()\n",
+    "\n",
+    "    # Setup the model\n",
+    "    model = getModel()\n",
+    "\n",
+    "    # Retrieval Pipeline\n",
+    "    # Retrieve the chunks with the most similar embeddings from Qdrant\n",
+    "    def retriever(text, collection):\n",
+    "        results = qClient.search(\n",
+    "            collection_name=collection,\n",
+    "            query_vector = embeddingsModel.embed_query(text),\n",
+    "            limit=10\n",
+    "        )\n",
+    "        return results\n",
+    "\n",
+    "    # Query expansion(I only generate one additional prompt for simplicity)\n",
+    "    template = \"\"\"\n",
+    "    Rewrite the prompt. The new prompt must offer a different perspective.\n",
+    "    Do not change the meaning. Output only the rewritten prompt with no introduction.\n",
+    "        Prompt: {prompt}\n",
+    "    \"\"\"\n",
+    "    prompt = PromptTemplate.from_template(template)\n",
+    "    chain = {\"prompt\": itemgetter(\"prompt\")} | prompt | model\n",
+    "    queryExpansion = chain.invoke({\"prompt\": query})\n",
+    "    print(\"Query expansion: \", queryExpansion)\n",
+    "\n",
+    "    # Self-querying(The metadata I will be generating determines whether to look through the Qdrant collection containing github code)\n",
+    "    template = \"\"\"\n",
+    "    You are an AI assistant. You must determine if the prompt requires code as the answer.\n",
+    "    Output a 1 if it is or a 0 if it is not and nothing else.\n",
+    "        Prompt: {prompt}\n",
+    "    \"\"\"\n",
+    "    prompt = PromptTemplate.from_template(template)\n",
+    "    chain = {\"prompt\": itemgetter(\"prompt\")} | prompt | model\n",
+    "    codingQuestion = chain.invoke({\"prompt\": query})\n",
+    "    print(\"Coding question?: \", codingQuestion)\n",
+    "\n",
+    "    # Filtered vector search for each of the N queries after expansion\n",
+    "    relatedCollection = 'Document'\n",
+    "    if (codingQuestion == '1'):\n",
+    "        relatedCollection = 'Github'\n",
+    "    results1 = retriever(query, relatedCollection)\n",
+    "    results2 = retriever(queryExpansion, relatedCollection)\n",
+    "    print(\"Related collection: \", relatedCollection)\n",
+    "    \n",
+    "\n",
+    "    # Collecting results\n",
+    "    results = results1+results2\n",
+    "\n",
+    "    # Reranking(Instead of using a CrossEncoder, I will manually compare embeddings)\n",
+    "    ids = [result.id for result in results]\n",
+    "    scores = [result.score for result in results]\n",
+    "    topIds = []\n",
+    "    topIndexes = []\n",
+    "    for x in range(3):\n",
+    "        maxScore = 0\n",
+    "        maxIndex = 0\n",
+    "        for i in range(len(ids)):\n",
+    "            if ids[i] not in topIds and scores[i] > maxScore:\n",
+    "                maxScore = scores[i]\n",
+    "                maxIndex = i\n",
+    "        topIds.append(ids[maxIndex])\n",
+    "        topIndexes.append(maxIndex)\n",
+    "    texts = [result.payload['text'] for result in results]\n",
+    "    links = [result.payload['link'] for result in results]\n",
+    "    topTexts = ''\n",
+    "    for index in topIndexes:\n",
+    "        print(\"Top texts: \", texts[index])\n",
+    "        print(\"Link: \", links[index])\n",
+    "        topTexts += texts[index]\n",
+    "\n",
+    "    # Building prompt\n",
+    "    if(codingQuestion == '1'):\n",
+    "        template = \"\"\"\n",
+    "        Write code for the following question given the related coding document below.\n",
+    "\n",
+    "        Document: {document}\n",
+    "        Question: {question}\n",
+    "        \"\"\"\n",
+    "        prompt = PromptTemplate.from_template(template)\n",
+    "    else:\n",
+    "        template = \"\"\"\n",
+    "        You are an AI agent that has retreived a document from the web.\n",
+    "        If the document is useful for answering the question use it.\n",
+    "        If the document is not useful, answer normally.\n",
+    "        Do not mention the document.\n",
+    "\n",
+    "        Document: {document}\n",
+    "        Question: {question}\n",
+    "        \"\"\"\n",
+    "        prompt = PromptTemplate.from_template(template)\n",
+    "\n",
+    "    # Obtaining answer\n",
+    "    chain = {\"document\": itemgetter(\"document\"), \"question\": itemgetter(\"question\")} | prompt | model\n",
+    "    print(\"RAG answer: \", chain.invoke({\"document\": topTexts, \"question\": query}))\n",
+    "    print(\"\\n----------------------------------------------\\n\")\n",
+    "    baseline = model.invoke(query)\n",
+    "    print(\"Baseline answer: \", baseline[:500])\n",
+    "    print(\"\\n----------------------------------------------\\n\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Query expansion:  What percentage of global companies have adopted or are utilizing Nav2?\n",
+      "Coding question?:  0\n",
+      "Related collection:  Document\n",
+      "Top texts:  types of tasks like object following, complete coverage navigation, and more. Nav2 is a production-grade and high-quality navigation framework trusted by 100+ companies worldwide. It provides perception, planning, control, localization, visualization, and much more to build highly reliable autonomous systems. This will compute an environmental model from sensor and semantic data, dynamically path plan, compute velocities for motors, avoid obstacles, and structure higher-level robot behaviors.\n",
+      "Link:  https://docs.nav2.org/\n",
+      "Top texts:  not specifically address here. BehaviorTree.CPP upgraded to version 4.5+ Since we migrated from version 3.8 to 4.5, users must upgrade their XML and source code accordingly. You can refer to [this page](https://www.behaviortree.dev/docs/migration) for more details, but the main changes are: XML must be changed. This [python script can help](https://github.com/BehaviorTree/BehaviorTree.CPP/blob/master/convert_v3_to_v4.py). The syntax of SubTrees has changed; the one of SubTreePlus was adopted,\n",
+      "Link:  https://docs.nav2.org/migration/Iron.html\n",
+      "Top texts:  September 19, 2015 MoveIt! Upcoming Events - RoboBusiness 2015 Come meet MoveIt! developers and users at RoboBusiness 2015 in San Jose... September 17, 2015 Report on First MoveIt! Community Meeting Watch video of the First MoveIt! Community Meeting in case you missed it. Thank you for coming to the MoveIt! Community Meeting and thanks to the present... July 02, 2015 MoveIt! goes underwater! MoveIt! on an underwater Girona500 AUV robot and 4-DOF arm for autonomous underwater manipulation...\n",
+      "Link:  https://moveit.ai/blog/\n",
+      "RAG answer:  Nav2 is trusted by 100+ companies worldwide.\n",
+      "\n",
+      "----------------------------------------------\n",
+      "\n",
+      "Baseline answer:  I don't have any information about a company called \"Nav2.\" It's possible that it's a small or private company, or it may not be well-known. Can you provide more context or clarify which Nav2 you are referring to?\n",
+      "\n",
+      "Alternatively, I can suggest some well-known companies that use Nav (a navigation and mapping platform) for their trust services. For example:\n",
+      "\n",
+      "* Uber uses Nav for its ride-hailing service\n",
+      "* Lyft also uses Nav for its service\n",
+      "* Pizza Hut uses Nav to help customers navigate to location\n",
+      "\n",
+      "----------------------------------------------\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "#queries = [\"How can I develop the navigation stack of an agent with egomotion?\", \"What is ROS?\", \"How many companies is Nav2 trusted by worldwide?\", \"How would I build a ROS 2 Navigation Framework and System?\", \"Write me code to move a robot using Moveit\"]\n",
+    "queries = [\"How many companies is Nav2 trusted by worldwide?\"]\n",
+    "for query in queries:\n",
+    "    answer(query)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

project/Tools/QdrantTools.ipynb CHANGED Viewed

@@ -2,17 +2,35 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Number of document chunks:  0\n",
-      "Number of githb chunks:  0\n",
       "\n",
-      "Sample search result(n=2): \n"
      ]
     }
    ],
@@ -39,7 +57,7 @@
     "\n",
     "# Show everything in the Github collection\n",
     "numGithubChunks = 0\n",
-    "# Note with_vectors defaults to false, so the vectors are not returned\n",
     "chunks = qClient.scroll(collection_name='Github', limit=100)\n",
     "while True:\n",
     "    for chunk in chunks[0]:\n",
@@ -49,34 +67,86 @@
     "    chunks = qClient.scroll(collection_name='Github', limit=100, with_payload=False, offset=chunks[1])\n",
     "    if chunks[1] is None:\n",
     "        break\n",
-    "print(\"Number of githb chunks: \", numDocumentChunks)\n",
     "if numGithubChunks > 0:\n",
-    "    print(\"\\nSample github chunk(metadata not the vector): \")\n",
     "    print(sampleGithubChunk, '\\n')\n",
     "\n",
     "# Show a sample search\n",
     "embeddingsModel = getEmbeddingsModel()\n",
     "results = qClient.search(\n",
     "    collection_name=\"Document\",\n",
-    "    query_vector = embeddingsModel.embed_query(\"What operating system is ROS made for?\"),\n",
     "    limit=10\n",
     ")\n",
-    "print(\"\\nSample search result(n=2): \")\n",
     "for result in results:\n",
     "    print(result)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Cosine Similarity for related sentences: 0.7035977848391597\n",
-      "Cosine Similarity for unrelated sentences: 0.3566534327076298\n"
      ]
     }
    ],
@@ -84,23 +154,22 @@
     "import numpy as np\n",
     "# How cosine distance works\n",
     "\n",
-    "embedding1 = embeddingsModel.embed_query(\"What is the weather like?\")\n",
-    "embedding2 = embeddingsModel.embed_query(\"It is raining today.\")\n",
-    "embedding3 = embeddingsModel.embed_query(\"ROS is an open source platform\")\n",
     "def cosine_similarity(vec1, vec2):\n",
     "    dot_product = np.dot(vec1, vec2)\n",
     "    norm_vec1 = np.linalg.norm(vec1)\n",
     "    norm_vec2 = np.linalg.norm(vec2)\n",
     "    return dot_product / (norm_vec1 * norm_vec2)\n",
-    "similarity1 = cosine_similarity(embedding1, embedding2)\n",
-    "similarity2 = cosine_similarity(embedding1, embedding3)\n",
     "print(\"Cosine Similarity for related sentences:\", similarity1)\n",
     "print(\"Cosine Similarity for unrelated sentences:\", similarity2)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
    "metadata": {},
    "outputs": [
     {
@@ -109,7 +178,7 @@
        "True"
       ]
      },
-     "execution_count": 3,
      "metadata": {},
      "output_type": "execute_result"
     }

  "cells": [
   {
    "cell_type": "code",
+   "execution_count": 33,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "Number of document chunks:  14800\n",
       "\n",
+      "Sample document chunk(metadata not the vector): \n",
+      "id=0 payload={'link': 'https://www.ros.org/', 'type': 'Document', 'chunk': 0, 'text': 'ROS: Home Why ROS? Getting Started Community Ecosystem ROS - Robot Operating System The Robot Operating System (ROS) is a set of software libraries and tools that help you build robot applications. From drivers to state-of-the-art algorithms, and with powerful developer tools, ROS has what you need for your next robotics project. And it\\'s all open source. What is ROS? ROS Videos \" Install Jazzy Jalisco Jazzy Jalisco is our latest ROS 2 LTS release targeted at the Ubuntu 24.04 (Noble) and'} vector=None shard_key=None order_value=None \n",
+      "\n",
+      "Number of githb chunks:  3600\n",
+      "\n",
+      "Sample github chunk(with_vector=false): \n",
+      "id=0 payload={'link': 'https://github.com/ros2/ros2/tree/rolling/README.md', 'type': 'Github', 'chunk': 0, 'text': \"#About TheRobotOperatingSystem(ROS)isasetofsoftwarelibrariesandtoolsthathelpyoubuildrobotapplications. Fromdriverstostate-of-the-artalgorithms,andwithpowerfuldevelopertools,ROShaswhatyouneedforyournextroboticsproject. Andit'sallopensource. Fullprojectdetailson[ROS.org](https://ros.org/) #GettingStarted LookingtogetstartedwithROS? Our[installationguideishere](https://www.ros.org/blog/getting-started/).\"} vector=None shard_key=None order_value=None \n",
+      "\n",
+      "\n",
+      "Sample search result(n=10): \n",
+      "id=45 version=45 score=0.5391361 payload={'link': 'https://docs.nav2.org/', 'type': 'Document', 'chunk': 40, 'text': 'types of tasks like object following, complete coverage navigation, and more. Nav2 is a production-grade and high-quality navigation framework trusted by 100+ companies worldwide. It provides perception, planning, control, localization, visualization, and much more to build highly reliable autonomous systems. This will compute an environmental model from sensor and semantic data, dynamically path plan, compute velocities for motors, avoid obstacles, and structure higher-level robot behaviors.'} vector=None shard_key=None order_value=None\n",
+      "id=9180 version=9180 score=0.511093 payload={'link': 'https://docs.nav2.org/migration/Iron.html', 'type': 'Document', 'chunk': 39, 'text': 'not specifically address here. BehaviorTree.CPP upgraded to version 4.5+ Since we migrated from version 3.8 to 4.5, users must upgrade their XML and source code accordingly. You can refer to [this page](https://www.behaviortree.dev/docs/migration) for more details, but the main changes are: XML must be changed. This [python script can help](https://github.com/BehaviorTree/BehaviorTree.CPP/blob/master/convert_v3_to_v4.py). The syntax of SubTrees has changed; the one of SubTreePlus was adopted,'} vector=None shard_key=None order_value=None\n",
+      "id=9922 version=9922 score=0.5105795 payload={'link': 'https://moveit.ai/blog/', 'type': 'Document', 'chunk': 31, 'text': 'September 19, 2015 MoveIt! Upcoming Events - RoboBusiness 2015 Come meet MoveIt! developers and users at RoboBusiness 2015 in San Jose... September 17, 2015 Report on First MoveIt! Community Meeting Watch video of the First MoveIt! Community Meeting in case you missed it. Thank you for coming to the MoveIt! Community Meeting and thanks to the present... July 02, 2015 MoveIt! goes underwater! MoveIt! on an underwater Girona500 AUV robot and 4-DOF arm for autonomous underwater manipulation...'} vector=None shard_key=None order_value=None\n",
+      "id=540 version=540 score=0.51053035 payload={'link': 'https://docs.nav2.org/concepts/index.html', 'type': 'Document', 'chunk': 56, 'text': 'to their task. When the behavior tree ticks the corresponding BT node, it will call the action server to process its task. The action server callback inside the server will call the chosen algorithm by its name (e.g. FollowPath) that maps to a specific algorithm. This allows a user to abstract the algorithm used in the behavior tree to classes of algorithms. For instance, you can have N plugin controllers to follow paths, dock with charger, avoid dynamic obstacles, or interface with a tool.'} vector=None shard_key=None order_value=None\n",
+      "id=7618 version=7618 score=0.50761116 payload={'link': 'https://docs.nav2.org/configuration/packages/configuring-savitzky-golay-smoother.html', 'type': 'Document', 'chunk': 39, 'text': 'plugin that will take in an input path and smooth it using a simple and fast smoothing technique based on Savitzky Golay Filters. It uses a digital signal processing technique designed to reduce noise distorting a reference signal, in this case, a path. It is useful for all types of planners, but particularly in NavFn to remove tiny artifacts that can occur near the end of paths or Theta* to slightly soften the transition between Line of Sight line segments without modifying the primary path.'} vector=None shard_key=None order_value=None\n",
+      "id=1067 version=1067 score=0.50312483 payload={'link': 'https://docs.nav2.org/setup_guides/algorithm/select_algorithm.html', 'type': 'Document', 'chunk': 48, 'text': 'not suitable for ackermann and legged robots since they have turning constraints. That being said, these plugins are best used on robots that can drive in any direction or rotate safely in place, such as circular differential and circular omnidirectional robots. Another planner plugin is the Smac Hybrid-A* planner that supports arbitrary shaped ackermann and legged robots. It is a highly optimized and fully reconfigurable Hybrid-A* implementation supporting Dubin and Reeds-Shepp motion models.'} vector=None shard_key=None order_value=None\n",
+      "id=60 version=60 score=0.5007378 payload={'link': 'https://moveit.ai/', 'type': 'Document', 'chunk': 2, 'text': 'given pose, even in over-actuated arms Control Execute time-parameterized joint trajectories to low level hardware controllers through common interfaces 3D Perception Connect to depth sensors and point clouds with Octomaps Collision Checking Avoid obstacles using geometric primitives, meshes, or point cloud data Companies using MoveIt Powerful 3D Interactive Visualizer Out-of-the box visual demonstrations in Rviz allow new users experimentation with various planning algorithms around obstacles.'} vector=None shard_key=None order_value=None\n",
+      "id=9196 version=9196 score=0.49414897 payload={'link': 'https://docs.nav2.org/migration/Iron.html', 'type': 'Document', 'chunk': 55, 'text': 'planner. When enforce_path_inversion is true, the path handler will prune the path to the first time the directions change to force the controller to plan to the inversion point and then be set the rest of the path, once in tolerance. The Path Align critic also contains a parameter use_path_orientations which can be paired with it to incentivize aligning the path containing orientation information to better attempt to achieve path inversions where requested and not do them when not requested.'} vector=None shard_key=None order_value=None\n",
+      "id=404 version=404 score=0.4938618 payload={'link': 'https://docs.nav2.org/development_guides/devcontainer_docs/devcontainer_guide.html', 'type': 'Document', 'chunk': 43, 'text': 'needed for building the project, as reused by the projects CI. For example, the dever stage modifies /etc/bash.bashrc to automatically source install/setup.bash from the underlay workspace, ensuring all VS Code extensions are loaded with the correct environment, while avoiding any race conditions during installation and startup. To speed up the initial build, images layers from this builder stage are cached by pulling the same image tag used by the projects CI, hosted from the image registry.'} vector=None shard_key=None order_value=None\n",
+      "id=523 version=523 score=0.48727226 payload={'link': 'https://docs.nav2.org/concepts/index.html', 'type': 'Document', 'chunk': 39, 'text': 'with the concepts required to appreciating and working with this project. ROS 2 ROS 2 is the core middleware used for Nav2. If you are unfamiliar with this, please visit the ROS 2 documentation before continuing. Action Server Just as in ROS, action servers are a common way to control long running tasks like navigation. This stack makes more extensive use of actions, and in some cases, without an easy topic interface. It is more important to understand action servers as a developer in ROS 2.'} vector=None shard_key=None order_value=None\n"
      ]
     }
    ],
     "\n",
     "# Show everything in the Github collection\n",
     "numGithubChunks = 0\n",
+    "# Note with_vectors defaults to false, so the vectors are not returned(since they are very large)\n",
     "chunks = qClient.scroll(collection_name='Github', limit=100)\n",
     "while True:\n",
     "    for chunk in chunks[0]:\n",
     "    chunks = qClient.scroll(collection_name='Github', limit=100, with_payload=False, offset=chunks[1])\n",
     "    if chunks[1] is None:\n",
     "        break\n",
+    "print(\"Number of githb chunks: \", numGithubChunks)\n",
     "if numGithubChunks > 0:\n",
+    "    print(\"\\nSample github chunk(with_vector=false): \")\n",
     "    print(sampleGithubChunk, '\\n')\n",
     "\n",
     "# Show a sample search\n",
     "embeddingsModel = getEmbeddingsModel()\n",
     "results = qClient.search(\n",
     "    collection_name=\"Document\",\n",
+    "    query_vector = embeddingsModel.embed_query(\"How many companies is Nav2 trusted by worldwide?\"),\n",
     "    limit=10\n",
     ")\n",
+    "print(\"\\nSample search result(n=10): \")\n",
     "for result in results:\n",
     "    print(result)"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": 34,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Total number of chunks to embed:  285569\n",
+      "Chunks currently embedded:  18400\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Check how many chunks total will be processed by the FeaturePipeline\n",
+    "from shared import getMongoClient\n",
+    "from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
+    "\n",
+    "\n",
+    "texts = []\n",
+    "# Create a mongoDB connection\n",
+    "mongoHost = getMongoClient()\n",
+    "mongoDatabase = mongoHost[\"twin\"]\n",
+    "collections = mongoDatabase.list_collection_names()\n",
+    "for collection in collections:\n",
+    "    mongoCollection = mongoDatabase[collection]\n",
+    "    results = mongoCollection.find()\n",
+    "    for result in results:\n",
+    "        # For each document, split it into chunks\n",
+    "        texts.append(result[\"content\"])\n",
+    "\n",
+    "cleanTexts = []\n",
+    "for text in texts:\n",
+    "    cleanTexts.append(\"\".join(char for char in text if 32 <= ord(char) <= 126))\n",
+    "\n",
+    "numChunks = 0\n",
+    "text_splitter = RecursiveCharacterTextSplitter(\n",
+    "    chunk_size=500,\n",
+    "    chunk_overlap=20,\n",
+    "    length_function=len,\n",
+    "    is_separator_regex=False,\n",
+    ")\n",
+    "for text in cleanTexts:\n",
+    "    textChunks = text_splitter.split_text(text)\n",
+    "    for chunk in textChunks:\n",
+    "        numChunks += 1\n",
+    "\n",
+    "print(\"Total number of chunks to embed: \", numChunks)\n",
+    "print(\"Chunks currently embedded: \", numDocumentChunks+numGithubChunks)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "Cosine Similarity for related sentences: 0.523006986899456\n",
+      "Cosine Similarity for unrelated sentences: 0.32259653091273344\n"
      ]
     }
    ],
     "import numpy as np\n",
     "# How cosine distance works\n",
     "\n",
+    "queryEmbedding = embeddingsModel.embed_query(\"What is the weather like?\")\n",
+    "documentEmbedding = embeddingsModel.embed_documents([\"It is raining today.\", \"ROS is an open source platform\"])\n",
     "def cosine_similarity(vec1, vec2):\n",
     "    dot_product = np.dot(vec1, vec2)\n",
     "    norm_vec1 = np.linalg.norm(vec1)\n",
     "    norm_vec2 = np.linalg.norm(vec2)\n",
     "    return dot_product / (norm_vec1 * norm_vec2)\n",
+    "similarity1 = cosine_similarity(queryEmbedding, documentEmbedding[0])\n",
+    "similarity2 = cosine_similarity(queryEmbedding, documentEmbedding[1])\n",
     "print(\"Cosine Similarity for related sentences:\", similarity1)\n",
     "print(\"Cosine Similarity for unrelated sentences:\", similarity2)"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": 19,
    "metadata": {},
    "outputs": [
     {
        "True"
       ]
      },
+     "execution_count": 19,
      "metadata": {},
      "output_type": "execute_result"
     }

project/Tools/mongoTools.ipynb CHANGED Viewed

@@ -2,20 +2,469 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 7,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Number of regular documents:  4\n",
-      "Number of github documents:  6815\n",
       "Links crawled: \n",
       "https://www.ros.org/\n",
       "https://docs.nav2.org/\n",
       "https://moveit.ai/\n",
       "https://gazebosim.org/home\n",
       "https://github.com/ros2/ros2/tree/rolling/README.md\n",
       "https://github.com/ros2/ros2/tree/rolling/.gitignore\n",
       "https://github.com/ros2/ros2/tree/rolling/CODEOWNERS\n",
@@ -6816,23 +7265,8 @@
       "https://github.com/gazebosim/gazebo-classic/blob/gazebo11/./gazebo/msgs/generator/GazeboGenerator.hh\n",
       "https://github.com/gazebosim/gazebo-classic/blob/gazebo11/./gazebo/msgs/generator/gazebo_generator.cc\n",
       "https://github.com/gazebosim/gazebo-classic/blob/gazebo11/./gazebo/msgs/generator/GazeboGenerator.cc\n",
-      "https://github.com/ros2/ros2/tree/rolling/README.md\n",
-      "https://github.com/ros2/ros2/tree/rolling/.gitignore\n",
-      "https://github.com/ros2/ros2/tree/rolling/CODEOWNERS\n",
-      "https://github.com/ros2/ros2/tree/rolling/ros2.repos\n",
-      "https://github.com/ros2/ros2/tree/rolling/src/.gitkeep\n",
-      "https://github.com/ros-navigation/navigation2/tree/main/README.md\n",
-      "https://github.com/ros-navigation/navigation2/tree/main/.gitignore\n",
-      "https://github.com/ros-navigation/navigation2/tree/main/CODEOWNERS\n",
-      "https://github.com/ros-navigation/navigation2/tree/main/ros2.repos\n",
-      "https://github.com/ros-navigation/navigation2/tree/main/src/.gitkeep\n",
-      "https://github.com/ros2/ros2/tree/rolling/README.md\n",
-      "https://github.com/ros2/ros2/tree/rolling/.gitignore\n",
-      "https://github.com/ros2/ros2/tree/rolling/CODEOWNERS\n",
-      "https://github.com/ros2/ros2/tree/rolling/ros2.repos\n",
-      "https://github.com/ros2/ros2/tree/rolling/src/.gitkeep\n",
-      "Sample regular document:  {'_id': ObjectId('6755a587c408062710d2da36'), 'link': 'https://www.ros.org/', 'type': 'Document', 'content': ' ROS: Home Why ROS? Getting Started Community Ecosystem ROS - Robot Operating System The Robot Operating System (ROS) is a set of software libraries and tools that help you build robot applications. From drivers to state-of-the-art algorithms, and with powerful developer tools, ROS has what you need for your next robotics project. And it\\'s all open source. What is ROS? ROS Videos \" Install Jazzy Jalisco Jazzy Jalisco is our latest ROS 2 LTS release targeted at the Ubuntu 24.04 (Noble) and Windows 10, though other systems are supported to varying degrees. Learn More Humble Hawksbill ROS 2 Humble Hawksbill is a slighly older LTS release of ROS 2 targeted at Ubuntu 22.04 (Jammy) and Windows 10. Other systems are supported including tier 3 support for 20.04 for those transitioning from ROS 1. Learn More Support There are several mechanisms in place to support the ROS community, each with its own purpose. Documentation Documentation and tutorials for ROS 2 Stack Exchange Ask questions. Get answers. Forums Hear the latest discussions ROS 1 Wiki Legacy documentation and tutorials for ROS 1 Documentation Documentation and tutorials for ROS 2 Robotics Stack Exchange Ask questions.Get answers.All ROS versions Forums Hear the latest discussions ROS 1 Wiki Legacy documentation and tutorials for ROS 1 Recent Updates and Highlights ROSCon 2024 Videos are Now Available See the ROSCon 2024 website for details 11/18/2024 - Katherine Scott The videos from ROSCon 2024 in Odense are now available on the ROSCon Website (see the program), this Vimeo showcase, and in the ROS documentation. The ROSCon website also includes the slides from all the talks at ROSCon. I have also included a list of all the videos below. I want to thank AMD for being our 2024 ROSCon video sponsor, their generous support makes the ROSCon live stream and videos possible. READ MORE Recent ROS Discourse Posts ROS News of the Week 11/22/2024 - ROS Discourse Gazebo Classic and Citadel End of Life 12/2/2024 - ROS Discourse ROS 2 driver for Mitsubishi Melfa RV-FR 10/24/2024 ROS Discourse Home Why ROS? Getting Started Community Ecosystem Q&A Forum Packages Wiki Documentation media Q&A Forum Packages ROSCon Wiki documentation discord Brought to you by Open Robotics | licensed under Creative Commons Attributions 3.0 | ©2021 Open Robotics '}\n",
-      "Sample github document {'_id': ObjectId('67559bffc408062710d2bc0f'), 'link': 'https://github.com/ros2/ros2/tree/rolling/README.md', 'type': 'Github', 'content': \"#About TheRobotOperatingSystem(ROS)isasetofsoftwarelibrariesandtoolsthathelpyoubuildrobotapplications. Fromdriverstostate-of-the-artalgorithms,andwithpowerfuldevelopertools,ROShaswhatyouneedforyournextroboticsproject. Andit'sallopensource. Fullprojectdetailson[ROS.org](https://ros.org/) #GettingStarted LookingtogetstartedwithROS? Our[installationguideishere](https://www.ros.org/blog/getting-started/). Onceyou'veinstalledROSstartbylearningsome[basicconcepts](https://docs.ros.org/en/rolling/Concepts/Basic.html)andtakealookatour[beginnertutorials](https://docs.ros.org/en/rolling/Tutorials/Beginner-CLI-Tools.html). #JointheROSCommunity ##CommunityResources *[ROSDiscussionForum](https://discourse.ros.org/) *[ROSDiscordServer](https://discord.com/servers/open-robotics-1077825543698927656) *[RoboticsStackExchange](https://robotics.stackexchange.com/)(preferredROSsupportforum). *[OfficialROSVideos](https://vimeo.com/osrfoundation) *[ROSCon](https://roscon.ros.org),ouryearlydeveloperconference. *CiteROS2inacademicworkusing[DOI:10.1126/scirobotics.abm6074](https://www.science.org/doi/10.1126/scirobotics.abm6074) ##DeveloperResources *[ROS2Documentation](https://docs.ros.org/) *[ROSPackageAPIreference](https://docs.ros.org/en/rolling/p/) *[ROSPackageIndex](https://index.ros.org/) *[ROSonDockerHub](https://hub.docker.com/_/ros/) *[ROSResourceStatusPage](https://status.openrobotics.org/) *[REP-2000](https://ros.org/reps/rep-2000.html):ROS2ReleasesandTargetPlatforms ##ProjectResources *[PurchaseROSSwag](https://spring.ros.org/) *[InformationabouttheROSTrademark](https://www.ros.org/blog/media/) *OnSocialMedia *[OpenRoboticsonLinkedIn](https://www.linkedin.com/company/open-source-robotics-foundation) *[OpenRoboticsonTwitter](https://twitter.com/OpenRoboticsOrg) *[ROS.orgonTwitter](https://twitter.com/ROSOrg) ROSismadepossiblethroughthegeneroussupportofopensourcecontributorsandthenon-profit[OpenSourceRoboticsFoundation(OSRF)](https://www.openrobotics.org/). TaxdeductibledonationstotheOSRFcanbe[madehere.](https://donorbox.org/support-open-robotics?utm_medium=qrcode&utm_source=qrcode) \"}\n"
      ]
     }
    ],
@@ -6875,7 +7309,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
    "metadata": {},
    "outputs": [
     {
@@ -6884,7 +7318,7 @@
        "DeleteResult({'n': 0, 'ok': 1.0}, acknowledged=True)"
       ]
      },
-     "execution_count": 13,
      "metadata": {},
      "output_type": "execute_result"
     }

  "cells": [
   {
    "cell_type": "code",
+   "execution_count": 1,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "Number of regular documents:  453\n",
+      "Number of github documents:  6800\n",
       "Links crawled: \n",
       "https://www.ros.org/\n",
       "https://docs.nav2.org/\n",
       "https://moveit.ai/\n",
       "https://gazebosim.org/home\n",
+      "https://www.ros.org/blog/why-ros\n",
+      "https://www.ros.org/blog/getting-started\n",
+      "https://www.ros.org/blog/community\n",
+      "https://www.ros.org/blog/ecosystem\n",
+      "https://www.ros.org/blog/getting-started/\n",
+      "https://www.ros.org/blog/media\n",
+      "https://www.ros.org/blog/discord\n",
+      "https://docs.nav2.org/getting_started/index.html\n",
+      "https://docs.nav2.org/development_guides/index.html\n",
+      "https://docs.nav2.org/development_guides/build_docs/index.html\n",
+      "https://docs.nav2.org/development_guides/build_docs/build_troubleshooting_guide.html\n",
+      "https://docs.nav2.org/development_guides/devcontainer_docs/index.html\n",
+      "https://docs.nav2.org/development_guides/devcontainer_docs/devcontainer_guide.html\n",
+      "https://docs.nav2.org/development_guides/involvement_docs/index.html\n",
+      "https://docs.nav2.org/concepts/index.html\n",
+      "https://docs.nav2.org/setup_guides/index.html\n",
+      "https://docs.nav2.org/setup_guides/transformation/setup_transforms.html\n",
+      "https://docs.nav2.org/setup_guides/urdf/setup_urdf.html\n",
+      "https://docs.nav2.org/setup_guides/odom/setup_odom.html\n",
+      "https://docs.nav2.org/setup_guides/sensors/setup_sensors.html\n",
+      "https://docs.nav2.org/setup_guides/footprint/setup_footprint.html\n",
+      "https://docs.nav2.org/setup_guides/algorithm/select_algorithm.html\n",
+      "https://docs.nav2.org/about/robots.html\n",
+      "https://docs.nav2.org/tutorials/index.html\n",
+      "https://docs.nav2.org/tutorials/docs/navigation2_on_real_turtlebot3.html\n",
+      "https://docs.nav2.org/tutorials/docs/navigation2_with_slam.html\n",
+      "https://docs.nav2.org/tutorials/docs/navigation2_with_stvl.html\n",
+      "https://docs.nav2.org/tutorials/docs/navigation2_with_gps.html\n",
+      "https://docs.nav2.org/tutorials/docs/using_groot.html\n",
+      "https://docs.nav2.org/tutorials/docs/integrating_vio.html\n",
+      "https://docs.nav2.org/tutorials/docs/navigation2_dynamic_point_following.html\n",
+      "https://docs.nav2.org/tutorials/docs/navigation2_with_keepout_filter.html\n",
+      "https://docs.nav2.org/tutorials/docs/navigation2_with_speed_filter.html\n",
+      "https://docs.nav2.org/tutorials/docs/using_docking.html\n",
+      "https://docs.nav2.org/tutorials/docs/using_shim_controller.html\n",
+      "https://docs.nav2.org/tutorials/docs/adding_smoother.html\n",
+      "https://docs.nav2.org/tutorials/docs/using_collision_monitor.html\n",
+      "https://docs.nav2.org/tutorials/docs/adding_a_nav2_task_server.html\n",
+      "https://docs.nav2.org/tutorials/docs/filtering_of_noise-induced_obstacles.html\n",
+      "https://docs.nav2.org/tutorials/docs/camera_calibration.html\n",
+      "https://docs.nav2.org/tutorials/docs/get_backtrace.html\n",
+      "https://docs.nav2.org/tutorials/docs/get_profile.html\n",
+      "https://docs.nav2.org/plugin_tutorials/index.html\n",
+      "https://docs.nav2.org/plugin_tutorials/docs/writing_new_costmap2d_plugin.html\n",
+      "https://docs.nav2.org/plugin_tutorials/docs/writing_new_nav2planner_plugin.html\n",
+      "https://docs.nav2.org/plugin_tutorials/docs/writing_new_nav2controller_plugin.html\n",
+      "https://docs.nav2.org/plugin_tutorials/docs/writing_new_bt_plugin.html\n",
+      "https://docs.nav2.org/plugin_tutorials/docs/writing_new_behavior_plugin.html\n",
+      "https://docs.nav2.org/plugin_tutorials/docs/writing_new_navigator_plugin.html\n",
+      "https://docs.nav2.org/configuration/index.html\n",
+      "https://docs.nav2.org/configuration/packages/configuring-bt-navigator.html\n",
+      "https://docs.nav2.org/configuration/packages/configuring-bt-xml.html\n",
+      "https://docs.nav2.org/configuration/packages/bt-plugins/actions/Wait.html\n",
+      "https://docs.nav2.org/configuration/packages/bt-plugins/actions/Spin.html\n",
+      "https://docs.nav2.org/configuration/packages/bt-plugins/actions/BackUp.html\n",
+      "https://docs.nav2.org/configuration/packages/bt-plugins/actions/DriveOnHeading.html\n",
+      "https://docs.nav2.org/configuration/packages/bt-plugins/actions/AssistedTeleop.html\n",
+      "https://docs.nav2.org/configuration/packages/bt-plugins/actions/ComputePathToPose.html\n",
+      "https://docs.nav2.org/configuration/packages/bt-plugins/actions/FollowPath.html\n",
+      "https://docs.nav2.org/configuration/packages/bt-plugins/actions/NavigateToPose.html\n",
+      "https://docs.nav2.org/configuration/packages/bt-plugins/actions/ClearEntireCostmap.html\n",
+      "https://docs.nav2.org/configuration/packages/bt-plugins/actions/ClearCostmapExceptRegion.html\n",
+      "https://docs.nav2.org/configuration/packages/bt-plugins/actions/ClearCostmapAroundRobot.html\n",
+      "https://docs.nav2.org/configuration/packages/bt-plugins/actions/ReinitializeGlobalLocalization.html\n",
+      "https://docs.nav2.org/configuration/packages/bt-plugins/actions/TruncatePath.html\n",
+      "https://docs.nav2.org/configuration/packages/bt-plugins/actions/TruncatePathLocal.html\n",
+      "https://docs.nav2.org/configuration/packages/bt-plugins/actions/PlannerSelector.html\n",
+      "https://docs.nav2.org/configuration/packages/bt-plugins/actions/ControllerSelector.html\n",
+      "https://docs.nav2.org/configuration/packages/bt-plugins/actions/SmootherSelector.html\n",
+      "https://docs.nav2.org/configuration/packages/bt-plugins/actions/GoalCheckerSelector.html\n",
+      "https://docs.nav2.org/configuration/packages/bt-plugins/actions/ProgressCheckerSelector.html\n",
+      "https://docs.nav2.org/configuration/packages/bt-plugins/actions/NavigateThroughPoses.html\n",
+      "https://docs.nav2.org/configuration/packages/bt-plugins/actions/ComputePathThroughPoses.html\n",
+      "https://docs.nav2.org/configuration/packages/bt-plugins/actions/ComputeCoveragePath.html\n",
+      "https://docs.nav2.org/configuration/packages/bt-plugins/actions/CancelCoverage.html\n",
+      "https://docs.nav2.org/configuration/packages/bt-plugins/actions/RemovePassedGoals.html\n",
+      "https://docs.nav2.org/configuration/packages/bt-plugins/actions/RemoveInCollisionGoals.html\n",
+      "https://docs.nav2.org/configuration/packages/bt-plugins/actions/CancelControl.html\n",
+      "https://docs.nav2.org/configuration/packages/bt-plugins/actions/CancelBackUp.html\n",
+      "https://docs.nav2.org/configuration/packages/bt-plugins/actions/CancelSpin.html\n",
+      "https://docs.nav2.org/configuration/packages/bt-plugins/actions/CancelWait.html\n",
+      "https://docs.nav2.org/configuration/packages/bt-plugins/actions/CancelDriveOnHeading.html\n",
+      "https://docs.nav2.org/configuration/packages/bt-plugins/actions/CancelAssistedTeleop.html\n",
+      "https://docs.nav2.org/configuration/packages/bt-plugins/actions/Smooth.html\n",
+      "https://docs.nav2.org/configuration/packages/bt-plugins/actions/GetPoseFromPath.html\n",
+      "https://docs.nav2.org/configuration/packages/bt-plugins/actions/DockRobot.html\n",
+      "https://docs.nav2.org/configuration/packages/bt-plugins/actions/UndockRobot.html\n",
+      "https://docs.nav2.org/configuration/packages/bt-plugins/conditions/GoalReached.html\n",
+      "https://docs.nav2.org/configuration/packages/bt-plugins/conditions/TransformAvailable.html\n",
+      "https://docs.nav2.org/configuration/packages/bt-plugins/conditions/DistanceTraveled.html\n",
+      "https://docs.nav2.org/configuration/packages/bt-plugins/conditions/GoalUpdated.html\n",
+      "https://docs.nav2.org/configuration/packages/bt-plugins/conditions/GloballyUpdatedGoal.html\n",
+      "https://docs.nav2.org/configuration/packages/bt-plugins/conditions/InitialPoseReceived.html\n",
+      "https://docs.nav2.org/configuration/packages/bt-plugins/conditions/IsStuck.html\n",
+      "https://docs.nav2.org/configuration/packages/bt-plugins/conditions/IsStopped.html\n",
+      "https://docs.nav2.org/configuration/packages/bt-plugins/conditions/TimeExpired.html\n",
+      "https://docs.nav2.org/configuration/packages/bt-plugins/conditions/IsBatteryLow.html\n",
+      "https://docs.nav2.org/configuration/packages/bt-plugins/conditions/IsPathValid.html\n",
+      "https://docs.nav2.org/configuration/packages/bt-plugins/conditions/PathExpiringTimer.html\n",
+      "https://docs.nav2.org/configuration/packages/bt-plugins/conditions/AreErrorCodesPresent.html\n",
+      "https://docs.nav2.org/configuration/packages/bt-plugins/conditions/WouldAControllerRecoveryHelp.html\n",
+      "https://docs.nav2.org/configuration/packages/bt-plugins/conditions/WouldAPlannerRecoveryHelp.html\n",
+      "https://docs.nav2.org/configuration/packages/bt-plugins/conditions/WouldASmootherRecoveryHelp.html\n",
+      "https://docs.nav2.org/configuration/packages/bt-plugins/conditions/IsBatteryCharging.html\n",
+      "https://docs.nav2.org/configuration/packages/bt-plugins/controls/PipelineSequence.html\n",
+      "https://docs.nav2.org/configuration/packages/bt-plugins/controls/RoundRobin.html\n",
+      "https://docs.nav2.org/configuration/packages/bt-plugins/controls/RecoveryNode.html\n",
+      "https://docs.nav2.org/configuration/packages/bt-plugins/decorators/RateController.html\n",
+      "https://docs.nav2.org/configuration/packages/bt-plugins/decorators/DistanceController.html\n",
+      "https://docs.nav2.org/configuration/packages/bt-plugins/decorators/SpeedController.html\n",
+      "https://docs.nav2.org/configuration/packages/bt-plugins/decorators/GoalUpdater.html\n",
+      "https://docs.nav2.org/configuration/packages/bt-plugins/decorators/PathLongerOnApproach.html\n",
+      "https://docs.nav2.org/configuration/packages/bt-plugins/decorators/SingleTrigger.html\n",
+      "https://docs.nav2.org/configuration/packages/configuring-costmaps.html\n",
+      "https://docs.nav2.org/configuration/packages/costmap-plugins/static.html\n",
+      "https://docs.nav2.org/configuration/packages/costmap-plugins/inflation.html\n",
+      "https://docs.nav2.org/configuration/packages/costmap-plugins/obstacle.html\n",
+      "https://docs.nav2.org/configuration/packages/costmap-plugins/voxel.html\n",
+      "https://docs.nav2.org/configuration/packages/costmap-plugins/range.html\n",
+      "https://docs.nav2.org/configuration/packages/costmap-plugins/denoise.html\n",
+      "https://docs.nav2.org/configuration/packages/costmap-plugins/keepout_filter.html\n",
+      "https://docs.nav2.org/configuration/packages/costmap-plugins/speed_filter.html\n",
+      "https://docs.nav2.org/configuration/packages/costmap-plugins/binary_filter.html\n",
+      "https://docs.nav2.org/configuration/packages/configuring-lifecycle.html\n",
+      "https://docs.nav2.org/configuration/packages/configuring-planner-server.html\n",
+      "https://docs.nav2.org/configuration/packages/configuring-coverage-server.html\n",
+      "https://docs.nav2.org/configuration/packages/configuring-navfn.html\n",
+      "https://docs.nav2.org/configuration/packages/configuring-smac-planner.html\n",
+      "https://docs.nav2.org/configuration/packages/smac/configuring-smac-2d.html\n",
+      "https://docs.nav2.org/configuration/packages/smac/configuring-smac-hybrid.html\n",
+      "https://docs.nav2.org/configuration/packages/smac/configuring-smac-lattice.html\n",
+      "https://docs.nav2.org/configuration/packages/configuring-thetastar.html\n",
+      "https://docs.nav2.org/configuration/packages/configuring-controller-server.html\n",
+      "https://docs.nav2.org/configuration/packages/nav2_controller-plugins/simple_progress_checker.html\n",
+      "https://docs.nav2.org/configuration/packages/nav2_controller-plugins/pose_progress_checker.html\n",
+      "https://docs.nav2.org/configuration/packages/nav2_controller-plugins/simple_goal_checker.html\n",
+      "https://docs.nav2.org/configuration/packages/nav2_controller-plugins/stopped_goal_checker.html\n",
+      "https://docs.nav2.org/configuration/packages/configuring-dwb-controller.html\n",
+      "https://docs.nav2.org/configuration/packages/dwb-params/controller.html\n",
+      "https://docs.nav2.org/configuration/packages/dwb-params/iterator.html\n",
+      "https://docs.nav2.org/configuration/packages/dwb-params/kinematic.html\n",
+      "https://docs.nav2.org/configuration/packages/dwb-params/visualization.html\n",
+      "https://docs.nav2.org/configuration/packages/dwb-plugins/limited_accel_generator.html\n",
+      "https://docs.nav2.org/configuration/packages/dwb-plugins/standard_traj_generator.html\n",
+      "https://docs.nav2.org/configuration/packages/trajectory_critics/base_obstacle.html\n",
+      "https://docs.nav2.org/configuration/packages/trajectory_critics/goal_align.html\n",
+      "https://docs.nav2.org/configuration/packages/trajectory_critics/goal_dist.html\n",
+      "https://docs.nav2.org/configuration/packages/trajectory_critics/obstacle_footprint.html\n",
+      "https://docs.nav2.org/configuration/packages/trajectory_critics/oscillation.html\n",
+      "https://docs.nav2.org/configuration/packages/trajectory_critics/path_align.html\n",
+      "https://docs.nav2.org/configuration/packages/trajectory_critics/path_dist.html\n",
+      "https://docs.nav2.org/configuration/packages/trajectory_critics/prefer_forward.html\n",
+      "https://docs.nav2.org/configuration/packages/trajectory_critics/rotate_to_goal.html\n",
+      "https://docs.nav2.org/configuration/packages/trajectory_critics/twirling.html\n",
+      "https://docs.nav2.org/configuration/packages/configuring-regulated-pp.html\n",
+      "https://docs.nav2.org/configuration/packages/configuring-mppic.html\n",
+      "https://docs.nav2.org/configuration/packages/configuring-rotation-shim-controller.html\n",
+      "https://docs.nav2.org/configuration/packages/configuring-graceful-motion-controller.html\n",
+      "https://docs.nav2.org/configuration/packages/configuring-map-server.html\n",
+      "https://docs.nav2.org/configuration/packages/configuring-amcl.html\n",
+      "https://docs.nav2.org/configuration/packages/configuring-behavior-server.html\n",
+      "https://docs.nav2.org/configuration/packages/configuring-smoother-server.html\n",
+      "https://docs.nav2.org/configuration/packages/configuring-simple-smoother.html\n",
+      "https://docs.nav2.org/configuration/packages/configuring-savitzky-golay-smoother.html\n",
+      "https://docs.nav2.org/configuration/packages/configuring-constrained-smoother.html\n",
+      "https://docs.nav2.org/configuration/packages/configuring-velocity-smoother.html\n",
+      "https://docs.nav2.org/configuration/packages/configuring-collision-monitor.html\n",
+      "https://docs.nav2.org/configuration/packages/collision_monitor/configuring-collision-monitor-node.html\n",
+      "https://docs.nav2.org/configuration/packages/collision_monitor/configuring-collision-detector-node.html\n",
+      "https://docs.nav2.org/configuration/packages/configuring-waypoint-follower.html\n",
+      "https://docs.nav2.org/configuration/packages/nav2_waypoint_follower-plugins/wait_at_waypoint.html\n",
+      "https://docs.nav2.org/configuration/packages/nav2_waypoint_follower-plugins/photo_at_waypoint.html\n",
+      "https://docs.nav2.org/configuration/packages/nav2_waypoint_follower-plugins/input_at_waypoint.html\n",
+      "https://docs.nav2.org/configuration/packages/configuring-loopback-sim.html\n",
+      "https://docs.nav2.org/configuration/packages/configuring-docking-server.html\n",
+      "https://docs.nav2.org/tuning/index.html\n",
+      "https://docs.nav2.org/behavior_trees/index.html\n",
+      "https://docs.nav2.org/behavior_trees/overview/nav2_specific_nodes.html\n",
+      "https://docs.nav2.org/behavior_trees/overview/detailed_behavior_tree_walkthrough.html\n",
+      "https://docs.nav2.org/behavior_trees/trees/nav_to_pose_recovery.html\n",
+      "https://docs.nav2.org/behavior_trees/trees/nav_through_poses_recovery.html\n",
+      "https://docs.nav2.org/behavior_trees/trees/nav_to_pose_and_pause_near_goal_obstacle.html\n",
+      "https://docs.nav2.org/behavior_trees/trees/nav_to_pose_with_consistent_replanning_and_if_path_becomes_invalid.html\n",
+      "https://docs.nav2.org/behavior_trees/trees/follow_point.html\n",
+      "https://docs.nav2.org/behavior_trees/trees/odometry_calibration.html\n",
+      "https://docs.nav2.org/plugins/index.html\n",
+      "https://docs.nav2.org/migration/index.html\n",
+      "https://docs.nav2.org/migration/Dashing.html\n",
+      "https://docs.nav2.org/migration/Eloquent.html\n",
+      "https://docs.nav2.org/migration/Foxy.html\n",
+      "https://docs.nav2.org/migration/Galactic.html\n",
+      "https://docs.nav2.org/migration/Humble.html\n",
+      "https://docs.nav2.org/migration/Iron.html\n",
+      "https://docs.nav2.org/migration/Jazzy.html\n",
+      "https://docs.nav2.org/commander_api/index.html\n",
+      "https://docs.nav2.org/roadmap/roadmap.html\n",
+      "https://docs.nav2.org/about/index.html\n",
+      "https://docs.nav2.org/about/related_projects.html\n",
+      "https://docs.nav2.org/_images/sponsors_feb_2024.png\n",
+      "https://docs.nav2.org/_images/nav2_architecture.png\n",
+      "https://moveit.ai/install\n",
+      "https://moveit.ai/install-moveit2/binary/\n",
+      "https://moveit.ai/documentation/applications/\n",
+      "https://moveit.ai/documentation/concepts/\n",
+      "https://moveit.ai/documentation/related_projects/\n",
+      "https://moveit.ai/documentation/plugins/\n",
+      "https://moveit.ai/documentation/planners/\n",
+      "https://moveit.ai/documentation/source-code-api/\n",
+      "https://moveit.ai/about/\n",
+      "https://moveit.ai/robots/\n",
+      "https://moveit.ai/about/citations/\n",
+      "https://moveit.ai/about/distribution/\n",
+      "https://moveit.ai/documentation/faqs/\n",
+      "https://moveit.ai/support/\n",
+      "https://moveit.ai/about/press_kit/\n",
+      "https://moveit.ai/documentation/contributing\n",
+      "https://moveit.ai/documentation/contributing/\n",
+      "https://moveit.ai/events/\n",
+      "https://moveit.ai/documentation/contributing/roadmap/\n",
+      "https://moveit.ai/events/2024-google-summer-of-code/\n",
+      "https://moveit.ai/documentation/contributing/pullrequests/\n",
+      "https://moveit.ai/documentation/contributing/code/\n",
+      "https://moveit.ai/blog/\n",
+      "https://moveit.ai/about/get_involved/\n",
+      "https://moveit.ai/install-moveit2/source/\n",
+      "https://www.ros.org/blog/discord/\n",
+      "https://docs.nav2.org/index.html\n",
+      "https://docs.nav2.org/_images/rviz-not-started.png\n",
+      "https://docs.nav2.org/_images/gazebo_turtlebot1.png\n",
+      "https://docs.nav2.org/_images/rviz_initial.png\n",
+      "https://docs.nav2.org/_images/rviz-set-initial-pose.png\n",
+      "https://docs.nav2.org/_images/navstack-ready.png\n",
+      "https://docs.nav2.org/_images/navigate-to-pose.png\n",
+      "https://docs.nav2.org/_images/navigation_with_recovery_behaviours.gif\n",
+      "https://docs.nav2.org/_images/base-bot_1.png\n",
+      "https://docs.nav2.org/_images/base-bot_2.png\n",
+      "https://docs.nav2.org/_images/gazebo_sam_bot.png\n",
+      "https://docs.nav2.org/_images/rviz.png\n",
+      "https://docs.nav2.org/_images/add_topic_laserscan.png\n",
+      "https://docs.nav2.org/_images/add_my_marker.png\n",
+      "https://docs.nav2.org/_images/add_topic_local_costmap.png\n",
+      "https://docs.nav2.org/_images/add_topic_global_costmap.png\n",
+      "https://docs.nav2.org/_images/rviz_after_launch_view.png\n",
+      "https://docs.nav2.org/_images/rviz_slam_map_view.png\n",
+      "https://docs.nav2.org/_images/rviz_set_initial_pose.png\n",
+      "https://docs.nav2.org/_images/rviz_send_goal.png\n",
+      "https://docs.nav2.org/_images/rviz_robot_navigating.png\n",
+      "https://docs.nav2.org/_images/WGS_84_reference_frame.svg\n",
+      "https://docs.nav2.org/_images/South-America-UTM-zones.png\n",
+      "https://docs.nav2.org/_images/gazebo_sonoma_raceway.png\n",
+      "https://docs.nav2.org/_images/mapviz_init.png\n",
+      "https://docs.nav2.org/_images/localization_check.gif\n",
+      "https://docs.nav2.org/_images/navigation_check.gif\n",
+      "https://docs.nav2.org/_images/interactive_wpf.gif\n",
+      "https://docs.nav2.org/_images/groot_export_new_node.png\n",
+      "https://docs.nav2.org/_images/vio.png\n",
+      "https://docs.nav2.org/_images/main_diagram.png\n",
+      "https://docs.nav2.org/_images/drawing_keepout_mask.png\n",
+      "https://docs.nav2.org/_images/keepout_global.gif\n",
+      "https://docs.nav2.org/_images/keepout_mask.png\n",
+      "https://docs.nav2.org/_images/drawing_speed_mask.png\n",
+      "https://docs.nav2.org/_images/speed_global.gif\n",
+      "https://docs.nav2.org/_images/speed_mask.png\n",
+      "https://docs.nav2.org/_images/smoothing_path.png\n",
+      "https://docs.nav2.org/_images/polygons.png\n",
+      "https://docs.nav2.org/_images/holonomic_direction.png\n",
+      "https://docs.nav2.org/_images/holonomic_examples1.png\n",
+      "https://docs.nav2.org/_images/polygons_visualization.png\n",
+      "https://docs.nav2.org/_images/collision.png\n",
+      "https://docs.nav2.org/_images/title.png\n",
+      "https://docs.nav2.org/_images/3x3_kernels.png\n",
+      "https://docs.nav2.org/_images/dilate.gif\n",
+      "https://docs.nav2.org/_images/connected_components.gif\n",
+      "https://docs.nav2.org/_images/ROS2_topic_hz.png\n",
+      "https://docs.nav2.org/_images/window1.png\n",
+      "https://docs.nav2.org/_images/calibration.jpg\n",
+      "https://docs.nav2.org/_images/greenbars.png\n",
+      "https://docs.nav2.org/_images/calibration_complete.png\n",
+      "https://docs.nav2.org/_images/calibration_parameters.png\n",
+      "https://docs.nav2.org/_images/kcachegrind.png\n",
+      "https://docs.nav2.org/_images/call_graph.png\n",
+      "https://docs.nav2.org/_images/gradient_layer_preview.gif\n",
+      "https://docs.nav2.org/_images/gradient_layer_run.png\n",
+      "https://docs.nav2.org/_images/nav2_straightline_gif.gif\n",
+      "https://docs.nav2.org/_images/nav2_pure_pursuit_gif.gif\n",
+      "https://docs.nav2.org/_images/2d_test.png\n",
+      "https://docs.nav2.org/_images/hybrid_144.png\n",
+      "https://docs.nav2.org/_images/state_reverse.png\n",
+      "https://docs.nav2.org/_images/00-37.png\n",
+      "https://docs.nav2.org/_images/constrained_smoother.png\n",
+      "https://docs.nav2.org/_images/w_cost_cusp_multiplier.png\n",
+      "https://docs.nav2.org/_images/cost_check_points.png\n",
+      "https://docs.nav2.org/_images/holonomic_examples.png\n",
+      "https://docs.nav2.org/_images/odometry_calibration.gif\n",
+      "https://docs.nav2.org/_images/panel-feedback.gif\n",
+      "https://docs.nav2.org/_images/use_final_approach_orientation_false.gif\n",
+      "https://docs.nav2.org/_images/use_final_approach_orientation_true.gif\n",
+      "https://docs.nav2.org/_images/rpp_goal_lookahead_interpolate.gif\n",
+      "https://moveit.ai/install/\n",
+      "https://moveit.ai/install/source/\n",
+      "https://moveit.ai/install/source-windows/\n",
+      "https://moveit.ai/install/docker/\n",
+      "https://moveit.ai/install-moveit2/binary-windows/\n",
+      "https://moveit.ai/install-moveit2/source-windows/\n",
+      "https://moveit.ai/documentation/contributing/releases/\n",
+      "https://moveit.ai/documentation/concepts/developer_concepts/\n",
+      "https://moveit.ai/about/maintainer_policy\n",
+      "https://moveit.ai/about/citations\n",
+      "https://moveit.ai/blog\n",
+      "https://moveit.ai/about/press_kit\n",
+      "https://moveit.ai/documentation/faqs\n",
+      "https://moveit.ai/documentation/contributing/pullrequests\n",
+      "https://moveit.ai/events/moveit/mentor/google/2022/05/20/2022-google-summer-of-code-students.html\n",
+      "https://moveit.ai/events/moveit/mentor/google/2022/03/15/gsoc.html\n",
+      "https://moveit.ai/ros/moveit/events/2021/10/29/rosworld-moveit-workshop.html\n",
+      "https://moveit.ai/events/world-moveit-day/ros/moveit/2021/03/22/world-moveit-day-2021-lighting-talks.html\n",
+      "https://moveit.ai/events/moveit/mentor/google/2021/03/11/gsof-mentor.html\n",
+      "https://moveit.ai/events/world-moveit-day/2021/01/15/world-moveit-day-2021.html\n",
+      "https://moveit.ai/events/world-moveit-day/2020/04/28/world-moveit-day-2020.html\n",
+      "https://moveit.ai/feed.xml\n",
+      "https://moveit.ai/documentation/contributing/future_projects/\n",
+      "https://moveit.ai/documentation/contributing/maintainer_pr\n",
+      "https://moveit.ai/documentation/contributing/continuous_integration/\n",
+      "https://moveit.ai/documentation/contributing/syncing_backporting/\n",
+      "https://moveit.ai/moveit/gsoc/2024/08/22/GSoC-2024-mujoco-support-for-ros2-moveit.html\n",
+      "https://moveit.ai/moveit/gsoc/2024/08/19/GSoC-2024-Zenoh-Support-and-Benchmarking.html\n",
+      "https://moveit.ai/release/jazzy/rolling/2024/06/30/New-MoveIt-LTS-release-for-ROS-2-Jazzy.html\n",
+      "https://moveit.ai/moveit/gsoc/2024/06/07/Google-Summer-of-Code-Contributor-Introductions.html\n",
+      "https://moveit.ai/planning%20pipeline/moveit2/motion%20planning/2024/03/25/MoveIt-Planning-Pipeline-Refactoring.html\n",
+      "https://moveit.ai/open%20source/open%20core%20software/2024/02/22/MoveIt-Pro-Open-Core.html\n",
+      "https://moveit.ai/2024/02/20/Introduciong-MoveIt-Pro-Rapid-Robotics-Application-Development-for-Unstructured-Environments.html\n",
+      "https://moveit.ai/bin%20picking/grasping/segmentation/manipulation/2024/01/31/Bin-Picking-Flexible-&-Fast-For-Any-Brand-of-Robot-Arm.html\n",
+      "https://moveit.ai/moveit/roscon/2023/11/29/MoveItCon-2023-Recap.html\n",
+      "https://moveit.ai/moveit/benchmarking/inverse%20kinematics/servo/2023/11/21/GSoC-2023-MoveIt-Servo-and-IK-Benchmarking.html\n",
+      "https://moveit.ai/moveit%202/ros/2023/05/31/balancing-stability-and-development.html\n",
+      "https://moveit.ai/moveit%202/ros/2023/05/19/optimization-based-planning-with-stomp.html\n",
+      "https://moveit.ai/moveit%202/ros/2023/05/03/google-summer-of-code-participants.html\n",
+      "https://moveit.ai/moveit%202/ros/2023/03/20/google-summer-of-code-2023.html\n",
+      "https://moveit.ai/moveit/ros/python/google/2023/02/15/MoveIt-Humble-Release.html\n",
+      "https://moveit.ai/moveit%202/parallel%20planning/motion%20planning/2023/02/15/parallel-planning-with-MoveIt-2.html\n",
+      "https://moveit.ai/moveit/google/2023/01/12/gsoc-simultaneous-trajectory-execution.html\n",
+      "https://moveit.ai/picknik/moveit/2022/08/16/Announcing-MoveIt-Studio.html\n",
+      "https://moveit.ai/ros/2022/07/22/Declarative-ROS-2-Parameters.html\n",
+      "https://moveit.ai/moveit/ros/2022/07/22/MoveIt-Servo-Inverse-Kinematics.html\n",
+      "https://moveit.ai/moveit/ros/ros2/humble/rolling/2022/07/15/MoveIt-2.5.2.html\n",
+      "https://moveit.ai/moveit/ros/humble/2022/06/02/MoveIt-Humble-Release.html\n",
+      "https://moveit.ai/robowflex/moveit/automation/2022/05/05/zak-kingston.html\n",
+      "https://moveit.ai/areospace/moveit/automation/2022/04/20/moveit-for-areospace.html\n",
+      "https://moveit.ai/moveit/ros/2022/03/02/2022-community-meeting.html\n",
+      "https://moveit.ai/moveit/ros/2022/02/15/2022-community-meeting.html\n",
+      "https://moveit.ai/moveit/ros/2022/01/20/2022-doc-a-thon.html\n",
+      "https://moveit.ai/moveit/ros/2021/12/17/sprint-report-3.html\n",
+      "https://moveit.ai/moveit/ros/2021/11/30/sprint-report-2.html\n",
+      "https://moveit.ai/ros/moveit/galactic/2021/07/08/moveit-galactic.html\n",
+      "https://moveit.ai/ros2/moveit/2021/06/08/moveit-vs-moveit2.html\n",
+      "https://moveit.ai/ros/moveit/noetic/2021/05/13/noetic-release.html\n",
+      "https://moveit.ai/ros/moveit/noetic/2021/04/15/noetic-update.html\n",
+      "https://moveit.ai/ros/moveit/2021/04/04/WMD-2021-results.html\n",
+      "https://moveit.ai/ros/moveit/2021/04/01/moveit3_release.html\n",
+      "https://moveit.ai/moveit/2021/01/14/moveitcon-2019-macau.html\n",
+      "https://moveit.ai/moveit/ros%202/tsc/2020/12/21/moveit-for-ros-2-migration-update.html\n",
+      "https://moveit.ai/moveit/pilz/motion%20planner/2020/12/17/Pilz-Plugin-for-MoveIt.html\n",
+      "https://moveit.ai/ros/ros%20world/moveit/2020/11/24/ros-world-2020.html\n",
+      "https://moveit.ai/bullet/collision%20detection/moveit/2020/11/18/bullet-collision.html\n",
+      "https://moveit.ai/moveit/ros/noetic/2020/10/13/announcing-moveit-1-1-1-release-for-ros-noetic.html\n",
+      "https://moveit.ai/moveit/ros/noetic/2020/09/28/moveit-noetic.html\n",
+      "https://moveit.ai/deep%20learning/grasping/moveit/3d%20perception/2020/09/28/grasp-deep-learning.html\n",
+      "https://moveit.ai/moveit/2020/09/10/ompl-constrained-planning-gsoc.html\n",
+      "https://moveit.ai/moveit/ros2/servo/jog/2020/09/09/moveit2-servo.html\n",
+      "https://moveit.ai/moveit2/ros2/foxy/release/2020/09/04/moveit2-foxy-release.html\n",
+      "https://moveit.ai/moveit/ros/2020/08/26/moveit-calibration.html\n",
+      "https://moveit.ai/moveit/ros/2020/08/04/moveit-melodic-release.html\n",
+      "https://moveit.ai/moveit/ros/2020/07/24/moveit-research-roundup.html\n",
+      "https://moveit.ai/moveit/ros/microsoft/windows/2020/07/14/moveit-on-windows.html\n",
+      "https://moveit.ai/moveit/ros/2020/06/26/world-moveit-day-2020-recap.html\n",
+      "https://moveit.ai/industrial/moveit2/ur5/2020/06/09/moveit2-robotic-application.html\n",
+      "https://moveit.ai/planning/ompl/2020/06/05/ompl-1-5-0-released.html\n",
+      "https://moveit.ai/moveit/gsoc/code/students/2020/05/05/gsoc-2020-projects.html\n",
+      "https://moveit.ai/moveit/ros/2020/04/22/moveitcon-2020.html\n",
+      "https://moveit.ai/moveit/ros/2020/02/25/mtc.html\n",
+      "https://moveit.ai/moveit/ros2/2020/02/18/moveit-2-beta-feature-list.html\n",
+      "https://moveit.ai/moveit/ros/2019/12/26/world-moveit-day-2019-recap.html\n",
+      "https://moveit.ai/moveit/ros/2019/11/18/moveit-grasps.html\n",
+      "https://moveit.ai/moveit/ros/2019/11/13/world-moveit-day-2019.html\n",
+      "https://moveit.ai/moveit/ros/2019/09/19/moveit-workshop-macau.html\n",
+      "https://moveit.ai/moveit!/ros/2019/06/12/google-summer-of-code.html\n",
+      "https://moveit.ai/moveit!/ros/2019/05/31/moveit2-alpha-release.html\n",
+      "https://moveit.ai/moveit!/ros/2019/05/28/moveit-survey-results.html\n",
+      "https://moveit.ai/moveit!/ros/2019/04/16/realtime-robotics.html\n",
+      "https://moveit.ai/moveit!/ros/descartes/2019/04/12/moveit-descartes.html\n",
+      "https://moveit.ai/moveit!/ros/2019/04/08/moveit-survey.html\n",
+      "https://moveit.ai/moveit!/ros/2019/03/08/announcing-the-moveit-1-release.html\n",
+      "https://moveit.ai/moveit!/ros/2019/03/01/announcing-the-moveit-2-port.html\n",
+      "https://moveit.ai/moveit!/ros/2019/02/11/china-developer-workshop-report.html\n",
+      "https://moveit.ai/moveit!/ros/2019/01/04/china-developer-workshop.html\n",
+      "https://moveit.ai/moveit!/ros/2018/12/11/gsoc-2018-perception-pipeline.html\n",
+      "https://moveit.ai/moveit!/ros/2018/11/20/wordlmoveitdayreport3.html\n",
+      "https://moveit.ai/moveit!/ros/2018/10/25/gsoc-motion-planning-support.html\n",
+      "https://moveit.ai/moveit!/ros/2018/10/23/gsoc-2018-setup-assistant-v2.html\n",
+      "https://moveit.ai/moveit!/ros/2018/09/26/moveit-at-roscon-2018.html\n",
+      "https://moveit.ai/moveit!/ros/2018/08/03/save-the-date-world-moveit-day.html\n",
+      "https://moveit.ai/moveit!/ros/2018/05/23/firstmelodicrelease.html\n",
+      "https://moveit.ai/moveit!/ros/2018/05/08/google-summer-of-code.html\n",
+      "https://moveit.ai/moveit!/ros/2018/04/16/moveit-on-discourse.html\n",
+      "https://moveit.ai/moveit!/ros/2018/02/26/tutorials-documentation-codesprint.html\n",
+      "https://moveit.ai/moveit!/ros/2018/02/01/wordlmoveitdayreport2.html\n",
+      "https://moveit.ai/moveit!/ros/2017/09/20/wordlmoveitday.html\n",
+      "https://moveit.ai/moveit!/ros/2017/06/20/videoMontage2017.html\n",
+      "https://moveit.ai/moveit!/ros/2017/03/15/videoMontageRequest.html\n",
+      "https://moveit.ai/moveit!/ros/2017/01/03/firstIndigoRelease.html\n",
+      "https://moveit.ai/moveit!/ros/2016/12/15/firstkineticrelease.html\n",
+      "https://moveit.ai/moveit!/ros/2016/11/01/moveit-community.html\n",
+      "https://moveit.ai/moveit!/ros/2016/10/11/moveit_community_meeting.html\n",
+      "https://moveit.ai/moveit!/ros/2016/09/02/firstjaderelease.html\n",
+      "https://moveit.ai/moveit!/ros/2016/08/28/wordlmoveitdayreport.html\n",
+      "https://moveit.ai/moveit!/ros/2016/08/22/teamdelftamazon.html\n",
+      "https://moveit.ai/moveit!/ros/2016/08/05/wordlmoveitday.html\n",
+      "https://moveit.ai/moveit!/ros/2015/09/28/iros.html\n",
+      "https://moveit.ai/moveit!/ros/2015/09/19/robobusiness.html\n",
+      "https://moveit.ai/moveit!/ros/2015/09/17/moveit-community.html\n",
+      "https://moveit.ai/moveit!/ros/2015/07/02/going-underwater.html\n",
+      "https://moveit.ai/moveit!/ros/2015/06/03/icra-2015-update.html\n",
+      "https://moveit.ai/moveit!/ros/2015/05/24/see-you-at-icra-2015.html\n",
+      "https://moveit.ai/moveit!/ros/2015/03/17/versatile-manipulation-baxter-robot-with-moveit-used-to-teach-robotics-fundamentals-at-columbia-university.html\n",
+      "https://moveit.ai/moveit!/ros/2015/02/17/new-pal-mobile-manipulation-robot-tiago-runs-moveit.html\n",
+      "https://moveit.ai/moveit!/ros/2015/01/29/update-on-moveit-in-ros-i-community-meeting.html\n",
+      "https://moveit.ai/moveit!/ros/2015/01/29/the-amazon-picking-challenge.html\n",
+      "https://moveit.ai/moveit!/ros/2015/01/29/alten-mechatronics-applies-robotic-technology-in-fei-transmission-electron-microscopes-tem.html\n",
+      "https://moveit.ai/moveit!/ros/2015/01/29/new-capabilities-in-moveit-the-cartesian-path-planner-plugin.html\n",
+      "https://moveit.ai/moveit!/ros/2014/08/05/ronex-and-moveit.html\n",
+      "https://moveit.ai/moveit!/ros/2014/05/08/ckbot-whole-arm-grasping-and-moveit.html\n",
+      "https://moveit.ai/moveit!/ros/2014/04/03/rossurvey.html\n",
+      "https://moveit.ai/general/2014/02/20/first-new-robot-2014-hollie.html\n",
+      "https://moveit.ai/moveit!/ros/2014/01/07/a-new-year-a-new-website-a-new-movie.html\n",
+      "https://moveit.ai/moveit!/ros/2013/11/05/moveit-survey-results.html\n",
+      "https://moveit.ai/moveit!/ros/2013/10/22/robobusiness-2013.html\n",
+      "https://moveit.ai/moveit!/ros/2013/10/08/moveit-survey.html\n",
+      "https://moveit.ai/moveit!/ros/2013/08/20/moveit-pick-place-pr2.html\n",
+      "https://moveit.ai/moveit!/ros/2013/05/07/icra-motion-planning-tutorial.html\n",
+      "https://moveit.ai/moveit!/ros/2013/05/06/icra-roscon-trip-report.html\n",
+      "https://moveit.ai/install/source/dependencies/\n",
+      "https://moveit.ai/events/2022-google-summer-of-code/\n",
+      "https://moveit.ai/events/rosworld-2021-workshop/\n",
+      "https://moveit.ai/events/2022-moveit-community-meeting/\n",
+      "https://moveit.ai/assets/pdfs/2019/moveit_2019_survey.pdf\n",
+      "https://moveit.ai/events/world-moveit-day-2018/\n",
+      "https://moveit.ai/events/world-moveit-day-2017/\n",
+      "https://moveit.ai/events/world-moveit-day/\n",
+      "https://moveit.ai/events/rosworld-2021-workshop/Stretch%20Payload%20&%20Pulling%20Force.pdf\n",
       "https://github.com/ros2/ros2/tree/rolling/README.md\n",
       "https://github.com/ros2/ros2/tree/rolling/.gitignore\n",
       "https://github.com/ros2/ros2/tree/rolling/CODEOWNERS\n",
       "https://github.com/gazebosim/gazebo-classic/blob/gazebo11/./gazebo/msgs/generator/GazeboGenerator.hh\n",
       "https://github.com/gazebosim/gazebo-classic/blob/gazebo11/./gazebo/msgs/generator/gazebo_generator.cc\n",
       "https://github.com/gazebosim/gazebo-classic/blob/gazebo11/./gazebo/msgs/generator/GazeboGenerator.cc\n",
+      "Sample regular document:  {'_id': ObjectId('6755e9baa1014826dc57c074'), 'link': 'https://www.ros.org/', 'type': 'Document', 'content': ' ROS: Home Why ROS? Getting Started Community Ecosystem ROS - Robot Operating System The Robot Operating System (ROS) is a set of software libraries and tools that help you build robot applications. From drivers to state-of-the-art algorithms, and with powerful developer tools, ROS has what you need for your next robotics project. And it\\'s all open source. What is ROS? ROS Videos \" Install Jazzy Jalisco Jazzy Jalisco is our latest ROS 2 LTS release targeted at the Ubuntu 24.04 (Noble) and Windows 10, though other systems are supported to varying degrees. Learn More Humble Hawksbill ROS 2 Humble Hawksbill is a slighly older LTS release of ROS 2 targeted at Ubuntu 22.04 (Jammy) and Windows 10. Other systems are supported including tier 3 support for 20.04 for those transitioning from ROS 1. Learn More Support There are several mechanisms in place to support the ROS community, each with its own purpose. Documentation Documentation and tutorials for ROS 2 Stack Exchange Ask questions. Get answers. Forums Hear the latest discussions ROS 1 Wiki Legacy documentation and tutorials for ROS 1 Documentation Documentation and tutorials for ROS 2 Robotics Stack Exchange Ask questions.Get answers.All ROS versions Forums Hear the latest discussions ROS 1 Wiki Legacy documentation and tutorials for ROS 1 Recent Updates and Highlights ROSCon 2024 Videos are Now Available See the ROSCon 2024 website for details 11/18/2024 - Katherine Scott The videos from ROSCon 2024 in Odense are now available on the ROSCon Website (see the program), this Vimeo showcase, and in the ROS documentation. The ROSCon website also includes the slides from all the talks at ROSCon. I have also included a list of all the videos below. I want to thank AMD for being our 2024 ROSCon video sponsor, their generous support makes the ROSCon live stream and videos possible. READ MORE Recent ROS Discourse Posts ROS News of the Week 11/22/2024 - ROS Discourse Gazebo Classic and Citadel End of Life 12/2/2024 - ROS Discourse ROS 2 driver for Mitsubishi Melfa RV-FR 10/24/2024 ROS Discourse Home Why ROS? Getting Started Community Ecosystem Q&A Forum Packages Wiki Documentation media Q&A Forum Packages ROSCon Wiki documentation discord Brought to you by Open Robotics | licensed under Creative Commons Attributions 3.0 | ©2021 Open Robotics '}\n",
+      "Sample github document {'_id': ObjectId('6755e9bda1014826dc57c078'), 'link': 'https://github.com/ros2/ros2/tree/rolling/README.md', 'type': 'Github', 'content': \"#About TheRobotOperatingSystem(ROS)isasetofsoftwarelibrariesandtoolsthathelpyoubuildrobotapplications. Fromdriverstostate-of-the-artalgorithms,andwithpowerfuldevelopertools,ROShaswhatyouneedforyournextroboticsproject. Andit'sallopensource. Fullprojectdetailson[ROS.org](https://ros.org/) #GettingStarted LookingtogetstartedwithROS? Our[installationguideishere](https://www.ros.org/blog/getting-started/). Onceyou'veinstalledROSstartbylearningsome[basicconcepts](https://docs.ros.org/en/rolling/Concepts/Basic.html)andtakealookatour[beginnertutorials](https://docs.ros.org/en/rolling/Tutorials/Beginner-CLI-Tools.html). #JointheROSCommunity ##CommunityResources *[ROSDiscussionForum](https://discourse.ros.org/) *[ROSDiscordServer](https://discord.com/servers/open-robotics-1077825543698927656) *[RoboticsStackExchange](https://robotics.stackexchange.com/)(preferredROSsupportforum). *[OfficialROSVideos](https://vimeo.com/osrfoundation) *[ROSCon](https://roscon.ros.org),ouryearlydeveloperconference. *CiteROS2inacademicworkusing[DOI:10.1126/scirobotics.abm6074](https://www.science.org/doi/10.1126/scirobotics.abm6074) ##DeveloperResources *[ROS2Documentation](https://docs.ros.org/) *[ROSPackageAPIreference](https://docs.ros.org/en/rolling/p/) *[ROSPackageIndex](https://index.ros.org/) *[ROSonDockerHub](https://hub.docker.com/_/ros/) *[ROSResourceStatusPage](https://status.openrobotics.org/) *[REP-2000](https://ros.org/reps/rep-2000.html):ROS2ReleasesandTargetPlatforms ##ProjectResources *[PurchaseROSSwag](https://spring.ros.org/) *[InformationabouttheROSTrademark](https://www.ros.org/blog/media/) *OnSocialMedia *[OpenRoboticsonLinkedIn](https://www.linkedin.com/company/open-source-robotics-foundation) *[OpenRoboticsonTwitter](https://twitter.com/OpenRoboticsOrg) *[ROS.orgonTwitter](https://twitter.com/ROSOrg) ROSismadepossiblethroughthegeneroussupportofopensourcecontributorsandthenon-profit[OpenSourceRoboticsFoundation(OSRF)](https://www.openrobotics.org/). TaxdeductibledonationstotheOSRFcanbe[madehere.](https://donorbox.org/support-open-robotics?utm_medium=qrcode&utm_source=qrcode) \"}\n"
      ]
     }
    ],
   },
   {
    "cell_type": "code",
+   "execution_count": 9,
    "metadata": {},
    "outputs": [
     {
        "DeleteResult({'n': 0, 'ok': 1.0}, acknowledged=True)"
       ]
      },
+     "execution_count": 9,
      "metadata": {},
      "output_type": "execute_result"
     }

project/TrainingPipeline.ipynb CHANGED Viewed

@@ -12,7 +12,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": ".venv",
    "language": "python",
    "name": "python3"
   },
@@ -26,7 +26,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.9"
   }
  },
  "nbformat": 4,

  ],
  "metadata": {
   "kernelspec": {
+   "display_name": "Python 3",
    "language": "python",
    "name": "python3"
   },
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
+   "version": "3.12.7"
   }
  },
  "nbformat": 4,

project/app.py CHANGED Viewed

@@ -1,15 +1,8 @@
-# Make sure you have run "ollama serve"
 # This is the same code as ClearML
-import os
-import sys
 from operator import itemgetter
 import gradio as gr
-from dotenv import load_dotenv
 from langchain.prompts import PromptTemplate
-from langchain_community.embeddings import OllamaEmbeddings
-from langchain_community.llms import Ollama
-from qdrant_client import QdrantClient
 from shared import getModel, getEmbeddingsModel, getQdrantClient
 def answer(samplePrompt, useSample, Query):
@@ -84,8 +77,6 @@ def answer(samplePrompt, useSample, Query):
     links = [result.payload['link'] for result in results]
     topTexts = ''
     for index in topIndexes:
-        print("Top texts: ", texts[index])
-        print("Link: ", links[index])
         topTexts += texts[index]
     # Building prompt
@@ -99,7 +90,10 @@ def answer(samplePrompt, useSample, Query):
         prompt = PromptTemplate.from_template(template)
     else:
         template = """
-        Answer the question based on the document below. If you can't answer the question, reply "I don't know"
         Document: {document}
         Question: {question}
@@ -115,7 +109,10 @@ demo = gr.Interface(
     fn=answer,
     inputs=[
         gr.Dropdown(
-            ["What is ROS?", "Write me code to move a robot"], label="Sample Prompt"
         ),
         "checkbox",
         "text",

+# Make sure ollama serve is running(docker or terminal)
 # This is the same code as ClearML
 from operator import itemgetter
 import gradio as gr
 from langchain.prompts import PromptTemplate
 from shared import getModel, getEmbeddingsModel, getQdrantClient
 def answer(samplePrompt, useSample, Query):
     links = [result.payload['link'] for result in results]
     topTexts = ''
     for index in topIndexes:
         topTexts += texts[index]
     # Building prompt
         prompt = PromptTemplate.from_template(template)
     else:
         template = """
+        You are an AI agent that has retreived a document from the web.
+        If the document is useful for answering the question use it.
+        If the document is not useful, answer normally.
+        Do not mention the document.
         Document: {document}
         Question: {question}
     fn=answer,
     inputs=[
         gr.Dropdown(
+            ["How can I develop the navigation stack of an agent with egomotion?",
+             "What is ROS?", "How many companies is Nav2 trusted by worldwide?",
+             "How would I build a ROS 2 Navigation Framework and System?",
+             "Write me code to move a robot using Moveit"], label="Sample Prompt"
         ),
         "checkbox",
         "text",