File size: 10,643 Bytes
2af0eb7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/workspaces/RAG_LLM/project/shared.py:57: LangChainDeprecationWarning: The class `OllamaEmbeddings` was deprecated in LangChain 0.3.1 and will be removed in 1.0.0. An updated version of the class exists in the :class:`~langchain-ollama package and should be used instead. To use it run `pip install -U :class:`~langchain-ollama` and import as `from :class:`~langchain_ollama import OllamaEmbeddings``.\n",
      "  return OllamaEmbeddings(model=MODEL, base_url=\"http://host.docker.internal:11434\")\n",
      "/workspaces/RAG_LLM/project/shared.py:70: LangChainDeprecationWarning: The class `Ollama` was deprecated in LangChain 0.3.1 and will be removed in 1.0.0. An updated version of the class exists in the :class:`~langchain-ollama package and should be used instead. To use it run `pip install -U :class:`~langchain-ollama` and import as `from :class:`~langchain_ollama import OllamaLLM``.\n",
      "  return Ollama(model=MODEL, base_url=\"http://host.docker.internal:11434\")\n"
     ]
    }
   ],
   "source": [
    "# See README for more info on how the DataCollectionPipeline works\n",
    "# The retrieval pipeline is part of the DataCollectionPipeline\n",
    "from shared import getQdrantClient, getEmbeddingsModel, getModel\n",
    "from langchain_community.llms import Ollama\n",
    "from langchain.prompts import PromptTemplate\n",
    "from operator import itemgetter\n",
    "# Create a qdrant connection\n",
    "qClient = getQdrantClient()\n",
    "\n",
    "# Setup the text embedder\n",
    "embeddingsModel = getEmbeddingsModel()\n",
    "\n",
    "# Setup the model\n",
    "model = getModel()\n",
    "\n",
    "# Retrieval Pipeline\n",
    "# Retrieve the chunks with the most similar embeddings from Qdrant\n",
    "def retriever(text, collection):\n",
    "    results = qClient.search(\n",
    "        collection_name=collection,\n",
    "        query_vector = embeddingsModel.embed_query(text),\n",
    "        limit=10\n",
    "    )\n",
    "    return results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Query expansion:  Create a user-friendly, community-driven guide that provides an alternative to the traditional ROS documentation, focusing on real-world scenarios and practical applications rather than technical specifications and developer guides.\n",
      "Coding Question?:  1\n",
      "Related Collection:  Github\n",
      "Top texts:  #About TheRobotOperatingSystem(ROS)isasetofsoftwarelibrariesandtoolsthathelpyoubuildrobotapplications. Fromdriverstostate-of-the-artalgorithms,andwithpowerfuldevelopertools,ROShaswhatyouneedforyournextroboticsproject. Andit'sallopensource. Fullprojectdetailson[ROS.org](https://ros.org/) #GettingStarted LookingtogetstartedwithROS? Our[installationguideishere](https://www.ros.org/blog/getting-started/).\n",
      "Link:  https://github.com/ros2/ros2/tree/rolling/README.md\n",
      "Top texts:  type:git url:https://github.com/ros2/tinyxml2_vendor.git version:rolling ros2/tlsf: type:git url:https://github.com/ros2/tlsf.git version:rolling ros2/unique_identifier_msgs: type:git url:https://github.com/ros2/unique_identifier_msgs.git version:rolling ros2/urdf: type:git url:https://github.com/ros2/urdf.git version:rolling ros2/yaml_cpp_vendor: type:git url:https://github.com/ros2/yaml_cpp_vendor.git version:rolling\n",
      "Link:  https://github.com/ros2/ros2/tree/rolling/ros2.repos\n",
      "Top texts:  *[ROSResourceStatusPage](https://status.openrobotics.org/) *[REP-2000](https://ros.org/reps/rep-2000.html):ROS2ReleasesandTargetPlatforms ##ProjectResources *[PurchaseROSSwag](https://spring.ros.org/) *[InformationabouttheROSTrademark](https://www.ros.org/blog/media/) *OnSocialMedia *[OpenRoboticsonLinkedIn](https://www.linkedin.com/company/open-source-robotics-foundation) *[OpenRoboticsonTwitter](https://twitter.com/OpenRoboticsOrg) *[ROS.orgonTwitter](https://twitter.com/ROSOrg)\n",
      "Link:  https://github.com/ros2/ros2/tree/rolling/README.md\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "\"Here's an example of what the README file for ROS could look like:\\n\\n**Welcome to the Robot Operating System (ROS)**\\n\\nROS is a set of software libraries and tools that help you build robot applications. From driver development to state-of-the-art algorithms, and with powerful development tools, ROS has everything you need for your next robotics project.\\n\\n### Getting Started\\n\\nTo get started with ROS, check out our [installation guide](https://www.ros.org/blog/getting-started/).\\n\\n### What's Included\\n\\nROS includes a range of open-source projects, including:\\n\\n*   **tinyxml2_vendor**: A fork of the tinyxml2 library for parsing XML files.\\n*   **tlsf**: A library for secure communication over TLS (Transport Layer Security).\\n*   **unique_identifier_msgs**: A package for generating unique identifiers for robots and other entities.\\n*   **urdf**: A package for working with URDF (Unified Robot Description Format) files.\\n*   **yaml_cpp_vendor**: A fork of the yaml-cpp library for parsing YAML files.\\n\\n### ROS Releases and Target Platforms\\n\\nFor more information on ROS releases, target platforms, and release notes, check out [REP-2000](https://ros.org/reps/rep-2000.html).\\n\\n### Project Resources\\n\\n*   **ROSSwag**: Purchase ROS-related merchandise from our online store.\\n*   **ROS Trademark Information**: Learn about the ROS trademark.\\n\\n### Get Involved\\n\\nStay up-to-date with the latest news and developments in ROS:\\n\\n*   Follow us on [LinkedIn](https://www.linkedin.com/company/open-source-robotics-foundation)\\n*   Join our Twitter community: [OpenRoboticsOrg](https://twitter.com/OpenRoboticsOrg), [ROSOrg](https://twitter.com/ROSOrg)\\n\\n### License and Contributions\\n\\nROS is an open-source project, licensed under the Apache 2.0 license.\\n\\nWe welcome contributions from the ROS community! If you have any ideas or bug fixes to contribute, check out our [contribution guidelines](https://ros.org/blog/contribute/).\\n\\n**Thank You**\\n\\nThanks for choosing ROS as your platform for robotics development!\\n\\nYou can modify this README file according to your needs and preferences.\""
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# User query\n",
    "query = \"Can you create a README file for ROS\"\n",
    "\n",
    "# Query expansion(I only generate one additional prompt for simplicity)\n",
    "template = \"\"\"\n",
    "Rewrite the prompt. The new prompt must offer a different perspective.\n",
    "Do not change the meaning. Output only the rewritten prompt with no introduction.\n",
    "    Prompt: {prompt}\n",
    "\"\"\"\n",
    "prompt = PromptTemplate.from_template(template)\n",
    "chain = {\"prompt\": itemgetter(\"prompt\")} | prompt | model\n",
    "queryExpansion = chain.invoke({\"prompt\": query})\n",
    "print(\"Query expansion: \", queryExpansion)\n",
    "\n",
    "# Self-querying(The metadata I will be generating determines whether to look through the Qdrant collection containing github code)\n",
    "template = \"\"\"\n",
    "You are an AI assistant. You must determine if the prompt requires code as the answer.\n",
    "Output a 1 if it is or a 0 if it is not and nothing else.\n",
    "    Prompt: {prompt}\n",
    "\"\"\"\n",
    "prompt = PromptTemplate.from_template(template)\n",
    "chain = {\"prompt\": itemgetter(\"prompt\")} | prompt | model\n",
    "codingQuestion = chain.invoke({\"prompt\": query})\n",
    "print(\"Coding Question?: \", codingQuestion)\n",
    "\n",
    "# Filtered vector search for each of the N queries after expansion\n",
    "relatedCollection = 'Document'\n",
    "if (codingQuestion == '1'):\n",
    "    relatedCollection = 'Github'\n",
    "print(\"Related Collection: \", relatedCollection)\n",
    "results1 = retriever(query, relatedCollection)\n",
    "results2 = retriever(queryExpansion, relatedCollection)\n",
    "\n",
    "# Collecting results\n",
    "results = results1+results2\n",
    "\n",
    "# Reranking(Instead of using a CrossEncoder, I will manually compare embeddings)\n",
    "ids = [result.id for result in results]\n",
    "scores = [result.score for result in results]\n",
    "topIds = []\n",
    "topIndexes = []\n",
    "for x in range(3):\n",
    "    maxScore = 0\n",
    "    maxIndex = 0\n",
    "    for i in range(len(ids)):\n",
    "        if ids[i] not in topIds and scores[i] > maxScore:\n",
    "            maxScore = scores[i]\n",
    "            maxIndex = i\n",
    "    topIds.append(ids[maxIndex])\n",
    "    topIndexes.append(maxIndex)\n",
    "texts = [result.payload['text'] for result in results]\n",
    "links = [result.payload['link'] for result in results]\n",
    "topTexts = ''\n",
    "for index in topIndexes:\n",
    "    print(\"Top texts: \", texts[index])\n",
    "    print(\"Link: \", links[index])\n",
    "    topTexts += texts[index]\n",
    "\n",
    "# Building prompt\n",
    "if(codingQuestion == '1'):\n",
    "    template = \"\"\"\n",
    "    Write code for the following question given the related coding document below.\n",
    "\n",
    "    Document: {document}\n",
    "    Question: {question}\n",
    "    \"\"\"\n",
    "    prompt = PromptTemplate.from_template(template)\n",
    "else:\n",
    "    template = \"\"\"\n",
    "    Answer the question based on the document below. If you can't answer the question, reply \"I don't know\"\n",
    "\n",
    "    Document: {document}\n",
    "    Question: {question}\n",
    "    \"\"\"\n",
    "    prompt = PromptTemplate.from_template(template)\n",
    "\n",
    "# Obtaining answer\n",
    "chain = {\"document\": itemgetter(\"document\"), \"question\": itemgetter(\"question\")} | prompt | model\n",
    "chain.invoke({\"document\": topTexts, \"question\": query})"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}