File size: 10,643 Bytes
2af0eb7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 |
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/workspaces/RAG_LLM/project/shared.py:57: LangChainDeprecationWarning: The class `OllamaEmbeddings` was deprecated in LangChain 0.3.1 and will be removed in 1.0.0. An updated version of the class exists in the :class:`~langchain-ollama package and should be used instead. To use it run `pip install -U :class:`~langchain-ollama` and import as `from :class:`~langchain_ollama import OllamaEmbeddings``.\n",
" return OllamaEmbeddings(model=MODEL, base_url=\"http://host.docker.internal:11434\")\n",
"/workspaces/RAG_LLM/project/shared.py:70: LangChainDeprecationWarning: The class `Ollama` was deprecated in LangChain 0.3.1 and will be removed in 1.0.0. An updated version of the class exists in the :class:`~langchain-ollama package and should be used instead. To use it run `pip install -U :class:`~langchain-ollama` and import as `from :class:`~langchain_ollama import OllamaLLM``.\n",
" return Ollama(model=MODEL, base_url=\"http://host.docker.internal:11434\")\n"
]
}
],
"source": [
"# See README for more info on how the DataCollectionPipeline works\n",
"# The retrieval pipeline is part of the DataCollectionPipeline\n",
"from shared import getQdrantClient, getEmbeddingsModel, getModel\n",
"from langchain_community.llms import Ollama\n",
"from langchain.prompts import PromptTemplate\n",
"from operator import itemgetter\n",
"# Create a qdrant connection\n",
"qClient = getQdrantClient()\n",
"\n",
"# Setup the text embedder\n",
"embeddingsModel = getEmbeddingsModel()\n",
"\n",
"# Setup the model\n",
"model = getModel()\n",
"\n",
"# Retrieval Pipeline\n",
"# Retrieve the chunks with the most similar embeddings from Qdrant\n",
"def retriever(text, collection):\n",
" results = qClient.search(\n",
" collection_name=collection,\n",
" query_vector = embeddingsModel.embed_query(text),\n",
" limit=10\n",
" )\n",
" return results"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Query expansion: Create a user-friendly, community-driven guide that provides an alternative to the traditional ROS documentation, focusing on real-world scenarios and practical applications rather than technical specifications and developer guides.\n",
"Coding Question?: 1\n",
"Related Collection: Github\n",
"Top texts: #About TheRobotOperatingSystem(ROS)isasetofsoftwarelibrariesandtoolsthathelpyoubuildrobotapplications. Fromdriverstostate-of-the-artalgorithms,andwithpowerfuldevelopertools,ROShaswhatyouneedforyournextroboticsproject. Andit'sallopensource. Fullprojectdetailson[ROS.org](https://ros.org/) #GettingStarted LookingtogetstartedwithROS? Our[installationguideishere](https://www.ros.org/blog/getting-started/).\n",
"Link: https://github.com/ros2/ros2/tree/rolling/README.md\n",
"Top texts: type:git url:https://github.com/ros2/tinyxml2_vendor.git version:rolling ros2/tlsf: type:git url:https://github.com/ros2/tlsf.git version:rolling ros2/unique_identifier_msgs: type:git url:https://github.com/ros2/unique_identifier_msgs.git version:rolling ros2/urdf: type:git url:https://github.com/ros2/urdf.git version:rolling ros2/yaml_cpp_vendor: type:git url:https://github.com/ros2/yaml_cpp_vendor.git version:rolling\n",
"Link: https://github.com/ros2/ros2/tree/rolling/ros2.repos\n",
"Top texts: *[ROSResourceStatusPage](https://status.openrobotics.org/) *[REP-2000](https://ros.org/reps/rep-2000.html):ROS2ReleasesandTargetPlatforms ##ProjectResources *[PurchaseROSSwag](https://spring.ros.org/) *[InformationabouttheROSTrademark](https://www.ros.org/blog/media/) *OnSocialMedia *[OpenRoboticsonLinkedIn](https://www.linkedin.com/company/open-source-robotics-foundation) *[OpenRoboticsonTwitter](https://twitter.com/OpenRoboticsOrg) *[ROS.orgonTwitter](https://twitter.com/ROSOrg)\n",
"Link: https://github.com/ros2/ros2/tree/rolling/README.md\n"
]
},
{
"data": {
"text/plain": [
"\"Here's an example of what the README file for ROS could look like:\\n\\n**Welcome to the Robot Operating System (ROS)**\\n\\nROS is a set of software libraries and tools that help you build robot applications. From driver development to state-of-the-art algorithms, and with powerful development tools, ROS has everything you need for your next robotics project.\\n\\n### Getting Started\\n\\nTo get started with ROS, check out our [installation guide](https://www.ros.org/blog/getting-started/).\\n\\n### What's Included\\n\\nROS includes a range of open-source projects, including:\\n\\n* **tinyxml2_vendor**: A fork of the tinyxml2 library for parsing XML files.\\n* **tlsf**: A library for secure communication over TLS (Transport Layer Security).\\n* **unique_identifier_msgs**: A package for generating unique identifiers for robots and other entities.\\n* **urdf**: A package for working with URDF (Unified Robot Description Format) files.\\n* **yaml_cpp_vendor**: A fork of the yaml-cpp library for parsing YAML files.\\n\\n### ROS Releases and Target Platforms\\n\\nFor more information on ROS releases, target platforms, and release notes, check out [REP-2000](https://ros.org/reps/rep-2000.html).\\n\\n### Project Resources\\n\\n* **ROSSwag**: Purchase ROS-related merchandise from our online store.\\n* **ROS Trademark Information**: Learn about the ROS trademark.\\n\\n### Get Involved\\n\\nStay up-to-date with the latest news and developments in ROS:\\n\\n* Follow us on [LinkedIn](https://www.linkedin.com/company/open-source-robotics-foundation)\\n* Join our Twitter community: [OpenRoboticsOrg](https://twitter.com/OpenRoboticsOrg), [ROSOrg](https://twitter.com/ROSOrg)\\n\\n### License and Contributions\\n\\nROS is an open-source project, licensed under the Apache 2.0 license.\\n\\nWe welcome contributions from the ROS community! If you have any ideas or bug fixes to contribute, check out our [contribution guidelines](https://ros.org/blog/contribute/).\\n\\n**Thank You**\\n\\nThanks for choosing ROS as your platform for robotics development!\\n\\nYou can modify this README file according to your needs and preferences.\""
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# User query\n",
"query = \"Can you create a README file for ROS\"\n",
"\n",
"# Query expansion(I only generate one additional prompt for simplicity)\n",
"template = \"\"\"\n",
"Rewrite the prompt. The new prompt must offer a different perspective.\n",
"Do not change the meaning. Output only the rewritten prompt with no introduction.\n",
" Prompt: {prompt}\n",
"\"\"\"\n",
"prompt = PromptTemplate.from_template(template)\n",
"chain = {\"prompt\": itemgetter(\"prompt\")} | prompt | model\n",
"queryExpansion = chain.invoke({\"prompt\": query})\n",
"print(\"Query expansion: \", queryExpansion)\n",
"\n",
"# Self-querying(The metadata I will be generating determines whether to look through the Qdrant collection containing github code)\n",
"template = \"\"\"\n",
"You are an AI assistant. You must determine if the prompt requires code as the answer.\n",
"Output a 1 if it is or a 0 if it is not and nothing else.\n",
" Prompt: {prompt}\n",
"\"\"\"\n",
"prompt = PromptTemplate.from_template(template)\n",
"chain = {\"prompt\": itemgetter(\"prompt\")} | prompt | model\n",
"codingQuestion = chain.invoke({\"prompt\": query})\n",
"print(\"Coding Question?: \", codingQuestion)\n",
"\n",
"# Filtered vector search for each of the N queries after expansion\n",
"relatedCollection = 'Document'\n",
"if (codingQuestion == '1'):\n",
" relatedCollection = 'Github'\n",
"print(\"Related Collection: \", relatedCollection)\n",
"results1 = retriever(query, relatedCollection)\n",
"results2 = retriever(queryExpansion, relatedCollection)\n",
"\n",
"# Collecting results\n",
"results = results1+results2\n",
"\n",
"# Reranking(Instead of using a CrossEncoder, I will manually compare embeddings)\n",
"ids = [result.id for result in results]\n",
"scores = [result.score for result in results]\n",
"topIds = []\n",
"topIndexes = []\n",
"for x in range(3):\n",
" maxScore = 0\n",
" maxIndex = 0\n",
" for i in range(len(ids)):\n",
" if ids[i] not in topIds and scores[i] > maxScore:\n",
" maxScore = scores[i]\n",
" maxIndex = i\n",
" topIds.append(ids[maxIndex])\n",
" topIndexes.append(maxIndex)\n",
"texts = [result.payload['text'] for result in results]\n",
"links = [result.payload['link'] for result in results]\n",
"topTexts = ''\n",
"for index in topIndexes:\n",
" print(\"Top texts: \", texts[index])\n",
" print(\"Link: \", links[index])\n",
" topTexts += texts[index]\n",
"\n",
"# Building prompt\n",
"if(codingQuestion == '1'):\n",
" template = \"\"\"\n",
" Write code for the following question given the related coding document below.\n",
"\n",
" Document: {document}\n",
" Question: {question}\n",
" \"\"\"\n",
" prompt = PromptTemplate.from_template(template)\n",
"else:\n",
" template = \"\"\"\n",
" Answer the question based on the document below. If you can't answer the question, reply \"I don't know\"\n",
"\n",
" Document: {document}\n",
" Question: {question}\n",
" \"\"\"\n",
" prompt = PromptTemplate.from_template(template)\n",
"\n",
"# Obtaining answer\n",
"chain = {\"document\": itemgetter(\"document\"), \"question\": itemgetter(\"question\")} | prompt | model\n",
"chain.invoke({\"document\": topTexts, \"question\": query})"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.7"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
|