KenTheNoob
/

RAG_LLM

Model card Files Files and versions Community

File size: 10,643 Bytes

2af0eb7

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/workspaces/RAG_LLM/project/shared.py:57: LangChainDeprecationWarning: The class `OllamaEmbeddings` was deprecated in LangChain 0.3.1 and will be removed in 1.0.0. An updated version of the class exists in the :class:`~langchain-ollama package and should be used instead. To use it run `pip install -U :class:`~langchain-ollama` and import as `from :class:`~langchain_ollama import OllamaEmbeddings``.\n",
      "  return OllamaEmbeddings(model=MODEL, base_url=\"http://host.docker.internal:11434\")\n",
      "/workspaces/RAG_LLM/project/shared.py:70: LangChainDeprecationWarning: The class `Ollama` was deprecated in LangChain 0.3.1 and will be removed in 1.0.0. An updated version of the class exists in the :class:`~langchain-ollama package and should be used instead. To use it run `pip install -U :class:`~langchain-ollama` and import as `from :class:`~langchain_ollama import OllamaLLM``.\n",
      "  return Ollama(model=MODEL, base_url=\"http://host.docker.internal:11434\")\n"
     ]
    }
   ],
   "source": [
    "# See README for more info on how the DataCollectionPipeline works\n",
    "# The retrieval pipeline is part of the DataCollectionPipeline\n",
    "from shared import getQdrantClient, getEmbeddingsModel, getModel\n",
    "from langchain_community.llms import Ollama\n",
    "from langchain.prompts import PromptTemplate\n",
    "from operator import itemgetter\n",
    "# Create a qdrant connection\n",
    "qClient = getQdrantClient()\n",
    "\n",
    "# Setup the text embedder\n",
    "embeddingsModel = getEmbeddingsModel()\n",
    "\n",
    "# Setup the model\n",
    "model = getModel()\n",
    "\n",
    "# Retrieval Pipeline\n",
    "# Retrieve the chunks with the most similar embeddings from Qdrant\n",
    "def retriever(text, collection):\n",
    "    results = qClient.search(\n",
    "        collection_name=collection,\n",
    "        query_vector = embeddingsModel.embed_query(text),\n",
    "        limit=10\n",
    "    )\n",
    "    return results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Query expansion:  Create a user-friendly, community-driven guide that provides an alternative to the traditional ROS documentation, focusing on real-world scenarios and practical applications rather than technical specifications and developer guides.\n",
      "Coding Question?:  1\n",
      "Related Collection:  Github\n",
      "Top texts:  #About TheRobotOperatingSystem(ROS)isasetofsoftwarelibrariesandtoolsthathelpyoubuildrobotapplications. Fromdriverstostate-of-the-artalgorithms,andwithpowerfuldevelopertools,ROShaswhatyouneedforyournextroboticsproject. Andit'sallopensource. Fullprojectdetailson[ROS.org](https://ros.org/) #GettingStarted LookingtogetstartedwithROS? Our[installationguideishere](https://www.ros.org/blog/getting-started/).\n",
      "Link:  https://github.com/ros2/ros2/tree/rolling/README.md\n",
      "Top texts:  type:git url:https://github.com/ros2/tinyxml2_vendor.git version:rolling ros2/tlsf: type:git url:https://github.com/ros2/tlsf.git version:rolling ros2/unique_identifier_msgs: type:git url:https://github.com/ros2/unique_identifier_msgs.git version:rolling ros2/urdf: type:git url:https://github.com/ros2/urdf.git version:rolling ros2/yaml_cpp_vendor: type:git url:https://github.com/ros2/yaml_cpp_vendor.git version:rolling\n",
      "Link:  https://github.com/ros2/ros2/tree/rolling/ros2.repos\n",
      "Top texts:  *[ROSResourceStatusPage](https://status.openrobotics.org/) *[REP-2000](https://ros.org/reps/rep-2000.html):ROS2ReleasesandTargetPlatforms ##ProjectResources *[PurchaseROSSwag](https://spring.ros.org/) *[InformationabouttheROSTrademark](https://www.ros.org/blog/media/) *OnSocialMedia *[OpenRoboticsonLinkedIn](https://www.linkedin.com/company/open-source-robotics-foundation) *[OpenRoboticsonTwitter](https://twitter.com/OpenRoboticsOrg) *[ROS.orgonTwitter](https://twitter.com/ROSOrg)\n",
      "Link:  https://github.com/ros2/ros2/tree/rolling/README.md\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "\"Here's an example of what the README file for ROS could look like:\\n\\n**Welcome to the Robot Operating System (ROS)**\\n\\nROS is a set of software libraries and tools that help you build robot applications. From driver development to state-of-the-art algorithms, and with powerful development tools, ROS has everything you need for your next robotics project.\\n\\n### Getting Started\\n\\nTo get started with ROS, check out our [installation guide](https://www.ros.org/blog/getting-started/).\\n\\n### What's Included\\n\\nROS includes a range of open-source projects, including:\\n\\n*   **tinyxml2_vendor**: A fork of the tinyxml2 library for parsing XML files.\\n*   **tlsf**: A library for secure communication over TLS (Transport Layer Security).\\n*   **unique_identifier_msgs**: A package for generating unique identifiers for robots and other entities.\\n*   **urdf**: A package for working with URDF (Unified Robot Description Format) files.\\n*   **yaml_cpp_vendor**: A fork of the yaml-cpp library for parsing YAML files.\\n\\n### ROS Releases and Target Platforms\\n\\nFor more information on ROS releases, target platforms, and release notes, check out [REP-2000](https://ros.org/reps/rep-2000.html).\\n\\n### Project Resources\\n\\n*   **ROSSwag**: Purchase ROS-related merchandise from our online store.\\n*   **ROS Trademark Information**: Learn about the ROS trademark.\\n\\n### Get Involved\\n\\nStay up-to-date with the latest news and developments in ROS:\\n\\n*   Follow us on [LinkedIn](https://www.linkedin.com/company/open-source-robotics-foundation)\\n*   Join our Twitter community: [OpenRoboticsOrg](https://twitter.com/OpenRoboticsOrg), [ROSOrg](https://twitter.com/ROSOrg)\\n\\n### License and Contributions\\n\\nROS is an open-source project, licensed under the Apache 2.0 license.\\n\\nWe welcome contributions from the ROS community! If you have any ideas or bug fixes to contribute, check out our [contribution guidelines](https://ros.org/blog/contribute/).\\n\\n**Thank You**\\n\\nThanks for choosing ROS as your platform for robotics development!\\n\\nYou can modify this README file according to your needs and preferences.\""
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# User query\n",
    "query = \"Can you create a README file for ROS\"\n",
    "\n",
    "# Query expansion(I only generate one additional prompt for simplicity)\n",
    "template = \"\"\"\n",
    "Rewrite the prompt. The new prompt must offer a different perspective.\n",
    "Do not change the meaning. Output only the rewritten prompt with no introduction.\n",
    "    Prompt: {prompt}\n",
    "\"\"\"\n",
    "prompt = PromptTemplate.from_template(template)\n",
    "chain = {\"prompt\": itemgetter(\"prompt\")} | prompt | model\n",
    "queryExpansion = chain.invoke({\"prompt\": query})\n",
    "print(\"Query expansion: \", queryExpansion)\n",
    "\n",
    "# Self-querying(The metadata I will be generating determines whether to look through the Qdrant collection containing github code)\n",
    "template = \"\"\"\n",
    "You are an AI assistant. You must determine if the prompt requires code as the answer.\n",
    "Output a 1 if it is or a 0 if it is not and nothing else.\n",
    "    Prompt: {prompt}\n",
    "\"\"\"\n",
    "prompt = PromptTemplate.from_template(template)\n",
    "chain = {\"prompt\": itemgetter(\"prompt\")} | prompt | model\n",
    "codingQuestion = chain.invoke({\"prompt\": query})\n",
    "print(\"Coding Question?: \", codingQuestion)\n",
    "\n",
    "# Filtered vector search for each of the N queries after expansion\n",
    "relatedCollection = 'Document'\n",
    "if (codingQuestion == '1'):\n",
    "    relatedCollection = 'Github'\n",
    "print(\"Related Collection: \", relatedCollection)\n",
    "results1 = retriever(query, relatedCollection)\n",
    "results2 = retriever(queryExpansion, relatedCollection)\n",
    "\n",
    "# Collecting results\n",
    "results = results1+results2\n",
    "\n",
    "# Reranking(Instead of using a CrossEncoder, I will manually compare embeddings)\n",
    "ids = [result.id for result in results]\n",
    "scores = [result.score for result in results]\n",
    "topIds = []\n",
    "topIndexes = []\n",
    "for x in range(3):\n",
    "    maxScore = 0\n",
    "    maxIndex = 0\n",
    "    for i in range(len(ids)):\n",
    "        if ids[i] not in topIds and scores[i] > maxScore:\n",
    "            maxScore = scores[i]\n",
    "            maxIndex = i\n",
    "    topIds.append(ids[maxIndex])\n",
    "    topIndexes.append(maxIndex)\n",
    "texts = [result.payload['text'] for result in results]\n",
    "links = [result.payload['link'] for result in results]\n",
    "topTexts = ''\n",
    "for index in topIndexes:\n",
    "    print(\"Top texts: \", texts[index])\n",
    "    print(\"Link: \", links[index])\n",
    "    topTexts += texts[index]\n",
    "\n",
    "# Building prompt\n",
    "if(codingQuestion == '1'):\n",
    "    template = \"\"\"\n",
    "    Write code for the following question given the related coding document below.\n",
    "\n",
    "    Document: {document}\n",
    "    Question: {question}\n",
    "    \"\"\"\n",
    "    prompt = PromptTemplate.from_template(template)\n",
    "else:\n",
    "    template = \"\"\"\n",
    "    Answer the question based on the document below. If you can't answer the question, reply \"I don't know\"\n",
    "\n",
    "    Document: {document}\n",
    "    Question: {question}\n",
    "    \"\"\"\n",
    "    prompt = PromptTemplate.from_template(template)\n",
    "\n",
    "# Obtaining answer\n",
    "chain = {\"document\": itemgetter(\"document\"), \"question\": itemgetter(\"question\")} | prompt | model\n",
    "chain.invoke({\"document\": topTexts, \"question\": query})"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}