In [None]:
# See README for more info on how the DataCollectionPipeline works
# The retrieval pipeline is part of the DataCollectionPipeline
from shared import getQdrantClient, getEmbeddingsModel, getModel
from langchain_community.llms import Ollama
from langchain.prompts import PromptTemplate
from operator import itemgetter
# Create a qdrant connection
qClient = getQdrantClient()

# Setup the text embedder
embeddingsModel = getEmbeddingsModel()

# Setup the model
model = getModel()

# Retrieval Pipeline
# Retrieve the chunks with the most similar embeddings from Qdrant
def retriever(text, collection):
 results = qClient.search(
 collection_name=collection,
 query_vector = embeddingsModel.embed_query(text),
 limit=10
 )
 return results

 return OllamaEmbeddings(model=MODEL, base_url="http://host.docker.internal:11434")
 return Ollama(model=MODEL, base_url="http://host.docker.internal:11434")


In [5]:
# User query
query = "Can you create a README file for ROS"

# Query expansion(I only generate one additional prompt for simplicity)
template = """
Rewrite the prompt. The new prompt must offer a different perspective.
Do not change the meaning. Output only the rewritten prompt with no introduction.
 Prompt: {prompt}
"""
prompt = PromptTemplate.from_template(template)
chain = {"prompt": itemgetter("prompt")} | prompt | model
queryExpansion = chain.invoke({"prompt": query})
print("Query expansion: ", queryExpansion)

# Self-querying(The metadata I will be generating determines whether to look through the Qdrant collection containing github code)
template = """
You are an AI assistant. You must determine if the prompt requires code as the answer.
Output a 1 if it is or a 0 if it is not and nothing else.
 Prompt: {prompt}
"""
prompt = PromptTemplate.from_template(template)
chain = {"prompt": itemgetter("prompt")} | prompt | model
codingQuestion = chain.invoke({"prompt": query})
print("Coding Question?: ", codingQuestion)

# Filtered vector search for each of the N queries after expansion
relatedCollection = 'Document'
if (codingQuestion == '1'):
 relatedCollection = 'Github'
print("Related Collection: ", relatedCollection)
results1 = retriever(query, relatedCollection)
results2 = retriever(queryExpansion, relatedCollection)

# Collecting results
results = results1+results2

# Reranking(Instead of using a CrossEncoder, I will manually compare embeddings)
ids = [result.id for result in results]
scores = [result.score for result in results]
topIds = []
topIndexes = []
for x in range(3):
 maxScore = 0
 maxIndex = 0
 for i in range(len(ids)):
 if ids[i] not in topIds and scores[i] > maxScore:
 maxScore = scores[i]
 maxIndex = i
 topIds.append(ids[maxIndex])
 topIndexes.append(maxIndex)
texts = [result.payload['text'] for result in results]
links = [result.payload['link'] for result in results]
topTexts = ''
for index in topIndexes:
 print("Top texts: ", texts[index])
 print("Link: ", links[index])
 topTexts += texts[index]

# Building prompt
if(codingQuestion == '1'):
 template = """
 Write code for the following question given the related coding document below.

 Document: {document}
 Question: {question}
 """
 prompt = PromptTemplate.from_template(template)
else:
 template = """
 Answer the question based on the document below. If you can't answer the question, reply "I don't know"

 Document: {document}
 Question: {question}
 """
 prompt = PromptTemplate.from_template(template)

# Obtaining answer
chain = {"document": itemgetter("document"), "question": itemgetter("question")} | prompt | model
chain.invoke({"document": topTexts, "question": query})

Query expansion: Create a user-friendly, community-driven guide that provides an alternative to the traditional ROS documentation, focusing on real-world scenarios and practical applications rather than technical specifications and developer guides.
Coding Question?: 1
Related Collection: Github
Top texts: #About TheRobotOperatingSystem(ROS)isasetofsoftwarelibrariesandtoolsthathelpyoubuildrobotapplications. Fromdriverstostate-of-the-artalgorithms,andwithpowerfuldevelopertools,ROShaswhatyouneedforyournextroboticsproject. Andit'sallopensource. Fullprojectdetailson[ROS.org](https://ros.org/) #GettingStarted LookingtogetstartedwithROS? Our[installationguideishere](https://www.ros.org/blog/getting-started/).
Link: https://github.com/ros2/ros2/tree/rolling/README.md
Top texts: type:git url:https://github.com/ros2/tinyxml2_vendor.git version:rolling ros2/tlsf: type:git url:https://github.com/ros2/tlsf.git version:rolling ros2/unique_identifier_msgs: type:git url:https://github.com/ros2/unique

"Here's an example of what the README file for ROS could look like:\n\n**Welcome to the Robot Operating System (ROS)**\n\nROS is a set of software libraries and tools that help you build robot applications. From driver development to state-of-the-art algorithms, and with powerful development tools, ROS has everything you need for your next robotics project.\n\n### Getting Started\n\nTo get started with ROS, check out our [installation guide](https://www.ros.org/blog/getting-started/).\n\n### What's Included\n\nROS includes a range of open-source projects, including:\n\n* **tinyxml2_vendor**: A fork of the tinyxml2 library for parsing XML files.\n* **tlsf**: A library for secure communication over TLS (Transport Layer Security).\n* **unique_identifier_msgs**: A package for generating unique identifiers for robots and other entities.\n* **urdf**: A package for working with URDF (Unified Robot Description Format) files.\n* **yaml_cpp_vendor**: A fork of the yaml-cpp library for parsing YA