{ "cells": [ { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Number of document chunks: 0\n", "Number of githb chunks: 0\n", "\n", "Sample search result(n=2): \n" ] } ], "source": [ "from shared import getQdrantClient, getEmbeddingsModel\n", "qClient = getQdrantClient()\n", "\n", "# Show everything in the Document collection\n", "numDocumentChunks = 0\n", "# Note with_vectors defaults to false, so the vectors are not returned\n", "chunks = qClient.scroll(collection_name='Document', limit=100)\n", "while True:\n", " for chunk in chunks[0]:\n", " if numDocumentChunks == 0:\n", " sampleDocumentChunk = chunk\n", " numDocumentChunks += 1\n", " chunks = qClient.scroll(collection_name='Document', limit=100, with_payload=False, offset=chunks[1])\n", " if chunks[1] is None:\n", " break\n", "print(\"Number of document chunks: \", numDocumentChunks)\n", "if numDocumentChunks > 0:\n", " print(\"\\nSample document chunk(metadata not the vector): \")\n", " print(sampleDocumentChunk, '\\n')\n", "\n", "# Show everything in the Github collection\n", "numGithubChunks = 0\n", "# Note with_vectors defaults to false, so the vectors are not returned\n", "chunks = qClient.scroll(collection_name='Github', limit=100)\n", "while True:\n", " for chunk in chunks[0]:\n", " if numGithubChunks == 0:\n", " sampleGithubChunk = chunk\n", " numGithubChunks += 1\n", " chunks = qClient.scroll(collection_name='Github', limit=100, with_payload=False, offset=chunks[1])\n", " if chunks[1] is None:\n", " break\n", "print(\"Number of githb chunks: \", numDocumentChunks)\n", "if numGithubChunks > 0:\n", " print(\"\\nSample github chunk(metadata not the vector): \")\n", " print(sampleGithubChunk, '\\n')\n", "\n", "# Show a sample search\n", "embeddingsModel = getEmbeddingsModel()\n", "results = qClient.search(\n", " collection_name=\"Document\",\n", " query_vector = embeddingsModel.embed_query(\"What operating system is ROS made for?\"),\n", " limit=10\n", ")\n", "print(\"\\nSample search result(n=2): \")\n", "for result in results:\n", " print(result)" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Cosine Similarity for related sentences: 0.7035977848391597\n", "Cosine Similarity for unrelated sentences: 0.3566534327076298\n" ] } ], "source": [ "import numpy as np\n", "# How cosine distance works\n", "\n", "embedding1 = embeddingsModel.embed_query(\"What is the weather like?\")\n", "embedding2 = embeddingsModel.embed_query(\"It is raining today.\")\n", "embedding3 = embeddingsModel.embed_query(\"ROS is an open source platform\")\n", "def cosine_similarity(vec1, vec2):\n", " dot_product = np.dot(vec1, vec2)\n", " norm_vec1 = np.linalg.norm(vec1)\n", " norm_vec2 = np.linalg.norm(vec2)\n", " return dot_product / (norm_vec1 * norm_vec2)\n", "similarity1 = cosine_similarity(embedding1, embedding2)\n", "similarity2 = cosine_similarity(embedding1, embedding3)\n", "print(\"Cosine Similarity for related sentences:\", similarity1)\n", "print(\"Cosine Similarity for unrelated sentences:\", similarity2)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from qdrant_client.http.models import Distance, VectorParams\n", "# Delete all collections and vectors inside them\n", "qClient.delete_collection(collection_name = \"Document\")\n", "qClient.delete_collection(collection_name = \"Github\")\n", "# Recreate the empty collections\n", "qClient.create_collection(\n", " collection_name = \"Document\",\n", " vectors_config=VectorParams(size=3072, distance=Distance.COSINE)\n", ")\n", "qClient.create_collection(\n", " collection_name = \"Github\",\n", " vectors_config=VectorParams(size=3072, distance=Distance.COSINE)\n", ")" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.7" } }, "nbformat": 4, "nbformat_minor": 2 }