KenTheNoob
/

RAG_LLM

Model card Files Files and versions Community

File size: 5,039 Bytes

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of document chunks:  0\n",
      "Number of githb chunks:  0\n",
      "\n",
      "Sample search result(n=2): \n"
     ]
    }
   ],
   "source": [
    "from shared import getQdrantClient, getEmbeddingsModel\n",
    "qClient = getQdrantClient()\n",
    "\n",
    "# Show everything in the Document collection\n",
    "numDocumentChunks = 0\n",
    "# Note with_vectors defaults to false, so the vectors are not returned\n",
    "chunks = qClient.scroll(collection_name='Document', limit=100)\n",
    "while True:\n",
    "    for chunk in chunks[0]:\n",
    "        if numDocumentChunks == 0:\n",
    "            sampleDocumentChunk = chunk\n",
    "        numDocumentChunks += 1\n",
    "    chunks = qClient.scroll(collection_name='Document', limit=100, with_payload=False, offset=chunks[1])\n",
    "    if chunks[1] is None:\n",
    "        break\n",
    "print(\"Number of document chunks: \", numDocumentChunks)\n",
    "if numDocumentChunks > 0:\n",
    "    print(\"\\nSample document chunk(metadata not the vector): \")\n",
    "    print(sampleDocumentChunk, '\\n')\n",
    "\n",
    "# Show everything in the Github collection\n",
    "numGithubChunks = 0\n",
    "# Note with_vectors defaults to false, so the vectors are not returned\n",
    "chunks = qClient.scroll(collection_name='Github', limit=100)\n",
    "while True:\n",
    "    for chunk in chunks[0]:\n",
    "        if numGithubChunks == 0:\n",
    "            sampleGithubChunk = chunk\n",
    "        numGithubChunks += 1\n",
    "    chunks = qClient.scroll(collection_name='Github', limit=100, with_payload=False, offset=chunks[1])\n",
    "    if chunks[1] is None:\n",
    "        break\n",
    "print(\"Number of githb chunks: \", numDocumentChunks)\n",
    "if numGithubChunks > 0:\n",
    "    print(\"\\nSample github chunk(metadata not the vector): \")\n",
    "    print(sampleGithubChunk, '\\n')\n",
    "\n",
    "# Show a sample search\n",
    "embeddingsModel = getEmbeddingsModel()\n",
    "results = qClient.search(\n",
    "    collection_name=\"Document\",\n",
    "    query_vector = embeddingsModel.embed_query(\"What operating system is ROS made for?\"),\n",
    "    limit=10\n",
    ")\n",
    "print(\"\\nSample search result(n=2): \")\n",
    "for result in results:\n",
    "    print(result)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Cosine Similarity for related sentences: 0.7035977848391597\n",
      "Cosine Similarity for unrelated sentences: 0.3566534327076298\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "# How cosine distance works\n",
    "\n",
    "embedding1 = embeddingsModel.embed_query(\"What is the weather like?\")\n",
    "embedding2 = embeddingsModel.embed_query(\"It is raining today.\")\n",
    "embedding3 = embeddingsModel.embed_query(\"ROS is an open source platform\")\n",
    "def cosine_similarity(vec1, vec2):\n",
    "    dot_product = np.dot(vec1, vec2)\n",
    "    norm_vec1 = np.linalg.norm(vec1)\n",
    "    norm_vec2 = np.linalg.norm(vec2)\n",
    "    return dot_product / (norm_vec1 * norm_vec2)\n",
    "similarity1 = cosine_similarity(embedding1, embedding2)\n",
    "similarity2 = cosine_similarity(embedding1, embedding3)\n",
    "print(\"Cosine Similarity for related sentences:\", similarity1)\n",
    "print(\"Cosine Similarity for unrelated sentences:\", similarity2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from qdrant_client.http.models import Distance, VectorParams\n",
    "# Delete all collections and vectors inside them\n",
    "qClient.delete_collection(collection_name = \"Document\")\n",
    "qClient.delete_collection(collection_name = \"Github\")\n",
    "# Recreate the empty collections\n",
    "qClient.create_collection(\n",
    "    collection_name = \"Document\",\n",
    "    vectors_config=VectorParams(size=3072, distance=Distance.COSINE)\n",
    ")\n",
    "qClient.create_collection(\n",
    "    collection_name = \"Github\",\n",
    "    vectors_config=VectorParams(size=3072, distance=Distance.COSINE)\n",
    ")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}