From 2adbb80591476d310a00390edae0ef2d3a52d21d Mon Sep 17 00:00:00 2001 From: to Date: Thu, 4 Apr 2024 23:41:59 -0400 Subject: [PATCH] Fixed Typos and Clarifying Concepts on Semantic Cache Notebook --- .../semantic_cache_chroma_vector_database.ipynb | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/notebooks/en/semantic_cache_chroma_vector_database.ipynb b/notebooks/en/semantic_cache_chroma_vector_database.ipynb index 885e4a00..0348ef5d 100644 --- a/notebooks/en/semantic_cache_chroma_vector_database.ipynb +++ b/notebooks/en/semantic_cache_chroma_vector_database.ipynb @@ -940,14 +940,14 @@ "cell_type": "code", "source": [ "class semantic_cache:\n", - " def __init__(self, json_file=\"cache_file.json\", thresold=0.35):\n", + " def __init__(self, json_file=\"cache_file.json\", threshold=0.35):\n", " # Initialize Faiss index with Euclidean distance\n", " self.index, self.encoder = init_cache()\n", "\n", " # Set Euclidean distance threshold\n", " # a distance of 0 means identicals sentences\n", - " # We only return from cache sentences under this thresold\n", - " self.euclidean_threshold = thresold\n", + " # We only return from cache sentences under this threshold\n", + " self.euclidean_threshold = threshold\n", "\n", " self.json_file = json_file\n", " self.cache = retrieve_cache(self.json_file)\n", @@ -960,10 +960,17 @@ " embedding = self.encoder.encode([question])\n", "\n", " # Search for the nearest neighbor in the index\n", - " self.index.nprobe = 8\n", + " self.index.nprobe = 8 # Number of nearby cells to search\n", + "\n", + " # D - Array of Distances between the query vector and nearest neighbors\n", + " # I - Array of Indices of the nearest neighbors found in the index\n", + " # Both are Shape (n, k) where n = number of questions in the embedding array and k = number of returned vectors\n", " D, I = self.index.search(embedding, 1)\n", "\n", + " # In this case, we only are expecting one vector in both the distance and index array (k=1)\n", + " # Check if the distance array is valid\n", " if D[0] >= 0:\n", + " # Check if the index is valid and check if the distance is below the threshold value\n", " if I[0][0] >= 0 and D[0][0] <= self.euclidean_threshold:\n", " row_id = int(I[0][0])\n", "\n",