diff --git a/benchmark/scripts/breakdownHNSW/OoOCommon.py b/benchmark/scripts/breakdownHNSW/OoOCommon.py
new file mode 100755
index 000000000..043ee99f4
--- /dev/null
+++ b/benchmark/scripts/breakdownHNSW/OoOCommon.py
@@ -0,0 +1,125 @@
+import csv
+import numpy as np
+import matplotlib.pyplot as plt
+import itertools as it
+import os
+
+import matplotlib
+import matplotlib.pyplot as plt
+import numpy as np
+import pylab
+from matplotlib.font_manager import FontProperties
+from matplotlib.ticker import LogLocator, LinearLocator
+import os
+import pandas as pd
+import sys
+import matplotlib.ticker as mtick
+
+OPT_FONT_NAME = 'Helvetica'
+TICK_FONT_SIZE = 22
+LABEL_FONT_SIZE = 28
+LEGEND_FONT_SIZE = 30
+LABEL_FP = FontProperties(style='normal', size=LABEL_FONT_SIZE)
+LEGEND_FP = FontProperties(style='normal', size=LEGEND_FONT_SIZE)
+TICK_FP = FontProperties(style='normal', size=TICK_FONT_SIZE)
+
+MARKERS = (['*', '|', 'v', "^", "", "h", "<", ">", "+", "d", "<", "|", "", "+", "_"])
+# you may want to change the color map for different figures
+COLOR_MAP = (
+    '#B03A2E', '#2874A6', '#239B56', '#7D3C98', '#FFFFFF', '#F1C40F', '#F5CBA7', '#82E0AA', '#AEB6BF', '#AA4499')
+# you may want to change the patterns for different figures
+PATTERNS = (["////", "o", "", "||", "-", "//", "\\", "o", "O", "////", ".", "|||", "o", "---", "+", "\\\\", "*"])
+LABEL_WEIGHT = 'bold'
+LINE_COLORS = COLOR_MAP
+LINE_WIDTH = 3.0
+MARKER_SIZE = 15.0
+MARKER_FREQUENCY = 1000
+
+
+def editConfig(src, dest, key, value):
+    df = pd.read_csv(src, header=None)
+    rowIdx = 0
+    idxt = 0
+    for cell in df[0]:
+        # print(cell)
+        if (cell == key):
+            rowIdx = idxt
+            break
+        idxt = idxt + 1
+    df[1][rowIdx] = str(value)
+    df.to_csv(dest, index=False, header=False)
+
+
+def readConfig(src, key):
+    df = pd.read_csv(src, header=None)
+    rowIdx = 0
+    idxt = 0
+    for cell in df[0]:
+        # print(cell)
+        if (cell == key):
+            rowIdx = idxt
+            break
+        idxt = idxt + 1
+    return df[1][rowIdx]
+
+
+def draw2yLine(NAME, Com, R1, R2, l1, l2, m1, m2, fname):
+    fig, ax1 = plt.subplots(figsize=(10, 6.4))
+    lines = [None] * 2
+    # ax1.set_ylim(0, 1)
+    print(Com)
+    print(R1)
+    lines[0], = ax1.plot(Com, R1, color=LINE_COLORS[0], \
+                         linewidth=LINE_WIDTH, marker=MARKERS[0], \
+                         markersize=MARKER_SIZE
+                         #
+                         )
+
+    # #plt.show()
+    ax1.set_ylabel(m1, fontproperties=LABEL_FP)
+    ax1.set_xlabel(NAME, fontproperties=LABEL_FP)
+    # ax1.set_xticklabels(ax1.get_xticklabels())  # 设置共用的x轴
+    plt.xticks(rotation=0, size=TICK_FONT_SIZE)
+    plt.yticks(rotation=0, size=TICK_FONT_SIZE)
+    plt.rcParams['axes.unicode_minus'] = False  # 用来正常显示负号
+    ax2 = ax1.twinx()
+
+    # ax2.set_ylabel('latency/us')
+    # ax2.set_ylim(0, 0.5)
+    lines[1], = ax2.plot(Com, R2, color=LINE_COLORS[1], \
+                         linewidth=LINE_WIDTH, marker=MARKERS[1], \
+                         markersize=MARKER_SIZE)
+
+    ax2.set_ylabel(m2, fontproperties=LABEL_FP)
+    # ax2.vlines(192000, min(R2)-1, max(R2)+1, colors = "GREEN", linestyles = "dashed",label='total L1 size')
+    # plt.grid(axis='y', color='gray')
+
+    # style = dict(size=10, color='black')
+    # ax2.hlines(tset, 0, x2_list[len(x2_list)-1]+width, colors = "r", linestyles = "dashed",label="tset")
+    # ax2.text(4, tset, "$T_{set}$="+str(tset)+"us", ha='right', **style)
+
+    # plt.xlabel('batch', fontproperties=LABEL_FP)
+
+    # plt.xscale('log')
+    # figure.xaxis.set_major_locator(LinearLocator(5))
+    ax1.yaxis.set_major_locator(LinearLocator(5))
+    ax2.yaxis.set_major_locator(LinearLocator(5))
+    ax1.yaxis.set_major_formatter(mtick.FormatStrFormatter('%.1f'))
+    ax2.yaxis.set_major_formatter(mtick.FormatStrFormatter('%.1f'))
+    ax1.yaxis.set_major_formatter(mtick.FormatStrFormatter('%.1f'))
+    ax2.yaxis.set_major_formatter(mtick.FormatStrFormatter('%.1f'))
+    plt.legend(lines,
+               [l1, l2],
+               prop=LEGEND_FP,
+               loc='upper center',
+               ncol=1,
+               bbox_to_anchor=(0.55, 1.3
+                               ), shadow=False,
+               columnspacing=0.1,
+               frameon=True, borderaxespad=-1.5, handlelength=1.2,
+               handletextpad=0.1,
+               labelspacing=0.1)
+    plt.yticks(rotation=0, size=TICK_FONT_SIZE)
+    plt.tight_layout()
+
+    plt.savefig(fname + ".pdf")
diff --git a/benchmark/scripts/breakdownHNSW/accuBar.py b/benchmark/scripts/breakdownHNSW/accuBar.py
new file mode 100755
index 000000000..a18cb34c3
--- /dev/null
+++ b/benchmark/scripts/breakdownHNSW/accuBar.py
@@ -0,0 +1,335 @@
+import getopt
+import os
+import sys
+
+import matplotlib
+import matplotlib.pyplot as plt
+import numpy as np
+import pylab
+from matplotlib.font_manager import FontProperties
+from matplotlib.ticker import LinearLocator, LogLocator, MaxNLocator, ScalarFormatter
+from numpy import double
+
+OPT_FONT_NAME = 'Helvetica'
+TICK_FONT_SIZE = 24
+LABEL_FONT_SIZE = 24
+LEGEND_FONT_SIZE = 24
+TITLE_FRONT_SIZE = 24
+LABEL_FP = FontProperties(style='normal', size=LABEL_FONT_SIZE)
+LEGEND_FP = FontProperties(style='normal', size=LEGEND_FONT_SIZE)
+TICK_FP = FontProperties(style='normal', size=TICK_FONT_SIZE)
+TITLE_FP = FontProperties(style='normal', size=TITLE_FRONT_SIZE)
+MARKERS = (['o', 's', 'v', "^", "h", "v", ">", "x", "d", "<", "|", "", "|", "_"])
+# you may want to change the color map for different figures
+COLOR_MAP = ('#B03A2E', '#2874A6', '#239B56', '#7D3C98', '#F1C40F', '#F5CBA7', '#82E0AA', '#AEB6BF', '#AA4499')
+# you may want to change the patterns for different figures
+PATTERNS = (["\\", "///", "o", "||", "\\\\", "\\\\", "//////", "//////", ".", "\\\\\\", "\\\\\\"])
+LABEL_WEIGHT = 'bold'
+LINE_COLORS = COLOR_MAP
+LINE_WIDTH = 3.0
+MARKER_SIZE = 0.0
+MARKER_FREQUENCY = 1000
+
+matplotlib.rcParams['ps.useafm'] = True
+matplotlib.rcParams['pdf.use14corefonts'] = True
+matplotlib.rcParams['xtick.labelsize'] = TICK_FONT_SIZE
+matplotlib.rcParams['ytick.labelsize'] = TICK_FONT_SIZE
+matplotlib.rcParams['font.family'] = OPT_FONT_NAME
+matplotlib.rcParams['pdf.fonttype'] = 42
+
+exp_dir = "/data1/xtra"
+
+FIGURE_FOLDER = exp_dir + '/results/figure'
+
+
+# there are some embedding problems if directly exporting the pdf figure using matplotlib.
+# so we generate the eps format first and convert it to pdf.
+def ConvertEpsToPdf(dir_filename):
+    os.system("epstopdf --outfile " + dir_filename + ".pdf " + dir_filename + ".eps")
+    os.system("rm -rf " + dir_filename + ".eps")
+
+
+class ScalarFormatterForceFormat(ScalarFormatter):
+    def _set_format(self):  # Override function that finds format to use.
+        self.format = "%1.1f"  # Give format here
+
+
+# draw a line chart
+def DrawFigure(x_values, y_values, legend_labels, x_label, y_label, filename, allow_legend, title):
+    # you may change the figure size on your own.
+
+    fig = plt.figure(figsize=(20, 6))
+    figure = fig.add_subplot(111)
+
+    FIGURE_LABEL = legend_labels
+    LINE_COLORS = [
+        '#FF8C00', '#FFE4C4', '#00FFFF', '#E0FFFF',
+        '#FF6347', '#98FB98', '#800080', '#FFD700',
+        '#7CFC00', '#8A2BE2', '#FF4500', '#20B2AA',
+        '#B0E0E6', '#DC143C', '#00FF7F'
+    ]
+    HATCH_PATTERNS = ['/', '-', 'o', '///', '\\', '|', 'x', '\\\\', '+', '.', '*', 'oo', '++++', '....', 'xxx']
+    # if not os.path.exists(FIGURE_FOLDER):
+    #   os.makedirs(FIGURE_FOLDER)
+
+    # values in the x_xis
+    index = np.arange(len(x_values))
+    # the bar width.
+    # you may need to tune it to get the best figure.
+    width = 0.5
+    # draw the bars
+    bottom_base = np.zeros(len(y_values[0]))
+    bars = [None] * (len(FIGURE_LABEL))
+    for i in range(len(y_values)):
+        bars[i] = plt.bar(index * 1.5 + width / 2, y_values[i], width, hatch=HATCH_PATTERNS[i], color=LINE_COLORS[i],
+                          label=FIGURE_LABEL[i], bottom=bottom_base, edgecolor='black', linewidth=3)
+        bottom_base = np.array(y_values[i]) + bottom_base
+
+    # sometimes you may not want to draw legends.
+    if allow_legend == True:
+        plt.legend(bars, FIGURE_LABEL
+                   #                     mode='expand',
+                   #                     shadow=False,
+                   #                     columnspacing=0.25,
+                   #                     labelspacing=-2.2,
+                   #                     borderpad=5,
+                   #                     bbox_transform=ax.transAxes,
+                   #                     frameon=False,
+                   #                     columnspacing=5.5,
+                   #                     handlelength=2,
+                   )
+        if allow_legend == True:
+            handles, labels = figure.get_legend_handles_labels()
+        if allow_legend == True:
+            leg = plt.legend(handles[::-1], labels[::-1],
+                             loc='center',
+                             prop=LEGEND_FP,
+                             ncol=6,
+                             bbox_to_anchor=(0.5, 1.15),
+                             shadow=True, frameon=True, edgecolor='black',
+                             # bbox_to_anchor=(1.17, 0.5),
+                             handletextpad=0.1,
+                             # borderaxespad=0.0,
+                             # handlelength=1.8,
+                             labelspacing=-1.0,
+                             columnspacing=0.5,
+                             )
+            leg.get_frame().set_linewidth(2)
+            leg.get_frame().set_edgecolor("black")
+
+    # you may need to tune the xticks position to get the best figure.
+    plt.xticks(index * 1.5 + 0.6 * width, x_values)
+    plt.xticks(rotation=30, fontsize=TICK_FONT_SIZE)
+    plt.yticks(fontsize=TICK_FONT_SIZE)
+    # plt.ticklabel_format(axis="y", style="sci", scilimits=(0, 0), useMathText=True)
+    plt.ylim(0, 100)
+    plt.grid(axis='y', color='gray')
+    figure.yaxis.set_major_locator(LinearLocator(10))
+    # figure.yaxis.set_major_locator(LogLocator(base=10))
+    # figure.yaxis.set_major_locator(LinearLocator(6))
+
+    figure.get_xaxis().set_tick_params(direction='in', pad=10)
+    figure.get_yaxis().set_tick_params(direction='in', pad=10)
+    plt.grid(axis='y', color='gray', alpha=0.5, linewidth=0.5)
+    plt.xlabel(x_label, fontproperties=LABEL_FP)
+    plt.ylabel(y_label, fontproperties=LABEL_FP)
+
+    size = fig.get_size_inches()
+    dpi = fig.get_dpi()
+    plt.title(title, fontproperties=TITLE_FP)
+    plt.savefig(filename + ".pdf", bbox_inches='tight', format='pdf')
+
+
+def DrawLegend(legend_labels, filename):
+    fig = pylab.figure()
+    ax1 = fig.add_subplot(111)
+    FIGURE_LABEL = legend_labels
+    LEGEND_FP = FontProperties(style='normal', size=26)
+
+    bars = [None] * (len(FIGURE_LABEL))
+    data = [1]
+    x_values = [1]
+
+    width = 0.3
+    for i in range(len(FIGURE_LABEL)):
+        bars[i] = ax1.bar(x_values, data, width, hatch=PATTERNS[i], color=LINE_COLORS[i],
+                          linewidth=0.2)
+
+    # LEGEND
+    figlegend = pylab.figure(figsize=(11, 0.5))
+    figlegend.legend(bars, FIGURE_LABEL, prop=LEGEND_FP, \
+                     loc=9,
+                     bbox_to_anchor=(0, 0.4, 1, 1),
+                     ncol=len(FIGURE_LABEL), mode="expand", shadow=False, \
+                     frameon=False, handlelength=1.1, handletextpad=0.2, columnspacing=0.1)
+
+    figlegend.savefig(FIGURE_FOLDER + '/' + filename + '.pdf')
+
+
+def normalize(y_values):
+    y_total_values = np.zeros(len(y_values[0]))
+
+    for i in range(len(y_values)):
+        y_total_values += np.array(y_values[i])
+    y_norm_values = []
+
+    for i in range(len(y_values)):
+        y_norm_values.append(np.array(y_values[i]) / (y_total_values) * 100)
+    return y_norm_values
+
+
+# example for reading csv file
+def ReadFile(id):
+    # Creates a list containing w lists, each of h items, all set to 0
+    w, h = 5, 3
+    y = [[0 for x in range(w)] for y in range(h)]
+    # print(matches)
+    max_value = 0
+    j = 0
+    bound = id + 1 * w
+    for i in range(id, bound, 1):
+        cnt = 0
+        print(i)
+        f = open(exp_dir + "/results/breakdown/PMJ_JBCR_NP_{}.txt".format(i), "r")
+        read = f.readlines()
+        others = 0
+        for x in read:
+            value = double(x.strip("\n"))
+            if value > max_value:
+                max_value = value
+            elif cnt == 3:  # sort
+                y[0][j] = value
+            elif cnt == 4:  # merge
+                y[1][j] = value
+            elif cnt == 5:  # join
+                y[2][j] = value
+            else:
+                others += value
+            # if cnt == 6:
+            #     y[2][j] = others
+            cnt += 1
+        j += 1
+    print(y)
+    return y, max_value
+
+
+if __name__ == "__main__":
+    id = 119
+    try:
+        opts, args = getopt.getopt(sys.argv[1:], '-i:h', ['test id', 'help'])
+    except getopt.GetoptError:
+        print('breakdown.py -id testid')
+        sys.exit(2)
+    for opt, opt_value in opts:
+        if opt in ('-h', '--help'):
+            print("[*] Help info")
+            exit()
+        elif opt == '-i':
+            print('Test ID:', opt_value)
+            id = (int)(opt_value)
+
+    x_values = ['10%', '20%', '30%', '40%', '50%']  # sorting step size
+
+    y_values, max_value = ReadFile(id)  # 55
+
+    # y_norm_values = normalize(y_values)
+
+    # break into 4 parts
+    legend_labels = ['sort', 'merge', 'join']  # , 'others'
+
+    DrawFigure(x_values, y_values, legend_labels,
+               'sorting step size', 'cycles per input tuple',
+               'breakdown_sort_figure', True)
+
+    # DrawLegend(legend_labels, 'breakdown_radix_legend')
+
+
+def DrawPercentageFigure(x_values, y_values, legend_labels, x_label, y_label, filename, allow_legend, title):
+    # you may change the figure size on your own.
+    fig = plt.figure(figsize=(9, 3))
+    figure = fig.add_subplot(111)
+
+    FIGURE_LABEL = legend_labels
+
+    # values in the x_xis
+    index = np.arange(len(x_values))
+    # the bar width.
+    # you may need to tune it to get the best figure.
+    width = 0.5
+    # draw the bars
+    bottom_base = np.zeros(len(y_values[0]))
+    bars = [None] * (len(FIGURE_LABEL))
+    for i in range(len(y_values)):
+        bars[i] = plt.bar(index + width / 2, y_values[i], width, hatch=PATTERNS[i], color=LINE_COLORS[i],
+                          label=FIGURE_LABEL[i], bottom=bottom_base, edgecolor='black', linewidth=3)
+        bottom_base = np.array(y_values[i]) + bottom_base
+
+    # sometimes you may not want to draw legends.
+    if allow_legend == True:
+        plt.legend(bars, FIGURE_LABEL
+                   #                     mode='expand',
+                   #                     shadow=False,
+                   #                     columnspacing=0.25,
+                   #                     labelspacing=-2.2,
+                   #                     borderpad=5,
+                   #                     bbox_transform=ax.transAxes,
+                   #                     frameon=False,
+                   #                     columnspacing=5.5,
+                   #                     handlelength=2,
+                   )
+        if allow_legend == True:
+            handles, labels = figure.get_legend_handles_labels()
+        if allow_legend == True:
+            print(handles[::-1], labels[::-1])
+            leg = plt.legend(handles[::-1], labels[::-1],
+                             loc='center',
+                             prop=LEGEND_FP,
+                             ncol=3,
+                             bbox_to_anchor=(0.5, 1.2),
+                             handletextpad=0.1,
+                             borderaxespad=0.0,
+                             handlelength=1.8,
+                             labelspacing=0.3,
+                             columnspacing=0.3,
+                             )
+            leg.get_frame().set_linewidth(2)
+            leg.get_frame().set_edgecolor("black")
+
+    # sometimes you may not want to draw legends.
+    # if allow_legend == True:
+    #     leg=plt.legend(bars,
+    #                    FIGURE_LABEL,
+    #                    prop=LEGEND_FP,
+    #                    loc='right',
+    #                    ncol=1,
+    #                    #                     mode='expand',
+    #                    bbox_to_anchor=(0.45, 1.1), shadow=False,
+    #                    columnspacing=0.1,
+    #                    frameon=True, borderaxespad=0.0, handlelength=1.5,
+    #                    handletextpad=0.1,
+    #                    labelspacing=0.1)
+    #     leg.get_frame().set_linewidth(2)
+    #     leg.get_frame().set_edgecolor("black")
+
+    plt.ylim(0, 100)
+
+    # you may need to tune the xticks position to get the best figure.
+    plt.xticks(index + 0.5 * width, x_values)
+    plt.xticks(rotation=30)
+
+    # plt.xlim(0,)
+    # plt.ylim(0,1)
+
+    plt.grid(axis='y', color='gray')
+    figure.yaxis.set_major_locator(LinearLocator(6))
+
+    figure.get_xaxis().set_tick_params(direction='in', pad=10)
+    figure.get_yaxis().set_tick_params(direction='in', pad=10)
+
+    plt.xlabel(x_label, fontproperties=LABEL_FP)
+    plt.ylabel(y_label, fontproperties=LABEL_FP)
+
+    size = fig.get_size_inches()
+    dpi = fig.get_dpi()
+
+    plt.savefig(filename + ".pdf", bbox_inches='tight', format='pdf')
diff --git a/benchmark/scripts/breakdownHNSW/autoParase.py b/benchmark/scripts/breakdownHNSW/autoParase.py
new file mode 100755
index 000000000..9375c8e0b
--- /dev/null
+++ b/benchmark/scripts/breakdownHNSW/autoParase.py
@@ -0,0 +1,87 @@
+import csv
+
+
+def paraseValidStageNames(a):
+    nameList = []
+    with open(a, 'r') as f:
+        reader = csv.reader(f)
+        # reader = [each for each in csv.DictReader(f, delimiter=',')]
+        result = list(reader)
+        rows = len(result)
+        # print('rows=',rows)
+        firstRow = result[0]
+        # print(firstRow)
+        index = 0
+        # define what may attract our interest
+        idxCpu = 0
+        idxName = 0
+        for i in firstRow:
+            # print(i)
+            if (i == 'cpu'):
+                idxCpu = index
+            if (i == 'name'):
+                idxName = index
+            index = index + 1
+        # read the valid stages
+        vdataEntries = 0
+
+        for k in range(1, rows):
+            if (result[k][idxCpu] != 'NA'):
+                R1 = ((result[k][idxName]))
+                nameList.append(R1)
+        return nameList
+
+
+def paraseValidColums(a, nameList, colTitle):
+    with open(a, 'r') as f:
+        reader = csv.reader(f)
+        # reader = [each for each in csv.DictReader(f, delimiter=',')]
+        result = list(reader)
+        rows = len(result)
+        # print('rows=',rows)
+        firstRow = result[0]
+        # print(firstRow)
+        index = 0
+        # define what may attract our interest
+        idxCpu = 0
+        idxName = 0
+        idxTitle = 0
+        for i in firstRow:
+            # print(i)
+            if (i == 'cpu'):
+                idxCpu = index
+            if (i == 'name'):
+                idxName = index
+            if (i == colTitle):
+                idxTitle = index
+            index = index + 1
+        # read the valid stages
+        vdataEntries = 0
+        ru = []
+        for k in range(1, rows):
+            if (result[k][idxCpu] != 'NA'):
+                R1 = ((result[k][idxName]))
+                for j in range(len(nameList)):
+                    if (R1 == nameList[j]):
+                        s = int(result[k][idxTitle])
+                        ru.append(s)
+                        break
+        return ru
+
+
+def maxInList(a):
+    # a in [[1,2] [3,4]]
+    inLen = len(a[0])
+    ru = []
+    index = []
+    ti = 0
+    for i in range(len(a[0])):
+        ts = 0
+        ti = 0
+        for k in range(len(a)):
+            if (a[k][i] > ts):
+                ts = a[k][i]
+                ti = k
+        ru.append(ts)
+        index.append(ti)
+    return ru, index
diff --git a/benchmark/scripts/breakdownHNSW/config_e2e_static_lazy.csv b/benchmark/scripts/breakdownHNSW/config_e2e_static_lazy.csv
new file mode 100644
index 000000000..c3f57a047
--- /dev/null
+++ b/benchmark/scripts/breakdownHNSW/config_e2e_static_lazy.csv
@@ -0,0 +1,41 @@
+key,value,type
+vecDim,768,I64
+vecVolume,100000,I64
+batchSize,5000,I64
+metricType,IP,String
+DCOBatchSize,5000,I64
+ammAlgo,crs,String
+sketchSize,128,I64
+initialRows,50000,I64
+indexTag,faiss,String
+congestionDropWorker_algoTag,faiss,String
+eventRateTps,4000,I64
+querySize,100,I64
+zipfAlpha,0,Double
+coarseGrainedClusters,96,I64
+maskReference,0,Double
+encodeLen,1,I64
+numberOfBuckets,8192,I64
+cutOffTimeSeconds,14400,I64
+useSeparateQuery,1,I64
+sampleRows,2048,I64
+faissIndexTag,flat,String
+useCRS,1,I64
+crsDim,10,I64
+dataLoaderTag,fvecs,String
+staticDataSet,0,I64
+maxBuildIteration,200,I64
+lshMatrixType,random,String
+ANNK,10,I64
+frozenLevel,0,I64
+cudaBuild,1,I64
+candidateTimes,1,I64
+disableADC,0,I64
+isOnlinePQ,0,I64
+fineGrainedBuiltPath,OnlinePQIndex_fine.rbt,String
+dataPath,datasets/DPR/DPR100KC4.fvecs,String
+queryPath,datasets/DPR/DPR10KC4Q.fvecs,String
+waitPendingWrite,1,I64
+is_NSW,0,I64
+isOnlinePQ,0,I64
+flannIndexTag,1,I64
diff --git a/benchmark/scripts/breakdownHNSW/deps.txt b/benchmark/scripts/breakdownHNSW/deps.txt
new file mode 100644
index 000000000..6fe0c21b9
--- /dev/null
+++ b/benchmark/scripts/breakdownHNSW/deps.txt
@@ -0,0 +1,6 @@
+transformers>=4.30.0
+setuptools==65.5.1
+torch
+datasets==2.14.3
+numpy>=1.25.0
+nltk==3.8.1
diff --git a/benchmark/scripts/breakdownHNSW/drawSVI.py b/benchmark/scripts/breakdownHNSW/drawSVI.py
new file mode 100755
index 000000000..6e2d8c00a
--- /dev/null
+++ b/benchmark/scripts/breakdownHNSW/drawSVI.py
@@ -0,0 +1,148 @@
+#!/usr/bin/env python3
+import csv
+import numpy as np
+import matplotlib.pyplot as plt
+import accuBar as accuBar
+import groupBar as groupBar
+import groupBar2 as groupBar2
+import groupLine as groupLine
+from autoParase import *
+import itertools as it
+import os
+
+import matplotlib
+import matplotlib.pyplot as plt
+import numpy as np
+import pylab
+from matplotlib.font_manager import FontProperties
+from matplotlib.ticker import LogLocator, LinearLocator
+import os
+import pandas as pd
+import sys
+from OoOCommon import *
+
+OPT_FONT_NAME = 'Helvetica'
+TICK_FONT_SIZE = 22
+LABEL_FONT_SIZE = 28
+LEGEND_FONT_SIZE = 30
+LABEL_FP = FontProperties(style='normal', size=LABEL_FONT_SIZE)
+LEGEND_FP = FontProperties(style='normal', size=LEGEND_FONT_SIZE)
+TICK_FP = FontProperties(style='normal', size=TICK_FONT_SIZE)
+
+MARKERS = (['*', '|', 'v', "^", "", "h", "<", ">", "+", "d", "<", "|", "", "+", "_"])
+# you may want to change the color map for different figures
+COLOR_MAP = (
+    '#B03A2E', '#2874A6', '#239B56', '#7D3C98', '#FFFFFF', '#F1C40F', '#F5CBA7', '#82E0AA', '#AEB6BF', '#AA4499')
+# you may want to change the patterns for different figures
+PATTERNS = (["////", "o", "", "||", "-", "//", "\\", "o", "O", "////", ".", "|||", "o", "---", "+", "\\\\", "*"])
+LABEL_WEIGHT = 'bold'
+LINE_COLORS = COLOR_MAP
+LINE_WIDTH = 3.0
+MARKER_SIZE = 15.0
+MARKER_FREQUENCY = 1000
+
+matplotlib.rcParams['ps.useafm'] = True
+matplotlib.rcParams['pdf.use14corefonts'] = True
+matplotlib.rcParams['xtick.labelsize'] = TICK_FONT_SIZE
+matplotlib.rcParams['ytick.labelsize'] = TICK_FONT_SIZE
+matplotlib.rcParams['font.family'] = OPT_FONT_NAME
+matplotlib.rcParams['pdf.fonttype'] = 42
+
+
+def runPeriod(exePath, period, resultPath, configTemplate="config.csv"):
+    # resultFolder="periodTests"
+    configFname = "config_period_" + str(period) + ".csv"
+    # configTemplate = "config.csv"
+    # clear old files
+    os.system("cd " + exePath + "&& rm *.csv")
+
+    # editConfig(configTemplate, exePath + configFname, "earlierEmitMs", 0)
+    editConfig(configTemplate, exePath + configFname, "watermarkTimeMs", period)
+    # prepare new file
+    # run
+    os.system("cd " + exePath + "&& ./benchmark " + configFname)
+    # copy result
+    os.system("rm -rf " + resultPath + "/" + str(period))
+    os.system("mkdir " + resultPath + "/" + str(period))
+    os.system("cd " + exePath + "&& cp *.csv " + resultPath + "/" + str(period))
+
+
+def runPeriodVector(exePath, periodVec, resultPath, configTemplate="config.csv"):
+    for i in periodVec:
+        runPeriod(exePath, i, resultPath, configTemplate)
+
+
+def readResultPeriod(period, resultPath):
+    resultFname = resultPath + "/" + str(period) + "/default_general.csv"
+    avgLat = readConfig(resultFname, "AvgLatency")
+    lat95 = readConfig(resultFname, "95%Latency")
+    thr = readConfig(resultFname, "Throughput")
+    err = readConfig(resultFname, "AQPError")
+    return avgLat, lat95, thr, err
+
+
+def readResultVectorPeriod(periodVec, resultPath):
+    avgLatVec = []
+    lat95Vec = []
+    thrVec = []
+    errVec = []
+    compVec = []
+    for i in periodVec:
+        avgLat, lat95, thr, err = readResultPeriod(i, resultPath)
+        avgLatVec.append(float(avgLat) / 1000.0)
+        lat95Vec.append(float(lat95) / 1000.0)
+        thrVec.append(float(thr) / 1000.0)
+        errVec.append(abs(float(err)))
+        compVec.append(1 - abs(float(err)))
+    return avgLatVec, lat95Vec, thrVec, errVec, compVec
+
+
+def compareMethod(exeSpace, commonPathBase, resultPaths, csvTemplates, periodVec, reRun=1):
+    lat95All = []
+    errAll = []
+    periodAll = []
+    for i in range(len(csvTemplates)):
+        resultPath = commonPathBase + resultPaths[i]
+        if (reRun == 1):
+            os.system("rm -rf " + resultPath)
+            os.system("mkdir " + resultPath)
+            runPeriodVector(exeSpace, periodVec, resultPath, csvTemplates[i])
+        avgLatVec, lat95Vec, thrVec, errVec, compVec = readResultVectorPeriod(periodVec, resultPath)
+        lat95All.append(lat95Vec)
+        errAll.append(errVec)
+        periodAll.append(periodVec)
+    return lat95All, errAll, periodAll
+
+
+def main():
+    exeSpace = os.path.abspath(os.path.join(os.getcwd(), "../..")) + "/"
+    commonBasePath = os.path.abspath(os.path.join(os.getcwd(), "../..")) + "/results/figE2E/"
+
+    figPath = os.path.abspath(os.path.join(os.getcwd(), "../..")) + "/figures/"
+    configTemplate = exeSpace + "config.csv"
+    periodVec = [7, 8, 9, 10, 11, 12]
+    periodVecDisp = np.array(periodVec)
+    periodVecDisp = periodVecDisp
+    print(configTemplate)
+
+    reRun = 0
+    if (len(sys.argv) < 2):
+        os.system("rm -rf " + commonBasePath)
+        os.system("mkdir " + commonBasePath)
+        reRun = 1
+        # runPeriodVector(exeSpace, periodVec, resultPath)
+    # os.system("mkdir " + figPath)
+    # print(lat95All)
+    # print(lat95All)
+    # lat95All[3]=ts
+    methodTags = ["WMJ", "PECJ-alf", "(PECJ-vae)/7.5", "svi"]
+    resultPaths = ["wa", "pecj_ks", "pec_ai", "svi"]
+    csvTemplates = ["config_waterMark.csv", "config_ima.csv", "config_pecjAI.csv", "config_svi.csv"]
+    lat95All, errAll, periodAll = compareMethod(exeSpace, commonBasePath, resultPaths, csvTemplates, periodVec, reRun)
+    npLat = np.array(lat95All)
+    npLat[2] = npLat[2] / 7.5
+    groupLine.DrawFigure2(npLat, errAll, methodTags, "95% latency (ms)", "Error", 0, 1, figPath + "svie2ESmall", True)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/benchmark/scripts/breakdownHNSW/drawTogether.py b/benchmark/scripts/breakdownHNSW/drawTogether.py
new file mode 100644
index 000000000..4abcd8ba7
--- /dev/null
+++ b/benchmark/scripts/breakdownHNSW/drawTogether.py
@@ -0,0 +1,389 @@
+#!/usr/bin/env python3
+# Note: the concept drift is not learnt by indexing in this group
+import csv
+import numpy as np
+import matplotlib.pyplot as plt
+import accuBar as accuBar
+import groupBar2 as groupBar2
+import groupLine as groupLine
+from autoParase import *
+import itertools as it
+import os
+
+import matplotlib
+import matplotlib.pyplot as plt
+import numpy as np
+import pylab
+from matplotlib.font_manager import FontProperties
+from matplotlib import ticker
+from matplotlib.ticker import LogLocator, LinearLocator
+
+import os
+import pandas as pd
+import sys
+from OoOCommon import *
+
+OPT_FONT_NAME = 'Helvetica'
+TICK_FONT_SIZE = 22
+LABEL_FONT_SIZE = 22
+LEGEND_FONT_SIZE = 22
+LABEL_FP = FontProperties(style='normal', size=LABEL_FONT_SIZE)
+LEGEND_FP = FontProperties(style='normal', size=LEGEND_FONT_SIZE)
+TICK_FP = FontProperties(style='normal', size=TICK_FONT_SIZE)
+
+MARKERS = (['*', '|', 'v', "^", "", "h", "<", ">", "+", "d", "<", "|", "", "+", "_"])
+# you may want to change the color map for different figures
+COLOR_MAP = (
+    '#B03A2E', '#2874A6', '#239B56', '#7D3C98', '#FFFFFF', '#F1C40F', '#F5CBA7', '#82E0AA', '#AEB6BF', '#AA4499')
+# you may want to change the patterns for different figures
+PATTERNS = (["////", "o", "", "||", "-", "//", "\\", "o", "O", "////", ".", "|||", "o", "---", "+", "\\\\", "*"])
+LABEL_WEIGHT = 'bold'
+LINE_COLORS = COLOR_MAP
+LINE_WIDTH = 3.0
+MARKER_SIZE = 15.0
+MARKER_FREQUENCY = 1000
+
+matplotlib.rcParams['ps.useafm'] = True
+matplotlib.rcParams['pdf.use14corefonts'] = True
+matplotlib.rcParams['xtick.labelsize'] = TICK_FONT_SIZE
+matplotlib.rcParams['ytick.labelsize'] = TICK_FONT_SIZE
+matplotlib.rcParams['font.family'] = OPT_FONT_NAME
+matplotlib.rcParams['pdf.fonttype'] = 42
+
+
+
+def runPeriod(exePath, algoTag, resultPath, configTemplate="config.csv", prefixTagRaw="null"):
+    # resultFolder="periodTests"
+    prefixTag = str(prefixTagRaw)
+    configFname = "config_period_" + str(prefixTag) + ".csv"
+    configTemplate = "config_e2e_static_lazy.csv"
+    # clear old files
+    os.system("cd " + exePath + "&& sudo rm *.csv")
+    os.system("cp perfListEvaluation.csv " + exePath)
+    dataPathCommon = exePath + "/results/scanIPConceptDriftHotSpot/"
+    desiredDataFname = dataPathCommon + "driftData/" + "data_" + str(prefixTag) + '.fvecs'
+    desiredQueryFname = dataPathCommon + "driftData/" + "query_" + str(prefixTag) + '.fvecs'
+    # editConfig(configTemplate, exePath + configFname, "earlierEmitMs", 0)
+    editConfig(configTemplate, "temp3.csv", "dataPath", desiredDataFname)
+    editConfig("temp3.csv", "temp2.csv", "queryPath", desiredQueryFname)
+    editConfig("temp2.csv", exePath + "temp1.csv", "faissIndexTag", algoTag)
+    if (algoTag == 'LSH'):
+        editConfig(exePath + "temp1.csv", exePath + "temp2.csv", "numberOfBuckets", 1)
+        editConfig(exePath + "temp2.csv", exePath + "temp3.csv", "useCRS", 0)
+        editConfig(exePath + "temp3.csv", exePath + "temp4.csv", "congestionDropWorker_algoTag", "onlineIVFLSH")
+        editConfig(exePath + "temp4.csv", exePath + "temp1.csv", "encodeLen", 3)
+    if (algoTag == 'LSH-H'):
+        editConfig(exePath + "temp1.csv", exePath + "temp2.csv", "congestionDropWorker_algoTag", "onlineIVFLSH")
+        editConfig(exePath + "temp2.csv", exePath + "temp4.csv", "useCRS", 0)
+        editConfig(exePath + "temp4.csv", exePath + "temp1.csv", "encodeLen", 3)
+    if (algoTag == 'flatAMMIP'):
+        editConfig(exePath + "temp1.csv", exePath + "temp2.csv", "congestionDropWorker_algoTag", "flatAMMIP")
+        editConfig(exePath + "temp2.csv", exePath + "temp1.csv", "sketchSize", 256)
+    if (algoTag == 'flatAMMIPSMPPCA'):
+        editConfig(exePath + "temp1.csv", exePath + "temp2.csv", "congestionDropWorker_algoTag", "flatAMMIP")
+        editConfig(exePath + "temp2.csv", exePath + "temp4.csv", "sketchSize", 128)
+        editConfig(exePath + "temp4.csv", exePath + "temp1.csv", "ammAlgo", 'smp-pca')
+    if (algoTag == 'flat'):
+        editConfig(exePath + "temp1.csv", exePath + "temp2.csv", "congestionDropWorker_algoTag", "flat")
+        editConfig(exePath + "temp2.csv", exePath + "temp1.csv", "sketchSize", 256)
+    if (algoTag == 'NSW'):
+        editConfig(exePath + "temp1.csv", exePath + "temp2.csv", "congestionDropWorker_algoTag", "NSW")
+        editConfig(exePath + "temp2.csv", exePath + "temp1.csv", "is_NSW", 1)
+    if (algoTag == 'nnDescent'):
+        editConfig(exePath + "temp1.csv", exePath + "temp2.csv", "congestionDropWorker_algoTag", "nnDescent")
+        editConfig(exePath + "temp2.csv", exePath + "temp1.csv", "frozenLevel", 1)
+    if (algoTag == 'onlinePQ'):
+        editConfig(exePath + "temp1.csv", exePath + "temp3.csv", "faissIndexTag", "PQ")
+        editConfig(exePath + "temp3.csv", exePath + "temp2.csv", "isOnlinePQ", 1)
+        editConfig(exePath + "temp2.csv", exePath + "temp1.csv", "sketchSize", 256)
+    if (algoTag == 'Flann'):
+        editConfig(exePath + "temp1.csv", exePath + "temp2.csv", "congestionDropWorker_algoTag", "Flann")
+        editConfig(exePath + "temp2.csv", exePath + "temp1.csv", "sketchSize", 256)
+    if (algoTag == 'DPG'):
+        editConfig(exePath + "temp1.csv", exePath + "temp2.csv", "congestionDropWorker_algoTag", "DPG")
+        editConfig(exePath + "temp2.csv", exePath + "temp1.csv", "frozenLevel", 1)
+    exeTag = "onlineInsert"
+    # prepare new file
+    os.system("rm -rf " + exePath + "*.rbt")
+    os.system("cp *.rbt " + exePath)
+    # run
+    # os.system("cd " + exePath + "&& export OMP_NUM_THREADS=1 &&" + "sudo ./" + exeTag + " " + 'temp1.csv')
+    if (algoTag == 'nnDescent2'):
+        os.system("cp dummy.csv " + exePath + "onlineInsert_result.csv")
+    else:
+        os.system("cd " + exePath + "&& export OMP_NUM_THREADS=1 &&" + "sudo ./" + exeTag + " " + 'temp1.csv')
+    # copy result
+    os.system("sudo rm -rf " + resultPath + "/" + str(prefixTag))
+    os.system("sudo mkdir " + resultPath + "/" + str(prefixTag))
+
+    os.system("cd " + exePath + "&& sudo cp *.csv " + resultPath + "/" + str(prefixTag))
+
+
+def runPeriodVector(exePath, algoTag, resultPath, prefixTag, configTemplate="config.csv", reRun=1):
+    for i in range(len(prefixTag)):
+        if reRun == 2:
+            if checkResultSingle(prefixTag[i], resultPath) == 1:
+                print("skip " + str(prefixTag[i]))
+            else:
+                runPeriod(exePath, algoTag, resultPath, configTemplate, prefixTag[i])
+        else:
+            runPeriod(exePath, algoTag, resultPath, configTemplate, prefixTag[i])
+
+
+def readResultSingle(singleValue, resultPath):
+    resultFname = resultPath + "/" + str(singleValue) + "/onlineInsert_result.csv"
+    elapsedTime = readConfig(resultFname, "latencyOfQuery")
+    incrementalBuild = readConfig(resultFname, "95%latency(Insert)")
+    incrementalSearch = readConfig(resultFname, "latencyOfQuery")
+    recall = readConfig(resultFname, "recall")
+    pendingWaitTime = readConfig(resultFname, "pendingWrite")
+    l2Stall = 0
+    l3Stall = 0
+    totalStall = 0
+    froErr = 0
+    return elapsedTime, incrementalBuild, incrementalSearch, recall, pendingWaitTime, l2Stall, l3Stall, totalStall, froErr
+
+
+def readResultVector(singleValueVec, resultPath):
+    elapseTimeVec = []
+    incrementalBuildVec = []
+    incrementalSearchVec = []
+    recallVec = []
+    pendingWaitTimeVec = []
+    l2StallVec = []
+    l3StallVec = []
+    totalStallVec = []
+    froVec = []
+    for i in singleValueVec:
+        elapsedTime, incrementalBuild, incrementalSearch, recall, pendingWaitTime, l2Stall, l3Stall, totalStall, fro = readResultSingle(
+            i, resultPath)
+        elapseTimeVec.append(float(elapsedTime))
+        incrementalBuildVec.append(float(incrementalBuild))
+        incrementalSearchVec.append(float(incrementalSearch))
+        recallVec.append(float(recall))
+        pendingWaitTimeVec.append(float(pendingWaitTime))
+        l2StallVec.append(float(l2Stall))
+        l3StallVec.append(float(l3Stall))
+        totalStallVec.append(float(totalStall))
+        froVec.append(float(fro))
+    return np.array(elapseTimeVec), np.array(incrementalBuildVec), np.array(incrementalSearchVec), np.array(
+        recallVec), np.array(
+        pendingWaitTimeVec), np.array(l2StallVec), np.array(l3StallVec), np.array(totalStallVec), np.array(froVec)
+
+
+def checkResultSingle(singleValue, resultPath):
+    resultFname = resultPath + "/" + str(singleValue) + "/onlineInsert_result.csv"
+    ruExists = 0
+    if os.path.exists(resultFname):
+        ruExists = 1
+    else:
+        print("File does not exist:" + resultFname)
+        ruExists = 0
+    return ruExists
+
+
+def checkResultVector(singleValueVec, resultPath):
+    resultIsComplete = 0
+    for i in singleValueVec:
+        resultIsComplete = checkResultSingle(i, resultPath)
+        if resultIsComplete == 0:
+            return 0
+    return 1
+
+
+def compareMethod(exeSpace, commonPathBase, resultPaths, csvTemplate, algos, dataSetName, reRun=1):
+    elapsedTimeAll = []
+    incrementalBuildAll = []
+    incrementalSearchAll = []
+    periodAll = []
+    recallAll = []
+    pendingWaitTimeAll = []
+    l2StallAll = []
+    l3StallAll = []
+    totalStallAll = []
+    froAll = []
+    resultIsComplete = 1
+    algoCnt = 0
+    for i in range(len(algos)):
+        resultPath = commonPathBase + resultPaths[i]
+        algoTag = algos[i]
+        scanVec = dataSetName
+        if (reRun == 1):
+            os.system("sudo rm -rf " + resultPath)
+            os.system("sudo mkdir " + resultPath)
+            runPeriodVector(exeSpace, algoTag, resultPath, scanVec, csvTemplate)
+        else:
+            if (reRun == 2):
+                resultIsComplete = checkResultVector(scanVec, resultPath)
+                if resultIsComplete == 1:
+                    print(algoTag + " is complete, skip")
+                else:
+                    print(algoTag + " is incomplete, redo it")
+                    if os.path.exists(resultPath) == False:
+                        os.system("sudo mkdir " + resultPath)
+                    runPeriodVector(exeSpace, algoTag, resultPath, scanVec, csvTemplate, 2)
+                    resultIsComplete = checkResultVector(scanVec, resultPath)
+        # exit()
+        if resultIsComplete:
+            elapsedTime, incrementalBuild, incrementalSearch, recall, pendingWaitTime, l2Stall, l3Stall, totalStall, froVec = readResultVector(
+                dataSetName, resultPath)
+            elapsedTimeAll.append(elapsedTime)
+            incrementalBuildAll.append(incrementalBuild)
+            incrementalSearchAll.append(incrementalSearch)
+            periodAll.append(dataSetName)
+            recallAll.append(recall)
+            pendingWaitTimeAll.append(pendingWaitTime)
+            l2StallAll.append(l2Stall)
+            l3StallAll.append(l3Stall)
+            totalStallAll.append(totalStall)
+            froAll.append(froVec)
+            algoCnt = algoCnt + 1
+            print(algoCnt)
+        # periodAll.append(periodVec)
+    return np.array(elapsedTimeAll), np.array(incrementalBuildAll), np.array(periodAll), np.array(recallAll), np.array(
+        incrementalSearchAll), np.array(pendingWaitTimeAll), np.array(l2StallAll), np.array(l3StallAll), np.array(
+        totalStallAll), np.array(froAll)
+
+
+def getCyclesPerMethod(cyclesAll, valueChose):
+    recallPerMethod = []
+    for i in range(len(cyclesAll)):
+        recallPerMethod.append(cyclesAll[int(i)][int(valueChose)])
+    return np.array(recallPerMethod)
+
+
+def main():
+    exeSpace = os.path.abspath(os.path.join(os.getcwd(), "../..")) + "/"
+    commonBasePath = os.path.abspath(os.path.join(os.getcwd(), "../..")) + "/results/breakdownHNSW/"
+
+    figPath = os.path.abspath(os.path.join(os.getcwd(), "../..")) + "/figures/breakdownHNSW"
+
+    # add the datasets here
+    # srcAVec=["datasets/AST/mcfe.mtx"] # 765*756
+    # srcBVec=["datasets/AST/mcfe.mtx"] # 765*756
+    # dataSetNames=['AST']
+    # srcAVec=['datasets/UTM/utm1700a.mtx'] # 1700*1700
+    # srcBVec=['datasets/UTM/utm1700b.mtx'] # 1700*1700
+    # dataSetNames=['UTM']
+    # srcAVec=['datasets/ECO/wm2.mtx',"datasets/DWAVE/dwa512.mtx","datasets/AST/mcfe.mtx",'datasets/UTM/utm1700a.mtx','datasets/RDB/rdb2048.mtx','datasets/ZENIOS/zenios.mtx','datasets/QCD/qcda_small.mtx',"datasets/BUS/gemat1.mtx",]
+    # srcBVec=['datasets/ECO/wm3.mtx',"datasets/DWAVE/dwb512.mtx","datasets/AST/mcfe.mtx",'datasets/UTM/utm1700b.mtx','datasets/RDB/rdb2048l.mtx','datasets/ZENIOS/zenios.mtx','datasets/QCD/qcdb_small.mtx',"datasets/BUS/gemat1.mtx",]
+    # dataSetNames=['ECO','DWAVE','AST','UTM','RDB','ZENIOS','QCD','BUS']
+    # srcAVec=['datasets/ECO/wm2.mtx',"datasets/DWAVE/dwa512.mtx","datasets/AST/mcfe.mtx",'datasets/UTM/utm1700a.mtx','datasets/RDB/rdb2048.mtx','datasets/ZENIOS/zenios.mtx','datasets/QCD/qcda_small.mtx',"datasets/BUS/gemat1.mtx",]
+    # srcBVec=['datasets/ECO/wm3.mtx',"datasets/DWAVE/dwb512.mtx","datasets/AST/mcfe.mtx",'datasets/UTM/utm1700b.mtx','datasets/RDB/rdb2048l.mtx','datasets/ZENIOS/zenios.mtx','datasets/QCD/qcdb_small.mtx',"datasets/BUS/gemat1.mtx",]
+    # aRowVec= [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0]
+    aRowVec = ["DPR",0.05, 0.1, 0.2, 0.4, 0.6, 0.8]
+    # exit()
+    # aRowVec=[100, 200]
+    # add the algo tag here
+    # algosVec = ['flat', 'LSH-H','flatAMMIP','flatAMMIPSMPPCA','PQ','IVFPQ','HNSW']
+    algosVec = ['HNSWbd']
+    # algosVec = ['flat', 'LSH-H']
+    # algosVec = ['flat', 'onlinePQ']
+    # algosVec=['incrementalRaw']
+    # algosVec=[ 'pq']
+    # algoDisp = ['BrutalForce', 'PQ']
+    algoDisp = ['HNSWbd']
+    # algoDisp=['BrutalForce','LSH-H']
+    # algoDisp=['PQ']
+    # add the algo tag here
+
+    # this template configs all algos as lazy mode, all datasets are static and normalized
+    csvTemplate = 'config_e2e_static_lazy.csv'
+    # do not change the following
+    resultPaths = algosVec
+    os.system("mkdir ../../results")
+    os.system("mkdir ../../figures")
+    os.system("mkdir " + figPath)
+
+    # run
+    reRun = 0
+    if (len(sys.argv) < 2):
+
+        os.system("sudo rm -rf " + commonBasePath)
+
+        reRun = 1
+    else:
+        reRun = int(sys.argv[1])
+    os.system("sudo mkdir " + commonBasePath)
+    #prepareEmbeddings(commonBasePath, aRowVec)
+    # exit(0)
+    print(reRun)
+    methodTags = algoDisp
+    elapsedTimeAll, incrementalBuildAll, periodAll, recall, incrementalSearchAll, pendingWaitTimeAll, l2StallAll, l3StallAll, totalStallAll, froAll = compareMethod(
+        exeSpace, commonBasePath, resultPaths, csvTemplate, algosVec, aRowVec, reRun)
+    # Add some pre-process logic for int8 here if it is used
+
+    # groupBar2.DrawFigureYLog(aRowVec, recall/recall[-1], methodTags, "Datasets", "Ins (times of LTMM)", 5, 15, figPath + "/" + "recall", True)
+    # groupBar2.DrawFigureYLog(aRowVec, fpInsAll/fpInsAll[-1], methodTags, "Datasets", "FP Ins (times of LTMM)", 5, 15, figPath + "/" + "FP_recall", True)
+    # groupBar2.DrawFigureYLog(aRowVec, memInsAll/memInsAll[-1], methodTags, "Datasets", "Mem Ins (times of LTMM)", 5, 15, figPath + "/" + "mem_recall", True)
+    # groupBar2.DrawFigure(aRowVec, ratioFpIns, methodTags, "Datasets", "SIMD Utilization (%)", 5, 15, figPath + "/" + "SIMD utilization", True)
+    # groupBar2.DrawFigure(aRowVec, recall/(memLoadAll+memStoreAll), methodTags, "Datasets", "IPM", 5, 15, figPath + "/" + "IPM", True)
+    # groupBar2.DrawFigure(aRowVec, fpInsAll/(memLoadAll+memStoreAll), methodTags, "Datasets", "FP Ins per Unit Mem Access", 5, 15, figPath + "/" + "FPIPM", True)
+    # groupBar2.DrawFigure(aRowVec, (memLoadAll+memStoreAll)/(recall)*100.0, methodTags, "Datasets", "Ratio of Mem Ins (%)", 5, 15, figPath + "/" + "mem", True)
+
+    # groupBar2.DrawFigure(aRowVec, branchAll/recall*100.0, methodTags, "Datasets", "Ratio of Branch Ins (%)", 5, 15, figPath + "/" + "branches", True)
+    # groupBar2.DrawFigure(aRowVec, otherIns/recall*100.0, methodTags, "Datasets", "Ratio of Other Ins (%)", 5, 15, figPath + "/" + "others", True)
+    # print(recall[-1],recall[2])
+
+    # groupBar2.DrawFigure(dataSetNames, np.log(thrAll), methodTags, "Datasets", "elements/ms", 5, 15, figPath + "sec4_1_e2e_static_lazy_throughput_log", True)
+    #groupLine.DrawFigureYLog(periodAll, incrementalBuildAll / 1000,
+    #                        methodTags,
+    #                       "Drifted Pos", r'95% Latency of insert (ms)', 0, 1,
+    #                      figPath + "/" + "scanIPConceptDriftHotSpot_lat_INSERT",
+    #                     True)
+    #groupLine.DrawFigureYLog(periodAll, pendingWaitTimeAll / 1000,
+    #                        methodTags,
+    #                       "Drifted Pos", r'Pending wait for insert (ms)', 0, 1,
+    #                      figPath + "/" + "scanIPConceptDriftHotSpot_lat_pending",
+    #                     True)
+    #groupLine.DrawFigureYLog(periodAll, incrementalSearchAll / 1000,
+    #                        methodTags,
+    #                       "Drifted Pos", r'Latency of search (ms)', 0, 1,
+    #                      figPath + "/" + "scanIPConceptDriftHotSpot_lat_search",
+    #                     True)
+    #groupLine.DrawFigureYLog(periodAll, (incrementalSearchAll + pendingWaitTimeAll) / 1000,
+    #                        methodTags,
+    #                       "Drifted Pos", r'Latency of query (ms)', 0, 1,
+    #                      figPath + "/" + "scanIPConceptDriftHotSpot_lat_instant",
+    #                     True)
+    #groupLine.DrawFigureYnormal(periodAll, recall,
+    #                           methodTags,
+    #                          "Prob. of contamination", r'recall@10', 0, 1,
+    #                         figPath + "/" + "scanIPConceptDriftHotSpot_recall",
+    #                        False)
+    breakdownVec = []
+    greedy=[]
+    candidate=[]
+    link=[]
+    sumstep=[]
+    for i in range(len(algosVec)):
+        resultPath = commonBasePath + algosVec[i]
+        for j in range(len(aRowVec)):
+            bddirPath = resultPath + "/"+str(aRowVec[j])
+            bdPath = bddirPath+"/"+"hnswbd.csv"
+            #df = pd.read_csv(bdPath, sep=',', header=None).iloc[9,[3,4,5]]
+            df = pd.read_csv(bdPath, sep=',', header=None).iloc[:,[3,4,5]]
+            sum = df.sum(axis=1)
+            result_df = pd.concat([df, sum], axis=1)
+            greedy.append(result_df.iloc[9,0]/result_df.iloc[9,3]*100.0)
+            candidate.append(result_df.iloc[9,1]/result_df.iloc[9,3]*100.0)
+            link.append(result_df.iloc[9,2]/result_df.iloc[9,3]*100.0)
+            sumstep.append(result_df.iloc[9,3])
+
+
+    #accuBar.DrawFigure(methodTags,
+    #                  [memStallPerMethod, l1dStallPerMethod, l2StallPerMethod, l3StallPerMethod,
+    #                  otherPerMethod,nonStallPerMethod]/cpuCyclePerMethod*100.0, ['Mem Stall', 'L1D Stall', 'L2 Stall', 'L3 Stall', 'Other Stall', 'Not Stall'], '',
+    #                'Propotion (%)', figPath + "/" + "cyclesbreakDown"
+    #               + "_cycles_accubar" + str(valueVec[valueChose]), allowLegend,
+    #              '')
+    #groupBar2.DrawFigure(dataSetNames,l1dStallAll/cpuCycleAll*100.0,methodTags, "Datasets", "Ratio of l1dStalls (%)", 5, 15, figPath + "l1dstall_ratio", True)
+    accuBar.DrawFigure(aRowVec, [greedy, candidate, link], ["Greedy","Candidate", "Link"], "",'Propotion (%)',figPath + "/" + "cyclesbreakDown"+ "_cycles_accubar", True,   '')
+
+
+
+
+
+if __name__ == "__main__":
+    main()
+
diff --git a/benchmark/scripts/breakdownHNSW/dummy.csv b/benchmark/scripts/breakdownHNSW/dummy.csv
new file mode 100644
index 000000000..07eeb6197
--- /dev/null
+++ b/benchmark/scripts/breakdownHNSW/dummy.csv
@@ -0,0 +1,8 @@
+key,value,type
+latencyOfQuery,0,I64
+normalExit,0,I64
+recall,0,I64
+throughput,0,I64
+throughputByElements,0,I64
+95%latency(Insert),947361.000000,Double
+pendingWrite,947361.000000,Double
diff --git a/benchmark/scripts/breakdownHNSW/groupBar2.py b/benchmark/scripts/breakdownHNSW/groupBar2.py
new file mode 100644
index 000000000..fa1c3fda6
--- /dev/null
+++ b/benchmark/scripts/breakdownHNSW/groupBar2.py
@@ -0,0 +1,383 @@
+import itertools as it
+import os
+
+import matplotlib
+import matplotlib.pyplot as plt
+import numpy as np
+import pylab
+from matplotlib.font_manager import FontProperties
+from matplotlib.ticker import LogLocator, LinearLocator
+import matplotlib.ticker as mtick
+
+OPT_FONT_NAME = 'Helvetica'
+TICK_FONT_SIZE = 32
+LABEL_FONT_SIZE = 30
+LEGEND_FONT_SIZE = 32
+LABEL_FP = FontProperties(style='normal', size=LABEL_FONT_SIZE)
+LEGEND_FP = FontProperties(style='normal', size=LEGEND_FONT_SIZE)
+TICK_FP = FontProperties(style='normal', size=TICK_FONT_SIZE)
+
+MARKERS = (["+", 'o', 's', 'v', "^", "", "h", "<", ">", "+", "d", "<", "|", "", "+", "_"])
+# you may want to change the color map for different figures
+COLOR_MAP = (
+    '#AA4499', '#B03A2E', '#2874A6', '#239B56', '#7D3C98', '#00FFFF', '#F1C40F', '#F5CBA7', '#82E0AA', '#AEB6BF',
+    '#AA4499')
+# you may want to change the patterns for different figures
+PATTERNS = (
+    ["\\\\", "////", "\\\\", "//", "o", "", "||", "-", "//", "\\", "o", "O", "////", ".", "|||", "o", "---", "+",
+     "\\\\",
+     "*"])
+LABEL_WEIGHT = 'bold'
+LINE_COLORS = COLOR_MAP
+LINE_WIDTH = 3.0
+MARKER_SIZE = 15.0
+MARKER_FREQUENCY = 1000
+
+matplotlib.rcParams['ps.useafm'] = True
+matplotlib.rcParams['pdf.use14corefonts'] = True
+matplotlib.rcParams['xtick.labelsize'] = TICK_FONT_SIZE
+matplotlib.rcParams['ytick.labelsize'] = TICK_FONT_SIZE
+matplotlib.rcParams['font.family'] = OPT_FONT_NAME
+matplotlib.rcParams['pdf.fonttype'] = 42
+
+exp_dir = "/data1/xtra"
+
+FIGURE_FOLDER = exp_dir + '/results/figure'
+
+
+def DrawLegend(legend_labels, filename):
+    fig = pylab.figure()
+    ax1 = fig.add_subplot(111)
+    FIGURE_LABEL = legend_labels
+    LEGEND_FP = FontProperties(style='normal', size=26)
+    figlegend = pylab.figure(figsize=(16, 0.5))
+    bars = [None] * (len(FIGURE_LABEL))
+    data = [1]
+    x_values = [1]
+
+    width = 0.3
+    for i in range(len(FIGURE_LABEL)):
+        bars[i] = ax1.bar(x_values, data, width,
+                          hatch=PATTERNS[i],
+                          color=LINE_COLORS[i],
+                          label=FIGURE_LABEL[i],
+                          edgecolor='black', linewidth=3)
+
+    # LEGEND
+
+    figlegend.legend(bars, FIGURE_LABEL, prop=LEGEND_FP, \
+                     loc=1, ncol=len(FIGURE_LABEL), mode="expand", shadow=True, \
+                     frameon=True, handlelength=2, handletextpad=0.3, columnspacing=0.5,
+                     borderaxespad=-0.2, fancybox=True
+                     )
+    figlegend.savefig(FIGURE_FOLDER + '/' + filename + '.pdf')
+
+
+# draw a bar chart
+
+
+def DrawFigure(x_values, y_values, legend_labels, x_label, y_label, y_min, y_max, filename, allow_legend):
+    fig = plt.figure(figsize=(20, 6))
+    figure = fig.add_subplot(111)
+
+    LINE_COLORS = [
+        '#FF8C00', '#FFE4C4', '#00FFFF', '#E0FFFF',
+        '#FF6347', '#98FB98', '#800080', '#FFD700',
+        '#7CFC00', '#8A2BE2', '#FF4500', '#20B2AA',
+        '#B0E0E6', '#DC143C', '#00FF7F'
+    ]
+    HATCH_PATTERNS = ['/', '-', 'o', '///', '\\', '|', 'x', '\\\\', '+', '.', '*', 'oo', '++++', '....', 'xxx']
+
+    FIGURE_LABEL = legend_labels
+    index = np.arange(len(x_values))
+    width = 0.5 / len(x_values)
+    bars = [None] * (len(FIGURE_LABEL))
+    for i in range(len(y_values)):
+        bars[i] = plt.bar(index + i * width + width / 2,
+                          y_values[i], width,
+                          hatch=HATCH_PATTERNS[i % len(HATCH_PATTERNS)],
+                          color=LINE_COLORS[i % len(LINE_COLORS)],
+                          label=FIGURE_LABEL[i], edgecolor='black', linewidth=3)
+
+    if allow_legend:
+        plt.legend(bars, FIGURE_LABEL,
+                   prop={'size': 28},
+                   ncol=len(bars) / 2,  # Set the number of columns to match the number of bars
+                   loc='upper center',
+                   bbox_to_anchor=(0.5, 1.35),  # Adjust the position
+                   shadow=True, frameon=True, edgecolor='black', borderaxespad=0, columnspacing=0.2, handletextpad=0.2
+                   )
+
+    plt.xticks(index + len(x_values) / 2 * width, x_values, rotation=0)
+    figure.yaxis.set_major_locator(LinearLocator(5))
+    # figure.xaxis.set_major_locator(LinearLocator(5))
+    figure.get_xaxis().set_tick_params(direction='in', pad=10)
+    figure.get_yaxis().set_tick_params(direction='in', pad=10)
+    figure.yaxis.set_major_formatter(mtick.FormatStrFormatter('%.1f'))
+    plt.xlabel(x_label, fontsize=TICK_FONT_SIZE)
+    plt.ylabel(y_label, fontsize=TICK_FONT_SIZE)
+    plt.xticks(fontsize=TICK_FONT_SIZE)
+    plt.yticks(fontsize=TICK_FONT_SIZE)
+
+    fig.savefig(filename + ".pdf", bbox_inches='tight')
+
+
+def DrawFigureYLog(x_values, y_values, legend_labels, x_label, y_label, y_min, y_max, filename, allow_legend):
+    fig = plt.figure(figsize=(20, 6))
+    figure = fig.add_subplot(111)
+
+    LINE_COLORS = [
+        '#FF8C00', '#FFE4C4', '#00FFFF', '#E0FFFF',
+        '#FF6347', '#98FB98', '#800080', '#FFD700',
+        '#7CFC00', '#8A2BE2', '#FF4500', '#20B2AA',
+        '#B0E0E6', '#DC143C', '#00FF7F'
+    ]
+    HATCH_PATTERNS = ['/', '-', 'o', '///', '\\', '|', 'x', '\\\\', '+', '.', '*', 'oo', '++++', '....', 'xxx']
+
+    FIGURE_LABEL = legend_labels
+    index = np.arange(len(x_values))
+    width = 0.5 / len(x_values)
+    bars = [None] * (len(FIGURE_LABEL))
+    for i in range(len(y_values)):
+        bars[i] = plt.bar(index + i * width + width / 2,
+                          y_values[i], width,
+                          hatch=HATCH_PATTERNS[i % len(HATCH_PATTERNS)],
+                          color=LINE_COLORS[i % len(LINE_COLORS)],
+                          label=FIGURE_LABEL[i], edgecolor='black', linewidth=3)
+
+    if allow_legend:
+        plt.legend(bars, FIGURE_LABEL,
+                   prop={'size': 28},
+                   ncol=2,  # Set the number of columns to match the number of bars
+                   loc='upper center',
+                   bbox_to_anchor=(-0.4, 0.75),  # Adjust the position
+                   shadow=True, frameon=True, edgecolor='black', borderaxespad=0, columnspacing=0.2, handletextpad=0.2
+                   )
+
+    plt.xticks(index + len(x_values) / 2 * width, x_values, rotation=0)
+
+    plt.yscale('log')
+    figure.yaxis.set_major_locator(LogLocator(10))
+    figure.get_xaxis().set_tick_params(direction='in', pad=10)
+    figure.get_yaxis().set_tick_params(direction='in', pad=10)
+
+    plt.xticks(fontsize=TICK_FONT_SIZE)
+    plt.yticks(fontsize=TICK_FONT_SIZE)
+    plt.grid(axis='y', color='gray', alpha=0.5, linewidth=0.5)
+    plt.xlabel(x_label, fontsize=LABEL_FONT_SIZE)
+    plt.ylabel(y_label, fontsize=LABEL_FONT_SIZE)
+    # plt.show()
+
+    fig.savefig(filename + ".pdf", bbox_inches='tight')
+
+
+def DrawFigureYLog2(x_values, y_values, legend_labels, x_label, y_label, y_min, y_max, filename, allow_legend):
+    fig = plt.figure(figsize=(20, 6))
+    figure = fig.add_subplot(111)
+
+    LINE_COLORS = [
+        '#FF8C00', '#FFE4C4', '#00FFFF', '#E0FFFF',
+        '#FF6347', '#98FB98', '#800080', '#FFD700',
+        '#7CFC00', '#8A2BE2', '#FF4500', '#20B2AA',
+        '#B0E0E6', '#DC143C', '#00FF7F'
+    ]
+    HATCH_PATTERNS = ['/', '-', 'o', '///', '\\', '|', 'x', '\\\\', '+', '.', '*', 'oo', '++++', '....', 'xxx']
+
+    FIGURE_LABEL = legend_labels
+    index = np.arange(len(x_values))
+    width = 0.5 / 3
+    bars = [None] * (len(FIGURE_LABEL))
+    for i in range(len(y_values)):
+        bars[i] = plt.bar(index + i * width + width / 2,
+                          y_values[i], width,
+                          hatch=HATCH_PATTERNS[i % len(HATCH_PATTERNS)],
+                          color=LINE_COLORS[i % len(LINE_COLORS)],
+                          label=FIGURE_LABEL[i], edgecolor='black', linewidth=3)
+
+    if allow_legend:
+        plt.legend(bars, FIGURE_LABEL,
+                   prop={'size': 28},
+                   ncol=len(bars) / 2,  # Set the number of columns to match the number of bars
+                   loc='upper center',
+                   bbox_to_anchor=(0.5, 1.3),  # Adjust the position
+                   shadow=True, frameon=True, edgecolor='black', borderaxespad=0, columnspacing=0.5, handletextpad=0.1,
+                   labelspacing=0.,
+                   )
+
+    plt.xticks(index + 0.75 * width, x_values, rotation=30)
+    plt.xticks(fontsize=TICK_FONT_SIZE)
+    plt.yticks(fontsize=TICK_FONT_SIZE)
+    plt.xlabel(x_label, fontsize=24)
+    plt.ylabel(y_label, fontsize=24)
+    plt.axhline(y=1.0, color='red', linestyle='--')
+    figure.text(1.8, 5.0, "Instructions=1.0", fontsize=TICK_FONT_SIZE, ha='center')
+    plt.yscale('log')
+    figure.yaxis.set_major_locator(LogLocator(10))
+    figure.get_xaxis().set_tick_params(direction='in', pad=10)
+    figure.get_yaxis().set_tick_params(direction='in', pad=10)
+
+    plt.grid(axis='y', color='gray', alpha=0.5, linewidth=0.5)
+
+    # plt.show()
+
+    fig.savefig(filename + ".pdf", bbox_inches='tight')
+
+
+# def DrawFigure(x_values, y_values, legend_labels, x_label, y_label, y_min, y_max, filename, allow_legend):
+#     # you may change the figure size on your own.
+#     fig = plt.figure(figsize=(10, 3))
+#     figure = fig.add_subplot(111)
+#
+#     FIGURE_LABEL = legend_labels
+#
+#     # values in the x_xis
+#     index = np.arange(len(x_values))
+#     # the bar width.
+#     # you may need to tune it to get the best figure.
+#     width = 0.6 / len(x_values)
+#     # draw the bars
+#     bars = [None] * (len(FIGURE_LABEL))
+#     for i in range(len(y_values)):
+#         bars[i] = plt.bar(index + i * width + width / 2,
+#                           y_values[i], width,
+#                           hatch=PATTERNS[i],
+#                           color=LINE_COLORS[i],
+#                           label=FIGURE_LABEL[i], edgecolor='black', linewidth=3)
+#
+#     # sometimes you may not want to draw legends.
+#     if allow_legend == True:
+#         plt.legend(bars, FIGURE_LABEL,
+#                    prop=LEGEND_FP,
+#                    ncol=2,
+#                    loc='upper center',
+#                    #                     mode='expand',
+#                    shadow=False,
+#                    bbox_to_anchor=(0.45, 1.7),
+#                    columnspacing=0.1,
+#                    handletextpad=0.2,
+#                    #                     bbox_transform=ax.transAxes,
+#                    #                     frameon=True,
+#                    #                     columnspacing=5.5,
+#                    #                     handlelength=2,
+#                    )
+#
+#     # you may need to tune the xticks position to get the best figure.
+#     plt.xticks(index + len(x_values) / 2 * width, x_values, rotation=0)
+#
+#     # plt.ticklabel_format(axis="y", style="sci", scilimits=(0, 0))
+#     # plt.grid(axis='y', color='gray')
+#     # figure.get_xaxis().set_major_formatter(matplotlib.ticker.ScalarFormatter())
+#
+#     # you may need to tune the xticks position to get the best figure.
+#     # plt.yscale('log')
+#     #
+#     # plt.grid(axis='y', color='gray')
+#     figure.yaxis.set_major_locator(LinearLocator(5))
+#     # figure.xaxis.set_major_locator(LinearLocator(5))
+#     figure.get_xaxis().set_tick_params(direction='in', pad=10)
+#     figure.get_yaxis().set_tick_params(direction='in', pad=10)
+#     figure.yaxis.set_major_formatter(mtick.FormatStrFormatter('%.1f'))
+#     plt.xlabel(x_label, fontproperties=LABEL_FP)
+#     plt.ylabel(y_label, fontproperties=LABEL_FP)
+#
+#     plt.savefig(filename + ".pdf", bbox_inches='tight')
+
+
+# example for reading csv file
+def ReadFile():
+    y = []
+    col1 = []
+    col2 = []
+    col3 = []
+    col4 = []
+    col5 = []
+    col6 = []
+    col7 = []
+    col8 = []
+    col9 = []
+
+    for id in it.chain(range(38, 42)):
+        col9.append(0)
+    y.append(col9)  # this is a fake empty line to separate eager and lazy.
+
+    for id in it.chain(range(38, 42)):
+        file = exp_dir + '/results/latency/NPJ_{}.txt'.format(id)
+        f = open(file, "r")
+        read = f.readlines()
+        x = float(read.pop(int(len(read) * 0.95)).strip("\n"))  # get the 99th timestamp
+        col1.append(x)
+    y.append(col1)
+
+    for id in it.chain(range(38, 42)):
+        file = exp_dir + '/results/latency/PRJ_{}.txt'.format(id)
+        f = open(file, "r")
+        read = f.readlines()
+        x = float(read.pop(int(len(read) * 0.95)).strip("\n"))  # get the 99th timestamp        
+        col2.append(x)
+    y.append(col2)
+
+    for id in it.chain(range(38, 42)):
+        file = exp_dir + '/results/latency/MWAY_{}.txt'.format(id)
+        f = open(file, "r")
+        read = f.readlines()
+        x = float(read.pop(int(len(read) * 0.95)).strip("\n"))  # get the 99th timestamp       
+        col3.append(x)
+    y.append(col3)
+
+    for id in it.chain(range(38, 42)):
+        file = exp_dir + '/results/latency/MPASS_{}.txt'.format(id)
+        f = open(file, "r")
+        read = f.readlines()
+        x = float(read.pop(int(len(read) * 0.95)).strip("\n"))  # get the 99th timestamp
+        col4.append(x)
+    y.append(col4)
+
+    y.append(col9)  # this is a fake empty line to separate eager and lazy.
+
+    for id in it.chain(range(38, 42)):
+        file = exp_dir + '/results/latency/SHJ_JM_NP_{}.txt'.format(id)
+        f = open(file, "r")
+        read = f.readlines()
+        x = float(read.pop(int(len(read) * 0.95)).strip("\n"))  # get last timestamp
+        col5.append(x)
+    y.append(col5)
+
+    for id in it.chain(range(38, 42)):
+        file = exp_dir + '/results/latency/SHJ_JBCR_NP_{}.txt'.format(id)
+        f = open(file, "r")
+        read = f.readlines()
+        x = float(read.pop(int(len(read) * 0.95)).strip("\n"))  # get last timestamp
+        col6.append(x)
+    y.append(col6)
+
+    for id in it.chain(range(38, 42)):
+        file = exp_dir + '/results/latency/PMJ_JM_NP_{}.txt'.format(id)
+        f = open(file, "r")
+        read = f.readlines()
+        x = float(read.pop(int(len(read) * 0.95)).strip("\n"))  # get last timestamp
+        col7.append(x)
+    y.append(col7)
+
+    for id in it.chain(range(38, 42)):
+        file = exp_dir + '/results/latency/PMJ_JBCR_NP_{}.txt'.format(id)
+        f = open(file, "r")
+        read = f.readlines()
+        x = float(read.pop(int(len(read) * 0.95)).strip("\n"))  # get last timestamp
+        col8.append(x)
+    y.append(col8)
+    return y
+
+
+if __name__ == "__main__":
+    x_values = ["Stock", "Rovio", "YSB", "DEBS"]
+
+    y_values = ReadFile()
+
+    legend_labels = ['Lazy:', 'NPJ', 'PRJ', 'MWAY', 'MPASS',
+                     'Eager:', 'SHJ$^{JM}$', 'SHJ$^{JB}$', 'PMJ$^{JM}$', 'PMJ$^{JB}$']
+    print(y_values)
+    DrawFigure(x_values, y_values, legend_labels,
+               '', 'Latency (ms)', 0,
+               400, 'latency_figure_app', False)
+
+    # DrawLegend(legend_labels, 'latency_legend')
diff --git a/benchmark/scripts/breakdownHNSW/groupLine.py b/benchmark/scripts/breakdownHNSW/groupLine.py
new file mode 100644
index 000000000..8c0e33b8b
--- /dev/null
+++ b/benchmark/scripts/breakdownHNSW/groupLine.py
@@ -0,0 +1,400 @@
+import itertools as it
+import os
+
+import matplotlib
+import matplotlib.pyplot as plt
+import numpy as np
+import pylab
+from matplotlib.font_manager import FontProperties
+from matplotlib.ticker import MaxNLocator
+from matplotlib.font_manager import FontProperties
+from matplotlib.ticker import LinearLocator, LogLocator, MaxNLocator, ScalarFormatter
+from numpy import double
+import matplotlib.patches as patches
+
+OPT_FONT_NAME = 'Helvetica'
+TICK_FONT_SIZE = 32
+LABEL_FONT_SIZE = 30
+LEGEND_FONT_SIZE = 32
+LABEL_FP = FontProperties(style='normal', size=LABEL_FONT_SIZE)
+LEGEND_FP = FontProperties(style='normal', size=LEGEND_FONT_SIZE)
+TICK_FP = FontProperties(style='normal', size=TICK_FONT_SIZE)
+MARKERS = ['s', 'o', '^', 'v', '+', '*', 'h', 'x', 'p', '1', '2', 'o', '+', '|']
+COLOR_MAP = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22',
+             '#17becf', '#1f77b4']
+# you may want to change the patterns for different figures
+PATTERNS = (["|", "\\", "/", "+", "-", ".", "*", "x", "o", "O", "////", ".", "|||", "o", "---", "+", "\\\\", "*"])
+LABEL_WEIGHT = 'bold'
+LINE_COLORS = COLOR_MAP
+LINE_WIDTH = 3.0
+MARKER_SIZE = 13.0
+MARKER_FREQUENCY = 1000
+
+matplotlib.rcParams['ps.useafm'] = True
+matplotlib.rcParams['pdf.use14corefonts'] = True
+matplotlib.rcParams['xtick.labelsize'] = TICK_FONT_SIZE
+matplotlib.rcParams['ytick.labelsize'] = TICK_FONT_SIZE
+matplotlib.rcParams['font.family'] = OPT_FONT_NAME
+
+FIGURE_FOLDER = '/data1/xtra/results/figure'
+
+
+# there are some embedding problems if directly exporting the pdf figure using matplotlib.
+# so we generate the eps format first and convert it to pdf.
+def ConvertEpsToPdf(dir_filename):
+    os.system("epstopdf --outfile " + dir_filename + ".pdf " + dir_filename + ".eps")
+    os.system("rm -rf " + dir_filename + ".eps")
+
+
+def DrawLegend(legend_labels, filename):
+    fig = pylab.figure()
+    ax1 = fig.add_subplot(111)
+    FIGURE_LABEL = legend_labels
+    LINE_WIDTH = 8.0
+    MARKER_SIZE = 12.0
+    LEGEND_FP = FontProperties(style='normal', size=26)
+
+    figlegend = pylab.figure(figsize=(12, 0.5))
+    idx = 0
+    lines = [None] * (len(FIGURE_LABEL))
+    data = [1]
+    x_values = [1]
+
+    idx = 0
+    for group in range(len(FIGURE_LABEL)):
+        lines[idx], = ax1.plot(x_values, data,
+                               color=LINE_COLORS[idx], linewidth=LINE_WIDTH,
+                               marker=MARKERS[idx], markersize=MARKER_SIZE, label=str(group))
+        idx = idx + 1
+
+    # LEGEND
+    figlegend.legend(lines, FIGURE_LABEL, prop=LEGEND_FP,
+                     loc=1, ncol=len(FIGURE_LABEL), mode="expand", shadow=False,
+                     frameon=False, borderaxespad=0.0, handlelength=2)
+
+    if not os.path.exists(FIGURE_FOLDER):
+        os.makedirs(FIGURE_FOLDER)
+    # no need to export eps in this case.
+    figlegend.savefig(filename + '.pdf')
+
+
+# draw a line chart
+def DrawFigure2(xvalues, yvalues, legend_labels, x_label, y_label, y_min, y_max, filename, allow_legend):
+    fig = plt.figure(figsize=(10, 4))
+
+    markers = ['s', 'o', '^', 'v', '+', '*', ',', 'x', 'p', '1', '2', 'o']
+    linestyles = ['-.', '-.', 'dotted', 'dotted', 'dotted', 'dotted', 'dotted', ':', 'dashed', 'dotted', 'dotted', '-']
+    colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22',
+              '#17becf', '#1f77b4']
+    linewidth = 2
+
+    FIGURE_LABEL = legend_labels
+    x_values = xvalues
+    y_values = yvalues
+
+    lines = [None] * (len(FIGURE_LABEL))
+    for i in range(len(y_values)):
+        lines[i], = plt.plot(x_values[i], y_values[i], color=colors[i], \
+                             linewidth=linewidth, marker=markers[i], \
+                             markersize=9, linestyle=linestyles[i], \
+                             label=FIGURE_LABEL[i])
+
+    # for i in range(len(x_values)):
+    #     plt.axvline(x=x_values[i][0], linestyle='--', color='gray')
+    # plt.xticks(x_values.flatten())
+
+    if allow_legend:
+        plt.legend(lines,
+                   FIGURE_LABEL,
+                   fontsize=12,
+                   loc='upper center',
+                   ncol=3,
+                   bbox_to_anchor=(0.5, 1.15),
+                   borderaxespad=0.,
+                   frameon=True)
+    plt.xlabel(x_label, fontsize=20)
+    plt.ylabel(y_label, fontsize=20)
+
+    plt.ylim(y_min, y_max)
+    plt.grid(axis='y', color='gray', alpha=0.5, linewidth=0.5)
+
+    # plt.show()
+
+    fig.savefig(filename + ".pdf", bbox_inches='tight')
+
+
+# def DrawFigure2(xvalues, yvalues, legend_labels, x_label, y_label, y_min, y_max, filename, allow_legend):
+#     # you may change the figure size on your own.
+#     fig = plt.figure(figsize=(10, 3))
+#     figure = fig.add_subplot(111)
+#
+#     FIGURE_LABEL = legend_labels
+#
+#     x_values = xvalues
+#     y_values = yvalues
+#
+#     lines = [None] * (len(FIGURE_LABEL))
+#     for i in range(len(y_values)):
+#         lines[i], = figure.plot(x_values[i], y_values[i], color=LINE_COLORS[i], \
+#                                 linewidth=LINE_WIDTH, marker=MARKERS[i], \
+#                                 markersize=MARKER_SIZE, label=FIGURE_LABEL[i])
+#
+#     # sometimes you may not want to draw legends.
+#     if allow_legend == True:
+#         plt.legend(lines,
+#                    FIGURE_LABEL,
+#                    prop=LEGEND_FP,
+#                    loc='upper center',
+#                    ncol=3,
+#                    # mode='expand',
+#                    bbox_to_anchor=(0.55, 1.6), shadow=False,
+#                    columnspacing=0.1,
+#                    frameon=True, borderaxespad=0.0, handlelength=1.5,
+#                    handletextpad=0.1,
+#                    labelspacing=0.1)
+#     # plt.xscale('log')
+#     # plt.yscale('log')
+#     # plt.yscale('log')
+#
+#     # you may control the limits on your own.
+#
+#     # lt.ylim(y_min, y_max)
+#
+#     plt.grid(axis='y', color='gray')
+#     # figure.yaxis.set_major_locator(LogLocator(base=10))
+#     # figure.xaxis.set_major_locator(LogLocator(base=10))
+#
+#     # figure.get_xaxis().set_tick_params(direction='in', pad=10)
+#     # figure.get_yaxis().set_tick_params(direction='in', pad=10)
+#
+#     plt.xlabel(x_label, fontproperties=LABEL_FP)
+#     plt.ylabel(y_label, fontproperties=LABEL_FP)
+#
+#     size = fig.get_size_inches()
+#     dpi = fig.get_dpi()
+#
+#     plt.savefig(filename + ".pdf", bbox_inches='tight')
+
+
+# draw a line chart
+def DrawFigureYnormal(xvalues, yvalues, legend_labels, x_label, y_label, y_min, y_max, filename, allow_legend):
+    # you may change the figure size on your own.
+    fig = plt.figure(figsize=(10, 6))
+    figure = fig.add_subplot(111)
+    LINE_COLORS = [
+        '#FF8C00', '#FFE4C4', '#00FFFF', '#E0FFFF',
+        '#FF6347', '#98FB98', '#800080', '#FFD700',
+        '#7CFC00', '#8A2BE2', '#FF4500', '#20B2AA',
+        '#B0E0E6', '#00000F', '#00FF7F'
+    ]
+    FIGURE_LABEL = legend_labels
+
+    x_values = xvalues
+    y_values = yvalues
+    print(len(FIGURE_LABEL), len(x_values))
+    lines = [None] * (len(FIGURE_LABEL))
+    for i in range(len(y_values)):
+        lines[i], = figure.plot(x_values[i], y_values[i], color=LINE_COLORS[i], \
+                                linewidth=LINE_WIDTH, marker=MARKERS[i], \
+                                markersize=MARKER_SIZE, label=FIGURE_LABEL[i], markeredgecolor='k')
+
+    # sometimes you may not want to draw legends.
+    if allow_legend == True:
+        plt.legend(lines,
+                   FIGURE_LABEL,
+                   prop=LEGEND_FP,
+                   loc='upper center',
+                   ncol=1,
+                   bbox_to_anchor=(-0.3, 1.0), shadow=False,
+                   columnspacing=0.1,
+                   frameon=True, borderaxespad=0, handlelength=1.2,
+                   handletextpad=0.1,
+                   labelspacing=0.1)
+    # plt.xscale('log')
+
+    # plt.yscale('log')
+
+    # you may control the limits on your own.
+
+    # plt.ylim(y_min, y_max)
+
+    plt.grid(axis='y', color='gray')
+    # figure.yaxis.set_major_locator(LogLocator(base=10))
+    # figure.xaxis.set_major_locator(LogLocator(base=10))
+    plt.xticks(fontsize=TICK_FONT_SIZE)
+    figure.get_xaxis().set_tick_params(direction='in', pad=10)
+    figure.get_yaxis().set_tick_params(direction='in', pad=10)
+    # Create a rectangle with bias lines
+    # rectangle = patches.Rectangle((6.0, 0.00), 2.5, 0.2, edgecolor='black', hatch='\\', fill=False)
+    # figure.text(7.0, 0.21, "user demand", fontsize=TICK_FONT_SIZE, ha='center')
+    # figure.add_patch(rectangle)
+    plt.xlabel(x_label, fontproperties=LABEL_FP)
+    plt.ylabel(y_label, fontproperties=LABEL_FP)
+    plt.xticks(fontsize=TICK_FONT_SIZE)
+    plt.yticks(fontsize=TICK_FONT_SIZE)
+    size = fig.get_size_inches()
+    dpi = fig.get_dpi()
+    plt.savefig(filename + ".pdf", bbox_inches='tight')
+
+
+def DrawFigureYLog(xvalues, yvalues, legend_labels, x_label, y_label, y_min, y_max, filename, allow_legend):
+    # you may change the figure size on your own.
+    fig = plt.figure(figsize=(20, 6))
+    figure = fig.add_subplot(111)
+    LINE_COLORS = [
+        '#FF8C00', '#FFE4C4', '#00FFFF', '#E0FFFF',
+        '#FF6347', '#98FB98', '#800080', '#FFD700',
+        '#7CFC00', '#8A2BE2', '#FF4500', '#20B2AA',
+        '#B0E0E6', '#00000F', '#00FF7F'
+    ]
+    FIGURE_LABEL = legend_labels
+
+    x_values = xvalues
+    y_values = yvalues
+    print(len(FIGURE_LABEL), len(x_values))
+    lines = [None] * (len(FIGURE_LABEL))
+    for i in range(len(y_values)):
+        lines[i], = figure.plot(x_values[i], y_values[i], color=LINE_COLORS[i], \
+                                linewidth=LINE_WIDTH, marker=MARKERS[i], \
+                                markersize=MARKER_SIZE, label=FIGURE_LABEL[i], markeredgecolor='k')
+
+    # sometimes you may not want to draw legends.
+    if allow_legend == True:
+        plt.legend(lines,
+                   FIGURE_LABEL,
+                   prop=LEGEND_FP,
+                   loc='upper center',
+                   ncol=2,
+                   bbox_to_anchor=(-0.285, 0.7), shadow=False,
+                   columnspacing=0.1,
+                   frameon=True, borderaxespad=0, handlelength=1.2,
+                   handletextpad=0.1,
+                   labelspacing=0.1)
+    plt.xscale('log')
+
+    plt.yscale('log')
+
+    # you may control the limits on your own.
+
+    # plt.ylim(y_min, y_max)
+
+    plt.grid(axis='y', color='gray')
+    figure.yaxis.set_major_locator(LogLocator(base=10))
+    figure.xaxis.set_major_locator(LogLocator(base=10))
+    plt.xticks(fontsize=TICK_FONT_SIZE)
+    figure.get_xaxis().set_tick_params(direction='in', pad=10)
+    figure.get_yaxis().set_tick_params(direction='in', pad=10)
+    # Create a rectangle with bias lines
+    # rectangle = patches.Rectangle((6.0, 0.00), 2.5, 0.2, edgecolor='black', hatch='\\', fill=False)
+    # figure.text(7.0, 0.21, "user demand", fontsize=TICK_FONT_SIZE, ha='center')
+    # figure.add_patch(rectangle)
+    plt.xlabel(x_label, fontproperties=LABEL_FP)
+    plt.ylabel(y_label, fontproperties=LABEL_FP)
+    plt.xticks(fontsize=TICK_FONT_SIZE)
+    plt.yticks(fontsize=TICK_FONT_SIZE)
+    size = fig.get_size_inches()
+    dpi = fig.get_dpi()
+
+    plt.savefig(filename + ".pdf", bbox_inches='tight')
+
+
+# example for reading csv file
+def ReadFile():
+    y = []
+    col1 = []
+    col2 = []
+    col3 = []
+    col4 = []
+    col5 = []
+    col6 = []
+    col7 = []
+    col8 = []
+
+    for id in it.chain(range(28, 32)):
+        file = '/data1/xtra/results/timestamps/PRJ_{}.txt'.format(id)
+        f = open(file, "r")
+        read = f.readlines()
+        x = float(read.pop(len(read) - 1).strip("\n"))  # get last timestamp
+        value = len(read) / x  # get throughput (#items/ms)
+        col1.append(value)
+    y.append(col1)
+
+    for id in it.chain(range(28, 32)):
+        file = '/data1/xtra/results/timestamps/NPJ_{}.txt'.format(id)
+        f = open(file, "r")
+        read = f.readlines()
+        x = float(read.pop(len(read) - 1).strip("\n"))  # get last timestamp
+        value = len(read) / x  # get throughput (#items/ms)
+        col2.append(value)
+    y.append(col2)
+
+    for id in it.chain(range(28, 32)):
+        file = '/data1/xtra/results/timestamps/MPASS_{}.txt'.format(id)
+        f = open(file, "r")
+        read = f.readlines()
+        x = float(read.pop(len(read) - 1).strip("\n"))  # get last timestamp
+        value = len(read) / x  # get throughput (#items/ms)
+        col3.append(value)
+    y.append(col3)
+
+    for id in it.chain(range(28, 32)):
+        file = '/data1/xtra/results/timestamps/MWAY_{}.txt'.format(id)
+        f = open(file, "r")
+        read = f.readlines()
+        x = float(read.pop(len(read) - 1).strip("\n"))  # get last timestamp
+        value = len(read) / x  # get throughput (#items/ms)
+        col4.append(value)
+    y.append(col4)
+
+    for id in it.chain(range(28, 32)):
+        file = '/data1/xtra/results/timestamps/SHJ_JM_NP_{}.txt'.format(id)
+        f = open(file, "r")
+        read = f.readlines()
+        x = float(read.pop(len(read) - 1).strip("\n"))  # get last timestamp
+        value = len(read) / x  # get throughput (#items/ms)
+        col5.append(value)
+    y.append(col5)
+
+    for id in it.chain(range(28, 32)):
+        file = '/data1/xtra/results/timestamps/SHJ_JBCR_NP_{}.txt'.format(id)
+        f = open(file, "r")
+        read = f.readlines()
+        x = float(read.pop(len(read) - 1).strip("\n"))  # get last timestamp
+        value = len(read) / x  # get throughput (#items/ms)
+        col6.append(value)
+    y.append(col6)
+
+    for id in it.chain(range(28, 32)):
+        file = '/data1/xtra/results/timestamps/PMJ_JM_NP_{}.txt'.format(id)
+        f = open(file, "r")
+        read = f.readlines()
+        x = float(read.pop(len(read) - 1).strip("\n"))  # get last timestamp
+        value = len(read) / x  # get throughput (#items/ms)
+        col7.append(value)
+    y.append(col7)
+
+    for id in it.chain(range(28, 32)):
+        file = '/data1/xtra/results/timestamps/PMJ_JBCR_NP_{}.txt'.format(id)
+        f = open(file, "r")
+        read = f.readlines()
+        x = float(read.pop(len(read) - 1).strip("\n"))  # get last timestamp
+        value = len(read) / x  # get throughput (#items/ms)
+        col8.append(value)
+    y.append(col8)
+    return y
+
+
+if __name__ == "__main__":
+    # x_values = ['Unique', 'Zipf(0)', 'Zipf(0.2)', 'Zipf(0.4)', 'Zipf(0.8)', 'Zipf(1)']
+    x_values = [1600, 3200, 6400, 12800, 25600]
+
+    y_values = ReadFile()
+
+    legend_labels = ['NPJ', 'PRJ', 'MWAY', 'MPASS', 'SHJ$^{JM}$', 'SHJ$^{JB}$', 'PMJ$^{JM}$',
+                     'PMJ$^{JB}$']
+
+    DrawFigure(x_values, y_values, legend_labels,
+               'Input arrival rate of R (e/ms)', 'Tpt. (#matches/ms)', x_values[0],
+               x_values[4], 'throughput_figure1_1', False)
+
+#   DrawLegend(legend_labels, 'factor_legend')
diff --git a/benchmark/scripts/breakdownHNSW/temp2.csv b/benchmark/scripts/breakdownHNSW/temp2.csv
new file mode 100644
index 000000000..21dcdad44
--- /dev/null
+++ b/benchmark/scripts/breakdownHNSW/temp2.csv
@@ -0,0 +1,44 @@
+key,value,type
+vecDim,768,I64
+vecVolume,100000,I64
+batchSize,4000,I64
+metricType,IP,String
+DCOBatchSize,5000,I64
+ammAlgo,crs,String
+sketchSize,128,I64
+initialRows,50000,I64
+driftPosition,50000,I64
+driftOffset,0.1,Double
+indexTag,congestionDrop,String
+congestionDropWorker_algoTag,faiss,String
+eventRateTps,4000,I64
+querySize,100,I64
+zipfAlpha,0,Double
+coarseGrainedClusters,96,I64
+maskReference,0,Double
+encodeLen,1,I64
+numberOfBuckets,8192,I64
+cutOffTimeSeconds,14400,I64
+useSeparateQuery,1,I64
+sampleRows,2048,I64
+faissIndexTag,flat,String
+useCRS,1,I64
+crsDim,10,I64
+dataLoaderTag,fvecs,String
+initialRows,50000,I64
+staticDataSet,0,I64
+maxBuildIteration,200,I64
+lshMatrixType,random,String
+ANNK,10,I64
+frozenLevel,0,I64
+cudaBuild,1,I64
+candidateTimes,1,I64
+disableADC,0,I64
+isOnlinePQ,0,I64
+fineGrainedBuiltPath,OnlinePQIndex_fine.rbt,String
+dataPath,/home/rag/projects/CANDY/build/benchmark//results/scanIPConceptDriftHotSpot/driftData/data_0.8.fvecs,String
+queryPath,/home/rag/projects/CANDY/build/benchmark//results/scanIPConceptDriftHotSpot/driftData/query_0.8.fvecs,String
+waitPendingWrite,1,I64
+is_NSW,0,I64
+isOnlinePQ,0,I64
+flannIndexTag,1,I64
diff --git a/benchmark/scripts/breakdownHNSW/temp3.csv b/benchmark/scripts/breakdownHNSW/temp3.csv
new file mode 100644
index 000000000..1d77414cf
--- /dev/null
+++ b/benchmark/scripts/breakdownHNSW/temp3.csv
@@ -0,0 +1,44 @@
+key,value,type
+vecDim,768,I64
+vecVolume,100000,I64
+batchSize,4000,I64
+metricType,IP,String
+DCOBatchSize,5000,I64
+ammAlgo,crs,String
+sketchSize,128,I64
+initialRows,50000,I64
+driftPosition,50000,I64
+driftOffset,0.1,Double
+indexTag,congestionDrop,String
+congestionDropWorker_algoTag,faiss,String
+eventRateTps,4000,I64
+querySize,100,I64
+zipfAlpha,0,Double
+coarseGrainedClusters,96,I64
+maskReference,0,Double
+encodeLen,1,I64
+numberOfBuckets,8192,I64
+cutOffTimeSeconds,14400,I64
+useSeparateQuery,1,I64
+sampleRows,2048,I64
+faissIndexTag,flat,String
+useCRS,1,I64
+crsDim,10,I64
+dataLoaderTag,fvecs,String
+initialRows,50000,I64
+staticDataSet,0,I64
+maxBuildIteration,200,I64
+lshMatrixType,random,String
+ANNK,10,I64
+frozenLevel,0,I64
+cudaBuild,1,I64
+candidateTimes,1,I64
+disableADC,0,I64
+isOnlinePQ,0,I64
+fineGrainedBuiltPath,OnlinePQIndex_fine.rbt,String
+dataPath,/home/rag/projects/CANDY/build/benchmark//results/scanIPConceptDriftHotSpot/driftData/data_0.8.fvecs,String
+queryPath,datasets/DPR/DPR10KC4Q.fvecs,String
+waitPendingWrite,1,I64
+is_NSW,0,I64
+isOnlinePQ,0,I64
+flannIndexTag,1,I64
diff --git a/src/CANDY/FaissIndex.cpp b/src/CANDY/FaissIndex.cpp
index 12546a87d..7436b7477 100644
--- a/src/CANDY/FaissIndex.cpp
+++ b/src/CANDY/FaissIndex.cpp
@@ -8,7 +8,7 @@
 #include <faiss/IndexNNDescent.h>
 #include <faiss/IndexLSH.h>
 #include <faiss/IndexNSG.h>
-
+#include <faiss/IndexHNSWbd.h>
 bool CANDY::FaissIndex::setConfig(INTELLI::ConfigMapPtr cfg) {
   AbstractIndex::setConfig(cfg);
   INTELLI_INFO("SETTING CONFIG FOR FaissIndex");
@@ -24,7 +24,11 @@ bool CANDY::FaissIndex::setConfig(INTELLI::ConfigMapPtr cfg) {
     INTELLI_INFO("ENCAPSULATED FAISS INDEX: USE HNSWFlat");
     auto M = cfg->tryI64("maxConnection", 32, true);
     index = new faiss::IndexHNSWFlat(vecDim, M, faissMetric);
-  } else if (index_type == "PQ") {
+  } else if (index_type == "HNSWbd") {
+    INTELLI_INFO("ENCAPSULATED FAISS INDEX: USE HNSWFlat with breakdown enabled!");
+    auto M = cfg->tryI64("maxConnection", 32, true);
+    index = new faiss::IndexHNSWbdFlat(vecDim, M, faissMetric);
+} else if (index_type == "PQ") {
     INTELLI_INFO("ENCAPSULATED FAISS INDEX: USE PQ");
     // number of bits in PQ
     auto nbits = cfg->tryI64("encodeLenBits", bytes * 8, true);
@@ -251,4 +255,4 @@ std::vector<torch::Tensor> CANDY::FaissIndex::getTensorByIndex(std::vector<faiss
     }
   }
   return ru;
-}
\ No newline at end of file
+}
diff --git a/thirdparty/faiss/faiss/CMakeLists.txt b/thirdparty/faiss/faiss/CMakeLists.txt
index 27701586c..07ba6617a 100644
--- a/thirdparty/faiss/faiss/CMakeLists.txt
+++ b/thirdparty/faiss/faiss/CMakeLists.txt
@@ -20,6 +20,7 @@ set(FAISS_SRC
   IndexFlat.cpp
   IndexFlatCodes.cpp
   IndexHNSW.cpp
+  IndexHNSWbd.cpp
   IndexIDMap.cpp
   IndexIVF.cpp
   IndexIVFAdditiveQuantizer.cpp
@@ -56,6 +57,7 @@ set(FAISS_SRC
   impl/IDSelector.cpp
   impl/FaissException.cpp
   impl/HNSW.cpp
+  impl/HNSWbd.cpp
   impl/NSG.cpp
   impl/PolysemousTraining.cpp
   impl/ProductQuantizer.cpp
@@ -112,6 +114,7 @@ set(FAISS_HEADERS
   IndexFlat.h
   IndexFlatCodes.h
   IndexHNSW.h
+  IndexHNSWbd.h
   IndexIDMap.h
   IndexIVF.h
   IndexIVFAdditiveQuantizer.h
@@ -152,6 +155,7 @@ set(FAISS_HEADERS
   impl/FaissAssert.h
   impl/FaissException.h
   impl/HNSW.h
+  impl/HNSWbd.h
   impl/LocalSearchQuantizer.h
   impl/ProductAdditiveQuantizer.h
   impl/LookupTableScaler.h
diff --git a/thirdparty/faiss/faiss/IndexHNSWbd.cpp b/thirdparty/faiss/faiss/IndexHNSWbd.cpp
new file mode 100644
index 000000000..ab74dae90
--- /dev/null
+++ b/thirdparty/faiss/faiss/IndexHNSWbd.cpp
@@ -0,0 +1,1130 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+// -*- c++ -*-
+
+#include <faiss/IndexHNSWbd.h>
+
+#include <omp.h>
+#include <cassert>
+#include <cinttypes>
+#include <cmath>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+
+#include <queue>
+#include <unordered_set>
+
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <cstdint>
+
+#include <faiss/Index2Layer.h>
+#include <faiss/IndexFlat.h>
+#include <faiss/IndexIVFPQ.h>
+#include <faiss/impl/AuxIndexStructures.h>
+#include <faiss/impl/FaissAssert.h>
+#include <faiss/utils/Heap.h>
+#include <faiss/utils/distances.h>
+#include <faiss/utils/random.h>
+#include <faiss/utils/sorting.h>
+
+extern "C" {
+
+/* declare BLAS functions, see http://www.netlib.org/clapack/cblas/ */
+
+int sgemm_(
+        const char* transa,
+        const char* transb,
+        FINTEGER* m,
+        FINTEGER* n,
+        FINTEGER* k,
+        const float* alpha,
+        const float* a,
+        FINTEGER* lda,
+        const float* b,
+        FINTEGER* ldb,
+        float* beta,
+        float* c,
+        FINTEGER* ldc);
+}
+
+namespace faiss {
+
+using MinimaxHeap = HNSWbd::MinimaxHeap;
+using storage_idx_t = HNSWbd::storage_idx_t;
+using NodeDistFarther = HNSWbd::NodeDistFarther;
+
+
+
+/**************************************************************
+ * add / search blocks of descriptors
+ **************************************************************/
+
+namespace {
+
+/* Wrap the distance computer into one that negates the
+   distances. This makes supporting INNER_PRODUCE search easier */
+
+struct NegativeDistanceComputer : DistanceComputer {
+    /// owned by this
+    DistanceComputer* basedis;
+
+    explicit NegativeDistanceComputer(DistanceComputer* basedis)
+            : basedis(basedis) {}
+
+    void set_query(const float* x) override {
+        basedis->set_query(x);
+    }
+
+    /// compute distance of vector i to current query
+    float operator()(idx_t i) override {
+        return -(*basedis)(i);
+    }
+
+    void distances_batch_4(
+            const idx_t idx0,
+            const idx_t idx1,
+            const idx_t idx2,
+            const idx_t idx3,
+            float& dis0,
+            float& dis1,
+            float& dis2,
+            float& dis3) override {
+        basedis->distances_batch_4(
+                idx0, idx1, idx2, idx3, dis0, dis1, dis2, dis3);
+        dis0 = -dis0;
+        dis1 = -dis1;
+        dis2 = -dis2;
+        dis3 = -dis3;
+    }
+
+    /// compute distance between two stored vectors
+    float symmetric_dis(idx_t i, idx_t j) override {
+        return -basedis->symmetric_dis(i, j);
+    }
+
+    virtual ~NegativeDistanceComputer() {
+        delete basedis;
+    }
+};
+
+DistanceComputer* storage_distance_computer(const Index* storage) {
+    if (is_similarity_metric(storage->metric_type)) {
+        return new NegativeDistanceComputer(storage->get_distance_computer());
+    } else {
+        return storage->get_distance_computer();
+    }
+}
+
+void hnsw_add_vertices(
+        IndexHNSWbd& index_hnsw,
+        size_t n0,
+        size_t n,
+        const float* x,
+        bool verbose,
+        bool preset_levels = false) {
+    size_t d = index_hnsw.d;
+    HNSWbd& hnsw = index_hnsw.hnsw;
+    size_t ntotal = n0 + n;
+    double t0 = getmillisecs();
+    if (verbose) {
+        //printf("hnsw_add_vertices: adding %zd elements on top of %zd "
+          //     "(preset_levels=%d)\n",
+            //   n,
+              // n0,
+               //int(preset_levels));
+    }
+
+    if (n == 0) {
+        return;
+    }
+
+    int max_level = hnsw.prepare_level_tab(n, preset_levels);
+
+    if (verbose) {
+        //printf("  max_level = %d\n", max_level);
+    }
+
+    std::vector<omp_lock_t> locks(ntotal);
+    for (int i = 0; i < ntotal; i++)
+        omp_init_lock(&locks[i]);
+
+    // add vectors from highest to lowest level
+    std::vector<int> hist;
+    std::vector<int> order(n);
+
+    { // make buckets with vectors of the same level
+
+        // build histogram
+        for (int i = 0; i < n; i++) {
+            storage_idx_t pt_id = i + n0;
+            int pt_level = hnsw.levels[pt_id] - 1;
+            while (pt_level >= hist.size())
+                hist.push_back(0);
+            hist[pt_level]++;
+        }
+
+        // accumulate
+        std::vector<int> offsets(hist.size() + 1, 0);
+        for (int i = 0; i < hist.size() - 1; i++) {
+            offsets[i + 1] = offsets[i] + hist[i];
+        }
+
+        // bucket sort
+        for (int i = 0; i < n; i++) {
+            storage_idx_t pt_id = i + n0;
+            int pt_level = hnsw.levels[pt_id] - 1;
+            order[offsets[pt_level]++] = pt_id;
+        }
+    }
+
+    idx_t check_period = InterruptCallback::get_period_hint(
+            max_level * index_hnsw.d * hnsw.efConstruction);
+
+    { // perform add
+        RandomGenerator rng2(789);
+
+        int i1 = n;
+
+        for (int pt_level = hist.size() - 1; pt_level >= 0; pt_level--) {
+            int i0 = i1 - hist[pt_level];
+
+            if (verbose) {
+          //      printf("Adding %d elements at level %d\n", i1 - i0, pt_level);
+            }
+
+            // random permutation to get rid of dataset order bias
+            for (int j = i0; j < i1; j++)
+                std::swap(order[j], order[j + rng2.rand_int(i1 - j)]);
+
+            bool interrupt = false;
+
+//#pragma omp parallel if (i1 > i0 + 100)
+            {
+                VisitedTable vt(ntotal);
+
+                std::unique_ptr<DistanceComputer> dis(
+                        storage_distance_computer(index_hnsw.storage));
+                int prev_display =
+                        verbose && omp_get_thread_num() == 0 ? 0 : -1;
+                size_t counter = 0;
+
+                // here we should do schedule(dynamic) but this segfaults for
+                // some versions of LLVM. The performance impact should not be
+                // too large when (i1 - i0) / num_threads >> 1
+//#pragma omp for schedule(static)
+                for (int i = i0; i < i1; i++) {
+                    storage_idx_t pt_id = order[i];
+                    dis->set_query(x + (pt_id - n0) * d);
+
+                    // cannot break
+                    if (interrupt) {
+                        continue;
+                    }
+
+                    hnsw.add_with_locks(*dis, pt_level, pt_id, locks, vt);
+
+              //      if (prev_display >= 0 && i - i0 > prev_display + 10000) {
+                //        prev_display = i - i0;
+            //            printf("  %d / %d\r", i - i0, i1 - i0);
+                  //      fflush(stdout);
+                    //}
+                    if (counter % check_period == 0) {
+                        if (InterruptCallback::is_interrupted()) {
+                            interrupt = true;
+                        }
+                    }
+                    counter++;
+                }
+            }
+            if (interrupt) {
+                FAISS_THROW_MSG("computation interrupted");
+            }
+            i1 = i0;
+        }
+        FAISS_ASSERT(i1 == 0);
+    }
+    if (verbose) {
+       // printf("Done in %.3f ms\n", getmillisecs() - t0);
+    }
+
+    for (int i = 0; i < ntotal; i++) {
+        omp_destroy_lock(&locks[i]);
+    }
+}
+
+} // namespace
+
+/**************************************************************
+ * IndexHNSW implementation
+ **************************************************************/
+
+IndexHNSWbd::IndexHNSWbd(int d, int M, MetricType metric)
+        : Index(d, metric), hnsw(M) {}
+
+IndexHNSWbd::IndexHNSWbd(Index* storage, int M)
+        : Index(storage->d, storage->metric_type), hnsw(M), storage(storage) {}
+
+IndexHNSWbd::~IndexHNSWbd() {
+    if (own_fields) {
+        delete storage;
+    }
+}
+
+void IndexHNSWbd::train(idx_t n, const float* x) {
+    FAISS_THROW_IF_NOT_MSG(
+            storage,
+            "Please use IndexHNSWFlat (or variants) instead of IndexHNSW directly");
+    // hnsw structure does not require training
+    storage->train(n, x);
+    is_trained = true;
+}
+
+void IndexHNSWbd::search(
+        idx_t n,
+        const float* x,
+        idx_t k,
+        float* distances,
+        idx_t* labels,
+        const SearchParameters* params_in) const {
+    FAISS_THROW_IF_NOT(k > 0);
+    FAISS_THROW_IF_NOT_MSG(
+            storage,
+            "Please use IndexHNSWFlat (or variants) instead of IndexHNSW directly");
+    const SearchParametersHNSW* params = nullptr;
+
+    int efSearch = hnsw.efSearch;
+    if (params_in) {
+        params = dynamic_cast<const SearchParametersHNSW*>(params_in);
+        FAISS_THROW_IF_NOT_MSG(params, "params type invalid");
+        efSearch = params->efSearch;
+    }
+    size_t n1 = 0, n2 = 0, n3 = 0, ndis = 0, nreorder = 0;
+
+    idx_t check_period =
+            InterruptCallback::get_period_hint(hnsw.max_level * d * efSearch);
+    hnsw.bd_stat.reset();
+    for (idx_t i0 = 0; i0 < n; i0 += check_period) {
+        idx_t i1 = std::min(i0 + check_period, n);
+
+//#pragma omp parallel
+        {
+            VisitedTable vt(ntotal);
+
+            std::unique_ptr<DistanceComputer> dis(
+                    storage_distance_computer(storage));
+
+//#pragma omp for reduction(+ : n1, n2, n3, ndis, nreorder) schedule(guided)
+            for (idx_t i = i0; i < i1; i++) {
+                idx_t* idxi = labels + i * k;
+                float* simi = distances + i * k;
+                dis->set_query(x + i * d);
+
+                maxheap_heapify(k, simi, idxi);
+                HNSWStats stats = hnsw.search(*dis, k, idxi, simi, vt, params);
+                n1 += stats.n1;
+                n2 += stats.n2;
+                n3 += stats.n3;
+                ndis += stats.ndis;
+                nreorder += stats.nreorder;
+                maxheap_reorder(k, simi, idxi);
+
+                if (reconstruct_from_neighbors &&
+                    reconstruct_from_neighbors->k_reorder != 0) {
+                    int k_reorder = reconstruct_from_neighbors->k_reorder;
+                    if (k_reorder == -1 || k_reorder > k)
+                        k_reorder = k;
+
+                    nreorder += reconstruct_from_neighbors->compute_distances(
+                            k_reorder, idxi, x + i * d, simi);
+
+                    // sort top k_reorder
+                    maxheap_heapify(
+                            k_reorder, simi, idxi, simi, idxi, k_reorder);
+                    maxheap_reorder(k_reorder, simi, idxi);
+                }
+            }
+        }
+        InterruptCallback::check();
+    }
+
+    if (is_similarity_metric(metric_type)) {
+        // we need to revert the negated distances
+        for (size_t i = 0; i < k * n; i++) {
+            distances[i] = -distances[i];
+        }
+    }
+
+    hnsw_stats.combine({n1, n2, n3, ndis, nreorder});
+    //hnsw.bd_stat.print();
+}
+
+void IndexHNSWbd::add(idx_t n, const float* x) {
+    FAISS_THROW_IF_NOT_MSG(
+            storage,
+            "Please use IndexHNSWFlat (or variants) instead of IndexHNSW directly");
+    FAISS_THROW_IF_NOT(is_trained);
+    int n0 = ntotal;
+    storage->add(n, x);
+    ntotal = storage->ntotal;
+    hnsw.bd_stat.reset();
+    hnsw_add_vertices(*this, n0, n, x, verbose, hnsw.levels.size() == ntotal);
+    hnsw.bd_stat.print();
+}
+
+void IndexHNSWbd::reset() {
+    hnsw.reset();
+    storage->reset();
+    ntotal = 0;
+}
+
+void IndexHNSWbd::reconstruct(idx_t key, float* recons) const {
+    storage->reconstruct(key, recons);
+}
+
+void IndexHNSWbd::shrink_level_0_neighbors(int new_size) {
+//#pragma omp parallel
+    {
+        std::unique_ptr<DistanceComputer> dis(
+                storage_distance_computer(storage));
+
+//#pragma omp for
+        for (idx_t i = 0; i < ntotal; i++) {
+            size_t begin, end;
+            hnsw.neighbor_range(i, 0, &begin, &end);
+
+            std::priority_queue<NodeDistFarther> initial_list;
+
+            for (size_t j = begin; j < end; j++) {
+                int v1 = hnsw.neighbors[j];
+                if (v1 < 0)
+                    break;
+                initial_list.emplace(dis->symmetric_dis(i, v1), v1);
+
+                // initial_list.emplace(qdis(v1), v1);
+            }
+
+            std::vector<NodeDistFarther> shrunk_list;
+            HNSWbd::shrink_neighbor_list(
+                    *dis, initial_list, shrunk_list, new_size,hnsw.bd_stat);
+
+            for (size_t j = begin; j < end; j++) {
+                if (j - begin < shrunk_list.size())
+                    hnsw.neighbors[j] = shrunk_list[j - begin].id;
+                else
+                    hnsw.neighbors[j] = -1;
+            }
+        }
+    }
+}
+
+void IndexHNSWbd::search_level_0(
+        idx_t n,
+        const float* x,
+        idx_t k,
+        const storage_idx_t* nearest,
+        const float* nearest_d,
+        float* distances,
+        idx_t* labels,
+        int nprobe,
+        int search_type) const {
+    FAISS_THROW_IF_NOT(k > 0);
+    FAISS_THROW_IF_NOT(nprobe > 0);
+
+    storage_idx_t ntotal = hnsw.levels.size();
+
+//#pragma omp parallel
+    {
+        std::unique_ptr<DistanceComputer> qdis(
+                storage_distance_computer(storage));
+        HNSWStats search_stats;
+        VisitedTable vt(ntotal);
+
+//#pragma omp for
+        for (idx_t i = 0; i < n; i++) {
+            idx_t* idxi = labels + i * k;
+            float* simi = distances + i * k;
+
+            qdis->set_query(x + i * d);
+            maxheap_heapify(k, simi, idxi);
+
+            hnsw.search_level_0(
+                    *qdis.get(),
+                    k,
+                    idxi,
+                    simi,
+                    nprobe,
+                    nearest + i * nprobe,
+                    nearest_d + i * nprobe,
+                    search_type,
+                    search_stats,
+                    vt);
+
+            vt.advance();
+            maxheap_reorder(k, simi, idxi);
+        }
+//#pragma omp critical
+        { hnsw_stats.combine(search_stats); }
+    }
+}
+
+void IndexHNSWbd::init_level_0_from_knngraph(
+        int k,
+        const float* D,
+        const idx_t* I) {
+    int dest_size = hnsw.nb_neighbors(0);
+
+//#pragma omp parallel for
+    for (idx_t i = 0; i < ntotal; i++) {
+        DistanceComputer* qdis = storage_distance_computer(storage);
+        std::vector<float> vec(d);
+        storage->reconstruct(i, vec.data());
+        qdis->set_query(vec.data());
+
+        std::priority_queue<NodeDistFarther> initial_list;
+
+        for (size_t j = 0; j < k; j++) {
+            int v1 = I[i * k + j];
+            if (v1 == i)
+                continue;
+            if (v1 < 0)
+                break;
+            initial_list.emplace(D[i * k + j], v1);
+        }
+
+        std::vector<NodeDistFarther> shrunk_list;
+        HNSWbd::shrink_neighbor_list(*qdis, initial_list, shrunk_list, dest_size, hnsw.bd_stat);
+
+        size_t begin, end;
+        hnsw.neighbor_range(i, 0, &begin, &end);
+
+        for (size_t j = begin; j < end; j++) {
+            if (j - begin < shrunk_list.size())
+                hnsw.neighbors[j] = shrunk_list[j - begin].id;
+            else
+                hnsw.neighbors[j] = -1;
+        }
+    }
+}
+
+void IndexHNSWbd::init_level_0_from_entry_points(
+        int n,
+        const storage_idx_t* points,
+        const storage_idx_t* nearests) {
+    std::vector<omp_lock_t> locks(ntotal);
+    for (int i = 0; i < ntotal; i++)
+        omp_init_lock(&locks[i]);
+
+//#pragma omp parallel
+    {
+        VisitedTable vt(ntotal);
+
+        std::unique_ptr<DistanceComputer> dis(
+                storage_distance_computer(storage));
+        std::vector<float> vec(storage->d);
+
+//#pragma omp for schedule(dynamic)
+        for (int i = 0; i < n; i++) {
+            storage_idx_t pt_id = points[i];
+            storage_idx_t nearest = nearests[i];
+            storage->reconstruct(pt_id, vec.data());
+            dis->set_query(vec.data());
+
+            hnsw.add_links_starting_from(
+                    *dis, pt_id, nearest, (*dis)(nearest), 0, locks.data(), vt);
+
+            if (verbose && i % 10000 == 0) {
+                printf("  %d / %d\r", i, n);
+                fflush(stdout);
+            }
+        }
+    }
+    if (verbose) {
+        printf("\n");
+    }
+
+    for (int i = 0; i < ntotal; i++)
+        omp_destroy_lock(&locks[i]);
+}
+
+void IndexHNSWbd::reorder_links() {
+    int M = hnsw.nb_neighbors(0);
+
+//#pragma omp parallel
+    {
+        std::vector<float> distances(M);
+        std::vector<size_t> order(M);
+        std::vector<storage_idx_t> tmp(M);
+        std::unique_ptr<DistanceComputer> dis(
+                storage_distance_computer(storage));
+
+//#pragma omp for
+        for (storage_idx_t i = 0; i < ntotal; i++) {
+            size_t begin, end;
+            hnsw.neighbor_range(i, 0, &begin, &end);
+
+            for (size_t j = begin; j < end; j++) {
+                storage_idx_t nj = hnsw.neighbors[j];
+                if (nj < 0) {
+                    end = j;
+                    break;
+                }
+                distances[j - begin] = dis->symmetric_dis(i, nj);
+                tmp[j - begin] = nj;
+            }
+
+            fvec_argsort(end - begin, distances.data(), order.data());
+            for (size_t j = begin; j < end; j++) {
+                hnsw.neighbors[j] = tmp[order[j - begin]];
+            }
+        }
+    }
+}
+
+void IndexHNSWbd::link_singletons() {
+    printf("search for singletons\n");
+
+    std::vector<bool> seen(ntotal);
+
+    for (size_t i = 0; i < ntotal; i++) {
+        size_t begin, end;
+        hnsw.neighbor_range(i, 0, &begin, &end);
+        for (size_t j = begin; j < end; j++) {
+            storage_idx_t ni = hnsw.neighbors[j];
+            if (ni >= 0)
+                seen[ni] = true;
+        }
+    }
+
+    int n_sing = 0, n_sing_l1 = 0;
+    std::vector<storage_idx_t> singletons;
+    for (storage_idx_t i = 0; i < ntotal; i++) {
+        if (!seen[i]) {
+            singletons.push_back(i);
+            n_sing++;
+            if (hnsw.levels[i] > 1)
+                n_sing_l1++;
+        }
+    }
+
+    printf("  Found %d / %" PRId64 " singletons (%d appear in a level above)\n",
+           n_sing,
+           ntotal,
+           n_sing_l1);
+
+    std::vector<float> recons(singletons.size() * d);
+    for (int i = 0; i < singletons.size(); i++) {
+        FAISS_ASSERT(!"not implemented");
+    }
+}
+
+void IndexHNSWbd::permute_entries(const idx_t* perm) {
+    auto flat_storage = dynamic_cast<IndexFlatCodes*>(storage);
+    FAISS_THROW_IF_NOT_MSG(
+            flat_storage, "don't know how to permute this index");
+    flat_storage->permute_entries(perm);
+    hnsw.permute_entries(perm);
+}
+
+/**************************************************************
+ * ReconstructFromNeighbors implementation
+ **************************************************************/
+
+ReconstructFromNeighborsbd::ReconstructFromNeighborsbd(
+        const IndexHNSWbd& index,
+        size_t k,
+        size_t nsq)
+        : index(index), k(k), nsq(nsq) {
+    M = index.hnsw.nb_neighbors(0);
+    FAISS_ASSERT(k <= 256);
+    code_size = k == 1 ? 0 : nsq;
+    ntotal = 0;
+    d = index.d;
+    FAISS_ASSERT(d % nsq == 0);
+    dsub = d / nsq;
+    k_reorder = -1;
+}
+
+void ReconstructFromNeighborsbd::reconstruct(
+        storage_idx_t i,
+        float* x,
+        float* tmp) const {
+    const HNSWbd& hnsw = index.hnsw;
+    size_t begin, end;
+    hnsw.neighbor_range(i, 0, &begin, &end);
+
+    if (k == 1 || nsq == 1) {
+        const float* beta;
+        if (k == 1) {
+            beta = codebook.data();
+        } else {
+            int idx = codes[i];
+            beta = codebook.data() + idx * (M + 1);
+        }
+
+        float w0 = beta[0]; // weight of image itself
+        index.storage->reconstruct(i, tmp);
+
+        for (int l = 0; l < d; l++)
+            x[l] = w0 * tmp[l];
+
+        for (size_t j = begin; j < end; j++) {
+            storage_idx_t ji = hnsw.neighbors[j];
+            if (ji < 0)
+                ji = i;
+            float w = beta[j - begin + 1];
+            index.storage->reconstruct(ji, tmp);
+            for (int l = 0; l < d; l++)
+                x[l] += w * tmp[l];
+        }
+    } else if (nsq == 2) {
+        int idx0 = codes[2 * i];
+        int idx1 = codes[2 * i + 1];
+
+        const float* beta0 = codebook.data() + idx0 * (M + 1);
+        const float* beta1 = codebook.data() + (idx1 + k) * (M + 1);
+
+        index.storage->reconstruct(i, tmp);
+
+        float w0;
+
+        w0 = beta0[0];
+        for (int l = 0; l < dsub; l++)
+            x[l] = w0 * tmp[l];
+
+        w0 = beta1[0];
+        for (int l = dsub; l < d; l++)
+            x[l] = w0 * tmp[l];
+
+        for (size_t j = begin; j < end; j++) {
+            storage_idx_t ji = hnsw.neighbors[j];
+            if (ji < 0)
+                ji = i;
+            index.storage->reconstruct(ji, tmp);
+            float w;
+            w = beta0[j - begin + 1];
+            for (int l = 0; l < dsub; l++)
+                x[l] += w * tmp[l];
+
+            w = beta1[j - begin + 1];
+            for (int l = dsub; l < d; l++)
+                x[l] += w * tmp[l];
+        }
+    } else {
+        std::vector<const float*> betas(nsq);
+        {
+            const float* b = codebook.data();
+            const uint8_t* c = &codes[i * code_size];
+            for (int sq = 0; sq < nsq; sq++) {
+                betas[sq] = b + (*c++) * (M + 1);
+                b += (M + 1) * k;
+            }
+        }
+
+        index.storage->reconstruct(i, tmp);
+        {
+            int d0 = 0;
+            for (int sq = 0; sq < nsq; sq++) {
+                float w = *(betas[sq]++);
+                int d1 = d0 + dsub;
+                for (int l = d0; l < d1; l++) {
+                    x[l] = w * tmp[l];
+                }
+                d0 = d1;
+            }
+        }
+
+        for (size_t j = begin; j < end; j++) {
+            storage_idx_t ji = hnsw.neighbors[j];
+            if (ji < 0)
+                ji = i;
+
+            index.storage->reconstruct(ji, tmp);
+            int d0 = 0;
+            for (int sq = 0; sq < nsq; sq++) {
+                float w = *(betas[sq]++);
+                int d1 = d0 + dsub;
+                for (int l = d0; l < d1; l++) {
+                    x[l] += w * tmp[l];
+                }
+                d0 = d1;
+            }
+        }
+    }
+}
+
+void ReconstructFromNeighborsbd::reconstruct_n(
+        storage_idx_t n0,
+        storage_idx_t ni,
+        float* x) const {
+//#pragma omp parallel
+    {
+        std::vector<float> tmp(index.d);
+//#pragma omp for
+        for (storage_idx_t i = 0; i < ni; i++) {
+            reconstruct(n0 + i, x + i * index.d, tmp.data());
+        }
+    }
+}
+
+size_t ReconstructFromNeighborsbd::compute_distances(
+        size_t n,
+        const idx_t* shortlist,
+        const float* query,
+        float* distances) const {
+    std::vector<float> tmp(2 * index.d);
+    size_t ncomp = 0;
+    for (int i = 0; i < n; i++) {
+        if (shortlist[i] < 0)
+            break;
+        reconstruct(shortlist[i], tmp.data(), tmp.data() + index.d);
+        distances[i] = fvec_L2sqr(query, tmp.data(), index.d);
+        ncomp++;
+    }
+    return ncomp;
+}
+
+void ReconstructFromNeighborsbd::get_neighbor_table(storage_idx_t i, float* tmp1)
+        const {
+    const HNSWbd& hnsw = index.hnsw;
+    size_t begin, end;
+    hnsw.neighbor_range(i, 0, &begin, &end);
+    size_t d = index.d;
+
+    index.storage->reconstruct(i, tmp1);
+
+    for (size_t j = begin; j < end; j++) {
+        storage_idx_t ji = hnsw.neighbors[j];
+        if (ji < 0)
+            ji = i;
+        index.storage->reconstruct(ji, tmp1 + (j - begin + 1) * d);
+    }
+}
+
+/// called by add_codes
+void ReconstructFromNeighborsbd::estimate_code(
+        const float* x,
+        storage_idx_t i,
+        uint8_t* code) const {
+    // fill in tmp table with the neighbor values
+    std::unique_ptr<float[]> tmp1(new float[d * (M + 1) + (d * k)]);
+    float* tmp2 = tmp1.get() + d * (M + 1);
+
+    // collect coordinates of base
+    get_neighbor_table(i, tmp1.get());
+
+    for (size_t sq = 0; sq < nsq; sq++) {
+        int d0 = sq * dsub;
+
+        {
+            FINTEGER ki = k, di = d, m1 = M + 1;
+            FINTEGER dsubi = dsub;
+            float zero = 0, one = 1;
+
+            sgemm_("N",
+                   "N",
+                   &dsubi,
+                   &ki,
+                   &m1,
+                   &one,
+                   tmp1.get() + d0,
+                   &di,
+                   codebook.data() + sq * (m1 * k),
+                   &m1,
+                   &zero,
+                   tmp2,
+                   &dsubi);
+        }
+
+        float min = HUGE_VAL;
+        int argmin = -1;
+        for (size_t j = 0; j < k; j++) {
+            float dis = fvec_L2sqr(x + d0, tmp2 + j * dsub, dsub);
+            if (dis < min) {
+                min = dis;
+                argmin = j;
+            }
+        }
+        code[sq] = argmin;
+    }
+}
+
+void ReconstructFromNeighborsbd::add_codes(size_t n, const float* x) {
+    if (k == 1) { // nothing to encode
+        ntotal += n;
+        return;
+    }
+    codes.resize(codes.size() + code_size * n);
+//#pragma omp parallel for
+    for (int i = 0; i < n; i++) {
+        estimate_code(
+                x + i * index.d,
+                ntotal + i,
+                codes.data() + (ntotal + i) * code_size);
+    }
+    ntotal += n;
+    FAISS_ASSERT(codes.size() == ntotal * code_size);
+}
+
+/**************************************************************
+ * IndexHNSWFlat implementation
+ **************************************************************/
+
+
+
+/**************************************************************
+ * IndexHNSWPQ implementation
+ **************************************************************/
+
+IndexHNSWbdPQ::IndexHNSWbdPQ() = default;
+
+IndexHNSWbdPQ::IndexHNSWbdPQ(int d, int pq_m, int M, int pq_nbits)
+        : IndexHNSWbd(new IndexPQ(d, pq_m, pq_nbits), M) {
+    own_fields = true;
+    is_trained = false;
+}
+
+void IndexHNSWbdPQ::train(idx_t n, const float* x) {
+    IndexHNSWbd::train(n, x);
+    (dynamic_cast<IndexPQ*>(storage))->pq.compute_sdc_table();
+}
+
+/**************************************************************
+ * IndexHNSWSQ implementation
+ **************************************************************/
+
+IndexHNSWbdSQ::IndexHNSWbdSQ(
+        int d,
+        ScalarQuantizer::QuantizerType qtype,
+        int M,
+        MetricType metric)
+        : IndexHNSWbd(new IndexScalarQuantizer(d, qtype, metric), M) {
+    is_trained = this->storage->is_trained;
+    own_fields = true;
+}
+
+IndexHNSWbdSQ::IndexHNSWbdSQ() = default;
+
+/**************************************************************
+ * IndexHNSW2Level implementation
+ **************************************************************/
+
+IndexHNSWbd2Level::IndexHNSWbd2Level(
+        Index* quantizer,
+        size_t nlist,
+        int m_pq,
+        int M)
+        : IndexHNSWbd(new Index2Layer(quantizer, nlist, m_pq), M) {
+    own_fields = true;
+    is_trained = false;
+}
+
+IndexHNSWbd2Level::IndexHNSWbd2Level() = default;
+
+namespace {
+
+// same as search_from_candidates but uses v
+// visno -> is in result list
+// visno + 1 -> in result list + in candidates
+int search_from_candidates_2(
+        const HNSWbd& hnsw,
+        DistanceComputer& qdis,
+        int k,
+        idx_t* I,
+        float* D,
+        MinimaxHeap& candidates,
+        VisitedTable& vt,
+        HNSWStats& stats,
+        int level,
+        int nres_in = 0) {
+    int nres = nres_in;
+    int ndis = 0;
+    for (int i = 0; i < candidates.size(); i++) {
+        idx_t v1 = candidates.ids[i];
+        FAISS_ASSERT(v1 >= 0);
+        vt.visited[v1] = vt.visno + 1;
+    }
+
+    int nstep = 0;
+
+    while (candidates.size() > 0) {
+        float d0 = 0;
+        int v0 = candidates.pop_min(&d0);
+
+        size_t begin, end;
+        hnsw.neighbor_range(v0, level, &begin, &end);
+
+        for (size_t j = begin; j < end; j++) {
+            int v1 = hnsw.neighbors[j];
+            if (v1 < 0)
+                break;
+            if (vt.visited[v1] == vt.visno + 1) {
+                // nothing to do
+            } else {
+                ndis++;
+                float d = qdis(v1);
+                candidates.push(v1, d);
+
+                // never seen before --> add to heap
+                if (vt.visited[v1] < vt.visno) {
+                    if (nres < k) {
+                        faiss::maxheap_push(++nres, D, I, d, v1);
+                    } else if (d < D[0]) {
+                        faiss::maxheap_replace_top(nres, D, I, d, v1);
+                    }
+                }
+                vt.visited[v1] = vt.visno + 1;
+            }
+        }
+
+        nstep++;
+        if (nstep > hnsw.efSearch) {
+            break;
+        }
+    }
+
+    stats.n1++;
+    if (candidates.size() == 0)
+        stats.n2++;
+
+    return nres;
+}
+
+} // namespace
+
+void IndexHNSWbd2Level::search(
+        idx_t n,
+        const float* x,
+        idx_t k,
+        float* distances,
+        idx_t* labels,
+        const SearchParameters* params) const {
+    FAISS_THROW_IF_NOT(k > 0);
+    FAISS_THROW_IF_NOT_MSG(
+            !params, "search params not supported for this index");
+
+    if (dynamic_cast<const Index2Layer*>(storage)) {
+        IndexHNSWbd::search(n, x, k, distances, labels);
+
+    } else { // "mixed" search
+        size_t n1 = 0, n2 = 0, n3 = 0, ndis = 0, nreorder = 0;
+
+        const IndexIVFPQ* index_ivfpq =
+                dynamic_cast<const IndexIVFPQ*>(storage);
+
+        int nprobe = index_ivfpq->nprobe;
+
+        std::unique_ptr<idx_t[]> coarse_assign(new idx_t[n * nprobe]);
+        std::unique_ptr<float[]> coarse_dis(new float[n * nprobe]);
+
+        index_ivfpq->quantizer->search(
+                n, x, nprobe, coarse_dis.get(), coarse_assign.get());
+
+        index_ivfpq->search_preassigned(
+                n,
+                x,
+                k,
+                coarse_assign.get(),
+                coarse_dis.get(),
+                distances,
+                labels,
+                false);
+
+//#pragma omp parallel
+        {
+            VisitedTable vt(ntotal);
+            std::unique_ptr<DistanceComputer> dis(
+                    storage_distance_computer(storage));
+
+            int candidates_size = hnsw.upper_beam;
+            MinimaxHeap candidates(candidates_size);
+
+//#pragma omp for reduction(+ : n1, n2, n3, ndis, nreorder)
+            for (idx_t i = 0; i < n; i++) {
+                idx_t* idxi = labels + i * k;
+                float* simi = distances + i * k;
+                dis->set_query(x + i * d);
+
+                // mark all inverted list elements as visited
+
+                for (int j = 0; j < nprobe; j++) {
+                    idx_t key = coarse_assign[j + i * nprobe];
+                    if (key < 0)
+                        break;
+                    size_t list_length = index_ivfpq->get_list_size(key);
+                    const idx_t* ids = index_ivfpq->invlists->get_ids(key);
+
+                    for (int jj = 0; jj < list_length; jj++) {
+                        vt.set(ids[jj]);
+                    }
+                }
+
+                candidates.clear();
+
+                for (int j = 0; j < hnsw.upper_beam && j < k; j++) {
+                    if (idxi[j] < 0)
+                        break;
+                    candidates.push(idxi[j], simi[j]);
+                }
+
+                // reorder from sorted to heap
+                maxheap_heapify(k, simi, idxi, simi, idxi, k);
+
+                HNSWStats search_stats;
+                search_from_candidates_2(
+                        hnsw,
+                        *dis,
+                        k,
+                        idxi,
+                        simi,
+                        candidates,
+                        vt,
+                        search_stats,
+                        0,
+                        k);
+                n1 += search_stats.n1;
+                n2 += search_stats.n2;
+                n3 += search_stats.n3;
+                ndis += search_stats.ndis;
+                nreorder += search_stats.nreorder;
+
+                vt.advance();
+                vt.advance();
+
+                maxheap_reorder(k, simi, idxi);
+            }
+        }
+
+        hnsw_stats.combine({n1, n2, n3, ndis, nreorder});
+    }
+}
+
+void IndexHNSWbd2Level::flip_to_ivf() {
+    Index2Layer* storage2l = dynamic_cast<Index2Layer*>(storage);
+
+    FAISS_THROW_IF_NOT(storage2l);
+
+    IndexIVFPQ* index_ivfpq = new IndexIVFPQ(
+            storage2l->q1.quantizer,
+            d,
+            storage2l->q1.nlist,
+            storage2l->pq.M,
+            8);
+    index_ivfpq->pq = storage2l->pq;
+    index_ivfpq->is_trained = storage2l->is_trained;
+    index_ivfpq->precompute_table();
+    index_ivfpq->own_fields = storage2l->q1.own_fields;
+    storage2l->transfer_to_IVFPQ(*index_ivfpq);
+    index_ivfpq->make_direct_map(true);
+
+    storage = index_ivfpq;
+    delete storage2l;
+}
+
+} // namespace faiss
diff --git a/thirdparty/faiss/faiss/IndexHNSWbd.h b/thirdparty/faiss/faiss/IndexHNSWbd.h
new file mode 100644
index 000000000..19d3ad087
--- /dev/null
+++ b/thirdparty/faiss/faiss/IndexHNSWbd.h
@@ -0,0 +1,199 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+// -*- c++ -*-
+
+#pragma once
+
+#include <vector>
+
+#include <faiss/IndexFlat.h>
+#include <faiss/IndexPQ.h>
+#include <faiss/IndexScalarQuantizer.h>
+#include <faiss/impl/HNSWbd.h>
+#include <faiss/utils/utils.h>
+
+namespace faiss {
+
+struct IndexHNSWbd;
+
+struct ReconstructFromNeighborsbd {
+    typedef HNSWbd::storage_idx_t storage_idx_t;
+
+    const IndexHNSWbd& index;
+    size_t M;   // number of neighbors
+    size_t k;   // number of codebook entries
+    size_t nsq; // number of subvectors
+    size_t code_size;
+    int k_reorder; // nb to reorder. -1 = all
+
+    std::vector<float> codebook; // size nsq * k * (M + 1)
+
+    std::vector<uint8_t> codes; // size ntotal * code_size
+    size_t ntotal;
+    size_t d, dsub; // derived values
+
+    explicit ReconstructFromNeighborsbd(
+            const IndexHNSWbd& index,
+            size_t k = 256,
+            size_t nsq = 1);
+
+    /// codes must be added in the correct order and the IndexHNSW
+    /// must be populated and sorted
+    void add_codes(size_t n, const float* x);
+
+    size_t compute_distances(
+            size_t n,
+            const idx_t* shortlist,
+            const float* query,
+            float* distances) const;
+
+    /// called by add_codes
+    void estimate_code(const float* x, storage_idx_t i, uint8_t* code) const;
+
+    /// called by compute_distances
+    void reconstruct(storage_idx_t i, float* x, float* tmp) const;
+
+    void reconstruct_n(storage_idx_t n0, storage_idx_t ni, float* x) const;
+
+    /// get the M+1 -by-d table for neighbor coordinates for vector i
+    void get_neighbor_table(storage_idx_t i, float* out) const;
+};
+
+/** The HNSW index is a normal random-access index with a HNSW
+ * link structure built on top */
+
+struct IndexHNSWbd : Index {
+    typedef HNSWbd::storage_idx_t storage_idx_t;
+
+    // the link strcuture
+    HNSWbd hnsw;
+
+    // the sequential storage
+    bool own_fields = false;
+    Index* storage = nullptr;
+
+    ReconstructFromNeighborsbd* reconstruct_from_neighbors = nullptr;
+
+    explicit IndexHNSWbd(int d = 0, int M = 32, MetricType metric = METRIC_L2);
+    explicit IndexHNSWbd(Index* storage, int M = 32);
+
+    ~IndexHNSWbd() override;
+
+    void add(idx_t n, const float* x) override;
+
+    /// Trains the storage if needed
+    void train(idx_t n, const float* x) override;
+
+    /// entry point for search
+    void search(
+            idx_t n,
+            const float* x,
+            idx_t k,
+            float* distances,
+            idx_t* labels,
+            const SearchParameters* params = nullptr) const override;
+
+    void reconstruct(idx_t key, float* recons) const override;
+
+    void reset() override;
+
+    void shrink_level_0_neighbors(int size);
+
+    /** Perform search only on level 0, given the starting points for
+     * each vertex.
+     *
+     * @param search_type 1:perform one search per nprobe, 2: enqueue
+     *                    all entry points
+     */
+    void search_level_0(
+            idx_t n,
+            const float* x,
+            idx_t k,
+            const storage_idx_t* nearest,
+            const float* nearest_d,
+            float* distances,
+            idx_t* labels,
+            int nprobe = 1,
+            int search_type = 1) const;
+
+    /// alternative graph building
+    void init_level_0_from_knngraph(int k, const float* D, const idx_t* I);
+
+    /// alternative graph building
+    void init_level_0_from_entry_points(
+            int npt,
+            const storage_idx_t* points,
+            const storage_idx_t* nearests);
+
+    // reorder links from nearest to farthest
+    void reorder_links();
+
+    void link_singletons();
+
+    void permute_entries(const idx_t* perm);
+};
+
+/** Flat index topped with with a HNSW structure to access elements
+ *  more efficiently.
+ */
+
+struct IndexHNSWbdFlat : IndexHNSWbd {
+    IndexHNSWbdFlat() {
+        is_trained = true;
+    }
+
+    IndexHNSWbdFlat(int d, int M, MetricType metric)
+            : IndexHNSWbd(
+                      (metric == METRIC_L2) ? new IndexFlatL2(d)
+                                            : new IndexFlat(d, metric),
+                      M) {
+        own_fields = true;
+        is_trained = true;
+    }
+};
+
+/** PQ index topped with with a HNSW structure to access elements
+ *  more efficiently.
+ */
+struct IndexHNSWbdPQ : IndexHNSWbd {
+    IndexHNSWbdPQ();
+    IndexHNSWbdPQ(int d, int pq_m, int M, int pq_nbits = 8);
+    void train(idx_t n, const float* x) override;
+};
+
+/** SQ index topped with with a HNSW structure to access elements
+ *  more efficiently.
+ */
+struct IndexHNSWbdSQ : IndexHNSWbd {
+    IndexHNSWbdSQ();
+    IndexHNSWbdSQ(
+            int d,
+            ScalarQuantizer::QuantizerType qtype,
+            int M,
+            MetricType metric = METRIC_L2);
+};
+
+/** 2-level code structure with fast random access
+ */
+struct IndexHNSWbd2Level : IndexHNSWbd {
+    IndexHNSWbd2Level();
+    IndexHNSWbd2Level(Index* quantizer, size_t nlist, int m_pq, int M);
+
+    void flip_to_ivf();
+
+    /// entry point for search
+    void search(
+            idx_t n,
+            const float* x,
+            idx_t k,
+            float* distances,
+            idx_t* labels,
+            const SearchParameters* params = nullptr) const override;
+};
+
+} // namespace faiss
diff --git a/thirdparty/faiss/faiss/impl/HNSWbd.cpp b/thirdparty/faiss/faiss/impl/HNSWbd.cpp
new file mode 100644
index 000000000..24856de9a
--- /dev/null
+++ b/thirdparty/faiss/faiss/impl/HNSWbd.cpp
@@ -0,0 +1,1189 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+// -*- c++ -*-
+
+#include <faiss/impl/HNSWbd.h>
+#include <omp.h>
+#include <string>
+
+#include <faiss/impl/AuxIndexStructures.h>
+#include <faiss/impl/DistanceComputer.h>
+#include <faiss/impl/IDSelector.h>
+#include <faiss/utils/prefetch.h>
+
+#include <faiss/impl/platform_macros.h>
+
+#ifdef __AVX2__
+#include <immintrin.h>
+
+#include <limits>
+#include <type_traits>
+#endif
+
+#define chronoElapsedTime(start)                               \
+    std::chrono::duration_cast<std::chrono::microseconds>(     \
+            std::chrono::high_resolution_clock::now() - start) \
+            .count()
+
+namespace faiss {
+
+/**************************************************************
+ * HNSWbd structure implementation
+ **************************************************************/
+
+int HNSWbd::nb_neighbors(int layer_no) const {
+    return cum_nneighbor_per_level[layer_no + 1] -
+            cum_nneighbor_per_level[layer_no];
+}
+
+void HNSWbd::set_nb_neighbors(int level_no, int n) {
+    FAISS_THROW_IF_NOT(levels.size() == 0);
+    int cur_n = nb_neighbors(level_no);
+    for (int i = level_no + 1; i < cum_nneighbor_per_level.size(); i++) {
+        cum_nneighbor_per_level[i] += n - cur_n;
+    }
+}
+
+int HNSWbd::cum_nb_neighbors(int layer_no) const {
+    return cum_nneighbor_per_level[layer_no];
+}
+
+void HNSWbd::neighbor_range(idx_t no, int layer_no, size_t* begin, size_t* end)
+        const {
+    size_t o = offsets[no];
+    *begin = o + cum_nb_neighbors(layer_no);
+    *end = o + cum_nb_neighbors(layer_no + 1);
+}
+
+HNSWbd::HNSWbd(int M) : rng(12345) {
+    set_default_probas(M, 1.0 / log(M));
+    offsets.push_back(0);
+    bd_stat.reset();
+    M_ = M;
+}
+
+int HNSWbd::random_level() {
+    double f = rng.rand_float();
+    // could be a bit faster with bissection
+    for (int level = 0; level < assign_probas.size(); level++) {
+        if (f < assign_probas[level]) {
+            return level;
+        }
+        f -= assign_probas[level];
+    }
+    // happens with exponentially low probability
+    return assign_probas.size() - 1;
+}
+
+void HNSWbd::set_default_probas(int M, float levelMult) {
+    int nn = 0;
+    cum_nneighbor_per_level.push_back(0);
+    for (int level = 0;; level++) {
+        float proba = exp(-level / levelMult) * (1 - exp(-1 / levelMult));
+        if (proba < 1e-9)
+            break;
+        assign_probas.push_back(proba);
+        nn += level == 0 ? M * 2 : M;
+        cum_nneighbor_per_level.push_back(nn);
+    }
+}
+
+void HNSWbd::clear_neighbor_tables(int level) {
+    for (int i = 0; i < levels.size(); i++) {
+        size_t begin, end;
+        neighbor_range(i, level, &begin, &end);
+        for (size_t j = begin; j < end; j++) {
+            neighbors[j] = -1;
+        }
+    }
+}
+
+void HNSWbd::reset() {
+    max_level = -1;
+    entry_point = -1;
+    offsets.clear();
+    offsets.push_back(0);
+    levels.clear();
+    neighbors.clear();
+}
+
+void HNSWbd::print_neighbor_stats(int level) const {
+    FAISS_THROW_IF_NOT(level < cum_nneighbor_per_level.size());
+    printf("stats on level %d, max %d neighbors per vertex:\n",
+           level,
+           nb_neighbors(level));
+    size_t tot_neigh = 0, tot_common = 0, tot_reciprocal = 0, n_node = 0;
+    //#pragma omp parallel for reduction(+: tot_neigh) reduction(+: tot_common) \
+  reduction(+: tot_reciprocal) reduction(+: n_node)
+    for (int i = 0; i < levels.size(); i++) {
+        if (levels[i] > level) {
+            n_node++;
+            size_t begin, end;
+            neighbor_range(i, level, &begin, &end);
+            std::unordered_set<int> neighset;
+            for (size_t j = begin; j < end; j++) {
+                if (neighbors[j] < 0)
+                    break;
+                neighset.insert(neighbors[j]);
+            }
+            int n_neigh = neighset.size();
+            int n_common = 0;
+            int n_reciprocal = 0;
+            for (size_t j = begin; j < end; j++) {
+                storage_idx_t i2 = neighbors[j];
+                if (i2 < 0)
+                    break;
+                FAISS_ASSERT(i2 != i);
+                size_t begin2, end2;
+                neighbor_range(i2, level, &begin2, &end2);
+                for (size_t j2 = begin2; j2 < end2; j2++) {
+                    storage_idx_t i3 = neighbors[j2];
+                    if (i3 < 0)
+                        break;
+                    if (i3 == i) {
+                        n_reciprocal++;
+                        continue;
+                    }
+                    if (neighset.count(i3)) {
+                        neighset.erase(i3);
+                        n_common++;
+                    }
+                }
+            }
+            tot_neigh += n_neigh;
+            tot_common += n_common;
+            tot_reciprocal += n_reciprocal;
+        }
+    }
+    float normalizer = n_node;
+    printf("   nb of nodes at that level %zd\n", n_node);
+    printf("   neighbors per node: %.2f (%zd)\n",
+           tot_neigh / normalizer,
+           tot_neigh);
+    printf("   nb of reciprocal neighbors: %.2f\n",
+           tot_reciprocal / normalizer);
+    printf("   nb of neighbors that are also neighbor-of-neighbors: %.2f (%zd)\n",
+           tot_common / normalizer,
+           tot_common);
+}
+
+void HNSWbd::fill_with_random_links(size_t n) {
+    int max_level = prepare_level_tab(n);
+    RandomGenerator rng2(456);
+
+    for (int level = max_level - 1; level >= 0; --level) {
+        std::vector<int> elts;
+        for (int i = 0; i < n; i++) {
+            if (levels[i] > level) {
+                elts.push_back(i);
+            }
+        }
+        printf("linking %zd elements in level %d\n", elts.size(), level);
+
+        if (elts.size() == 1)
+            continue;
+
+        for (int ii = 0; ii < elts.size(); ii++) {
+            int i = elts[ii];
+            size_t begin, end;
+            neighbor_range(i, 0, &begin, &end);
+            for (size_t j = begin; j < end; j++) {
+                int other = 0;
+                do {
+                    other = elts[rng2.rand_int(elts.size())];
+                } while (other == i);
+
+                neighbors[j] = other;
+            }
+        }
+    }
+}
+
+int HNSWbd::prepare_level_tab(size_t n, bool preset_levels) {
+    size_t n0 = offsets.size() - 1;
+
+    if (preset_levels) {
+        FAISS_ASSERT(n0 + n == levels.size());
+    } else {
+        FAISS_ASSERT(n0 == levels.size());
+        for (int i = 0; i < n; i++) {
+            int pt_level = random_level();
+            levels.push_back(pt_level + 1);
+        }
+    }
+
+    int max_level = 0;
+    for (int i = 0; i < n; i++) {
+        int pt_level = levels[i + n0] - 1;
+        if (pt_level > max_level)
+            max_level = pt_level;
+        offsets.push_back(offsets.back() + cum_nb_neighbors(pt_level + 1));
+        neighbors.resize(offsets.back(), -1);
+    }
+
+    return max_level;
+}
+
+/** Enumerate vertices from nearest to farthest from query, keep a
+ * neighbor only if there is no previous neighbor that is closer to
+ * that vertex than the query.
+ */
+void HNSWbd::shrink_neighbor_list(
+        DistanceComputer& qdis,
+        std::priority_queue<NodeDistFarther>& input,
+        std::vector<NodeDistFarther>& output,
+        int max_size,
+	struct HNSW_breakdown_stats& bd_stats) {
+    while (input.size() > 0) {
+        NodeDistFarther v1 = input.top();
+        input.pop();
+        float dist_v1_q = v1.d;
+
+        bool good = true;
+        for (NodeDistFarther v2 : output) {
+		auto start = std::chrono::high_resolution_clock::now();
+            float dist_v1_v2 = qdis.symmetric_dis(v2.id, v1.id);
+	    bd_stats.time_dc_linking += chronoElapsedTime(start);
+	    bd_stats.step_linking +=1;
+
+            if (dist_v1_v2 < dist_v1_q) {
+                good = false;
+                break;
+            }
+        }
+
+        if (good) {
+            output.push_back(v1);
+            if (output.size() >= max_size) {
+                return;
+            }
+        }
+    }
+}
+
+namespace {
+
+using storage_idx_t = HNSWbd::storage_idx_t;
+using NodeDistCloser = HNSWbd::NodeDistCloser;
+using NodeDistFarther = HNSWbd::NodeDistFarther;
+
+/**************************************************************
+ * Addition subroutines
+ **************************************************************/
+
+/// remove neighbors from the list to make it smaller than max_size
+void shrink_neighbor_list(
+        DistanceComputer& qdis,
+        std::priority_queue<NodeDistCloser>& resultSet1,
+        int max_size,
+	struct HNSW_breakdown_stats& bd_stats) {
+    if (resultSet1.size() < max_size) {
+        return;
+    }
+    std::priority_queue<NodeDistFarther> resultSet;
+    std::vector<NodeDistFarther> returnlist;
+    while (resultSet1.size() > 0) {
+        resultSet.emplace(resultSet1.top().d, resultSet1.top().id);
+        resultSet1.pop();
+    }
+
+    HNSWbd::shrink_neighbor_list(qdis, resultSet, returnlist, max_size, bd_stats);
+
+    for (NodeDistFarther curen2 : returnlist) {
+        resultSet1.emplace(curen2.d, curen2.id);
+    }
+}
+
+/// add a link between two elements, possibly shrinking the list
+/// of links to make room for it.
+void add_link(
+        HNSWbd& hnsw,
+        DistanceComputer& qdis,
+        storage_idx_t src,
+        storage_idx_t dest,
+        int level) {
+    size_t begin, end;
+    hnsw.neighbor_range(src, level, &begin, &end);
+    if (hnsw.neighbors[end - 1] == -1) {
+        // there is enough room, find a slot to add it
+        size_t i = end;
+        while (i > begin) {
+            if (hnsw.neighbors[i - 1] != -1)
+                break;
+            i--;
+        }
+        hnsw.neighbors[i] = dest;
+        return;
+    }
+
+    // otherwise we let them fight out which to keep
+
+    // copy to resultSet...
+    std::priority_queue<NodeDistCloser> resultSet;
+    auto start = std::chrono::high_resolution_clock::now();
+    auto dist = qdis.symmetric_dis(src,dest);
+    hnsw.bd_stat.time_dc_linking += chronoElapsedTime(start);
+    resultSet.emplace(dist, dest);
+    for (size_t i = begin; i < end; i++) { // HERE WAS THE BUG
+        storage_idx_t neigh = hnsw.neighbors[i];
+	auto start = std::chrono::high_resolution_clock::now();
+	dist = qdis.symmetric_dis(src,neigh);
+	hnsw.bd_stat.time_dc_linking += chronoElapsedTime(start);
+	hnsw.bd_stat.step_before_shrinking +=1;
+        resultSet.emplace(dist, neigh);
+    }
+
+    shrink_neighbor_list(qdis, resultSet, end - begin,hnsw.bd_stat);
+
+    // ...and back
+    size_t i = begin;
+    while (resultSet.size()) {
+        hnsw.neighbors[i++] = resultSet.top().id;
+        resultSet.pop();
+    }
+    // they may have shrunk more than just by 1 element
+    while (i < end) {
+        hnsw.neighbors[i++] = -1;
+    }
+}
+    void search_neighbors_to_add(
+            HNSWbd& hnsw,
+            DistanceComputer& qdis,
+            std::priority_queue<NodeDistCloser>& results,
+            int entry_point,
+            float d_entry_point,
+            int level,
+            VisitedTable& vt);
+/// search neighbors on a single level, starting from an entry point
+void search_neighbors_to_add(
+        HNSWbd& hnsw,
+        DistanceComputer& qdis,
+        std::priority_queue<NodeDistCloser>& results,
+        int entry_point,
+        float d_entry_point,
+        int level,
+        VisitedTable& vt) {
+    // top is nearest candidate
+    std::priority_queue<NodeDistFarther> candidates;
+
+    NodeDistFarther ev(d_entry_point, entry_point);
+    candidates.push(ev);
+    results.emplace(d_entry_point, entry_point);
+    vt.set(entry_point);
+
+    while (!candidates.empty()) {
+        // get nearest
+        const NodeDistFarther& currEv = candidates.top();
+
+        if (currEv.d > results.top().d) {
+            break;
+        }
+        int currNode = currEv.id;
+        candidates.pop();
+
+        // loop over neighbors
+        size_t begin, end;
+        hnsw.neighbor_range(currNode, level, &begin, &end);
+        for (size_t i = begin; i < end; i++) {
+            storage_idx_t nodeId = hnsw.neighbors[i];
+            if (nodeId < 0)
+                break;
+            if (vt.get(nodeId))
+                continue;
+            vt.set(nodeId);
+            hnsw.bd_stat.steps_iterating_add =
+                    hnsw.bd_stat.steps_iterating_add + 1;
+		auto start = std::chrono::high_resolution_clock::now();
+            float dis = qdis(nodeId);
+	    hnsw.bd_stat.time_dc+=chronoElapsedTime(start);
+            NodeDistFarther evE1(dis, nodeId);
+
+            if (results.size() < hnsw.efConstruction || results.top().d > dis) {
+                results.emplace(dis, nodeId);
+                candidates.emplace(dis, nodeId);
+                if (results.size() > hnsw.efConstruction) {
+                    results.pop();
+                }
+            }
+        }
+    }
+    vt.advance();
+}
+
+/**************************************************************
+ * Searching subroutines
+ **************************************************************/
+
+/// greedily update a nearest vector at a given level
+void greedy_update_nearest(
+        const HNSWbd& hnsw,
+        DistanceComputer& qdis,
+        int level,
+        storage_idx_t& nearest,
+        float& d_nearest) {
+    for (;;) {
+        storage_idx_t prev_nearest = nearest;
+
+        size_t begin, end;
+        hnsw.neighbor_range(nearest, level, &begin, &end);
+        for (size_t i = begin; i < end; i++) {
+            hnsw.bd_stat.steps_greedy = hnsw.bd_stat.steps_greedy + 1;
+            storage_idx_t v = hnsw.neighbors[i];
+            if (v < 0)
+                break;
+            float dis = qdis(v);
+            if (dis < d_nearest) {
+                nearest = v;
+                d_nearest = dis;
+            }
+        }
+        if (nearest == prev_nearest) {
+            return;
+        }
+    }
+}
+
+} // namespace
+
+/// Finds neighbors and builds links with them, starting from an entry
+/// point. The own neighbor list is assumed to be locked.
+void HNSWbd::add_links_starting_from(
+        DistanceComputer& ptdis,
+        storage_idx_t pt_id,
+        storage_idx_t nearest,
+        float d_nearest,
+        int level,
+        omp_lock_t* locks,
+        VisitedTable& vt) {
+    std::priority_queue<NodeDistCloser> link_targets;
+    auto start = std::chrono::high_resolution_clock::now();
+    search_neighbors_to_add(
+            *this, ptdis, link_targets, nearest, d_nearest, level, vt);
+    bd_stat.time_searching_neighbors_to_add += chronoElapsedTime(start);
+
+    // but we can afford only this many neighbors
+    int M = nb_neighbors(level);
+
+    start = std::chrono::high_resolution_clock::now();
+    ::faiss::shrink_neighbor_list(ptdis, link_targets, M,bd_stat);
+
+    std::vector<storage_idx_t> neighbors;
+    neighbors.reserve(link_targets.size());
+    while (!link_targets.empty()) {
+        storage_idx_t other_id = link_targets.top().id;
+        add_link(*this, ptdis, pt_id, other_id, level);
+        neighbors.push_back(other_id);
+        link_targets.pop();
+    }
+
+    omp_unset_lock(&locks[pt_id]);
+    for (storage_idx_t other_id : neighbors) {
+        omp_set_lock(&locks[other_id]);
+        add_link(*this, ptdis, other_id, pt_id, level);
+        omp_unset_lock(&locks[other_id]);
+    }
+    omp_set_lock(&locks[pt_id]);
+    bd_stat.time_add_links += chronoElapsedTime(start);
+}
+
+/**************************************************************
+ * Building, parallel
+ **************************************************************/
+
+void HNSWbd::add_with_locks(
+        DistanceComputer& ptdis,
+        int pt_level,
+        int pt_id,
+        std::vector<omp_lock_t>& locks,
+        VisitedTable& vt) {
+    //  greedy search on upper levels
+
+    storage_idx_t nearest;
+    //#pragma omp critical
+    {
+        nearest = entry_point;
+
+        if (nearest == -1) {
+            max_level = pt_level;
+            entry_point = pt_id;
+        }
+    }
+
+    if (nearest < 0) {
+        return;
+    }
+
+    omp_set_lock(&locks[pt_id]);
+
+    int level = max_level; // level at which we start adding neighbors
+    float d_nearest = ptdis(nearest);
+    auto greedy_start = std::chrono::high_resolution_clock::now();
+    for (; level > pt_level; level--) {
+        greedy_update_nearest(*this, ptdis, level, nearest, d_nearest);
+    }
+    bd_stat.time_greedy_insert += chronoElapsedTime(greedy_start);
+
+    for (; level >= 0; level--) {
+        add_links_starting_from(
+                ptdis, pt_id, nearest, d_nearest, level, locks.data(), vt);
+    }
+
+    omp_unset_lock(&locks[pt_id]);
+
+    if (pt_level > max_level) {
+        max_level = pt_level;
+        entry_point = pt_id;
+    }
+}
+
+/**************************************************************
+ * Searching
+ **************************************************************/
+
+namespace {
+
+using MinimaxHeap = HNSWbd::MinimaxHeap;
+using Node = HNSWbd::Node;
+/** Do a BFS on the candidates list */
+
+int search_from_candidates(
+        const HNSWbd& hnsw,
+        DistanceComputer& qdis,
+        int k,
+        idx_t* I,
+        float* D,
+        MinimaxHeap& candidates,
+        VisitedTable& vt,
+        HNSWStats& stats,
+        int level,
+        int nres_in = 0,
+        const SearchParametersHNSW* params = nullptr) {
+    int nres = nres_in;
+    int ndis = 0;
+
+    // can be overridden by search params
+    bool do_dis_check = params ? params->check_relative_distance
+                               : hnsw.check_relative_distance;
+    int efSearch = params ? params->efSearch : hnsw.efSearch;
+    const IDSelector* sel = params ? params->sel : nullptr;
+
+    for (int i = 0; i < candidates.size(); i++) {
+        idx_t v1 = candidates.ids[i];
+        float d = candidates.dis[i];
+        FAISS_ASSERT(v1 >= 0);
+        if (!sel || sel->is_member(v1)) {
+            if (nres < k) {
+                faiss::maxheap_push(++nres, D, I, d, v1);
+            } else if (d < D[0]) {
+                faiss::maxheap_replace_top(nres, D, I, d, v1);
+            }
+        }
+        vt.set(v1);
+    }
+
+    int nstep = 0;
+
+    while (candidates.size() > 0) {
+        float d0 = 0;
+        int v0 = candidates.pop_min(&d0);
+
+        if (do_dis_check) {
+            // tricky stopping condition: there are more that ef
+            // distances that are processed already that are smaller
+            // than d0
+
+            int n_dis_below = candidates.count_below(d0);
+            if (n_dis_below >= efSearch) {
+                break;
+            }
+        }
+
+        size_t begin, end;
+        hnsw.neighbor_range(v0, level, &begin, &end);
+
+        // // baseline version
+        // for (size_t j = begin; j < end; j++) {
+        //     int v1 = hnsw.neighbors[j];
+        //     if (v1 < 0)
+        //         break;
+        //     if (vt.get(v1)) {
+        //         continue;
+        //     }
+        //     vt.set(v1);
+        //     ndis++;
+        //     float d = qdis(v1);
+        //     if (!sel || sel->is_member(v1)) {
+        //         if (nres < k) {
+        //             faiss::maxheap_push(++nres, D, I, d, v1);
+        //         } else if (d < D[0]) {
+        //             faiss::maxheap_replace_top(nres, D, I, d, v1);
+        //         }
+        //     }
+        //     candidates.push(v1, d);
+        // }
+
+        // the following version processes 4 neighbors at a time
+        size_t jmax = begin;
+        for (size_t j = begin; j < end; j++) {
+            int v1 = hnsw.neighbors[j];
+            if (v1 < 0)
+                break;
+
+            prefetch_L2(vt.visited.data() + v1);
+            jmax += 1;
+        }
+
+        int counter = 0;
+        size_t saved_j[4];
+
+        ndis += jmax - begin;
+
+        auto add_to_heap = [&](const size_t idx, const float dis) {
+            if (!sel || sel->is_member(idx)) {
+                if (nres < k) {
+                    faiss::maxheap_push(++nres, D, I, dis, idx);
+                } else if (dis < D[0]) {
+                    faiss::maxheap_replace_top(nres, D, I, dis, idx);
+                }
+            }
+            candidates.push(idx, dis);
+        };
+
+        for (size_t j = begin; j < jmax; j++) {
+            int v1 = hnsw.neighbors[j];
+            hnsw.bd_stat.steps_iterating_search =
+                    hnsw.bd_stat.steps_iterating_search + 1;
+            bool vget = vt.get(v1);
+            vt.set(v1);
+            saved_j[counter] = v1;
+            counter += vget ? 0 : 1;
+
+            if (counter == 4) {
+                float dis[4];
+                qdis.distances_batch_4(
+                        saved_j[0],
+                        saved_j[1],
+                        saved_j[2],
+                        saved_j[3],
+                        dis[0],
+                        dis[1],
+                        dis[2],
+                        dis[3]);
+
+                for (size_t id4 = 0; id4 < 4; id4++) {
+                    add_to_heap(saved_j[id4], dis[id4]);
+                }
+
+                counter = 0;
+            }
+        }
+
+        for (size_t icnt = 0; icnt < counter; icnt++) {
+            float dis = qdis(saved_j[icnt]);
+            add_to_heap(saved_j[icnt], dis);
+        }
+
+        nstep++;
+        if (!do_dis_check && nstep > efSearch) {
+            break;
+        }
+    }
+
+    if (level == 0) {
+        stats.n1++;
+        if (candidates.size() == 0) {
+            stats.n2++;
+        }
+        stats.n3 += ndis;
+    }
+
+    return nres;
+}
+
+std::priority_queue<HNSWbd::Node> search_from_candidate_unbounded(
+        const HNSWbd& hnsw,
+        const Node& node,
+        DistanceComputer& qdis,
+        int ef,
+        VisitedTable* vt,
+        HNSWStats& stats) {
+    int ndis = 0;
+    std::priority_queue<Node> top_candidates;
+    std::priority_queue<Node, std::vector<Node>, std::greater<Node>> candidates;
+
+    top_candidates.push(node);
+    candidates.push(node);
+
+    vt->set(node.second);
+
+    while (!candidates.empty()) {
+        float d0;
+        storage_idx_t v0;
+        std::tie(d0, v0) = candidates.top();
+
+        if (d0 > top_candidates.top().first) {
+            break;
+        }
+
+        candidates.pop();
+
+        size_t begin, end;
+        hnsw.neighbor_range(v0, 0, &begin, &end);
+
+        // // baseline version
+        // for (size_t j = begin; j < end; ++j) {
+        //     int v1 = hnsw.neighbors[j];
+        //
+        //     if (v1 < 0) {
+        //         break;
+        //     }
+        //     if (vt->get(v1)) {
+        //         continue;
+        //     }
+        //
+        //     vt->set(v1);
+        //
+        //     float d1 = qdis(v1);
+        //     ++ndis;
+        //
+        //     if (top_candidates.top().first > d1 ||
+        //         top_candidates.size() < ef) {
+        //         candidates.emplace(d1, v1);
+        //         top_candidates.emplace(d1, v1);
+        //
+        //         if (top_candidates.size() > ef) {
+        //             top_candidates.pop();
+        //         }
+        //     }
+        // }
+
+        // the following version processes 4 neighbors at a time
+        size_t jmax = begin;
+        for (size_t j = begin; j < end; j++) {
+            int v1 = hnsw.neighbors[j];
+            if (v1 < 0)
+                break;
+
+            prefetch_L2(vt->visited.data() + v1);
+            jmax += 1;
+        }
+
+        int counter = 0;
+        size_t saved_j[4];
+
+        ndis += jmax - begin;
+
+        auto add_to_heap = [&](const size_t idx, const float dis) {
+            if (top_candidates.top().first > dis ||
+                top_candidates.size() < ef) {
+                candidates.emplace(dis, idx);
+                top_candidates.emplace(dis, idx);
+
+                if (top_candidates.size() > ef) {
+                    top_candidates.pop();
+                }
+            }
+        };
+
+        for (size_t j = begin; j < jmax; j++) {
+            int v1 = hnsw.neighbors[j];
+
+            bool vget = vt->get(v1);
+            vt->set(v1);
+            saved_j[counter] = v1;
+            counter += vget ? 0 : 1;
+
+            if (counter == 4) {
+                float dis[4];
+                qdis.distances_batch_4(
+                        saved_j[0],
+                        saved_j[1],
+                        saved_j[2],
+                        saved_j[3],
+                        dis[0],
+                        dis[1],
+                        dis[2],
+                        dis[3]);
+
+                for (size_t id4 = 0; id4 < 4; id4++) {
+                    add_to_heap(saved_j[id4], dis[id4]);
+                }
+
+                counter = 0;
+            }
+        }
+
+        for (size_t icnt = 0; icnt < counter; icnt++) {
+            float dis = qdis(saved_j[icnt]);
+            add_to_heap(saved_j[icnt], dis);
+        }
+    }
+
+    ++stats.n1;
+    if (candidates.size() == 0) {
+        ++stats.n2;
+    }
+    stats.n3 += ndis;
+
+    return top_candidates;
+}
+
+} // anonymous namespace
+
+HNSWStats HNSWbd::search(
+        DistanceComputer& qdis,
+        int k,
+        idx_t* I,
+        float* D,
+        VisitedTable& vt,
+        const SearchParametersHNSW* params) const {
+    HNSWStats stats;
+    if (entry_point == -1) {
+        return stats;
+    }
+    if (upper_beam == 1) {
+        //  greedy search on upper levels
+        storage_idx_t nearest = entry_point;
+        float d_nearest = qdis(nearest);
+        auto start = std::chrono::high_resolution_clock::now();
+        for (int level = max_level; level >= 1; level--) {
+            greedy_update_nearest(*this, qdis, level, nearest, d_nearest);
+        }
+        bd_stat.time_greedy_search += chronoElapsedTime(start);
+
+        int ef = std::max(efSearch, k);
+        if (search_bounded_queue) { // this is the most common branch
+            MinimaxHeap candidates(ef);
+
+            candidates.push(nearest, d_nearest);
+            start = std::chrono::high_resolution_clock::now();
+            search_from_candidates(
+                    *this, qdis, k, I, D, candidates, vt, stats, 0, 0, params);
+            bd_stat.time_search_from_candidates += chronoElapsedTime(start);
+        } else {
+            std::priority_queue<Node> top_candidates =
+                    search_from_candidate_unbounded(
+                            *this,
+                            Node(d_nearest, nearest),
+                            qdis,
+                            ef,
+                            &vt,
+                            stats);
+
+            while (top_candidates.size() > k) {
+                top_candidates.pop();
+            }
+
+            int nres = 0;
+            while (!top_candidates.empty()) {
+                float d;
+                storage_idx_t label;
+                std::tie(d, label) = top_candidates.top();
+                faiss::maxheap_push(++nres, D, I, d, label);
+                top_candidates.pop();
+            }
+        }
+
+        vt.advance();
+
+    } else {
+        int candidates_size = upper_beam;
+        MinimaxHeap candidates(candidates_size);
+
+        std::vector<idx_t> I_to_next(candidates_size);
+        std::vector<float> D_to_next(candidates_size);
+
+        int nres = 1;
+        I_to_next[0] = entry_point;
+        D_to_next[0] = qdis(entry_point);
+
+        for (int level = max_level; level >= 0; level--) {
+            // copy I, D -> candidates
+
+            candidates.clear();
+
+            for (int i = 0; i < nres; i++) {
+                candidates.push(I_to_next[i], D_to_next[i]);
+            }
+
+            if (level == 0) {
+                nres = search_from_candidates(
+                        *this, qdis, k, I, D, candidates, vt, stats, 0);
+            } else {
+                nres = search_from_candidates(
+                        *this,
+                        qdis,
+                        candidates_size,
+                        I_to_next.data(),
+                        D_to_next.data(),
+                        candidates,
+                        vt,
+                        stats,
+                        level);
+            }
+            vt.advance();
+        }
+    }
+
+    return stats;
+}
+
+void HNSWbd::search_level_0(
+        DistanceComputer& qdis,
+        int k,
+        idx_t* idxi,
+        float* simi,
+        idx_t nprobe,
+        const storage_idx_t* nearest_i,
+        const float* nearest_d,
+        int search_type,
+        HNSWStats& search_stats,
+        VisitedTable& vt) const {
+    const HNSWbd& hnsw = *this;
+
+    if (search_type == 1) {
+        int nres = 0;
+
+        for (int j = 0; j < nprobe; j++) {
+            storage_idx_t cj = nearest_i[j];
+
+            if (cj < 0)
+                break;
+
+            if (vt.get(cj))
+                continue;
+
+            int candidates_size = std::max(hnsw.efSearch, int(k));
+            MinimaxHeap candidates(candidates_size);
+
+            candidates.push(cj, nearest_d[j]);
+
+            nres = search_from_candidates(
+                    hnsw,
+                    qdis,
+                    k,
+                    idxi,
+                    simi,
+                    candidates,
+                    vt,
+                    search_stats,
+                    0,
+                    nres);
+        }
+    } else if (search_type == 2) {
+        int candidates_size = std::max(hnsw.efSearch, int(k));
+        candidates_size = std::max(candidates_size, int(nprobe));
+
+        MinimaxHeap candidates(candidates_size);
+        for (int j = 0; j < nprobe; j++) {
+            storage_idx_t cj = nearest_i[j];
+
+            if (cj < 0)
+                break;
+            candidates.push(cj, nearest_d[j]);
+        }
+
+        search_from_candidates(
+                hnsw, qdis, k, idxi, simi, candidates, vt, search_stats, 0);
+    }
+}
+
+void HNSWbd::permute_entries(const idx_t* map) {
+    // remap levels
+    storage_idx_t ntotal = levels.size();
+    std::vector<storage_idx_t> imap(ntotal); // inverse mapping
+    // map: new index -> old index
+    // imap: old index -> new index
+    for (int i = 0; i < ntotal; i++) {
+        assert(map[i] >= 0 && map[i] < ntotal);
+        imap[map[i]] = i;
+    }
+    if (entry_point != -1) {
+        entry_point = imap[entry_point];
+    }
+    std::vector<int> new_levels(ntotal);
+    std::vector<size_t> new_offsets(ntotal + 1);
+    std::vector<storage_idx_t> new_neighbors(neighbors.size());
+    size_t no = 0;
+    for (int i = 0; i < ntotal; i++) {
+        storage_idx_t o = map[i]; // corresponding "old" index
+        new_levels[i] = levels[o];
+        for (size_t j = offsets[o]; j < offsets[o + 1]; j++) {
+            storage_idx_t neigh = neighbors[j];
+            new_neighbors[no++] = neigh >= 0 ? imap[neigh] : neigh;
+        }
+        new_offsets[i + 1] = no;
+    }
+    assert(new_offsets[ntotal] == offsets[ntotal]);
+    // swap everyone
+    std::swap(levels, new_levels);
+    std::swap(offsets, new_offsets);
+    std::swap(neighbors, new_neighbors);
+}
+
+/**************************************************************
+ * MinimaxHeap
+ **************************************************************/
+
+void HNSWbd::MinimaxHeap::push(storage_idx_t i, float v) {
+    if (k == n) {
+        if (v >= dis[0])
+            return;
+        if (ids[0] != -1) {
+            --nvalid;
+        }
+        faiss::heap_pop<HC>(k--, dis.data(), ids.data());
+    }
+    faiss::heap_push<HC>(++k, dis.data(), ids.data(), v, i);
+    ++nvalid;
+}
+
+float HNSWbd::MinimaxHeap::max() const {
+    return dis[0];
+}
+
+int HNSWbd::MinimaxHeap::size() const {
+    return nvalid;
+}
+
+void HNSWbd::MinimaxHeap::clear() {
+    nvalid = k = 0;
+}
+
+#ifdef __AVX2__
+int HNSWbd::MinimaxHeap::pop_min(float* vmin_out) {
+    assert(k > 0);
+    static_assert(
+            std::is_same<storage_idx_t, int32_t>::value,
+            "This code expects storage_idx_t to be int32_t");
+
+    int32_t min_idx = -1;
+    float min_dis = std::numeric_limits<float>::infinity();
+
+    size_t iii = 0;
+
+    __m256i min_indices = _mm256_setr_epi32(-1, -1, -1, -1, -1, -1, -1, -1);
+    __m256 min_distances =
+            _mm256_set1_ps(std::numeric_limits<float>::infinity());
+    __m256i current_indices = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
+    __m256i offset = _mm256_set1_epi32(8);
+
+    // The baseline version is available in non-AVX2 branch.
+
+    // The following loop tracks the rightmost index with the min distance.
+    // -1 index values are ignored.
+    const int k8 = (k / 8) * 8;
+    for (; iii < k8; iii += 8) {
+        __m256i indices =
+                _mm256_loadu_si256((const __m256i*)(ids.data() + iii));
+        __m256 distances = _mm256_loadu_ps(dis.data() + iii);
+
+        // This mask filters out -1 values among indices.
+        __m256i m1mask = _mm256_cmpgt_epi32(_mm256_setzero_si256(), indices);
+
+        __m256i dmask = _mm256_castps_si256(
+                _mm256_cmp_ps(min_distances, distances, _CMP_LT_OS));
+        __m256 finalmask = _mm256_castsi256_ps(_mm256_or_si256(m1mask, dmask));
+
+        const __m256i min_indices_new = _mm256_castps_si256(_mm256_blendv_ps(
+                _mm256_castsi256_ps(current_indices),
+                _mm256_castsi256_ps(min_indices),
+                finalmask));
+
+        const __m256 min_distances_new =
+                _mm256_blendv_ps(distances, min_distances, finalmask);
+
+        min_indices = min_indices_new;
+        min_distances = min_distances_new;
+
+        current_indices = _mm256_add_epi32(current_indices, offset);
+    }
+
+    // Vectorizing is doable, but is not practical
+    int32_t vidx8[8];
+    float vdis8[8];
+    _mm256_storeu_ps(vdis8, min_distances);
+    _mm256_storeu_si256((__m256i*)vidx8, min_indices);
+
+    for (size_t j = 0; j < 8; j++) {
+        if (min_dis > vdis8[j] || (min_dis == vdis8[j] && min_idx < vidx8[j])) {
+            min_idx = vidx8[j];
+            min_dis = vdis8[j];
+        }
+    }
+
+    // process last values. Vectorizing is doable, but is not practical
+    for (; iii < k; iii++) {
+        if (ids[iii] != -1 && dis[iii] <= min_dis) {
+            min_dis = dis[iii];
+            min_idx = iii;
+        }
+    }
+
+    if (min_idx == -1) {
+        return -1;
+    }
+
+    if (vmin_out) {
+        *vmin_out = min_dis;
+    }
+    int ret = ids[min_idx];
+    ids[min_idx] = -1;
+    --nvalid;
+    return ret;
+}
+
+#else
+
+// baseline non-vectorized version
+int HNSWbd::MinimaxHeap::pop_min(float* vmin_out) {
+    assert(k > 0);
+    // returns min. This is an O(n) operation
+    int i = k - 1;
+    while (i >= 0) {
+        if (ids[i] != -1) {
+            break;
+        }
+        i--;
+    }
+    if (i == -1) {
+        return -1;
+    }
+    int imin = i;
+    float vmin = dis[i];
+    i--;
+    while (i >= 0) {
+        if (ids[i] != -1 && dis[i] < vmin) {
+            vmin = dis[i];
+            imin = i;
+        }
+        i--;
+    }
+    if (vmin_out) {
+        *vmin_out = vmin;
+    }
+    int ret = ids[imin];
+    ids[imin] = -1;
+    --nvalid;
+
+    return ret;
+}
+#endif
+
+int HNSWbd::MinimaxHeap::count_below(float thresh) {
+    int n_below = 0;
+    for (int i = 0; i < k; i++) {
+        if (dis[i] < thresh) {
+            n_below++;
+        }
+    }
+
+    return n_below;
+}
+
+} // namespace faiss
\ No newline at end of file
diff --git a/thirdparty/faiss/faiss/impl/HNSWbd.h b/thirdparty/faiss/faiss/impl/HNSWbd.h
new file mode 100644
index 000000000..daaaa401d
--- /dev/null
+++ b/thirdparty/faiss/faiss/impl/HNSWbd.h
@@ -0,0 +1,310 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+// -*- c++ -*-
+
+#pragma once
+
+#include <queue>
+#include <unordered_set>
+#include <vector>
+#include <faiss/impl/HNSW.h>
+#include <omp.h>
+#include <faiss/Index.h>
+#include <faiss/impl/FaissAssert.h>
+#include <faiss/impl/platform_macros.h>
+#include <faiss/utils/Heap.h>
+#include <faiss/utils/random.h>
+#include <fstream>
+#include <iostream>
+
+namespace faiss {
+
+/** Implementation of the Hierarchical Navigable Small World
+ * datastructure.
+ *
+ * Efficient and robust approximate nearest neighbor search using
+ * Hierarchical Navigable Small World graphs
+ *
+ *  Yu. A. Malkov, D. A. Yashunin, arXiv 2017
+ *
+ * This implementation is heavily influenced by the NMSlib
+ * implementation by Yury Malkov and Leonid Boystov
+ * (https://github.com/searchivarius/nmslib)
+ *
+ * The HNSW object stores only the neighbor link structure, see
+ * IndexHNSW.h for the full index object.
+ */
+
+
+struct HNSW_breakdown_stats {
+    size_t steps_greedy =
+            0; // number of vertices traversing in greedy search in add
+    size_t steps_iterating_add =
+            0; // number of vertices visited in add_neighbors
+    size_t steps_iterating_search =
+            0; // number of vertices visited in searching from candidates
+
+    size_t time_greedy_insert = 0;
+    size_t time_searching_neighbors_to_add = 0;
+    size_t time_add_links = 0;
+
+    size_t time_greedy_search = 0;
+    size_t time_search_from_candidates = 0;
+    size_t time_dc = 0;
+    size_t time_dc_linking = 0;
+    size_t step_linking =0;
+    size_t step_before_shrinking=0;
+    HNSW_breakdown_stats() = default;
+    //std::string filename="hnswbd.csv";
+    void reset() {
+        steps_greedy = 0;
+        steps_iterating_add = 0;
+        steps_iterating_search = 0;
+        time_greedy_insert = 0;
+        time_searching_neighbors_to_add = 0;
+        time_add_links = 0;
+        time_greedy_search = 0;
+        time_search_from_candidates = 0;
+	time_dc = 0;
+	time_dc_linking = 0;
+	step_before_shrinking = 0;
+	step_linking = 0;
+    }
+
+
+    void print() {
+        std::cout << steps_greedy << ",";
+        std::cout << steps_iterating_add << ",";
+        std::cout << steps_iterating_search << ",";
+
+        std::cout << time_greedy_insert << ",";
+        std::cout << time_searching_neighbors_to_add << ",";
+        std::cout << time_add_links << ",";
+
+        std::cout << time_greedy_search << ",";
+        std::cout << time_search_from_candidates << ",";
+	std::cout<<time_dc<<",";
+	std::cout<<time_dc_linking<<",";
+	std::cout<<step_before_shrinking<<",";
+	std::cout<<step_linking<<"\n";
+
+
+    std::ofstream outputFile;
+    outputFile.open("hnswbd.csv", std::ios_base::app);
+
+    if(!outputFile.is_open()){
+        std::cerr<<"Failed to open file."<<std::endl;
+        std::cerr<<"Error:"<<std::strerror(errno)<<std::endl;
+        return;
+    }
+        outputFile << steps_greedy << ",";
+        outputFile << steps_iterating_add << ",";
+        outputFile<< steps_iterating_search << ",";
+
+        outputFile<< time_greedy_insert << ",";
+        outputFile<< time_searching_neighbors_to_add << ",";
+        outputFile<< time_add_links << ",";
+
+        outputFile<< time_greedy_search << ",";
+        outputFile<< time_search_from_candidates << ",";
+        outputFile<<time_dc<<",";
+        outputFile<<time_dc_linking<<",";
+        outputFile<<step_before_shrinking<<",";
+        outputFile<<step_linking<<"\n";
+
+    outputFile.close();
+    }
+};
+struct HNSWbd {
+    /// internal storage of vectors (32 bits: this is expensive)
+    using storage_idx_t = int32_t;
+
+    typedef std::pair<float, storage_idx_t> Node;
+
+    mutable struct HNSW_breakdown_stats bd_stat;
+
+    /** Heap structure that allows fast
+     */
+    struct MinimaxHeap {
+        int n;
+        int k;
+        int nvalid;
+
+        std::vector<storage_idx_t> ids;
+        std::vector<float> dis;
+        typedef faiss::CMax<float, storage_idx_t> HC;
+
+        explicit MinimaxHeap(int n) : n(n), k(0), nvalid(0), ids(n), dis(n) {}
+
+        void push(storage_idx_t i, float v);
+
+        float max() const;
+
+        int size() const;
+
+        void clear();
+
+        int pop_min(float* vmin_out = nullptr);
+
+        int count_below(float thresh);
+    };
+
+    /// to sort pairs of (id, distance) from nearest to fathest or the reverse
+    struct NodeDistCloser {
+        float d;
+        int id;
+        NodeDistCloser(float d, int id) : d(d), id(id) {}
+        bool operator<(const NodeDistCloser& obj1) const {
+            return d < obj1.d;
+        }
+    };
+
+    struct NodeDistFarther {
+        float d;
+        int id;
+        NodeDistFarther(float d, int id) : d(d), id(id) {}
+        bool operator<(const NodeDistFarther& obj1) const {
+            return d > obj1.d;
+        }
+    };
+
+    /// assignment probability to each layer (sum=1)
+    std::vector<double> assign_probas;
+
+    /// number of neighbors stored per layer (cumulative), should not
+    /// be changed after first add
+    std::vector<int> cum_nneighbor_per_level;
+
+    /// level of each vector (base level = 1), size = ntotal
+    std::vector<int> levels;
+
+    /// offsets[i] is the offset in the neighbors array where vector i is stored
+    /// size ntotal + 1
+    std::vector<size_t> offsets;
+
+    /// neighbors[offsets[i]:offsets[i+1]] is the list of neighbors of vector i
+    /// for all levels. this is where all storage goes.
+    std::vector<storage_idx_t> neighbors;
+
+    /// entry point in the search structure (one of the points with maximum
+    /// level
+    storage_idx_t entry_point = -1;
+
+    faiss::RandomGenerator rng;
+
+    /// maximum level
+    int max_level = -1;
+
+    /// expansion factor at construction time
+    int efConstruction = 40;
+
+    /// expansion factor at search time
+    int efSearch = 16;
+
+    int M_ = 32;
+
+    /// during search: do we check whether the next best distance is good
+    /// enough?
+    bool check_relative_distance = true;
+
+    /// number of entry points in levels > 0.
+    int upper_beam = 1;
+
+    /// use bounded queue during exploration
+    bool search_bounded_queue = true;
+
+    // methods that initialize the tree sizes
+
+    /// initialize the assign_probas and cum_nneighbor_per_level to
+    /// have 2*M links on level 0 and M links on levels > 0
+    void set_default_probas(int M, float levelMult);
+
+    /// set nb of neighbors for this level (before adding anything)
+    void set_nb_neighbors(int level_no, int n);
+
+    // methods that access the tree sizes
+
+    /// nb of neighbors for this level
+    int nb_neighbors(int layer_no) const;
+
+    /// cumumlative nb up to (and excluding) this level
+    int cum_nb_neighbors(int layer_no) const;
+
+    /// range of entries in the neighbors table of vertex no at layer_no
+    void neighbor_range(idx_t no, int layer_no, size_t* begin, size_t* end)
+            const;
+
+    /// only mandatory parameter: nb of neighbors
+    explicit HNSWbd(int M = 32);
+
+    /// pick a random level for a new point
+    int random_level();
+
+    /// add n random levels to table (for debugging...)
+    void fill_with_random_links(size_t n);
+
+    void add_links_starting_from(
+            DistanceComputer& ptdis,
+            storage_idx_t pt_id,
+            storage_idx_t nearest,
+            float d_nearest,
+            int level,
+            omp_lock_t* locks,
+            VisitedTable& vt);
+
+    /** add point pt_id on all levels <= pt_level and build the link
+     * structure for them. */
+    void add_with_locks(
+            DistanceComputer& ptdis,
+            int pt_level,
+            int pt_id,
+            std::vector<omp_lock_t>& locks,
+            VisitedTable& vt);
+
+    /// search interface for 1 point, single thread
+    HNSWStats search(
+            DistanceComputer& qdis,
+            int k,
+            idx_t* I,
+            float* D,
+            VisitedTable& vt,
+            const SearchParametersHNSW* params = nullptr) const;
+
+    /// search only in level 0 from a given vertex
+    void search_level_0(
+            DistanceComputer& qdis,
+            int k,
+            idx_t* idxi,
+            float* simi,
+            idx_t nprobe,
+            const storage_idx_t* nearest_i,
+            const float* nearest_d,
+            int search_type,
+            HNSWStats& search_stats,
+            VisitedTable& vt) const;
+
+    void reset();
+
+    void clear_neighbor_tables(int level);
+    void print_neighbor_stats(int level) const;
+
+    int prepare_level_tab(size_t n, bool preset_levels = false);
+
+    static void shrink_neighbor_list(
+            DistanceComputer& qdis,
+            std::priority_queue<NodeDistFarther>& input,
+            std::vector<NodeDistFarther>& output,
+            int max_size,
+	    struct HNSW_breakdown_stats& bd_stats);
+
+    void permute_entries(const idx_t* map);
+};
+
+
+
+} // namespace faiss
\ No newline at end of file