huggingface
diff --git a/‎examples/applications/clustering/agglomerative.py‎
Lines changed: 3 additions & 3 deletions b/‎examples/applications/clustering/agglomerative.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎examples/applications/clustering/fast_clustering.py‎
Lines changed: 1 addition & 0 deletions b/‎examples/applications/clustering/fast_clustering.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎examples/applications/clustering/kmeans.py‎
Lines changed: 1 addition & 0 deletions b/‎examples/applications/clustering/kmeans.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎examples/applications/computing-embeddings/computing_embeddings_streaming.py‎
Lines changed: 1 addition & 1 deletion b/‎examples/applications/computing-embeddings/computing_embeddings_streaming.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/applications/cross-encoder/cross-encoder_reranking.py‎
Lines changed: 1 addition & 0 deletions b/‎examples/applications/cross-encoder/cross-encoder_reranking.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎examples/applications/cross-encoder/cross-encoder_usage.py‎
Lines changed: 1 addition & 0 deletions b/‎examples/applications/cross-encoder/cross-encoder_usage.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎examples/applications/parallel-sentence-mining/bitext_mining.py‎
Lines changed: 1 addition & 0 deletions b/‎examples/applications/parallel-sentence-mining/bitext_mining.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎examples/applications/parallel-sentence-mining/bucc2018.py‎
Lines changed: 1 addition & 0 deletions b/‎examples/applications/parallel-sentence-mining/bucc2018.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎examples/applications/semantic-search/semantic_search.py‎
Lines changed: 1 addition & 0 deletions b/‎examples/applications/semantic-search/semantic_search.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎examples/applications/semantic-search/semantic_search_publications.py‎
Lines changed: 2 additions & 1 deletion b/‎examples/applications/semantic-search/semantic_search_publications.py‎
Lines changed: 2 additions & 1 deletion
@@ -3,9 +3,9 @@
 
 Sentences are mapped to sentence embeddings and then agglomerative clustering with a threshold is applied.
 """
+
 from sentence_transformers import SentenceTransformer
 from sklearn.cluster import AgglomerativeClustering
-import numpy as np
 
 embedder = SentenceTransformer("all-MiniLM-L6-v2")
 
@@ -25,8 +25,8 @@
 ]
 corpus_embeddings = embedder.encode(corpus)
 
-# Normalize the embeddings to unit length
-corpus_embeddings = corpus_embeddings / np.linalg.norm(corpus_embeddings, axis=1, keepdims=True)
+# Some models don't automatically normalize the embeddings, in which case you should normalize the embeddings:
+# corpus_embeddings = corpus_embeddings / np.linalg.norm(corpus_embeddings, axis=1, keepdims=True)
 
 # Perform kmean clustering
 clustering_model = AgglomerativeClustering(
 
@@ -11,6 +11,7 @@
 
 In this example, we download a large set of questions from Quora and then find similar questions in this set.
 """
+
 from sentence_transformers import SentenceTransformer, util
 import os
 import csv
 
@@ -3,6 +3,7 @@
 
 Sentences are mapped to sentence embeddings and then k-mean clustering is applied.
 """
+
 from sentence_transformers import SentenceTransformer
 from sklearn.cluster import KMeans
 
 
@@ -4,7 +4,7 @@
 when encoding large text collections.
 It also demonstrates how to stream data which is helpful in case you don't
 want to wait for an extremely large dataset to download, or if you want to
-limit the amount of memory used. More info about dataset streaming: 
+limit the amount of memory used. More info about dataset streaming:
 https://huggingface.co/docs/datasets/stream
 """
 
 
@@ -6,6 +6,7 @@
 
 Then, we re-rank the hits from the Bi-Encoder using a Cross-Encoder.
 """
+
 from sentence_transformers import SentenceTransformer, util
 from sentence_transformers import CrossEncoder
 import os
 
@@ -3,6 +3,7 @@
 sentences in a corpus using a Cross-Encoder for semantic textual similarity (STS).
 It output then the most similar sentences for the given query.
 """
+
 from sentence_transformers.cross_encoder import CrossEncoder
 import numpy as np
 
 
@@ -12,6 +12,7 @@
 This script requires that you have FAISS installed:
 https://github.com/facebookresearch/faiss
 """
+
 from sentence_transformers import SentenceTransformer, models
 import numpy as np
 from bitext_mining_utils import score_candidates, kNN, file_open
 
@@ -9,6 +9,7 @@
 This script requires that you have FAISS installed:
 https://github.com/facebookresearch/faiss
 """
+
 from sentence_transformers import SentenceTransformer, models
 from collections import defaultdict
 import os
 
@@ -6,6 +6,7 @@
 
 This script outputs for various queries the top 5 most similar sentences in the corpus.
 """
+
 from sentence_transformers import SentenceTransformer, util
 import torch
 
 
@@ -1,13 +1,14 @@
 """
 This example demonstrates how we can perform semantic search for scientific publications.
 
-As model, we use SPECTER (https://github.com/allenai/specter), which encodes paper titles and abstracts 
+As model, we use SPECTER (https://github.com/allenai/specter), which encodes paper titles and abstracts
 into a vector space.
 
 When can then use util.semantic_search() to find the most similar papers.
 
 Colab example: https://colab.research.google.com/drive/12hfBveGHRsxhPIUMmJYrll2lFU4fOX06
 """
+
 import json
 import os
 from sentence_transformers import SentenceTransformer, util