Last active
October 26, 2024 12:47
-
-
Save amir-saniyan/e102de09b01c4ed1632e3d1a1a1cbf64 to your computer and use it in GitHub Desktop.
Revisions
-
amir-saniyan revised this gist
Oct 26, 2024 . 1 changed file with 2 additions and 2 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -178,7 +178,7 @@ float CalculateAngularSimilarity(const std::vector<float>& embedding1, const std } ``` ## Python Version using Scipy ```python import math @@ -207,7 +207,7 @@ def calculate_angular_similarity(a, b): return angular_similarity ``` ## Python Version using Numpy ```python import math -
amir-saniyan revised this gist
Oct 26, 2024 . 1 changed file with 32 additions and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -178,7 +178,7 @@ float CalculateAngularSimilarity(const std::vector<float>& embedding1, const std } ``` # Python Version using Scipy ```python import math @@ -207,6 +207,37 @@ def calculate_angular_similarity(a, b): return angular_similarity ``` # Python Version using Numpy ```python import math import numpy as np def calculate_cosine_similarity(a, b): dot_product = np.dot(a, b) magnitude_a = np.linalg.norm(a) magnitude_b = np.linalg.norm(b) cosine_similarity = dot_product / (magnitude_a * magnitude_b) return cosine_similarity def calculate_cosine_distance(a, b): cosine_similarity = calculate_cosine_similarity(a, b) cosine_distance = 1 - cosine_similarity return cosine_distance def calculate_angular_similarity(a, b): cosine_similarity = calculate_cosine_similarity(a, b) angular_similarity = 1 - (math.acos(cosine_similarity) / math.pi) return angular_similarity def calculate_angular_distance(a, b): angular_similarity = calculate_angular_similarity(a, b) angular_distance = 1 - angular_similarity return angular_distance ``` ## Similarity Search Using Tensorflow ```python -
amir-saniyan revised this gist
Nov 29, 2022 . 1 changed file with 52 additions and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -206,3 +206,55 @@ def calculate_angular_similarity(a, b): angular_similarity = 1 - calculate_angular_distance(a, b) return angular_similarity ``` ## Similarity Search Using Tensorflow ```python import time import numpy as np # np.__version__ == '1.23.5' import tensorflow as tf # tf.__version__ == '2.11.0' EMBEDDINGS_LENGTH = 512 NUMBER_OF_EMBEDDINGS = 1000 * 1000 def calculate_cosine_similarities(x, embeddings): cosine_similarities = -1 * tf.keras.losses.cosine_similarity(x, embeddings) return cosine_similarities.numpy() def find_closest_embeddings(x, embeddings, top_k=1): cosine_similarities = calculate_cosine_similarities(x, embeddings) values, indices = tf.math.top_k(cosine_similarities, k=top_k) return values.numpy(), indices.numpy() def main(): # x shape: (512) # Embeddings shape: (1000000, 512) x = np.random.rand(EMBEDDINGS_LENGTH).astype(np.float32) embeddings = np.random.rand(NUMBER_OF_EMBEDDINGS, EMBEDDINGS_LENGTH).astype(np.float32) print('Embeddings shape: ', embeddings.shape) n = 100 sum_duration = 0 for i in range(n): start = time.time() best_values, best_indices = find_closest_embeddings(x, embeddings, top_k=1) end = time.time() duration = end - start sum_duration += duration print('Duration (seconds): {}, Best value: {}, Best index: {}'.format(duration, best_values[0], best_indices[0])) # Average duration (seconds): 1.707 for Intel(R) Core(TM) i7-10700 CPU @ 2.90GHz # Average duration (seconds): 0.961 for NVIDIA 1080 ti print('Average duration (seconds): ', sum_duration / n) if __name__ == '__main__': main() ``` -
amir-saniyan revised this gist
May 16, 2021 . 1 changed file with 1 addition and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -180,7 +180,7 @@ float CalculateAngularSimilarity(const std::vector<float>& embedding1, const std # Python Version ```python import math from scipy import spatial -
amir-saniyan revised this gist
May 16, 2021 . 1 changed file with 30 additions and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -2,7 +2,7 @@ In the name of God # Embedding Similarity Measurement This gist contains implementation of Embedding Similarity Measurement in `C++` and `Python`. ## Manhattan Distance @@ -177,3 +177,32 @@ float CalculateAngularSimilarity(const std::vector<float>& embedding1, const std return angularSimilarity; } ``` # Python Version ``` import math from scipy import spatial def calculate_cosine_distance(a, b): cosine_distance = float(spatial.distance.cosine(a, b)) return cosine_distance def calculate_cosine_similarity(a, b): cosine_similarity = 1 - calculate_cosine_distance(a, b) return cosine_similarity def calculate_angular_distance(a, b): cosine_similarity = calculate_cosine_similarity(a, b) angular_distance = math.acos(cosine_similarity) / math.pi return angular_distance def calculate_angular_similarity(a, b): angular_similarity = 1 - calculate_angular_distance(a, b) return angular_similarity ``` -
amir-saniyan revised this gist
Jan 1, 2021 . 1 changed file with 1 addition and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -57,7 +57,7 @@ float CalculateEuclideanDistance(const std::vector<float>& embedding1, const std for(std::size_t i = 0; i < embeddingSize; i++) { float distance = embedding2[i] - embedding1[i]; sum += distance * distance; } float euclideanDistance = std::sqrt(sum); -
amir-saniyan created this gist
Jan 1, 2021 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,179 @@ In the name of God # Embedding Similarity Measurement This gist contains implementation of Embedding Similarity Measurement in `C++`. ## Manhattan Distance - https://en.wikipedia.org/wiki/Taxicab_geometry ```cpp #include <cstddef> #include <cmath> #include <stdexcept> float CalculateManhattanDistance(const std::vector<float>& embedding1, const std::vector<float>& embedding2) { if(embedding1.size() != embedding2.size()) { throw std::invalid_argument("Embedding sizes should be equal."); } float sum = 0; std::size_t embeddingSize = embedding1.size(); for(std::size_t i = 0; i < embeddingSize; i++) { float distance = std::abs(embedding2[i] - embedding1[i]); sum += distance; } float manhattanDistance = sum; return manhattanDistance; } ``` ## Euclidean Distance - https://en.wikipedia.org/wiki/Euclidean_distance ```cpp #include <cstddef> #include <cmath> #include <stdexcept> float CalculateEuclideanDistance(const std::vector<float>& embedding1, const std::vector<float>& embedding2) { if(embedding1.size() != embedding2.size()) { throw std::invalid_argument("Embedding sizes should be equal."); } float sum = 0; std::size_t embeddingSize = embedding1.size(); for(std::size_t i = 0; i < embeddingSize; i++) { float distance = embedding2[i] - embedding1[i]; sum += std::pow(distance, 2); } float euclideanDistance = std::sqrt(sum); return euclideanDistance; } ``` ## Minkowski Distance - https://en.wikipedia.org/wiki/Minkowski_distance ```cpp #include <cstddef> #include <cmath> #include <stdexcept> float CalculateMinkowskiDistance(const std::vector<float>& embedding1, const std::vector<float>& embedding2, int p) { if(embedding1.size() != embedding2.size()) { throw std::invalid_argument("Embedding sizes should be equal."); } float sum = 0; std::size_t embeddingSize = embedding1.size(); for(std::size_t i = 0; i < embeddingSize; i++) { float distance = std::abs(embedding2[i] - embedding1[i]); sum += std::pow(distance, p); } float minkowskiDistance = std::pow(sum, 1.0f / p); return minkowskiDistance; } ``` ## L1, L2, Lp Norms - https://en.wikipedia.org/wiki/Lp_space ```cpp float CalculateL1Norm(const std::vector<float>& embedding1, const std::vector<float>& embedding2) { return CalculateManhattanDistance(embedding1, embedding2); } float CalculateL2Norm(const std::vector<float>& embedding1, const std::vector<float>& embedding2) { return CalculateEuclideanDistance(embedding1, embedding2); } float CalculateLPNorm(const std::vector<float>& embedding1, const std::vector<float>& embedding2, int p) { return CalculateMinkowskiDistance(embedding1, embedding2, p); } ``` ## Cosine Similarity - https://en.wikipedia.org/wiki/Cosine_similarity ```cpp #include <cstddef> #include <cmath> #include <stdexcept> float CalculateCosineSimilarity(const std::vector<float>& embedding1, const std::vector<float>& embedding2) { if(embedding1.size() != embedding2.size()) { throw std::invalid_argument("Embedding sizes should be equal."); } float aa = 0; float bb = 0; float ab = 0; std::size_t embeddingSize = embedding1.size(); for(std::size_t i = 0; i < embeddingSize; i++) { aa += std::pow(embedding1[i], 2); bb += std::pow(embedding2[i], 2); ab += embedding1[i] * embedding2[i]; } float cosineSimilarity = ab / std::sqrt(aa * bb); return cosineSimilarity; } ``` ## Angular Distance, Angular Similarity - https://en.wikipedia.org/wiki/Cosine_similarity ```cpp # define PI 3.14159265358979323846 float CalculateAngularDistance(const std::vector<float>& embedding1, const std::vector<float>& embedding2) { float cosineSimilarity = CalculateCosineSimilarity(embedding1, embedding2); float angularDistance = std::acos(cosineSimilarity) / PI; return angularDistance; } float CalculateAngularSimilarity(const std::vector<float>& embedding1, const std::vector<float>& embedding2) { float angularDistance = CalculateAngularDistance(embedding1, embedding2); float angularSimilarity = 1 - angularDistance; return angularSimilarity; } ```