Skip to content

Instantly share code, notes, and snippets.

@MagicLogic
Forked from Kovah/mysql-levenshtein.sql
Created July 23, 2018 07:31
Show Gist options
  • Select an option

  • Save MagicLogic/d5bbbf18aa27b0e8545b51f91d704cd3 to your computer and use it in GitHub Desktop.

Select an option

Save MagicLogic/d5bbbf18aa27b0e8545b51f91d704cd3 to your computer and use it in GitHub Desktop.

Revisions

  1. @Kovah Kovah created this gist Feb 22, 2017.
    70 changes: 70 additions & 0 deletions mysql-levenshtein.sql
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,70 @@
    -- Levenshtein function
    -- Source: https://openquery.com.au/blog/levenshtein-mysql-stored-function
    -- Levenshtein reference: http://en.wikipedia.org/wiki/Levenshtein_distance

    -- Arjen note: because the levenshtein value is encoded in a byte array, distance cannot exceed 255;
    -- thus the maximum string length this implementation can handle is also limited to 255 characters.

    DELIMITER $$
    DROP FUNCTION IF EXISTS LEVENSHTEIN $$
    CREATE FUNCTION LEVENSHTEIN(s1 VARCHAR(255) CHARACTER SET utf8, s2 VARCHAR(255) CHARACTER SET utf8)
    RETURNS INT
    DETERMINISTIC
    BEGIN
    DECLARE s1_len, s2_len, i, j, c, c_temp, cost INT;
    DECLARE s1_char CHAR CHARACTER SET utf8;
    -- max strlen=255 for this function
    DECLARE cv0, cv1 VARBINARY(256);

    SET s1_len = CHAR_LENGTH(s1),
    s2_len = CHAR_LENGTH(s2),
    cv1 = 0x00,
    j = 1,
    i = 1,
    c = 0;

    IF (s1 = s2) THEN
    RETURN (0);
    ELSEIF (s1_len = 0) THEN
    RETURN (s2_len);
    ELSEIF (s2_len = 0) THEN
    RETURN (s1_len);
    END IF;

    WHILE (j <= s2_len) DO
    SET cv1 = CONCAT(cv1, CHAR(j)),
    j = j + 1;
    END WHILE;

    WHILE (i <= s1_len) DO
    SET s1_char = SUBSTRING(s1, i, 1),
    c = i,
    cv0 = CHAR(i),
    j = 1;

    WHILE (j <= s2_len) DO
    SET c = c + 1,
    cost = IF(s1_char = SUBSTRING(s2, j, 1), 0, 1);

    SET c_temp = ORD(SUBSTRING(cv1, j, 1)) + cost;
    IF (c > c_temp) THEN
    SET c = c_temp;
    END IF;

    SET c_temp = ORD(SUBSTRING(cv1, j+1, 1)) + 1;
    IF (c > c_temp) THEN
    SET c = c_temp;
    END IF;

    SET cv0 = CONCAT(cv0, CHAR(c)),
    j = j + 1;
    END WHILE;

    SET cv1 = cv0,
    i = i + 1;
    END WHILE;

    RETURN (c);
    END $$

    DELIMITER ;