Created
November 5, 2020 23:09
-
-
Save dfrobison/f098d92a7b83b51e289789af7bc0208b to your computer and use it in GitHub Desktop.
Revisions
-
dfrobison created this gist
Nov 5, 2020 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,214 @@ struct FuzzyTextSearch { private static let maxMatches = 256 private static let defaultMatches: [Int] = Array(repeating: -1, count: maxMatches) private static let recursionLimit = 10 private static let sequentialBonus = 15 // bonus for adjacent matches private static let separatorBonus = 30 // bonus if match occurs after a separator private static let camelBonus = 30 // bonus if match is uppercase and prev is lower private static let firstLetterBonus = 15 // bonus if the first letter is matched private static let leadingLetterPenalty = -5 // penalty applied for every letter in str before the first match private static let maxLeadingLetterPenalty = -15 // maximum penalty for leading letters private static let unmatchedLetterPenalty = -1 // penalty for every letter that doesn't matter private let neighborSeparator = "_ " private let pattern: String private let patternLength: Int private var str: String = "" private var strLength = 0 init(pattern: String) { self.pattern = pattern patternLength = pattern.count } mutating func fuzzyMatchSimple(stringToMatch str: String) -> Bool { guard !pattern.isEmpty else {return true} guard !str.isEmpty else {return false} var strIndexOffset = 0 var patternIndexOffset = 0 strLength = str.count while (patternIndexOffset < patternLength) && (strIndexOffset < strLength) { if pattern[pattern.index(pattern.startIndex, offsetBy: patternIndexOffset)].lowercased() == str[str.index(str.startIndex, offsetBy: strIndexOffset)].lowercased() { patternIndexOffset += 1 } strIndexOffset += 1 } return patternIndexOffset == patternLength } mutating func fuzzyMatch(stringToMatch str: String, outScore: inout Int) -> Bool { var matches: [Int] = [] return fuzzyMatch(stringToMatch: str, outScore: &outScore, matches: &matches) } mutating func fuzzyMatch(stringToMatch str: String, outScore: inout Int, matches: inout [Int]) -> Bool { var recursionCount = 0 var internalMatches = FuzzyTextSearch.defaultMatches self.str = str strLength = str.count let matched = fuzzyMatchRecursive(patternOffsetIndex: 0, strOffsetIndex: 0, outScore: &outScore, srcMatches: nil, matches: &internalMatches, nextMatch: 0, recursionCount: &recursionCount) matches = internalMatches.filter{$0 != -1} return matched } // Private implementation private mutating func fuzzyMatchRecursive(patternOffsetIndex: Int, strOffsetIndex: Int, outScore: inout Int, srcMatches: [Int]?, matches: inout [Int], nextMatch: Int, recursionCount: inout Int) -> Bool { // Count recursions recursionCount += 1 guard recursionCount < FuzzyTextSearch.recursionLimit else {return false} // Detect end of strings guard patternOffsetIndex < patternLength && strOffsetIndex < strLength else {return false} var patternOffsetIndex = patternOffsetIndex var strOffsetIndex = strOffsetIndex var nextMatch = nextMatch // Recursion params var recursiveMatch = false var bestRecursiveMatches = FuzzyTextSearch.defaultMatches var bestRecursiveScore = 0 // Loop through pattern and str looking for a match var first_match = true while patternOffsetIndex != patternLength && strOffsetIndex != strLength { // Found match if pattern[pattern.index(pattern.startIndex, offsetBy: patternOffsetIndex)].lowercased() == str[str.index(str.startIndex, offsetBy: strOffsetIndex)].lowercased() { // Supplied matches buffer was too short if nextMatch >= FuzzyTextSearch.maxMatches { return false } // Remember matches if first_match && srcMatches != nil { for index in 0 ..< nextMatch { matches[index] = srcMatches![index] } first_match = false } // Recursive call that "skips" this match var recursiveMatches = FuzzyTextSearch.defaultMatches var recursiveScore = 0 if fuzzyMatchRecursive(patternOffsetIndex: patternOffsetIndex, strOffsetIndex: strOffsetIndex + 1, outScore: &recursiveScore, srcMatches: matches, matches: &recursiveMatches, nextMatch: nextMatch, recursionCount: &recursionCount) { // Pick best recursive score if !recursiveMatch || recursiveScore > bestRecursiveScore { bestRecursiveMatches = recursiveMatches bestRecursiveScore = recursiveScore } recursiveMatch = true } // Advance matches[nextMatch] = strOffsetIndex nextMatch += 1 patternOffsetIndex += 1 } strOffsetIndex += 1 } // Determine if full pattern was matched let matched = patternOffsetIndex == patternLength // Calculate score if matched { // Nothing else needs to be looked since a match occurred strOffsetIndex = strLength // Initialize score outScore = 100 // Apply leading letter penalty var penalty = FuzzyTextSearch.leadingLetterPenalty * matches[0] if penalty < FuzzyTextSearch.maxLeadingLetterPenalty { penalty = FuzzyTextSearch.maxLeadingLetterPenalty } outScore += penalty // Apply unmatched penalty let unmatched = strLength - nextMatch outScore += FuzzyTextSearch.unmatchedLetterPenalty * unmatched // Apply ordering bonuses for nextMatchIndex in 0 ..< nextMatch { let currIdx = matches[nextMatchIndex] if nextMatchIndex > 0 { let prevIdx = matches[nextMatchIndex - 1] // Sequential if currIdx == (prevIdx + 1) { outScore += FuzzyTextSearch.sequentialBonus } } // Check for bonuses based on neighbor character value if currIdx > 0 { // Camel case let neighbor = str[str.index(str.startIndex, offsetBy: currIdx - 1)] let curr = str[str.index(str.startIndex, offsetBy: currIdx)] if neighbor.isLowercase && curr.isUppercase { outScore += FuzzyTextSearch.camelBonus } // Separator if neighborSeparator.contains(neighbor) { outScore += FuzzyTextSearch.separatorBonus } } else { // First letter outScore += FuzzyTextSearch.firstLetterBonus } } } // Return best result if recursiveMatch && (!matched || bestRecursiveScore > outScore) { // Recursive score is better than "this" matches = bestRecursiveMatches outScore = bestRecursiveScore return true } else if matched { // "this" score is better than recursive return true } // no match return false } }