#!/usr/bin/env python3 # -*- coding: utf-8 -*- """Simple regex creator. Creates regex expressions from examples. """ from typing import Set import sys def lcs(str1: str, str2: str) -> Set[str]: """Find longest common substring. https://bogotobogo.com/python/python_longest_common_substring_lcs_algorithm_generalized_suffix_tree.php """ str1_len = len(str1) str2_len = len(str2) counter = [[0]*(str2_len+1) for x in range(str1_len+1)] longest = 0 lcs_set: Set[str] = set() for i in range(str1_len): for j in range(str2_len): if str1[i] == str2[j]: count = counter[i][j] + 1 counter[i+1][j+1] = count if count > longest: lcs_set = set() longest = count lcs_set.add(str1[i-count+1:i+1]) elif count == longest: lcs_set.add(str1[i-count+1:i+1]) return lcs_set def split(str1: str, str2: str) -> str: """Find the least restrictive regex of two strings.""" # we only want to return * if at least one string has characters if str1 is str2 is '': return '' ret = lcs(str1, str2) try: sub_str = next(iter(ret)) # if we split around 1 character matches # then we get a load of potentially spurious rules # e.g. # split('total', 'example') -> *a*l* # which is probably not right if len(sub_str) is 1: return '*' front_str1, back_str1 = str1.split(sub_str) front_str2, back_str2 = str2.split(sub_str) return split(front_str1, front_str2) + sub_str + split(back_str1, back_str2) except StopIteration as sub_str: # No common substrings return '*' def regex(*args: str) -> str: """Find the least restrictive regex of n strings.""" return args[0] if len(args) is 1 else split(args[0], regex(*args[1:])) if __name__ == "__main__": print(regex(*sys.argv[1:]))