-
-
Save EricDuminil/8faabc2f3de82b24e5a371b6dc0fd1e0 to your computer and use it in GitHub Desktop.
Revisions
-
EricDuminil revised this gist
Jun 30, 2025 . 1 changed file with 13 additions and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,6 +1,19 @@ #!/usr/bin/python # -*- coding: utf-8 -*- # # Original Perl module: Regexp::Trie # Original Copyright (C) 2006 by Dan Kogai # # This Python translation is a derivative work based on Regexp::Trie # Copyright (c) 2010 by rex # Copyright (c) 2017 by fcicq, atiking and EricDuminil # This Python code is licensed under the Artistic License 2.0 (AL2.0). # A copy of the Artistic License 2.0 can be found at: # https://opensource.org/licenses/Artistic-2.0 # # Original source: https://metacpan.org/pod/Regexp::Trie # #author: rex #blog: http://iregex.org #filename trie.py -
EricDuminil revised this gist
Mar 14, 2017 . 1 changed file with 3 additions and 2 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -3,7 +3,7 @@ # #author: rex #blog: http://iregex.org #filename trie.py #created: 2010-08-01 20:24 #source uri: http://iregex.org/blog/trie-in-python.html @@ -14,7 +14,8 @@ class Trie(): """Regexp::Trie in python. Creates a Trie out of a list of words. The trie can be exported to a Regexp pattern. The corresponding Regexp should match much faster than a simple Regexp union.""" def __init__(self): self.data = {} -
EricDuminil renamed this gist
Mar 14, 2017 . 1 changed file with 0 additions and 0 deletions.There are no files selected for viewing
File renamed without changes. -
EricDuminil revised this gist
Mar 14, 2017 . 1 changed file with 36 additions and 31 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -8,70 +8,75 @@ #source uri: http://iregex.org/blog/trie-in-python.html # escape bug fix by fcicq @ 2012.8.19 # python3 compatible by EricDuminil @ 2017.03. import re class Trie(): """Regexp::Trie in python""" def __init__(self): self.data = {} def add(self, word): ref = self.data for char in word: ref[char] = char in ref and ref[char] or {} ref = ref[char] ref[''] = 1 def dump(self): return self.data def quote(self, char): return re.escape(char) def _pattern(self, pData): data = pData if "" in data and len(data.keys()) == 1: return None alt = [] cc = [] q = 0 for char in sorted(data.keys()): if isinstance(data[char], dict): try: recurse = self._pattern(data[char]) alt.append(self.quote(char) + recurse) except: cc.append(self.quote(char)) else: q = 1 cconly = not len(alt) > 0 if len(cc) > 0: if len(cc) == 1: alt.append(cc[0]) else: alt.append('[' + ''.join(cc) + ']') if len(alt) == 1: result = alt[0] else: result = "(?:" + "|".join(alt) + ")" if q: if cconly: result += "?" else: result = "(?:%s)?" % result return result def pattern(self): return self._pattern(self.dump()) if __name__ == '__main__': t = Trie() for w in ['foobar', 'foobah', 'fooxar', 'foozap', 'fooza']: t.add(w) print(t.pattern()) #=> "foo(?:ba[hr]|xar|zap?)" -
atiking revised this gist
Nov 18, 2013 . 1 changed file with 1 addition and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -46,7 +46,7 @@ def _regexp(self, pData): cc.append(self.quote(char)) else: q=1 cconly=not len(alt)>0 if len(cc)>0: if len(cc)==1: -
fcicq revised this gist
Aug 19, 2012 . 1 changed file with 2 additions and 2 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -8,7 +8,6 @@ #source uri: http://iregex.org/blog/trie-in-python.html # escape bug fix by fcicq @ 2012.8.19 import re class Trie(): @@ -67,7 +66,8 @@ def _regexp(self, pData): result="(?:%s)?" % result return result def regexp(self): # return "(?-xism:%s)" % self._regexp(self.dump()) # fcicq: not sure what is it... return self._regexp(self.dump()) if __name__ == '__main__': a=Trie() -
fcicq revised this gist
Aug 19, 2012 . 1 changed file with 10 additions and 2 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -7,8 +7,13 @@ #created: 2010-08-01 20:24 #source uri: http://iregex.org/blog/trie-in-python.html # escape bug fix by fcicq @ 2012.8.19 # known issues: remove the prefix '(?-xism:(?:' and suffix '))' in order to work. import re class Trie(): """Regexp::Trie in python""" def __init__(self): self.data={} @@ -22,6 +27,9 @@ def add(self, word): def dump(self): return self.data def quote(self, char): return re.escape(char) def _regexp(self, pData): data=pData if data.has_key("") and len(data.keys())==1: @@ -34,9 +42,9 @@ def _regexp(self, pData): if isinstance(data[char],dict): try: recurse=self._regexp(data[char]) alt.append(self.quote(char)+recurse) except: cc.append(self.quote(char)) else: q=1 cconly=len(alt) and 0 or 1 #if len, 0; else:0 -
fcicq created this gist
Aug 19, 2012 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,69 @@ #!/usr/bin/python # -*- coding: utf-8 -*- # #author: rex #blog: http://iregex.org #filename tr.py #created: 2010-08-01 20:24 #source uri: http://iregex.org/blog/trie-in-python.html class Trie(): """Regexp::Trie in python""" def __init__(self): self.data={} def add(self, word): ref=self.data for char in word: ref[char]=ref.has_key(char) and ref[char] or {} ref=ref[char] ref['']=1 def dump(self): return self.data def _regexp(self, pData): data=pData if data.has_key("") and len(data.keys())==1: return None alt=[] cc=[] q=0 for char in sorted(data.keys()): if isinstance(data[char],dict): try: recurse=self._regexp(data[char]) alt.append(char+recurse) except: cc.append(char) else: q=1 cconly=len(alt) and 0 or 1 #if len, 0; else:0 if len(cc)>0: if len(cc)==1: alt.append(cc[0]) else: alt.append('['+''.join(cc)+']') if len(alt)==1: result=alt[0] else: result="(?:"+"|".join(alt)+")" if q: if cconly: result+="?" else: result="(?:%s)?" % result return result def regexp(self): return "(?-xism:%s)" % self._regexp(self.dump()) if __name__ == '__main__': a=Trie() for w in ['foobar', 'foobah', 'fooxar', 'foozap', 'fooza']: a.add(w) print a.regexp()