-
-
Save EricDuminil/8faabc2f3de82b24e5a371b6dc0fd1e0 to your computer and use it in GitHub Desktop.
Found original code, author and license
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/python | |
| # -*- coding: utf-8 -*- | |
| # | |
| #author: rex | |
| #blog: http://iregex.org | |
| #filename tr.py | |
| #created: 2010-08-01 20:24 | |
| #source uri: http://iregex.org/blog/trie-in-python.html | |
| # escape bug fix by fcicq @ 2012.8.19 | |
| import re | |
| class Trie(): | |
| """Regexp::Trie in python""" | |
| def __init__(self): | |
| self.data={} | |
| def add(self, word): | |
| ref=self.data | |
| for char in word: | |
| ref[char]=ref.has_key(char) and ref[char] or {} | |
| ref=ref[char] | |
| ref['']=1 | |
| def dump(self): | |
| return self.data | |
| def quote(self, char): | |
| return re.escape(char) | |
| def _regexp(self, pData): | |
| data=pData | |
| if data.has_key("") and len(data.keys())==1: | |
| return None | |
| alt=[] | |
| cc=[] | |
| q=0 | |
| for char in sorted(data.keys()): | |
| if isinstance(data[char],dict): | |
| try: | |
| recurse=self._regexp(data[char]) | |
| alt.append(self.quote(char)+recurse) | |
| except: | |
| cc.append(self.quote(char)) | |
| else: | |
| q=1 | |
| cconly=len(alt) and 0 or 1 #if len, 0; else:0 | |
| if len(cc)>0: | |
| if len(cc)==1: | |
| alt.append(cc[0]) | |
| else: | |
| alt.append('['+''.join(cc)+']') | |
| if len(alt)==1: | |
| result=alt[0] | |
| else: | |
| result="(?:"+"|".join(alt)+")" | |
| if q: | |
| if cconly: | |
| result+="?" | |
| else: | |
| result="(?:%s)?" % result | |
| return result | |
| def regexp(self): | |
| # return "(?-xism:%s)" % self._regexp(self.dump()) # fcicq: not sure what is it... | |
| return self._regexp(self.dump()) | |
| if __name__ == '__main__': | |
| a=Trie() | |
| for w in ['foobar', 'foobah', 'fooxar', 'foozap', 'fooza']: | |
| a.add(w) | |
| print a.regexp() |
Author
Hi, thanks for the interest.
I'm on holiday, without any computer. Please feel free to do whatever you
want with the code. Your proposal sounds good.
Note that I'm not the original author.
Regards,
Eric
…On Tue, Aug 6, 2019, 10:43 Pattarawat Chormai ***@***.***> wrote:
Hi Eric,
I'm currently working on a project and using this gist. Comparing to other
Tries libraries, yours is quite minimal and sufficient for our work.
It would be good if we could import your gist as a module instead of this
snippet.
Would you mind creating a pip module for this snippet? I could also do
that if you don't have time and later transfer to your Github.
Thanks!
Pat
—
You are receiving this because you authored the thread.
Reply to this email directly, view it on GitHub
<https://gist.github.com/8faabc2f3de82b24e5a371b6dc0fd1e0?email_source=notifications&email_token=AAABQPSL7OROGSJNWIAXCT3QDE2SRA5CNFSM4IJT5XQ2YY3PNVWWK3TUL52HS4DFVNDWS43UINXW23LFNZ2KUY3PNVWWK3TUL5UWJTQAFWR3M#gistcomment-2991030>,
or mute the thread
<https://github.com/notifications/unsubscribe-auth/AAABQPTYSRFPVXFMCYKGYMTQDE2SRANCNFSM4IJT5XQQ>
.
@EricDuminil I took the liberty and mentioned you in the MIT License! https://pypi.org/project/retrie
cc @heytitle
Author
@ddelange: Thank you very much. The project looks good!
Trying to port this code to Rust :)
What is q, exactly?
@Yoric maybe this helps: https://github.com/ddelange/retrie/blob/0.1.2/src/retrie/trie.py#L44
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hi Eric,
I'm currently working on a project and using this gist. Comparing to other Tries libraries, yours is quite minimal and sufficient for our work.
It would be good if we could import your gist as a module instead of this snippet.
Would you mind creating a pip module for this snippet? I could also do that if you don't have time and later transfer to your Github.
Thanks!
Pat