Skip to content

Instantly share code, notes, and snippets.

@Quacky2200
Forked from mdeous/repermute.py
Last active October 29, 2024 20:00
Show Gist options
  • Select an option

  • Save Quacky2200/714acad06f3f80f6bdb92d7d49dea4bf to your computer and use it in GitHub Desktop.

Select an option

Save Quacky2200/714acad06f3f80f6bdb92d7d49dea4bf to your computer and use it in GitHub Desktop.

Revisions

  1. Quacky2200 revised this gist Aug 4, 2024. 1 changed file with 4 additions and 5 deletions.
    9 changes: 4 additions & 5 deletions repermute.py
    Original file line number Diff line number Diff line change
    @@ -73,12 +73,11 @@ def handle_max_repeat(repeat):

    optlist = handle_tok(subtok, subval)

    iterlist = []
    for x in range(min, max + 1):
    joined = join([optlist] * x)
    iterlist.append(joined)
    results = []
    for rep in range(min, max + 1):
    results += [''.join(it) for it in join([optlist] * rep)]

    return (''.join(it) for it in itertools.chain(*iterlist))
    return results

    def handle_range(val):
    lo, hi = val
  2. Quacky2200 revised this gist Feb 13, 2024. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion repermute.py
    Original file line number Diff line number Diff line change
    @@ -85,7 +85,7 @@ def handle_range(val):
    return (chr(x) for x in range(lo, hi + 1))

    def handle_subpattern(val):
    return list(permute_toks(val[1]))
    return list(permute_toks(val[3]))

    def handle_tok(tok, val):
    """
  3. Quacky2200 revised this gist Sep 9, 2019. 1 changed file with 6 additions and 4 deletions.
    10 changes: 6 additions & 4 deletions repermute.py
    Original file line number Diff line number Diff line change
    @@ -24,7 +24,7 @@
    category_chars = {
    CATEGORY_DIGIT : string.digits,
    CATEGORY_SPACE : string.whitespace,
    CATEGORY_WORD : string.digits + string.letters + '_'
    CATEGORY_WORD : string.digits + string.ascii_letters + '_'
    }

    def unique_extend(res_list, list):
    @@ -39,7 +39,8 @@ def handle_any(val):
    """
    return string.printable

    def handle_branch((tok, val)):
    def handle_branch(_tuple):
    (tok, val) = _tuple
    all_opts = []
    for toks in val:
    opts = permute_toks(toks)
    @@ -58,7 +59,8 @@ def handle_in(val):
    def handle_literal(val):
    return [chr(val)]

    def handle_max_repeat((min, max, val)):
    def handle_max_repeat(repeat):
    (min, max, val) = repeat
    """
    Handle a repeat token such as {x,y} or ?.
    """
    @@ -101,7 +103,7 @@ def handle_tok(tok, val):
    SUBPATTERN : handle_subpattern}
    try:
    return handlers[tok](val)
    except KeyError, e:
    except KeyError as e:
    fmt = "Unsupported regular expression construct: %s"
    raise ValueError(fmt % tok)

  4. Mathieu D. created this gist Feb 13, 2012.
    139 changes: 139 additions & 0 deletions repermute.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,139 @@
    # -*- coding: utf-8 -*-
    #
    # Used like this:
    #
    # from repermute import ipermute
    #
    # for s in ipermute('[A-Z]\d'):
    # print s
    #
    # Almost all regular expression constructs are supported except for '*'
    # (which in the Python sre_parse implementation matches 0 to 65535
    # times), '+' and '^'. Non-ASCII characters doesn't work, but I think
    # that is a limitation in the sre_parse module. It works by first
    # parsing the regexp to string sequences so that [A-Z] becomes ['A',
    # 'B', ... 'Z'], \d becomes ['1', ... , '9']. Then a Cartesian join is
    # applied on all string sequences to get all possible permutations of
    # them all.

    import itertools
    from sre_constants import *
    import sre_parse
    import string

    category_chars = {
    CATEGORY_DIGIT : string.digits,
    CATEGORY_SPACE : string.whitespace,
    CATEGORY_WORD : string.digits + string.letters + '_'
    }

    def unique_extend(res_list, list):
    for item in list:
    if item not in res_list:
    res_list.append(item)

    def handle_any(val):
    """
    This is different from normal regexp matching. It only matches
    printable ASCII characters.
    """
    return string.printable

    def handle_branch((tok, val)):
    all_opts = []
    for toks in val:
    opts = permute_toks(toks)
    unique_extend(all_opts, opts)
    return all_opts

    def handle_category(val):
    return list(category_chars[val])

    def handle_in(val):
    out = []
    for tok, val in val:
    out += handle_tok(tok, val)
    return out

    def handle_literal(val):
    return [chr(val)]

    def handle_max_repeat((min, max, val)):
    """
    Handle a repeat token such as {x,y} or ?.
    """
    subtok, subval = val[0]

    if max > 5000:
    # max is the number of cartesian join operations needed to be
    # carried out. More than 5000 consumes way to much memory.
    raise ValueError("To many repetitions requested (%d)" % max)

    optlist = handle_tok(subtok, subval)

    iterlist = []
    for x in range(min, max + 1):
    joined = join([optlist] * x)
    iterlist.append(joined)

    return (''.join(it) for it in itertools.chain(*iterlist))

    def handle_range(val):
    lo, hi = val
    return (chr(x) for x in range(lo, hi + 1))

    def handle_subpattern(val):
    return list(permute_toks(val[1]))

    def handle_tok(tok, val):
    """
    Returns a list of strings of possible permutations for this regexp
    token.
    """
    handlers = {
    ANY : handle_any,
    BRANCH : handle_branch,
    CATEGORY : handle_category,
    LITERAL : handle_literal,
    IN : handle_in,
    MAX_REPEAT : handle_max_repeat,
    RANGE : handle_range,
    SUBPATTERN : handle_subpattern}
    try:
    return handlers[tok](val)
    except KeyError, e:
    fmt = "Unsupported regular expression construct: %s"
    raise ValueError(fmt % tok)

    def permute_toks(toks):
    """
    Returns a generator of strings of possible permutations for this
    regexp token list.
    """
    lists = [handle_tok(tok, val) for tok, val in toks]
    return (''.join(it) for it in join(lists))

    def join(iterlist):
    """
    Cartesian join as an iterator of the supplied sequences. Borrowed
    from:
    http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/302478
    """
    def rloop(seqin, comb):
    if seqin:
    for item in seqin[0]:
    newcomb = comb + [item]
    for item in rloop(seqin[1:], newcomb):
    yield item
    else:
    yield comb
    return rloop(iterlist, [])

    ########## PUBLIC API ####################

    def ipermute(p):
    toks = [tok_n_val for tok_n_val in sre_parse.parse(p)]
    return permute_toks(toks)

    def permute(p):
    return list(ipermute(p))