package main import ( "os" "fmt" "strings" "regexp/syntax" "unicode/utf8" ) // Maximum number of repeats if unterminated (ex: *, +) var MaxRepeats = 10 // syntax.OpNoMatch func opNoMatch(pattern *syntax.Regexp) []string { return nil } // syntax.OpEmptyMatch, syntax.OpBeginLine, syntax.OpEndLine, syntax.OpBeginText, syntax.OpEndText func opEmpty(pattern *syntax.Regexp) []string { return []string{""} } // syntax.OpLiteral func opLiteral(pattern *syntax.Regexp) []string { // Literal is returned as-is, just 1 value return []string{string(pattern.Rune)} } // syntax.OpCharClass func opCharClass(pattern *syntax.Regexp) []string { // pattern.Rune is already de-dupped var lits []string for idx := 0; idx < len(pattern.Rune); idx += 2 { for r := pattern.Rune[idx]; r <= pattern.Rune[idx+1]; r++ { lits = append(lits, string(r)) } } return lits } // syntax.OpAnyCharNotNL func opAnyCharNotNL(pattern *syntax.Regexp) []string { lits := make([]string, utf8.MaxRune - 1) idx := 0 for r := 0; r < utf8.MaxRune; idx++ { if rune(r) != rune('\n') { lits[idx] = string(rune(idx)) idx++ } } return lits } // syntax.OpAnyChar func opAnyChar(pattern *syntax.Regexp) []string { lits := make([]string, utf8.MaxRune) for idx := 0; idx < int(utf8.MaxRune); idx++ { lits[idx] = string(rune(idx)) } return lits } // syntax.OpCapture func opCapture(pattern *syntax.Regexp) ([]string, error) { return expand(pattern.Sub[0]) } // syntax.OpStar func opStar(pattern *syntax.Regexp) ([]string, error) { pattern.Min = 0 pattern.Max = MaxRepeats return opRepeat(pattern) } // syntax.OpPlus func opPlus(pattern *syntax.Regexp) ([]string, error) { pattern.Min = 1 pattern.Max = MaxRepeats return opRepeat(pattern) } // syntax.OpQuest func opQuest(pattern *syntax.Regexp) ([]string, error) { lits, err := expand(pattern.Sub[0]) if err != nil { return nil, err } return append(lits, ""), nil } // syntax.OpRepeat func opRepeat(pattern *syntax.Regexp) ([]string, error) { lits, err := expand(pattern.Sub[0]) if err != nil { return nil, err } if pattern.Max == -1 { pattern.Max = MaxRepeats } rlits := make([]string, len(lits) * (pattern.Max - pattern.Min)) idx := 0 for repeat := pattern.Min; repeat < pattern.Max; repeat++ { if repeat == 0 { rlits[idx] = "" idx++ rlits = rlits[:len(rlits) - len(lits) + 1] continue } for _, lit := range lits { rlits[idx] = strings.Repeat(lit, repeat) idx++ } } return rlits, nil } // syntax.OpConcat func opConcat(pattern *syntax.Regexp) ([]string, error) { // Ensure unique values uniq := make(map[string]struct{}) // recursively concat var recur func(prefixes []string, patterns []*syntax.Regexp) error recur = func(prefixes []string, patterns []*syntax.Regexp) error { lits, err := expand(patterns[0]) if err != nil { return err } swap := make([]string, len(lits) * len(prefixes)) idx := 0 for _, prefix := range prefixes { for _, lit := range lits { swap[idx] = prefix + lit idx++ } } if len(patterns) <= 1 { for _, lit := range swap { uniq[lit] = struct{}{} } return nil } else { return recur(swap, patterns[1:]) } } if err := recur([]string{""}, pattern.Sub); err != nil { return nil, err } lits := make([]string, len(uniq)) idx := 0 for lit := range uniq { lits[idx] = lit idx++ } return lits, nil } // syntax.OpAlternate func opAlternate(pattern *syntax.Regexp) ([]string, error) { // Ensure unique values uniq := make(map[string]struct{}) for _, sub := range pattern.Sub { // Each alternate stands on it's own lits, err := expand(sub) if err != nil { return nil, err } for _, lit := range lits { uniq[lit] = struct{}{} } } lits := make([]string, len(uniq)) idx := 0 for lit := range uniq { lits[idx] = lit idx++ } return lits, nil } // expand recursively expands the possible values for a regular expression func expand(pattern *syntax.Regexp) ([]string, error) { switch pattern.Op { case syntax.OpNoMatch: return opNoMatch(pattern), nil case syntax.OpEmptyMatch, syntax.OpBeginLine, syntax.OpEndLine, syntax.OpBeginText, syntax.OpEndText: return opEmpty(pattern), nil case syntax.OpLiteral: return opLiteral(pattern), nil case syntax.OpCharClass: return opCharClass(pattern), nil case syntax.OpAnyCharNotNL: return opAnyCharNotNL(pattern), nil case syntax.OpAnyChar: return opAnyChar(pattern), nil case syntax.OpCapture: return opCapture(pattern) case syntax.OpStar: return opStar(pattern) case syntax.OpPlus: return opPlus(pattern) case syntax.OpQuest: return opQuest(pattern) case syntax.OpRepeat: return opRepeat(pattern) case syntax.OpConcat: return opConcat(pattern) case syntax.OpAlternate: return opAlternate(pattern) default: return nil, fmt.Errorf("unhandled pattern operation %s", pattern.Op) } } // Entry Point func main() { if len(os.Args) < 2 { fmt.Printf("usage: %s ", os.Args[0]) } exp, err := syntax.Parse(os.Args[1], syntax.POSIX) if err != nil { fmt.Fprintf(os.Stderr, "failed to parse regexp: %s", err) os.Exit(1) } // Operate on the simplified expression as it's faster matches, err := expand(exp.Simplify()) if err != nil { fmt.Fprintf(os.Stderr, "failed to expand regexp: %s", err) os.Exit(1) } for _, match := range matches { fmt.Println(match) } }