Skip to content

Instantly share code, notes, and snippets.

@bdlabs
Forked from theraot/Parser.cs
Created December 18, 2021 23:39
Show Gist options
  • Save bdlabs/beb02332b55b5697218f1e78cd11ef85 to your computer and use it in GitHub Desktop.
Save bdlabs/beb02332b55b5697218f1e78cd11ef85 to your computer and use it in GitHub Desktop.

Revisions

  1. @theraot theraot revised this gist Jun 9, 2019. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion Parser.cs
    Original file line number Diff line number Diff line change
    @@ -367,7 +367,7 @@ private Symbol Parse(StringProcessor processor)
    const string input = "\"hello \" + \"world \" + \" + \" + \"hello\"";

    var quoteSymbol = Pattern.Literal("QuoteSymbol", '"');
    var nonQuoteSymbol = Pattern.Custom("QuoteSymbol", s => s.ReadUntil('"'));
    var nonQuoteSymbol = Pattern.Custom("NonQuoteSymbol", s => s.ReadUntil('"'));
    var String = Pattern.Conjunction("String", quoteSymbol, nonQuoteSymbol, quoteSymbol);

    var whiteSpace = Pattern.Custom("WhiteSpace", s => s.ReadWhile(char.IsWhiteSpace));
  2. @theraot theraot renamed this gist Jun 8, 2019. 1 changed file with 2 additions and 0 deletions.
    2 changes: 2 additions & 0 deletions Parser → Parser.cs
    Original file line number Diff line number Diff line change
    @@ -1,5 +1,7 @@
    #r "nuget:Theraot.Core/3.0.3"

    // This file is for RoslynPad

    using System;
    using System.Collections;
    using System.Collections.Generic;
  3. @theraot theraot created this gist Jun 8, 2019.
    380 changes: 380 additions & 0 deletions Parser
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,380 @@
    #r "nuget:Theraot.Core/3.0.3"

    using System;
    using System.Collections;
    using System.Collections.Generic;
    using System.Linq;
    using System.Text;
    using Theraot.Core;

    public class ParseException : Exception
    {
    public ParseException(string message, string @string, int position, Pattern pattern)
    : base(message)
    {
    Pattern = pattern;
    String = @string;
    Position = position;
    }

    public Pattern Pattern { get; }

    public int Position { get; }

    public string String { get; }
    }

    public class Symbol : IEnumerable<Symbol>
    {
    public static readonly Symbol[] EmptySymbols = new Symbol[0];

    public Symbol(Pattern pattern)
    {
    Pattern = pattern;
    }

    public Pattern Pattern { get; }

    public virtual IEnumerator<Symbol> GetEnumerator()
    {
    yield return this;
    }

    IEnumerator IEnumerable.GetEnumerator()
    {
    return GetEnumerator();
    }
    }

    public class TerminalSymbol : Symbol
    {
    public TerminalSymbol(Pattern pattern, string value)
    : base(pattern)
    {
    Value = value;
    }

    public string Value { get; }

    public override string ToString()
    {
    return Value;
    }
    }

    public class CompositeSymbol : Symbol
    {
    public CompositeSymbol(Pattern pattern, IEnumerable<Symbol> symbols)
    : base(pattern)
    {
    Symbols = symbols;
    }

    private IEnumerable<Symbol> Symbols { get; }

    public override string ToString()
    {
    var builder = new StringBuilder();
    foreach (var symbol in Symbols)
    {
    builder.Append(symbol);
    }
    return builder.ToString();
    }

    public override IEnumerator<Symbol> GetEnumerator()
    {
    return GraphHelper.ExploreBreadthFirstGraph(this, symbol => symbol is CompositeSymbol compositeSymbol ? compositeSymbol.Symbols : EmptySymbols, EqualityComparer<Symbol>.Default).GetEnumerator();
    }
    }

    public sealed class Pattern
    {
    private Func<StringProcessor, Symbol> _parse;

    private Pattern(string name)
    {
    Name = name;
    _parse = _ => new CompositeSymbol(this, Symbol.EmptySymbols);
    }

    public string Name { get; }

    public static Pattern Conjunction(params Pattern[] patterns)
    {
    return Conjunction("(" + string.Join(" + ", from subPattern in patterns select subPattern.Name) + ")", patterns);
    }

    public static Pattern Conjunction(string name, params Pattern[] patterns)
    {
    var pattern = new Pattern(name);
    pattern._parse = processor => new CompositeSymbol(pattern, (from subPattern in patterns select subPattern.Parse(processor)).ToArray());
    return pattern;
    }

    public static Pattern Custom(string name, Func<StringProcessor, string> callback)
    {
    if (callback == null)
    {
    throw new ArgumentNullException(nameof(callback));
    }
    var pattern = new Pattern(name);
    pattern._parse = processor =>
    {
    var position = processor.Position;
    var greedy = processor.Greedy;
    string? result = null;
    try
    {
    result = callback(processor);
    }
    finally
    {
    if (result == null)
    {
    processor.Position = position;
    processor.Greedy = greedy;
    throw new ParseException($"Expected {name}", processor.String, processor.Position, pattern);
    }
    }
    return new TerminalSymbol(pattern, result);
    };
    return pattern;
    }

    public static Pattern Disjunction(params Pattern[] patterns)
    {
    return Disjunction("(" + string.Join(" | ", from subPattern in patterns select subPattern.Name) + ")", patterns);
    }

    public static Pattern Disjunction(string name, params Pattern[] patterns)
    {
    var pattern = new Pattern(name);
    pattern._parse = processor =>
    {
    var position = processor.Position;
    var greedy = processor.Greedy;
    foreach (var subPattern in patterns)
    {
    Symbol? result = null;
    try
    {
    result = subPattern.Parse(processor);
    }
    catch (ParseException exception)
    {
    // swallow
    Theraot.No.Op(exception);
    }
    finally
    {
    if (result == null)
    {
    processor.Position = position;
    processor.Greedy = greedy;
    }
    }
    if (result != null)
    {
    return new CompositeSymbol(pattern, new[] { result });
    }
    }
    throw new ParseException($"Expected {name}", processor.String, processor.Position, pattern);
    };
    return pattern;
    }

    public static Pattern Empty()
    {
    return Empty(string.Empty);
    }

    public static Pattern Empty(string name)
    {
    return new Pattern(name);
    }

    public static Pattern Literal(string literal)
    {
    return Literal("\"" + literal.Replace("\"", "\\\"") + "\"", literal);
    }

    public static Pattern Literal(string name, string literal)
    {
    var pattern = new Pattern(name);
    pattern._parse = processor =>
    {
    if (processor.Read(literal))
    {
    return new TerminalSymbol(pattern, literal);
    }
    throw new ParseException($"Expected {name} ({literal})", processor.String, processor.Position, pattern);
    };
    return pattern;
    }

    public static Pattern Literal(char literal)
    {
    return Literal("\"" + (literal == '"' ? "\\\"" : literal.ToString()) + "\"", literal);
    }

    public static Pattern Literal(string name, char literal)
    {
    var pattern = new Pattern(name);
    pattern._parse = processor =>
    {
    if (processor.Read(literal))
    {
    return new TerminalSymbol(pattern, literal.ToString());
    }
    throw new ParseException($"Expected {name}", processor.String, processor.Position, pattern);
    };
    return pattern;
    }

    public static Pattern Optional(Pattern subPattern)
    {
    return Optional("[" + subPattern.Name + "]", subPattern);
    }

    public static Pattern Optional(string name, Pattern subPattern)
    {
    var pattern = new Pattern(name);
    pattern._parse = processor =>
    {
    var position = processor.Position;
    var greedy = processor.Greedy;
    try
    {
    return new CompositeSymbol(pattern, new[] { subPattern.Parse(processor) });
    }
    catch (ParseException)
    {
    processor.Position = position;
    processor.Greedy = greedy;
    return new CompositeSymbol(pattern, Symbol.EmptySymbols);
    }
    };
    return pattern;
    }

    public static Pattern OptionalRepetition(Pattern subPattern)
    {
    return OptionalRepetition(subPattern.Name + "*", subPattern);
    }

    public static Pattern OptionalRepetition(string name, Pattern subPattern)
    {
    var pattern = new Pattern(name);
    pattern._parse = processor =>
    {
    var symbols = new List<Symbol>();
    while (!processor.EndOfString)
    {
    var position = processor.Position;
    var greedy = processor.Greedy;
    Symbol? result = null;
    try
    {
    result = subPattern.Parse(processor);
    }
    catch (ParseException exception)
    {
    // swallow
    Theraot.No.Op(exception);
    }
    finally
    {
    if (result == null)
    {
    processor.Position = position;
    processor.Greedy = greedy;
    }
    }
    if (result == null)
    {
    break;
    }
    symbols.Add(result);
    }
    return new CompositeSymbol(pattern, symbols);
    };
    return pattern;
    }

    public static Pattern Repetition(Pattern subPattern)
    {
    return Repetition(subPattern.Name + "+", subPattern);
    }

    public static Pattern Repetition(string name, Pattern subPattern)
    {
    var pattern = new Pattern(name);
    pattern._parse = processor =>
    {
    var symbols = new List<Symbol>();
    while (!processor.EndOfString)
    {
    var position = processor.Position;
    var greedy = processor.Greedy;
    Symbol? result = null;
    try
    {
    result = subPattern.Parse(processor);
    }
    catch (ParseException exception)
    {
    // swallow
    Theraot.No.Op(exception);
    }
    finally
    {
    if (result == null)
    {
    processor.Position = position;
    processor.Greedy = greedy;
    }
    }
    if (result == null)
    {
    break;
    }
    symbols.Add(result);
    }
    if (symbols.Count == 0)
    {
    throw new ParseException($"Expected {pattern.Name}", processor.String, processor.Position, pattern);
    }
    return new CompositeSymbol(pattern, symbols);
    };
    return pattern;
    }

    public Symbol Parse(string str)
    {
    var processor = new StringProcessor(str);
    return Parse(processor);
    }

    private Symbol Parse(StringProcessor processor)
    {
    return _parse?.Invoke(processor);
    }
    }

    const string input = "\"hello \" + \"world \" + \" + \" + \"hello\"";

    var quoteSymbol = Pattern.Literal("QuoteSymbol", '"');
    var nonQuoteSymbol = Pattern.Custom("QuoteSymbol", s => s.ReadUntil('"'));
    var String = Pattern.Conjunction("String", quoteSymbol, nonQuoteSymbol, quoteSymbol);

    var whiteSpace = Pattern.Custom("WhiteSpace", s => s.ReadWhile(char.IsWhiteSpace));
    var plusSymbol = Pattern.Literal("PlusSymbol", '+');
    var document = Pattern.Repetition(Pattern.Conjunction(whiteSpace, String, whiteSpace, plusSymbol));

    var results = from symbol in document.Parse(input) where symbol.Pattern == String select symbol.ToString();

    foreach (var v in results.ToArray())
    {
    Console.WriteLine(v);
    }