Skip to content

Instantly share code, notes, and snippets.

@postmodern
Created January 15, 2011 04:39
Show Gist options
  • Save postmodern/780702 to your computer and use it in GitHub Desktop.
Save postmodern/780702 to your computer and use it in GitHub Desktop.

Revisions

  1. postmodern revised this gist Jan 15, 2011. 1 changed file with 36 additions and 23 deletions.
    59 changes: 36 additions & 23 deletions http_parser.rb
    Original file line number Diff line number Diff line change
    @@ -6,29 +6,21 @@ class HTTPParser < Parslet::Parser
    # Character Classes
    #
    rule(:digit) { match('[0-9]') }
    rule(:digits) { digit.repeat(1) }
    rule(:xdigit) { digit | match('[a-fA-F]') }
    rule(:upper) { match('[A-Z]') }
    rule(:lower) { match('[a-z]') }
    rule(:alpha) { upper | lower }
    rule(:alnum) { alpha | digit }
    rule(:cntrl) { match('[\x00-\x1f]') }
    rule(:graph) { match('[!-~]') }
    rule(:print) { match('[ -~]') }
    rule(:punct) { match("[!-/:-@[-'{-~]") }
    rule(:space) { match("[\t\v\f\n\r]") }
    rule(:zlen) { str('') }
    rule(:empty) { }
    rule(:ascii) { match('[\x00-\x7f]') }

    #
    # Line Terminators
    #
    rule(:sp) { str(' ') }
    rule(:lws) { sp | str("\t") }
    rule(:crlf) { str("\r\n") }

    #
    # HTTP Character Classes
    #
    rule(:ctl) { cntrl | str("\x7f") }
    rule(:text) { lws | ctl.absnt? >> ascii }
    rule(:safe) { str('$') | str('-') | str('_') | str('.') }
    rule(:extra) {
    str('!') | str('*') | str("'") | str('(') | str(')') | str(',')
    @@ -38,7 +30,7 @@ class HTTPParser < Parslet::Parser
    str('&') | str('=') | str('+')
    }
    rule(:sorta_safe) { str('"') | str('<') | str('>') }
    rule(:unsafe) { ctl | str(' ') | str('#') | str('%') | sorta_safe }
    rule(:unsafe) { ctl | sp | str('#') | str('%') | sorta_safe }
    rule(:national) {
    (alpha | digit | reserved | extra | safe | unsafe).absnt? >> any
    }
    @@ -49,20 +41,27 @@ class HTTPParser < Parslet::Parser
    rule(:pchar) {
    uchar | str(':') | str('@') | str('&') | str('=') | str('+')
    }
    rule(:tspecials) {
    rule(:separators) {
    str('(') | str(')') | str('<') | str('>') | str('@') | str(',') |
    str(';') | str(':') | str("\\") | str('"') | str('/') | str('[') |
    str(']') | str('?') | str('=') | str('{') | str('}') | str(' ') |
    str(']') | str('?') | str('=') | str('{') | str('}') | sp |
    str("\t")
    }

    #
    # Elements
    #
    rule(:token) { (ctl | tspecials).absnt? >> ascii }
    rule(:token) { (ctl | separators).absnt? >> ascii }

    rule(:comment_text) { (str('(') | str(')')).absnt? >> text }
    rule(:comment) { str('(') >> comment_text.repeat >> str(')') }

    rule(:quoted_pair) { str("\\") >> ascii }
    rule(:quoted_text) { quoted_pair | str('"').absnt? >> text }
    rule(:quoted_string) { str('"') >> quoted_text >> str('"') }

    #
    # URI Schemes and Absolute Paths
    # URI Elements
    #
    rule(:scheme) {
    (alpha | digit | str('+') | str('-') | str('.')).repeat
    @@ -85,21 +84,35 @@ class HTTPParser < Parslet::Parser
    rule(:absolute_path) { str('/').repeat(1) >> relative_path }

    rule(:request_uri) { str('*') | absolute_uri | absolute_path }
    rule(:request_method) { (upper | digit | safe).repeat(1,20) }

    rule(:version_number) { digit.repeat(1) >> str('.') >> digit.repeat(1) }
    #
    # HTTP Elements
    #
    rule(:request_method) {
    str('OPTIONS') |
    str('GET') |
    str('HEAD') |
    str('POST') |
    str('PUT') |
    str('DELETE') |
    str('TRACE') |
    str('CONNECT') |
    token.repeat(1)
    }

    rule(:version_number) { digits >> str('.') >> digits }
    rule(:http_version) { str('HTTP/') >> version_number.as(:version) }
    rule(:request_line) {
    request_method.as(:method) >>
    str(' ') >> request_uri.as(:uri) >>
    str(' ') >> http_version
    sp >> request_uri.as(:uri) >>
    sp >> http_version
    }

    rule(:header_name) { (str(':').absnt? >> token).repeat(1) }
    rule(:header_value) { (crlf.absnt? >> any).repeat(1) }
    rule(:header_value) { (text | token | separators | quoted_string).repeat(1) }

    rule(:header) {
    header_name.as(:name) >> str(':') >> str(' ').repeat >>
    header_name.as(:name) >> str(':') >> lws.repeat(1) >>
    header_value.as(:value) >> crlf
    }
    rule(:request) {
  2. postmodern revised this gist Jan 15, 2011. 1 changed file with 2 additions and 2 deletions.
    4 changes: 2 additions & 2 deletions http_parser.rb
    Original file line number Diff line number Diff line change
    @@ -3,7 +3,7 @@

    class HTTPParser < Parslet::Parser
    #
    # Ragel Character Classes
    # Character Classes
    #
    rule(:digit) { match('[0-9]') }
    rule(:xdigit) { digit | match('[a-fA-F]') }
    @@ -26,7 +26,7 @@ class HTTPParser < Parslet::Parser
    rule(:crlf) { str("\r\n") }

    #
    # Additional Character Classes
    # HTTP Character Classes
    #
    rule(:ctl) { cntrl | str("\x7f") }
    rule(:safe) { str('$') | str('-') | str('_') | str('.') }
  3. postmodern revised this gist Jan 15, 2011. 1 changed file with 34 additions and 34 deletions.
    68 changes: 34 additions & 34 deletions http_parser.rb
    Original file line number Diff line number Diff line change
    @@ -1,4 +1,5 @@
    require 'parslet'
    require 'pp'

    class HTTPParser < Parslet::Parser
    #
    @@ -66,54 +67,53 @@ class HTTPParser < Parslet::Parser
    rule(:scheme) {
    (alpha | digit | str('+') | str('-') | str('.')).repeat
    }
    rule(:absolute_uri) {
    scheme >> str(':') >> (uchar | reserved).repeat
    }
    rule(:path) {
    pchar.repeat(1) >> (str('/') >> pchar.repeat).repeat
    }
    rule(:query) {
    (uchar | reserved).repeat.as(:query_string)
    }
    rule(:param) {
    (pchar | str('/')).repeat
    }
    rule(:params) {
    param >> (str(';') >> param).repeat
    }
    rule(:rel_path) {
    (path.maybe >> (str(';') >> params).maybe).as(:path) >>
    (str('?').as(:start_query) >> query).maybe
    }
    rule(:absolute_path) { str('/').repeat(1) >> rel_path }

    rule(:request_uri) {
    (str('*') | absolute_uri | absolute_path).as(:request_uri)
    }
    rule(:fragment) { (uchar | reserved).repeat.as(:fragment) }
    rule(:http_method) {
    (upper | digit | safe).repeat(1,20).as(:request_method)
    rule(:absolute_uri) { scheme >> str(':') >> (uchar | reserved).repeat }

    rule(:path) { pchar.repeat(1) >> (str('/') >> pchar.repeat).repeat }
    rule(:query_string) { (uchar | reserved).repeat }
    rule(:param) { (pchar | str('/')).repeat }
    rule(:params) { param >> (str(';') >> param).repeat }
    rule(:frag) { (uchar | reserved).repeat }

    rule(:relative_path) {
    path.maybe.as(:path) >>
    (str(';') >> params.as(:params)).maybe >>
    (str('?') >> query_string.as(:query)).maybe >>
    (str('#') >> frag.as(:fragment)).maybe
    }
    rule(:absolute_path) { str('/').repeat(1) >> relative_path }

    rule(:http_number) { digit.repeat(1) >> str('.') >> digit.repeat(1) }
    rule(:http_version) { str('HTTP/') >> http_number.as(:version) }
    rule(:request_uri) { str('*') | absolute_uri | absolute_path }
    rule(:request_method) { (upper | digit | safe).repeat(1,20) }

    rule(:version_number) { digit.repeat(1) >> str('.') >> digit.repeat(1) }
    rule(:http_version) { str('HTTP/') >> version_number.as(:version) }
    rule(:request_line) {
    http_method >>
    str(' ') >> request_uri >>
    (str('#') >> fragment).maybe >>
    str(' ') >> http_version >> crlf
    request_method.as(:method) >>
    str(' ') >> request_uri.as(:uri) >>
    str(' ') >> http_version
    }

    rule(:header_name) { (str(':').absnt? >> token).repeat(1) }
    rule(:header_value) { (crlf.absnt? >> any).repeat(1) }

    rule(:message_header) {
    rule(:header) {
    header_name.as(:name) >> str(':') >> str(' ').repeat >>
    header_value.as(:value) >> crlf
    }
    rule(:request) {
    request_line >> message_header.repeat.as(:headers)
    request_line >> crlf >> header.repeat.as(:headers) >> crlf
    }

    root :request
    end

    parser = HTTPParser.new

    begin
    pp parser.parse(STDIN.read)
    rescue Parslet::ParseFailed => error
    STDERR.puts error
    STDERR.puts parser.root.error_tree
    end
  4. postmodern revised this gist Jan 15, 2011. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion http_parser.rb
    Original file line number Diff line number Diff line change
    @@ -2,7 +2,7 @@

    class HTTPParser < Parslet::Parser
    #
    # Regel Character Classes
    # Ragel Character Classes
    #
    rule(:digit) { match('[0-9]') }
    rule(:xdigit) { digit | match('[a-fA-F]') }
  5. postmodern created this gist Jan 15, 2011.
    119 changes: 119 additions & 0 deletions http_parser.rb
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,119 @@
    require 'parslet'

    class HTTPParser < Parslet::Parser
    #
    # Regel Character Classes
    #
    rule(:digit) { match('[0-9]') }
    rule(:xdigit) { digit | match('[a-fA-F]') }
    rule(:upper) { match('[A-Z]') }
    rule(:lower) { match('[a-z]') }
    rule(:alpha) { upper | lower }
    rule(:alnum) { alpha | digit }
    rule(:cntrl) { match('[\x00-\x1f]') }
    rule(:graph) { match('[!-~]') }
    rule(:print) { match('[ -~]') }
    rule(:punct) { match("[!-/:-@[-'{-~]") }
    rule(:space) { match("[\t\v\f\n\r]") }
    rule(:zlen) { str('') }
    rule(:empty) { }
    rule(:ascii) { match('[\x00-\x7f]') }

    #
    # Line Terminators
    #
    rule(:crlf) { str("\r\n") }

    #
    # Additional Character Classes
    #
    rule(:ctl) { cntrl | str("\x7f") }
    rule(:safe) { str('$') | str('-') | str('_') | str('.') }
    rule(:extra) {
    str('!') | str('*') | str("'") | str('(') | str(')') | str(',')
    }
    rule(:reserved) {
    str(';') | str('/') | str('?') | str(':') | str('@')
    str('&') | str('=') | str('+')
    }
    rule(:sorta_safe) { str('"') | str('<') | str('>') }
    rule(:unsafe) { ctl | str(' ') | str('#') | str('%') | sorta_safe }
    rule(:national) {
    (alpha | digit | reserved | extra | safe | unsafe).absnt? >> any
    }

    rule(:unreserved) { alpha | digit | safe | extra | national }
    rule(:escape) { str("%u").maybe >> xdigit >> xdigit }
    rule(:uchar) { unreserved | escape | sorta_safe }
    rule(:pchar) {
    uchar | str(':') | str('@') | str('&') | str('=') | str('+')
    }
    rule(:tspecials) {
    str('(') | str(')') | str('<') | str('>') | str('@') | str(',') |
    str(';') | str(':') | str("\\") | str('"') | str('/') | str('[') |
    str(']') | str('?') | str('=') | str('{') | str('}') | str(' ') |
    str("\t")
    }

    #
    # Elements
    #
    rule(:token) { (ctl | tspecials).absnt? >> ascii }

    #
    # URI Schemes and Absolute Paths
    #
    rule(:scheme) {
    (alpha | digit | str('+') | str('-') | str('.')).repeat
    }
    rule(:absolute_uri) {
    scheme >> str(':') >> (uchar | reserved).repeat
    }
    rule(:path) {
    pchar.repeat(1) >> (str('/') >> pchar.repeat).repeat
    }
    rule(:query) {
    (uchar | reserved).repeat.as(:query_string)
    }
    rule(:param) {
    (pchar | str('/')).repeat
    }
    rule(:params) {
    param >> (str(';') >> param).repeat
    }
    rule(:rel_path) {
    (path.maybe >> (str(';') >> params).maybe).as(:path) >>
    (str('?').as(:start_query) >> query).maybe
    }
    rule(:absolute_path) { str('/').repeat(1) >> rel_path }

    rule(:request_uri) {
    (str('*') | absolute_uri | absolute_path).as(:request_uri)
    }
    rule(:fragment) { (uchar | reserved).repeat.as(:fragment) }
    rule(:http_method) {
    (upper | digit | safe).repeat(1,20).as(:request_method)
    }

    rule(:http_number) { digit.repeat(1) >> str('.') >> digit.repeat(1) }
    rule(:http_version) { str('HTTP/') >> http_number.as(:version) }
    rule(:request_line) {
    http_method >>
    str(' ') >> request_uri >>
    (str('#') >> fragment).maybe >>
    str(' ') >> http_version >> crlf
    }

    rule(:header_name) { (str(':').absnt? >> token).repeat(1) }
    rule(:header_value) { (crlf.absnt? >> any).repeat(1) }

    rule(:message_header) {
    header_name.as(:name) >> str(':') >> str(' ').repeat >>
    header_value.as(:value) >> crlf
    }
    rule(:request) {
    request_line >> message_header.repeat.as(:headers)
    }

    root :request
    end