Skip to content

Instantly share code, notes, and snippets.

@travisbrown
Forked from decretist/README.md
Last active August 29, 2015 14:13
Show Gist options
  • Save travisbrown/4bad84b4729c7a221e30 to your computer and use it in GitHub Desktop.
Save travisbrown/4bad84b4729c7a221e30 to your computer and use it in GitHub Desktop.

Revisions

  1. travisbrown revised this gist Jan 22, 2015. 1 changed file with 37 additions and 33 deletions.
    70 changes: 37 additions & 33 deletions parser-test.scala
    Original file line number Diff line number Diff line change
    @@ -14,49 +14,53 @@ object parse extends RegexParsers {
    def plain: Parser[Plain] = "[^<>\\-\\+]+".r ^^ (Plain(_))
    def elements: Parser[List[Element]] = rep(tag | header | plain)
    def apply(s: String) = parseAll(elements, s)

    def groupByTag(els: List[Element]) = {
    val (last, rest) = els.foldLeft((List.empty[Element], List.empty[List[Element]])) {
    case ((Nil, done), tag @ Tag(_, _)) => (List(tag), done)
    case ((current, done), tag @ Tag(_, _)) => (List(tag), done :+ current)
    case ((current, done), other) => (current :+ other, done)
    }

    rest :+ last
    }
    }

    object run {
    def main(args: Array[String]) {
    var index = 1
    var print = false
    val input = io.Source.fromFile(
    "./edF.txt"
    // ).getLines.take(15522).mkString(" ") // Pars Prima
    // ).getLines.take(59012).mkString(" ") // Pars Prima and Pars Secunda
    ).getLines.take(64523).mkString(" ") // entire file
    // parse(input).get foreach println
    parse(input).get.map {
    case Tag("1", "C") => // case start
    case Tag("1", "D") => // distinction start
    case Tag("1", "DC") => //
    case Tag("1", "DP") => //
    case Tag("2", _) => // distinction number
    case Tag("3", _) => // question number
    case Tag("4", _) => // capitulum
    case Tag("L", _) => // line
    case Tag("P", "0") => // Palea end
    case Tag("P", "1") => // Palea start
    case Tag("S", _) => // page
    case Tag("T", "A") => // dictum ante
    case Tag("T", "I") => // inscription
    case Tag("T", "P") => // dictum post
    case Tag("T", "Q") => // case statement
    case Tag("T", "R") => print = true // rubric
    case Tag("T", "T") => // text
    case Tag(_, _) => // error

    parse(input).map(parse.groupByTag).get.map {
    case Tag("1", "C") :: _ => // case start
    case Tag("1", "D") :: _ => // distinction start
    case Tag("1", "DC") :: _ => //
    case Tag("1", "DP") :: _ => //
    case Tag("2", _) :: _ => // distinction number
    case Tag("3", _) :: _ => // question number
    case Tag("4", _) :: _ => // capitulum
    case Tag("L", _) :: _ => // line
    case Tag("P", "0") :: _ => // Palea end
    case Tag("P", "1") :: _ => // Palea start
    case Tag("S", _) :: _ => // page
    case Tag("T", "A") :: _ => // dictum ante
    case Tag("T", "I") :: _ => // inscription
    case Tag("T", "P") :: _ => // dictum post
    case Tag("T", "Q") :: _ => // case statement
    case Tag("T", "R") :: body =>
    val content = body.collect {
    case Plain(text) => text.trim
    }.mkString(" ")

    println(content.trim)

    case Tag("T", "T") :: _ => // text
    case Tag(_, _) :: _ => // error
    // case Tag(name, attr) => println(name + "\t" + attr)
    case Header(text) => // do nothing
    case Plain(text) => // do nothing
    val spaces = "\\s+".r
    if (print) {
    if (!spaces.pattern.matcher(text).matches) {
    // println(f"$index%04d" + " _ " + text)
    println(text.trim())
    index = index + 1
    print = false
    }
    }
    case _ => // do nothing
    }
    }
    }
  2. @decretist decretist revised this gist Jan 22, 2015. 1 changed file with 4 additions and 4 deletions.
    8 changes: 4 additions & 4 deletions README.md
    Original file line number Diff line number Diff line change
    @@ -1,14 +1,14 @@
    6714256
    =======
    edF.txt 2192,2194:
    ...
    ```
    <4 7> -[C. VII.]+ <T R> Ait enim Gregorius I., -[lib. VII. reg. epist. 112.]+
    Siagrio Episcopo Augustodunensi:
    <T T> ...
    ...
    ```
    edF.txt 10695,10697:
    ...
    ```
    <4 29> -C. XXIX.+ <T R> Inconsultis legatis inperatoris Adrianus -[II.]+
    ad pontificatum eligitur.
    <T T> ...
    ...
    ```
  3. @decretist decretist revised this gist Jan 22, 2015. 1 changed file with 4 additions and 0 deletions.
    4 changes: 4 additions & 0 deletions README.md
    Original file line number Diff line number Diff line change
    @@ -1,10 +1,14 @@
    6714256
    =======
    edF.txt 2192,2194:
    ...
    <4 7> -[C. VII.]+ <T R> Ait enim Gregorius I., -[lib. VII. reg. epist. 112.]+
    Siagrio Episcopo Augustodunensi:
    <T T> ...
    ...
    edF.txt 10695,10697:
    ...
    <4 29> -C. XXIX.+ <T R> Inconsultis legatis inperatoris Adrianus -[II.]+
    ad pontificatum eligitur.
    <T T> ...
    ...
  4. @decretist decretist revised this gist Jan 21, 2015. 1 changed file with 10 additions and 0 deletions.
    10 changes: 10 additions & 0 deletions README.md
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,10 @@
    6714256
    =======
    edF.txt 2192,2194:
    <4 7> -[C. VII.]+ <T R> Ait enim Gregorius I., -[lib. VII. reg. epist. 112.]+
    Siagrio Episcopo Augustodunensi:
    <T T> ...
    edF.txt 10695,10697:
    <4 29> -C. XXIX.+ <T R> Inconsultis legatis inperatoris Adrianus -[II.]+
    ad pontificatum eligitur.
    <T T> ...
  5. @decretist decretist revised this gist Jan 20, 2015. 2 changed files with 64524 additions and 1 deletion.
    1 change: 0 additions & 1 deletion .gitignore
    Original file line number Diff line number Diff line change
    @@ -1 +0,0 @@
    edF.txt
    64,524 changes: 64,524 additions & 0 deletions edF.txt
    64,524 additions, 0 deletions not shown because the diff is too large. Please use a local Git client to view these changes.
  6. @decretist decretist revised this gist Jan 20, 2015. 5 changed files with 10304 additions and 7 deletions.
    16 changes: 9 additions & 7 deletions parser-test.scala
    Original file line number Diff line number Diff line change
    @@ -23,7 +23,8 @@ object run {
    val input = io.Source.fromFile(
    "./edF.txt"
    // ).getLines.take(15522).mkString(" ") // Pars Prima
    ).getLines.take(59011).mkString(" ") // Pars Prima and Pars Secunda
    // ).getLines.take(59012).mkString(" ") // Pars Prima and Pars Secunda
    ).getLines.take(64523).mkString(" ") // entire file
    // parse(input).get foreach println
    parse(input).get.map {
    case Tag("1", "C") => // case start
    @@ -37,20 +38,21 @@ object run {
    case Tag("P", "0") => // Palea end
    case Tag("P", "1") => // Palea start
    case Tag("S", _) => // page
    case Tag("T", "A") => print = true // dictum ante
    case Tag("T", "A") => // dictum ante
    case Tag("T", "I") => // inscription
    case Tag("T", "P") => print = true // dictum post
    case Tag("T", "Q") => print = true // case statement
    case Tag("T", "R") => // rubric
    case Tag("T", "T") => print = true // text
    case Tag("T", "P") => // dictum post
    case Tag("T", "Q") => // case statement
    case Tag("T", "R") => print = true // rubric
    case Tag("T", "T") => // text
    case Tag(_, _) => // error
    // case Tag(name, attr) => println(name + "\t" + attr)
    case Header(text) => // do nothing
    case Plain(text) => // do nothing
    val spaces = "\\s+".r
    if (print) {
    if (!spaces.pattern.matcher(text).matches) {
    println(f"$index%04d" + " _ " + text)
    // println(f"$index%04d" + " _ " + text)
    println(text.trim())
    index = index + 1
    print = false
    }
    3,422 changes: 3,422 additions & 0 deletions python.out
    3,422 additions, 0 deletions not shown because the diff is too large. Please use a local Git client to view these changes.
    29 changes: 29 additions & 0 deletions rubrics.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,29 @@
    #!/usr/bin/python
    #
    # Paul Evans ([email protected])
    #
    from __future__ import print_function
    import re
    import sys
    def main():
    f = open('./edF.txt', 'r')
    file = f.read()
    toc = open('./toc.txt', 'r')
    dictionary = {}
    # (?<=...) positive lookbehind assertion.
    rubrics = re.findall('(?:\<T R\>|(?<=\<T R\>))(.*?)' # rubric starts with rubric (<T R>) tag.
    '(?:' # non-capturing group.
    '\<T [IPT]\>' # rubric ends with inscription, dictum (post), or text tag.
    ')', file, re.S) # re.S (re.DOTALL) makes '.' special character match any character including newline.
    print('expected 3422 rubrics, found ' + str(len(rubrics)) + ' rubrics', file=sys.stderr)
    for rubric in rubrics:
    rubric = re.sub('\<P 1\> \-\[PALEA\.\+', '', rubric) # remove Palea tag.
    rubric = re.sub(re.compile('\-\[.*?\]\+', re.S), '', rubric)
    rubric = re.sub('\-.*?\+', '', rubric)
    rubric = re.sub('\s+', ' ', rubric)
    rubric = re.sub('^\s+', '', rubric) # remove leading whitespace characters
    rubric = re.sub('\s+$', '', rubric) # remove trailing whitespace characters
    print(rubric)

    if __name__ == '__main__':
    main()
    3,422 changes: 3,422 additions & 0 deletions scala.out
    3,422 additions, 0 deletions not shown because the diff is too large. Please use a local Git client to view these changes.
    3,422 changes: 3,422 additions & 0 deletions toc.txt
    3,422 additions, 0 deletions not shown because the diff is too large. Please use a local Git client to view these changes.
  7. @decretist decretist revised this gist Oct 2, 2013. 1 changed file with 2 additions and 1 deletion.
    3 changes: 2 additions & 1 deletion parser-test.scala
    Original file line number Diff line number Diff line change
    @@ -47,8 +47,9 @@ object run {
    // case Tag(name, attr) => println(name + "\t" + attr)
    case Header(text) => // do nothing
    case Plain(text) => // do nothing
    val spaces = "\\s+".r
    if (print) {
    if (text != " " && text != " " && text != " ") {
    if (!spaces.pattern.matcher(text).matches) {
    println(f"$index%04d" + " _ " + text)
    index = index + 1
    print = false
  8. @decretist decretist revised this gist Sep 30, 2013. 1 changed file with 16 additions and 6 deletions.
    22 changes: 16 additions & 6 deletions parser-test.scala
    Original file line number Diff line number Diff line change
    @@ -16,11 +16,14 @@ object parse extends RegexParsers {
    def apply(s: String) = parseAll(elements, s)
    }

    object Process {
    object run {
    def main(args: Array[String]) {
    var index = 1
    var print = false
    val input = io.Source.fromFile(
    "./edF.txt"
    ).getLines.take(15522).mkString(" ") // Prima Pars
    // ).getLines.take(15522).mkString(" ") // Pars Prima
    ).getLines.take(59011).mkString(" ") // Pars Prima and Pars Secunda
    // parse(input).get foreach println
    parse(input).get.map {
    case Tag("1", "C") => // case start
    @@ -34,16 +37,23 @@ object Process {
    case Tag("P", "0") => // Palea end
    case Tag("P", "1") => // Palea start
    case Tag("S", _) => // page
    case Tag("T", "A") => // dictum ante
    case Tag("T", "A") => print = true // dictum ante
    case Tag("T", "I") => // inscription
    case Tag("T", "P") => // dictum post
    case Tag("T", "Q") => // case statement
    case Tag("T", "P") => print = true // dictum post
    case Tag("T", "Q") => print = true // case statement
    case Tag("T", "R") => // rubric
    case Tag("T", "T") => // text
    case Tag("T", "T") => print = true // text
    case Tag(_, _) => // error
    // case Tag(name, attr) => println(name + "\t" + attr)
    case Header(text) => // do nothing
    case Plain(text) => // do nothing
    if (print) {
    if (text != " " && text != " " && text != " ") {
    println(f"$index%04d" + " _ " + text)
    index = index + 1
    print = false
    }
    }
    }
    }
    }
  9. @decretist decretist revised this gist Sep 27, 2013. 1 changed file with 6 additions and 0 deletions.
    6 changes: 6 additions & 0 deletions parser-test.scala
    Original file line number Diff line number Diff line change
    @@ -23,8 +23,12 @@ object Process {
    ).getLines.take(15522).mkString(" ") // Prima Pars
    // parse(input).get foreach println
    parse(input).get.map {
    case Tag("1", "C") => // case start
    case Tag("1", "D") => // distinction start
    case Tag("1", "DC") => //
    case Tag("1", "DP") => //
    case Tag("2", _) => // distinction number
    case Tag("3", _) => // question number
    case Tag("4", _) => // capitulum
    case Tag("L", _) => // line
    case Tag("P", "0") => // Palea end
    @@ -33,9 +37,11 @@ object Process {
    case Tag("T", "A") => // dictum ante
    case Tag("T", "I") => // inscription
    case Tag("T", "P") => // dictum post
    case Tag("T", "Q") => // case statement
    case Tag("T", "R") => // rubric
    case Tag("T", "T") => // text
    case Tag(_, _) => // error
    // case Tag(name, attr) => println(name + "\t" + attr)
    case Header(text) => // do nothing
    case Plain(text) => // do nothing
    }
  10. @decretist decretist revised this gist Sep 27, 2013. 2 changed files with 15 additions and 3 deletions.
    1 change: 1 addition & 0 deletions .gitignore
    Original file line number Diff line number Diff line change
    @@ -0,0 +1 @@
    edF.txt
    17 changes: 14 additions & 3 deletions parser-test.scala
    Original file line number Diff line number Diff line change
    @@ -20,11 +20,22 @@ object Process {
    def main(args: Array[String]) {
    val input = io.Source.fromFile(
    "./edF.txt"
    ).getLines.take(15522).mkString(" ")

    ).getLines.take(15522).mkString(" ") // Prima Pars
    // parse(input).get foreach println
    parse(input).get.map {
    case Tag(name, attr) => { println(name + "\t" + attr) }
    case Tag("1", "D") => // distinction start
    case Tag("2", _) => // distinction number
    case Tag("4", _) => // capitulum
    case Tag("L", _) => // line
    case Tag("P", "0") => // Palea end
    case Tag("P", "1") => // Palea start
    case Tag("S", _) => // page
    case Tag("T", "A") => // dictum ante
    case Tag("T", "I") => // inscription
    case Tag("T", "P") => // dictum post
    case Tag("T", "R") => // rubric
    case Tag("T", "T") => // text
    case Tag(_, _) => // error
    case Header(text) => // do nothing
    case Plain(text) => // do nothing
    }
  11. @decretist decretist renamed this gist Sep 26, 2013. 1 changed file with 0 additions and 0 deletions.
    File renamed without changes.
  12. @decretist decretist revised this gist Sep 26, 2013. 1 changed file with 14 additions and 8 deletions.
    22 changes: 14 additions & 8 deletions parser-test
    Original file line number Diff line number Diff line change
    @@ -1,26 +1,32 @@
    import scala.util.parsing.combinator._

    sealed trait Element
    case class Comment(s: String) extends Element
    case class Tag(name: String, attr: String) extends Element
    case class Text(s: String) extends Element
    case class Header(text: String) extends Element
    case class Plain(text: String) extends Element

    object parse extends RegexParsers {
    override def skipWhitespace = false
    def comment: Parser[Comment] = "\\-".r ~> "[^+]*".r <~ "+" ^^ (Comment(_))
    def tag: Parser[Tag] = ("<" ~> "\\S+".r <~ "\\s".r) ~ "[^\\s>]+".r <~ ">" ^^ {
    case name ~ attr => Tag(name, attr)
    }
    def text: Parser[Text] = "[^<>\\-\\+]+".r ^^ (Text(_))
    def elements: Parser[List[Element]] = rep(comment | tag | text)
    def header: Parser[Header] = "\\-".r ~> "[^+]*".r <~ "+" ^^ (Header(_))
    def plain: Parser[Plain] = "[^<>\\-\\+]+".r ^^ (Plain(_))
    def elements: Parser[List[Element]] = rep(tag | header | plain)
    def apply(s: String) = parseAll(elements, s)
    }

    object Test {
    object Process {
    def main(args: Array[String]) {
    val input = io.Source.fromFile(
    "./edF.txt"
    ).getLines.take(10000).mkString(" ")
    parse(input).get foreach println
    ).getLines.take(15522).mkString(" ")

    // parse(input).get foreach println
    parse(input).get.map {
    case Tag(name, attr) => { println(name + "\t" + attr) }
    case Header(text) => // do nothing
    case Plain(text) => // do nothing
    }
    }
    }
  13. @decretist decretist created this gist Sep 26, 2013.
    26 changes: 26 additions & 0 deletions parser-test
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,26 @@
    import scala.util.parsing.combinator._

    sealed trait Element
    case class Comment(s: String) extends Element
    case class Tag(name: String, attr: String) extends Element
    case class Text(s: String) extends Element

    object parse extends RegexParsers {
    override def skipWhitespace = false
    def comment: Parser[Comment] = "\\-".r ~> "[^+]*".r <~ "+" ^^ (Comment(_))
    def tag: Parser[Tag] = ("<" ~> "\\S+".r <~ "\\s".r) ~ "[^\\s>]+".r <~ ">" ^^ {
    case name ~ attr => Tag(name, attr)
    }
    def text: Parser[Text] = "[^<>\\-\\+]+".r ^^ (Text(_))
    def elements: Parser[List[Element]] = rep(comment | tag | text)
    def apply(s: String) = parseAll(elements, s)
    }

    object Test {
    def main(args: Array[String]) {
    val input = io.Source.fromFile(
    "./edF.txt"
    ).getLines.take(10000).mkString(" ")
    parse(input).get foreach println
    }
    }