Skip to content

Instantly share code, notes, and snippets.

@travisbrown
Forked from decretist/README.md
Last active August 29, 2015 14:13
Show Gist options
  • Select an option

  • Save travisbrown/4bad84b4729c7a221e30 to your computer and use it in GitHub Desktop.

Select an option

Save travisbrown/4bad84b4729c7a221e30 to your computer and use it in GitHub Desktop.
import scala.util.parsing.combinator._
sealed trait Element
case class Tag(name: String, attr: String) extends Element
case class Header(text: String) extends Element
case class Plain(text: String) extends Element
object parse extends RegexParsers {
override def skipWhitespace = false
def tag: Parser[Tag] = ("<" ~> "\\S+".r <~ "\\s".r) ~ "[^\\s>]+".r <~ ">" ^^ {
case name ~ attr => Tag(name, attr)
}
def header: Parser[Header] = "\\-".r ~> "[^+]*".r <~ "+" ^^ (Header(_))
def plain: Parser[Plain] = "[^<>\\-\\+]+".r ^^ (Plain(_))
def elements: Parser[List[Element]] = rep(tag | header | plain)
def apply(s: String) = parseAll(elements, s)
}
object Process {
def main(args: Array[String]) {
val input = io.Source.fromFile(
"./edF.txt"
).getLines.take(15522).mkString(" ") // Prima Pars
// parse(input).get foreach println
parse(input).get.map {
case Tag("1", "D") => // distinction start
case Tag("2", _) => // distinction number
case Tag("4", _) => // capitulum
case Tag("L", _) => // line
case Tag("P", "0") => // Palea end
case Tag("P", "1") => // Palea start
case Tag("S", _) => // page
case Tag("T", "A") => // dictum ante
case Tag("T", "I") => // inscription
case Tag("T", "P") => // dictum post
case Tag("T", "R") => // rubric
case Tag("T", "T") => // text
case Tag(_, _) => // error
case Header(text) => // do nothing
case Plain(text) => // do nothing
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment