-
-
Save anothernoise/fc50a8ee64d3eed56919 to your computer and use it in GitHub Desktop.
Revisions
-
travisbrown revised this gist
Jul 30, 2013 . 1 changed file with 5 additions and 5 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -15,12 +15,12 @@ object Searcher { yearRange: (Int, Int), page: Option[Int] = None ) = pageReq <<? Map( "format" -> "json", "date1" -> yearRange._1.toString, "date2" -> yearRange._2.toString, "dateFilterType" -> "yearRange", "andtext" -> conj.mkString(" "), "ortext" -> disj.mkString(" ") ) ++ page.map("page" -> _.toString) def retrievePage( -
travisbrown revised this gist
Jul 30, 2013 . 1 changed file with 3 additions and 5 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -18,11 +18,9 @@ object Searcher { "format" -> "json", "date1" -> yearRange._1.toString, "date2" -> yearRange._2.toString, "dateFilterType" -> "yearRange", "andtext" -> conj.mkString(" "), "ortext" -> disj.mkString(" ") ) ++ page.map("page" -> _.toString) def retrievePage( -
travisbrown created this gist
Jul 29, 2013 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,7 @@ scalaVersion := "2.10.2" libraryDependencies ++= Seq( "net.databinder.dispatch" %% "dispatch-core" % "0.11.0", "net.databinder.dispatch" %% "dispatch-json4s-jackson" % "0.11.0", "net.sf.opencsv" % "opencsv" % "2.0" ) This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,60 @@ object Searcher { import dispatch.{ Http, url }, dispatch.Defaults._, dispatch.as import org.json4s._ import scala.Function.const import scala.concurrent.Future import scala.util._ implicit val formats = DefaultFormats val pageReq = url("http://chroniclingamerica.loc.gov/search/pages/results/") def constructRequest( conj: List[String], disj: List[String], yearRange: (Int, Int), page: Option[Int] = None ) = pageReq <<? Map( "format" -> "json", "date1" -> yearRange._1.toString, "date2" -> yearRange._2.toString, "dateFilterType" -> "yearRange" ) ++ conj.headOption.map( const("andtext" -> conj.mkString(" ")) ) ++ disj.headOption.map( const("ortext" -> disj.mkString(" ")) ) ++ page.map("page" -> _.toString) def retrievePage( conj: List[String], disj: List[String], yearRange: (Int, Int), page: Option[Int] = None ) = Http(constructRequest(conj, disj, yearRange, page) OK as.json4s.Json) def search( conj: List[String], disj: List[String], yearRange: (Int, Int) ) = retrievePage(conj, disj, yearRange).flatMap { json => val results = json.extract[ResultSet] Future.traverse(2 to results.pageCount) { i => retrievePage(conj, disj, yearRange, Some(i)).map( _.extract[ResultSet].items ) }.map(results.items ++ _.flatten) } // Asynchronously begin downloads and print result when completed. def saveSearchResults( conj: List[String], disj: List[String], yearRange: (Int, Int) )(path: String) = search(conj, disj, yearRange).onComplete { case Success(items) => println("Successfully downloaded %d items!".format(items.size)) CsvOutput.writeItems(items)(path) case Failure(e) => println("There was a problem: %s".format(e)) } } This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,34 @@ // Represents a single newspaper page. case class Item( id: String, url: String, lccn: String, date: String, sequence: Int, title: String, ocr_eng: String ) { def pubId = id match { case Item.PubIdPattern(pubId) => pubId } def formattedDate = date match { case Item.DatePattern(year, month, day) => "%s-%s-%s".format(year, month, day) } } // Some helpers for working with item data. object Item { val DatePattern = """(\d\d\d\d)(\d\d)(\d\d)""".r val PubIdPattern = """/lccn/([^/]+)/.*""".r } // Represents a set of search query results. case class ResultSet(totalItems: Int, itemsPerPage: Int, items: List[Item]) { def pageCount = { val quot = (totalItems / itemsPerPage).toInt val remd = (totalItems % itemsPerPage).toInt quot + math.signum(remd) } } This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,23 @@ // The boring stuff: writing the CSV file. object CsvOutput { import au.com.bytecode.opencsv.CSVWriter def writeItems(items: List[Item])(path: String) = { val writer = new CSVWriter(new java.io.FileWriter(path)) items.foreach { item => writer.writeNext( Array( item.pubId, item.title, item.formattedDate, item.sequence.toString, item.url, item.ocr_eng.replaceAll("\n", " ") ) ) } writer.close() } }