jellybob · September 29, 2012 21:38 · Sep 29, 2012
diff --git a/download_harrington.rb b/download_harrington.rb
@@ -0,0 +1,51 @@
+# Go to http://www.baen.com/series_list.asp?letter=H and find the book ID for the one you
+# want, and replace the book variable with it. Run this program, and add the resulting
+# index.html to Calibre, which can then convert it to a .mobi. Finally, email the .mobi
+# to your Kindle and enjoy.
+require 'capybara'
+require 'capybara/dsl'
+require 'capybara/webkit'
+require 'fileutils'
+
+book = "0743435710"
+
+FileUtils.mkdir_p("#{book}")
+
+include Capybara::DSL
+Capybara.current_driver = :webkit
+
+Capybara.string(page)
+Capybara.default_selector = :css
+
+def chapter_title
+  find("h1").text
+end
+
+base = "http://www.baenebooks.com/chapters/#{book}/"
+visit "#{base}/#{book}_toc.htm"
+
+chapters = all("a").reject { |link|
+  %w{Back Next}.include?(link.text)
+}.collect { |link|
+  { title: link.text, href: link["href"] }
+}
+
+book_title = chapter_title
+File.open("#{book}/index.html", "w") do |index|
+  index << "<html><body><h1>#{book_title}</h1><ul>"
+
+  chapter_number = 0
+  chapters.each do |chapter|
+    visit "#{base}/#{chapter[:href]}"
+    print "Chapter #{chapter_number+1}/#{chapters.size}\r"
+    index << %Q{<li><a href="chapter_#{chapter_number}.html">#{chapter_title}</a></li>}
+
+    # I redownload with curl here because Capybara was having trouble with non-ASCII characters.
+    %x{curl -s "#{base}/#{chapter[:href]}" > #{book}/chapter_#{chapter_number}.html}
+
+    chapter_number += 1
+  end
+
+  index << "</ul></body></html>"
+end
+puts ""