Skip to content

Instantly share code, notes, and snippets.

@ismailmechbal
Forked from sirupsen/book.rb
Created March 7, 2019 14:30
Show Gist options
  • Save ismailmechbal/2bcd579dbd9c05944fe556db084fe68b to your computer and use it in GitHub Desktop.
Save ismailmechbal/2bcd579dbd9c05944fe556db084fe68b to your computer and use it in GitHub Desktop.

Revisions

  1. @sirupsen sirupsen revised this gist Dec 14, 2018. No changes.
  2. @sirupsen sirupsen revised this gist Aug 10, 2018. 1 changed file with 6 additions and 0 deletions.
    6 changes: 6 additions & 0 deletions initializer.rb
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,6 @@
    InstapaperClient = Instapaper::Client.new do |client|
    client.consumer_key = ""
    client.consumer_secret = ""
    client.oauth_token = ''
    client.oauth_token_secret = '' # check docs, need to email them for this
    end
  3. @sirupsen sirupsen created this gist Aug 9, 2018.
    176 changes: 176 additions & 0 deletions book.rb
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,176 @@
    class Book < Airrecord::Table
    class Endorser < Airrecord::Table
    self.base_key = ""
    self.table_name = "Endorser"
    end

    self.base_key = ""
    self.table_name = "Books"

    has_many :endorsements, class: 'Book::Endorser', column: 'Endorsements'

    GOODREADS_BLACKLIST = %w(
    to-read favorites currently-reading owned
    series favourites re-read owned-books
    books-i-own wish-list si audiobook
    book-club ebook kindle to-buy
    )

    GOODREADS_MERGE = {
    "Non-fiction" => "Nonfiction",
    "Classic" => "Classics",
    "Cookbook" => "Cooking",
    "Cookbooks" => "Cooking",
    "Biography" => "Memoir",
    "Biographies" => "Memoir",
    "Autobiography" => "Memoir",
    "Auto-biography" => "Memoir",
    "Sci-fi" => "Science Fiction",
    "Scifi" => "Science Fiction",
    "Management" => "Leadership",
    "Self-help" => "Personal Development",
    "Selfhelp" => "Personal Development",
    "Personal-development" => "Personal Development",
    "Self-improvement" => "Personal Development",
    "Science-fiction" => "Science Fiction",
    "Ya" => "Young-adult",
    "Tech" => "Technology",
    "Young-adult" => "Young Adult",
    "Computer-science" => "Programming",
    "Investing" => "Economics",
    "Fitness" => "Health",
    "Food" => "Cooking",
    "Finance" => "Economics",
    "Software" => "Programming",
    "Literature" => "Classics",
    }

    CATEGORIES = [
    "Business", "Psychology", "Science", "Personal Development", "Philosophy",
    "History", "Fiction", "Memoir", "Leadership", "Classics", "Economics",
    "Cooking", "Programming", "Health", "Politics", "Technology", "Science Fiction",
    "Entrepreneurship", "Design", "Writing", "Fantasy", "Young Adult", "Nonfiction",
    ]

    def goodreads_id
    query = self["ISBN"] if self["ISBN"]
    query ||= "\"#{self[:title]}\""

    search = goodreads_client.search_books(query)
    if search.results.respond_to?(:work)
    matches = [search.results.work].flatten

    if self[:author]
    best_match = matches.find { |match|
    character_difference?(match["best_book"]["author"]["name"], self[:author])
    }
    end

    best_match ||= matches.first
    return unless best_match
    best_match.best_book.id
    end
    end

    def goodreads_book
    @book ||= begin
    id = goodreads_id
    return unless id
    goodreads_client.book(id)
    end
    end

    def goodreads_categories(n = 5)
    popular = goodreads_book.popular_shelves
    return [] if popular.blank?

    shelves = popular.shelf
    return [] unless shelves.first.respond_to?(:name)

    shelves.map(&:name).reject { |name|
    GOODREADS_BLACKLIST.include?(name)
    }.first(n).map { |name|
    name = name.capitalize
    name = GOODREADS_MERGE[name] if GOODREADS_MERGE[name]
    (CATEGORIES.include?(name) && name) || nil
    }.compact.uniq
    end

    def populate_from_goodreads(prevent_duplicates_from: [])
    book = goodreads_book

    unless book
    $stderr.puts "Unable to find book #{self["Title"]}"
    return
    end

    before = self.serializable_fields
    self["Title"] = book.title
    self["ISBN"] = book.isbn13 || self["ISBN"]
    self["Publication Year"] = book.work.original_publication_year.to_s || book.publication_year.to_s
    self["Goodreads Rating"] = book.average_rating
    self["Pages"] = book.num_pages
    authors = [book.authors.author].flatten
    self["Author"] = authors.first.name
    self["Categories"] = goodreads_categories.sort
    self["Goodreads Ratings"] = book.work.ratings_count

    difference = HashDiff.diff(before, self.serializable_fields)

    flagged = false
    author_ok = true

    $stderr.puts "\x1b[35m#{before["Title"]}\x1b[0m"
    difference.each do |(type, key, prev, new)|
    if key == "Author" && type == "~"
    unless authors.any? { |author| character_difference?(author.name, prev) }
    $stderr.puts "Author changed too much"
    flagged = true
    author_ok = false
    end
    end

    if key == "Title" && type == "~"
    unless new.downcase.start_with?(prev.downcase) || author_ok
    $stderr.puts "New title '#{new}' didn't start with old title '#{prev}'"
    flagged = true
    end
    end

    if type == "~"
    $stderr.puts "\x1b[34m#{type} #{key}: \x1b[31m#{prev} => \x1b[32m#{new}\x1b[0m"
    elsif type == "+"
    $stderr.puts "\x1b[34m#{type} #{key}: \x1b[32m#{prev}\x1b[0m"
    end
    end


    if flagged
    Rollbar.warn("Skipping book", title: self[:title])
    elsif prevent_duplicates_from.find { |other| other["ISBN"] == self["ISBN"] }
    $stderr.puts "Skipping #{self[:title]} due to duplicate"
    else
    if self.new_record?
    self.create
    else
    self.save
    end
    end
    end

    private

    def goodreads_client
    self.class.goodreads_client
    end

    def self.goodreads_client
    @client ||= begin
    Goodreads::Client.new(api_key: '', api_secret: '')
    end
    end

    def character_difference?(a, b, n = 4)
    (a.split('') - b.split('')).size <= n && (b.split('') - a.split('')).size <= n
    end
    end
    57 changes: 57 additions & 0 deletions importer.rb
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,57 @@
    class BookImport
    def instapaper
    InstapaperClient.bookmarks(limit: 500).to_enum(:each).map { |bookmark|
    if URI(bookmark.url).host =~ /\A(www\.)?amazon\.(com|ca)/
    uri = URI(bookmark.url)
    text = client_for("#{uri.scheme}://#{uri.hostname}").get(uri.path).body
    isbn = text.match(/(ISBN|ASIN)(-13|-10)?:\s*<\/b>\s*(\w{10,13})/)
    create_record_from_isbn(isbn[3], bookmark.bookmark_id)
    elsif bookmark.url =~ /goodreads\.com/
    uri = URI(bookmark.url)
    text = client_for("#{uri.scheme}://#{uri.hostname}").get(uri.path).body
    doc = Nokogiri::HTML(text)
    create_record_from_isbn(doc.at('meta[property="books:isbn"]')["content"], bookmark.bookmark_id)
    end
    }.compact
    end

    def kindle
    books_from_highlights
    end

    private

    # TODO: Do like what we do with words, where it puts the source multiple times
    # TODO: Refactor to be consistent with Words?
    # It does work though :)
    def books_from_highlights
    sources = JSON.parse(Readwise.get("/munger").body)["data"]
    existing_books = Book.all

    sources.each do |source|
    book_highlights = source["highlights"].select { |h| h["note"] =~ /\A\.?book/i }
    book_titles = book_highlights.map { |h| h["highlight"] }
    book_titles.each do |title|
    next if title == "Randomness)." # ugh can't get rid of it
    book = Book.new("Title" => title)
    book.populate_from_goodreads(prevent_duplicates_from: existing_books)
    end
    end
    end

    def create_record_from_isbn(isbn, bookmark_id)
    Book.new("ISBN" => isbn).populate_from_goodreads
    InstapaperClient.delete_bookmark(bookmark_id)
    end

    def client_for(host)
    @clients ||= {}
    return @clients[host] if @clients[host]
    @clients[host] ||= Faraday.new(:url => host) do |b|
    b.request :retry, max: 10, interval: 1, interval_randomness: 2, backoff_factor: 2, exceptions: Semian::NetHTTP::DEFAULT_ERRORS
    b.use FaradayMiddleware::FollowRedirects
    b.adapter :net_http_persistent
    b.headers["User-Agent"] = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.117 Safari/537.36"
    end
    end
    end