class BookImport def instapaper InstapaperClient.bookmarks(limit: 500).to_enum(:each).map { |bookmark| if URI(bookmark.url).host =~ /\A(www\.)?amazon\.(com|ca)/ uri = URI(bookmark.url) text = client_for("#{uri.scheme}://#{uri.hostname}").get(uri.path).body isbn = text.match(/(ISBN|ASIN)(-13|-10)?:\s*<\/b>\s*(\w{10,13})/) create_record_from_isbn(isbn[3], bookmark.bookmark_id) elsif bookmark.url =~ /goodreads\.com/ uri = URI(bookmark.url) text = client_for("#{uri.scheme}://#{uri.hostname}").get(uri.path).body doc = Nokogiri::HTML(text) create_record_from_isbn(doc.at('meta[property="books:isbn"]')["content"], bookmark.bookmark_id) end }.compact end def kindle books_from_highlights end private # TODO: Do like what we do with words, where it puts the source multiple times # TODO: Refactor to be consistent with Words? # It does work though :) def books_from_highlights sources = JSON.parse(Readwise.get("/munger").body)["data"] existing_books = Book.all sources.each do |source| book_highlights = source["highlights"].select { |h| h["note"] =~ /\A\.?book/i } book_titles = book_highlights.map { |h| h["highlight"] } book_titles.each do |title| next if title == "Randomness)." # ugh can't get rid of it book = Book.new("Title" => title) book.populate_from_goodreads(prevent_duplicates_from: existing_books) end end end def create_record_from_isbn(isbn, bookmark_id) Book.new("ISBN" => isbn).populate_from_goodreads InstapaperClient.delete_bookmark(bookmark_id) end def client_for(host) @clients ||= {} return @clients[host] if @clients[host] @clients[host] ||= Faraday.new(:url => host) do |b| b.request :retry, max: 10, interval: 1, interval_randomness: 2, backoff_factor: 2, exceptions: Semian::NetHTTP::DEFAULT_ERRORS b.use FaradayMiddleware::FollowRedirects b.adapter :net_http_persistent b.headers["User-Agent"] = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.117 Safari/537.36" end end end