require 'open-uri' require 'nokogiri' # スクレイピング先のURL url = 'http://ishidatozanjukunisshi.blogspot.jp' #url = 'http://ishidatozanjukunisshi.blogspot.jp/search?updated-max=2010-06-20T21:17:00%2B09:00&max-results=7&start=985&by-date=false' charset = nil def url_set(url) html = open(url) do |f| charset = f.charset # 文字種別を取得 f.read # htmlを読み込んで変数htmlに渡す end return html end def content_output(url, charset) html = url_set(url) doc = Nokogiri::HTML.parse(html, nil, charset) doc.css('.post-outer').each do |node| # p node puts "---------------------" puts node.css('.post-title a').text puts node.css('.timestamp-link').attribute('href').value puts node.css('.timestamp-link > abbr').attribute('title').value puts node.css('.post-body').text end end def content_check(url, charset) html = url_set(url) doc = Nokogiri::HTML.parse(html, nil, charset) if doc.search('.blog-pager > span > .blog-pager-older-link').size == 1 then url = doc.css('.blog-pager > span > .blog-pager-older-link').attribute('href').value else url = "" end return url end #content_output(url, charset) #content_check(url, charset) #exit while url != "" do puts url content_output(url, charset) url = content_check(url, charset) end