Skip to content

Instantly share code, notes, and snippets.

@levinalex
Created November 6, 2009 19:37
Show Gist options
  • Save levinalex/228213 to your computer and use it in GitHub Desktop.
Save levinalex/228213 to your computer and use it in GitHub Desktop.

Revisions

  1. levinalex renamed this gist Nov 6, 2009. 1 changed file with 0 additions and 0 deletions.
    File renamed without changes.
  2. levinalex created this gist Nov 6, 2009.
    109 changes: 109 additions & 0 deletions gistfile1.txt
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,109 @@
    require 'httparty'
    require 'grit'
    require 'pathname'

    module Wikipedia

    class Revision
    def initialize(article, revision)
    @article = article
    @revision = revision
    end

    %w(timestamp user revid anon comment).each do |m|
    define_method(m) do
    @revision[m]
    end
    end

    def content
    @revision["*"]
    end
    end

    class Revisions
    include HTTParty
    include Enumerable

    base_uri "http://en.wikipedia.org/w/api.php"
    default_params :action => :query,
    :prop => :revisions,
    :rvdir => :newer,
    :rvprop => [:content, :timestamp, :user, :revid, :comment].join("|"),
    :rvlimit => 50,
    :format => :json

    def initialize(article)
    @article = article
    @pageid = article.pageid
    @revs = []
    end

    def each
    nextquery = {}
    while true
    result = self.class.get('', :query => {:pageids => @pageid}.merge(nextquery))
    @revs = result["query"]["pages"][@pageid]["revisions"].each do |rev|
    yield Revision.new(@article, rev)
    end

    if result["query-continue"]
    nextquery = result["query-continue"]["revisions"]
    else
    return
    end
    end
    sleep 1
    end
    end

    class Article
    include HTTParty
    base_uri "http://en.wikipedia.org/w/api.php"

    def initialize(name)
    @name = name
    end

    def pageid
    @pageid ||= begin
    result = self.class.get('',
    :query => {
    :action => :query ,
    :prop => :revisions,
    :titles => @name,
    :format => :json })
    result["query"]["pages"].keys.first
    end
    end

    def revisions
    Revisions.new(self)
    end

    end
    end


    if __FILE__ == $0
    article = Wikipedia::Article.new(ARGV[0] || "Main Page")
    id = article.pageid
    dir = Pathname.new("cache").join(article.pageid)
    FileUtils.mkdir_p(dir)

    repo = Grit::Repo.init_bare(dir.join("#{article.pageid}.git"))
    last_commit = nil

    article.revisions.each do |r|
    puts [r.timestamp, r.user].join(" - ")

    user = Grit::Actor.new(r.user, r.user)

    ENV["GIT_AUTHOR_DATE"] = r.timestamp.to_s

    index = repo.index
    index.add("article", r.content)
    last_commit = index.commit(r.comment, last_commit, user)
    end
    p last_commit
    end