Skip to content

Instantly share code, notes, and snippets.

@AndorChen
Created June 1, 2015 12:56
Show Gist options
  • Save AndorChen/a07c591fed685fb7a80c to your computer and use it in GitHub Desktop.
Save AndorChen/a07c591fed685fb7a80c to your computer and use it in GitHub Desktop.

Revisions

  1. AndorChen created this gist Jun 1, 2015.
    183 changes: 183 additions & 0 deletions pandocfilter.rb
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,183 @@
    require 'json'

    class String
    def camelize
    string = self
    string.split('_').map(&:capitalize).join('')
    end
    end

    module PandocFilter

    class Node
    # key: node type
    # value: expected argument number
    NODES = {
    # block elements
    plain: 1,
    para: 1,
    code_block: 2,
    raw_block: 2,
    block_quote: 1,
    ordered_list: 2,
    bullet_list: 1,
    definition_list: 1,
    header: 3,
    horizontal_rule: 0,
    table: 5,
    div: 2,
    null: 0,

    # inline elements
    str: 1,
    emph: 1,
    strong: 1,
    strikeout: 1,
    superscript: 1,
    subscript: 1,
    small_caps: 1,
    quoted: 2,
    cite: 2,
    code: 2,
    space: 0,
    line_break: 0,
    math: 2,
    raw_inline: 2,
    link: 2,
    image: 2,
    note: 1,
    span: 2
    }

    class << self
    def method_missing(name, *args)
    raise "undefined #{name} node type" unless NODES.keys.include?(name)
    unless args.size == NODES[name]
    raise "#{name} expects #{NODES[name]} arguments, but given #{args.size}"
    end

    new(name.to_s.camelize, *args).to_hash
    end
    end

    attr_reader :type
    attr_reader :args
    attr_reader :numargs

    def initialize(type, *args)
    @type = type
    @args = args
    @numargs = args.size
    end

    def to_hash
    xs = case numargs
    when 0
    []
    when 1
    args[0]
    else
    args
    end

    {'t': type, 'c': xs}
    end
    end

    # Converts an action into a filter that reads a JSON-formatted
    # pandoc document from stdin, transforms it by walking the tree
    # with the action, and returns a new JSON-formatted pandoc document
    # to stdout. The argument is a function action(key, value, format, meta),
    # where key is the type of the pandoc object (e.g. 'Str', 'Para'),
    # value is the contents of the object (e.g. a string for 'Str',
    # a list of inline elements for 'Para'), format is the target
    # output format (which will be taken for the first command line
    # argument if present), and meta is the document's metadata.
    # If the function returns None, the object to which it applies
    # will remain unchanged. If it returns an object, the object will
    # be replaced. If it returns a list, the list will be spliced in to
    # the list to which the target object belongs. (So, returning an
    # empty list deletes the object.)
    #
    # action Callable object
    #
    # Return Manuplated JSON
    def self.process(&action)
    doc = JSON.load($stdin.read)
    if ARGV.size > 1
    format = ARGV[1]
    else
    format = ""
    end
    altered = self.walk(doc, format, doc[0]['unMeta'], &action)
    JSON.dump(altered, $stdout)
    end

    # Walks the tree x and returns concatenated string content,
    # leaving out all formatting.
    def self.stringify(x)
    result = []

    go = lambda do |key, val, format, meta|
    if ['Str', 'MetaString'].include? key
    result.push(val)
    elsif key == 'Code'
    result.push(val[1])
    elsif key == 'Math'
    result.push(val[1])
    elsif key == 'LineBreak'
    result.push(" ")
    elsif key == 'Space'
    result.push(" ")
    end
    end

    self.walk(x, "", {}, &go)

    result.join('')
    end

    # Returns an attribute list, constructed from the
    # dictionary attrs.
    def attributes(attrs)
    attrs ||= {}
    ident = attrs.fetch('id', '')
    classes = attrs.fetch("classes", [])
    keyvals = []
    attrs.keep_if { |k, v| k != "classes" && k != "id" }.each do |k, v|
    keyvals << [k, v]
    end

    [ident, classes, keyvals]
    end

    # Walk a tree, applying an action to every object.
    # Returns a modified tree.
    def self.walk(x, format, meta, &action)
    if x.is_a? Array
    array = []
    x.each do |item|
    if item.is_a?(Hash) && item.has_key?('t')
    res = action.call(item['t'], item['c'], format, meta)
    if res.nil?
    array.push(self.walk(item, format, meta, &action))
    elsif res.is_a? Array
    res.each { |z| array.push(self.walk(z, format, meta, &action)) }
    else
    array.push(self.walk(res, format, meta, &action))
    end
    else
    array.push(self.walk(item, format, meta, &action))
    end
    end
    return array
    elsif x.is_a? Hash
    hash = {}
    x.each { |k, _| hash[k] = self.walk(x[k], format, meta, &action) }
    return hash
    else
    return x
    end
    end

    end