Skip to content

Instantly share code, notes, and snippets.

@edsono
Forked from ttscoff/rtftomarkdown.rb
Created February 24, 2013 15:49
Show Gist options
  • Save edsono/5024293 to your computer and use it in GitHub Desktop.
Save edsono/5024293 to your computer and use it in GitHub Desktop.

Revisions

  1. @ttscoff ttscoff revised this gist Dec 1, 2012. 1 changed file with 11 additions and 3 deletions.
    14 changes: 11 additions & 3 deletions rtftomarkdown.rb
    Original file line number Diff line number Diff line change
    @@ -17,6 +17,7 @@
    If you start a second-level nested list as an ordered list, the next
    second-level list will be ordered. It's a textutil/RTF thing.
    =end
    write_file = false # for Services set to true to write out files with .md extension

    if ARGV.length == 0
    puts "#{__FILE__} expects an input file (RTF or DOC) as an argument"
    @@ -33,7 +34,7 @@ def remove_empty(input)
    file = infile.sub(/\/$/,'')
    if File.exists?(File.expand_path(file))
    ext = file.match(/\.(\w+)$/)[1]
    input = %x{/usr/bin/textutil -convert html -stdout #{file}}.strip
    input = %x{/usr/bin/textutil -convert html -stdout "#{file}"}.strip


    input.gsub!(/.*?<body>(.*?)<\/body>.*/m,"\\1")
    @@ -95,8 +96,15 @@ def remove_empty(input)
    line =~ /\*REMOVEME/
    }.join("\n")

    puts input
    puts footer
    if write_file
    open(file+".md", 'w+') { |f|
    f.puts input
    f.puts footer
    }
    else
    puts input
    puts footer
    end
    else
    puts "File not found: #{file}"
    end
  2. @ttscoff ttscoff revised this gist Dec 1, 2012. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion rtftomarkdown.rb
    Original file line number Diff line number Diff line change
    @@ -88,7 +88,7 @@ def remove_empty(input)
    "*REMOVEME"
    else
    indent = ""
    (list_level -1).times do indent += "\s\s\s\s" end
    (list_level -1).times do indent += " " end
    line.gsub(/<li.*?>(.*?)<\/li>/,"#{indent}#{list_type[list_level]} \\1")
    end
    }.delete_if {|line|
  3. @ttscoff ttscoff revised this gist Dec 1, 2012. 1 changed file with 24 additions and 0 deletions.
    24 changes: 24 additions & 0 deletions rtftomarkdown.rb
    Original file line number Diff line number Diff line change
    @@ -12,6 +12,10 @@
    combination produces seems impossible to work with. Most links disappear
    when converting from a DOC or DOCX file, and even Word's RTF export is
    unworkable.
    List levels converted by textutil can not be changed at a given depth.
    If you start a second-level nested list as an ordered list, the next
    second-level list will be ordered. It's a textutil/RTF thing.
    =end

    if ARGV.length == 0
    @@ -71,6 +75,26 @@ def remove_empty(input)
    line.strip
    }.join("\n")

    # handle lists
    list_level = 0
    list_type = []
    input = input.split("\n").map { |line|
    if line =~ /<([uo])l.*?>/
    list_level += 1
    list_type[list_level] = $1 =~ /u/ ? "*" : "1."
    "*REMOVEME"
    elsif line =~ /<\/[uo]l>/
    list_level -= 1
    "*REMOVEME"
    else
    indent = ""
    (list_level -1).times do indent += "\s\s\s\s" end
    line.gsub(/<li.*?>(.*?)<\/li>/,"#{indent}#{list_type[list_level]} \\1")
    end
    }.delete_if {|line|
    line =~ /\*REMOVEME/
    }.join("\n")

    puts input
    puts footer
    else
  4. @ttscoff ttscoff revised this gist Oct 15, 2012. 1 changed file with 29 additions and 5 deletions.
    34 changes: 29 additions & 5 deletions rtftomarkdown.rb
    Original file line number Diff line number Diff line change
    @@ -1,7 +1,18 @@
    #!/usr/bin/ruby
    # Uses textutil, available on Mac only (installed by default)
    # Usage: rtftomarkdown.rb FILENAME.rtf
    # Outputs to STDOUT
    =begin
    Usage: rtftomarkdown.rb FILENAME.rtf
    Uses textutil, available on Mac only (installed by default)
    Outputs to STDOUT
    Notes:
    Links are replaced with Markdown references (duplicate links combined).
    This works fine on RTF files, but the markup that the Word/textutil
    combination produces seems impossible to work with. Most links disappear
    when converting from a DOC or DOCX file, and even Word's RTF export is
    unworkable.
    =end

    if ARGV.length == 0
    puts "#{__FILE__} expects an input file (RTF or DOC) as an argument"
    @@ -28,7 +39,7 @@ def remove_empty(input)
    input.gsub!(/<\/?span( class=".*?")?>/,'')

    # substitute headers
    input.gsub!(/<p class="p1"><b>(.+?)<\/b><\/p>/,'# \\1')
    input.gsub!(/<p class="p1">(?:<b>)?(.+?)(?:<\/b>)?<\/p>/,'# \\1')
    input.gsub!(/<p class="p2"><b>(.+?)<\/b><\/p>/,'## \\1')
    input.gsub!(/<p class="p3"><b>(.+?)<\/b><\/p>/,'## \\1')
    input.gsub!(/<p class="p4"><b>(.+?)<\/b><\/p>/,'### \\1')
    @@ -39,16 +50,29 @@ def remove_empty(input)
    }.join("\n")

    # remove paragraph tags
    input.gsub!(/<p class="p5">(.*?)<\/p>/,'\\1')
    input.gsub!(/<p class="p\d">(.*?)<\/p>/,'\\1')
    # emphasis
    input.gsub!(/<\/?b>/,'**')
    input.gsub!(/<\/?i>/,'*')
    # links
    links = {}
    footer = ''
    input.gsub!(/<a href="(.*?)">(.*?)<\/a>/) do |match|
    if links.has_key? $1
    marker = links[$1]
    else
    links[$1] = links.length + 1
    footer += "\n[#{links[$1]}]: #{$1}"
    end
    "[#{$2}][#{links[$1]}]"
    end

    input = input.split("\n").map { |line|
    line.strip
    }.join("\n")

    puts input
    puts footer
    else
    puts "File not found: #{file}"
    end
  5. @ttscoff ttscoff revised this gist Oct 9, 2012. 1 changed file with 2 additions and 0 deletions.
    2 changes: 2 additions & 0 deletions rtftomarkdown.rb
    Original file line number Diff line number Diff line change
    @@ -1,5 +1,7 @@
    #!/usr/bin/ruby
    # Uses textutil, available on Mac only (installed by default)
    # Usage: rtftomarkdown.rb FILENAME.rtf
    # Outputs to STDOUT

    if ARGV.length == 0
    puts "#{__FILE__} expects an input file (RTF or DOC) as an argument"
  6. @ttscoff ttscoff created this gist Oct 9, 2012.
    53 changes: 53 additions & 0 deletions rtftomarkdown.rb
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,53 @@
    #!/usr/bin/ruby
    # Uses textutil, available on Mac only (installed by default)

    if ARGV.length == 0
    puts "#{__FILE__} expects an input file (RTF or DOC) as an argument"
    exit
    end

    def remove_empty(input)
    input.gsub!(/(<(\w+)( class=".*?")?>\s*<\/\2>)/,'')
    input = remove_empty(input) if input =~ /(<(\w+)( class=".*")?>\s*<\/\2>)/
    return input.strip
    end

    ARGV.each do |infile|
    file = infile.sub(/\/$/,'')
    if File.exists?(File.expand_path(file))
    ext = file.match(/\.(\w+)$/)[1]
    input = %x{/usr/bin/textutil -convert html -stdout #{file}}.strip


    input.gsub!(/.*?<body>(.*?)<\/body>.*/m,"\\1")

    # remove span/br tags, unneccessary
    input.gsub!(/<br>/,'')
    input.gsub!(/<\/?span( class=".*?")?>/,'')

    # substitute headers
    input.gsub!(/<p class="p1"><b>(.+?)<\/b><\/p>/,'# \\1')
    input.gsub!(/<p class="p2"><b>(.+?)<\/b><\/p>/,'## \\1')
    input.gsub!(/<p class="p3"><b>(.+?)<\/b><\/p>/,'## \\1')
    input.gsub!(/<p class="p4"><b>(.+?)<\/b><\/p>/,'### \\1')
    input.gsub!(/<p class="p5"><b>(.+?)<\/b><\/p>/,'### \\1')

    input = input.split("\n").map { |line|
    remove_empty(line)
    }.join("\n")

    # remove paragraph tags
    input.gsub!(/<p class="p5">(.*?)<\/p>/,'\\1')
    # emphasis
    input.gsub!(/<\/?b>/,'**')
    input.gsub!(/<\/?i>/,'*')

    input = input.split("\n").map { |line|
    line.strip
    }.join("\n")

    puts input
    else
    puts "File not found: #{file}"
    end
    end