Created
          October 19, 2015 13:27 
        
      - 
      
 - 
        
Save beshrkayali/6e2261f0b704d6aa7f90 to your computer and use it in GitHub Desktop.  
Revisions
- 
        
beshrkayali created this gist
Oct 19, 2015 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,156 @@ #!/usr/bin/python2 """ This is a modified version https://moinmo.in/ActionMarket/HTML2MoinMoin that takes in an HTML file instead of URL Usage: ./html2moinmoin.py FILE.html ./html2moinmoin.py FILE.html > file.moin Retrives the given FILE.html and convert it to MoinMoin markup. The result is written to stdout. """ import htmlentitydefs, sys from HTMLParser import HTMLParser class HTML2MoinMoin(HTMLParser): start_tags = { "a" : " [%(0)s ", "b" : "'''", "em" : "''", "tt" : "{{{", "pre" : "\n{{{", "p" : "\n\n", "br" : "\n\n", "h1" : "\n\n= ", "h2" : "\n\n== ", "h3" : "\n\n=== ", "h4" : "\n\n==== ", "h5" : "\n\n===== ", "title" : "TITLE: ", "table" : "\n", "tr" : "", "td" : "||" } end_tags = { "a" : ']', "b" : "'''", "em" : "''", "tt" : "}}}", "pre" : "}}}\n", "p" : "", "h1" : " =\n\n", "h2" : " ==\n\n", "h3" : " ===\n\n", "h4" : " ====\n\n", "h5" : " =====\n\n", "table" : "\n", "tr" : "||\n", "dt" : ":: " } def __init__(self): HTMLParser.__init__(self) self.output = sys.stdout self.list_mode = [] self.preformatted = False self.verbose = 0 def write(self, text): self.output.write(text) def do_ul_start(self, attrs, tag): self.list_mode.append("*") def do_ol_start(self, attrs, tag): self.list_mode.append("1.") def do_dl_start(self, attrs, tag): self.list_mode.append("") def do_ul_end(self, tag): self.list_mode = self.list_mode[:-1] do_ol_end = do_ul_end do_dl_end = do_ul_end def do_li_start(self, args, tag): self.write("\n" + " " * len(self.list_mode) + self.list_mode[-1]) def do_dt_start(self, args, tag): self.write("\n" + " " * len(self.list_mode) + self.list_mode[-1]) def do_pre_start(self, args, tag): self.preformatted = True self.write(self.start_tags["pre"]) def do_pre_end(self, tag): self.preformatted = False self.write(self.end_tags["pre"]) def handle_starttag(self, tag, attrs): func = HTML2MoinMoin.__dict__.get("do_%s_start" % tag, HTML2MoinMoin.do_default_start) if ((func == HTML2MoinMoin.do_default_start) and self.start_tags.has_key(tag)): attr_dict = {} i = 0 for a in attrs: attr_dict[a[0]] = a[1] attr_dict[str(i)] = a[1] i += 1 self.write(self.start_tags[tag] % attr_dict) else: func(self, attrs, tag) def handle_endtag(self, tag): func = HTML2MoinMoin.__dict__.get("do_%s_end" % tag, HTML2MoinMoin.do_default_end) if ((func == HTML2MoinMoin.do_default_end) and self.end_tags.has_key(tag)): self.write(self.end_tags[tag]) else: func(self, tag) def handle_data(self, data): if self.preformatted: self.write(data) else: self.write(data.replace("\n", " ")) def handle_charref(self, name): self.write(name) def handle_entityref(self, name): if htmlentitydefs.entitydefs.has_key(name): self.write(htmlentitydefs.entitydefs[name]) else: self.write("&" + name) def do_default_start(self, attrs, tag): if self.verbose: print "Encountered the beginning of a %s tag" % tag print "Attribs: %s" % attrs def do_default_end(self, tag): if self.verbose: print "Encountered the end of a %s tag" % tag def main(): with open(sys.argv[1], 'r') as htmlfile: htmldata = htmlfile.read() p = HTML2MoinMoin() p.feed(htmldata) p.close() htmlfile.close() if __name__ == "__main__": main()