Skip to content

Instantly share code, notes, and snippets.

@pslobo
Forked from rjames86/htmltable_to_table.py
Last active August 29, 2015 14:21
Show Gist options
  • Select an option

  • Save pslobo/746b8da53a5af3b79ce1 to your computer and use it in GitHub Desktop.

Select an option

Save pslobo/746b8da53a5af3b79ce1 to your computer and use it in GitHub Desktop.

Revisions

  1. Ryan M created this gist Jan 18, 2015.
    48 changes: 48 additions & 0 deletions htmltable_to_table.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,48 @@
    from BeautifulSoup import BeautifulSoup as Soup
    import urllib

    raw_page = urllib.urlopen('http://www.sfrandonneurs.org/home.htm')
    soup = Soup(raw_page)


    class MarkdownTable:

    @staticmethod
    def table_columns(headers=[]):
    to_ret = "|"
    to_ret += "|".join(headers)
    to_ret += "|"
    to_ret += "\n" + MarkdownTable.header(len(headers))
    return to_ret

    @staticmethod
    def header(col_num):
    return "" + "|---" * col_num + "|\n"

    @staticmethod
    def table_row(row):
    return "|" + "|".join(row) + "|"


    class SFRRiders:
    def __init__(self):
    self.r_table = soup.find('table', {'id': 'registeredRiders'})

    def __str__(self):
    to_ret = MarkdownTable.table_columns(self.headers)
    to_ret += "\n".join([MarkdownTable.table_row(row) for row in self.body])
    return to_ret

    @property
    def headers(self):
    return [th.text for th in self.r_table.find('thead').findAll('th')]

    @property
    def body(self):
    return map(self._get_tds, self.r_table.find('tbody').findAll('tr'))

    def to_string(self):
    return self.__str__()

    def _get_tds(self, trow):
    return [item.text for item in trow]