Skip to content

Instantly share code, notes, and snippets.

@edison12a
Forked from bradmontgomery/kill_attrs.py
Created April 3, 2018 13:15
Show Gist options
  • Save edison12a/e974468ea384c615fba1c24c936fd19c to your computer and use it in GitHub Desktop.
Save edison12a/e974468ea384c615fba1c24c936fd19c to your computer and use it in GitHub Desktop.

Revisions

  1. @bradmontgomery bradmontgomery revised this gist Apr 8, 2011. 1 changed file with 7 additions and 4 deletions.
    11 changes: 7 additions & 4 deletions kill_attrs.py
    Original file line number Diff line number Diff line change
    @@ -1,11 +1,14 @@
    from BeautifulSoup import BeautifulSoup

    def _remove_attrs(soup):
    for tag in soup.findAll(True):
    for tag in soup.findAll(True):
    tag.attrs = None
    return soup


    def example(): doc = '<html><head><title>test</title></head><body id="foo" onload="whatever"><p class="whatever">junk</p><div style="background: yellow;" id="foo" class="blah">blah</div></body></html>' print 'Before:\n%s' % doc soup = BeautifulSoup(doc)

    def example():
    doc = '<html><head><title>test</title></head><body id="foo" onload="whatever"><p class="whatever">junk</p><div style="background: yellow;" id="foo" class="blah">blah</div></body></html>'
    print 'Before:\n%s' % doc
    soup = BeautifulSoup(doc)
    clean_soup = _remove_attrs(soup)
    print '\nAfter:\n%s' % clean_soup
    print '\nAfter:\n%s' % clean_soup
  2. @bradmontgomery bradmontgomery revised this gist Apr 8, 2011. 1 changed file with 5 additions and 11 deletions.
    16 changes: 5 additions & 11 deletions kill_attrs.py
    Original file line number Diff line number Diff line change
    @@ -1,17 +1,11 @@
    from BeautifulSoup import BeautifulSoup

    def _remove_attrs(soup):
    tag_list = soup.findAll(lambda tag: len(tag.attrs) > 0)
    for t in tag_list:
    for attr, val in t.attrs:
    del t[attr]
    for tag in soup.findAll(True):
    tag.attrs = None
    return soup



    def example():
    doc = '<html><head><title>test</title></head><body id="foo"><p class="wahtever">junk</p><div style="background: yellow;">blah</div></body></html>'
    print 'Before:\n%s' % doc
    soup = BeautifulSoup(doc)
    def example(): doc = '<html><head><title>test</title></head><body id="foo" onload="whatever"><p class="whatever">junk</p><div style="background: yellow;" id="foo" class="blah">blah</div></body></html>' print 'Before:\n%s' % doc soup = BeautifulSoup(doc)
    clean_soup = _remove_attrs(soup)
    print 'After:\n%s' % clean_soup

    print '\nAfter:\n%s' % clean_soup
  3. @bradmontgomery bradmontgomery created this gist Nov 11, 2010.
    17 changes: 17 additions & 0 deletions kill_attrs.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,17 @@
    from BeautifulSoup import BeautifulSoup

    def _remove_attrs(soup):
    tag_list = soup.findAll(lambda tag: len(tag.attrs) > 0)
    for t in tag_list:
    for attr, val in t.attrs:
    del t[attr]
    return soup


    def example():
    doc = '<html><head><title>test</title></head><body id="foo"><p class="wahtever">junk</p><div style="background: yellow;">blah</div></body></html>'
    print 'Before:\n%s' % doc
    soup = BeautifulSoup(doc)
    clean_soup = _remove_attrs(soup)
    print 'After:\n%s' % clean_soup