- 
      
- 
        Save edison12a/e974468ea384c615fba1c24c936fd19c to your computer and use it in GitHub Desktop. 
Revisions
- 
        bradmontgomery revised this gist Apr 8, 2011 . 1 changed file with 7 additions and 4 deletions.There are no files selected for viewingThis file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,11 +1,14 @@ from BeautifulSoup import BeautifulSoup def _remove_attrs(soup): for tag in soup.findAll(True): tag.attrs = None return soup def example(): doc = '<html><head><title>test</title></head><body id="foo" onload="whatever"><p class="whatever">junk</p><div style="background: yellow;" id="foo" class="blah">blah</div></body></html>' print 'Before:\n%s' % doc soup = BeautifulSoup(doc) clean_soup = _remove_attrs(soup) print '\nAfter:\n%s' % clean_soup 
- 
        bradmontgomery revised this gist Apr 8, 2011 . 1 changed file with 5 additions and 11 deletions.There are no files selected for viewingThis file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,17 +1,11 @@ from BeautifulSoup import BeautifulSoup def _remove_attrs(soup): for tag in soup.findAll(True): tag.attrs = None return soup def example(): doc = '<html><head><title>test</title></head><body id="foo" onload="whatever"><p class="whatever">junk</p><div style="background: yellow;" id="foo" class="blah">blah</div></body></html>' print 'Before:\n%s' % doc soup = BeautifulSoup(doc) clean_soup = _remove_attrs(soup) print '\nAfter:\n%s' % clean_soup 
- 
        bradmontgomery created this gist Nov 11, 2010 .There are no files selected for viewingThis file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,17 @@ from BeautifulSoup import BeautifulSoup def _remove_attrs(soup): tag_list = soup.findAll(lambda tag: len(tag.attrs) > 0) for t in tag_list: for attr, val in t.attrs: del t[attr] return soup def example(): doc = '<html><head><title>test</title></head><body id="foo"><p class="wahtever">junk</p><div style="background: yellow;">blah</div></body></html>' print 'Before:\n%s' % doc soup = BeautifulSoup(doc) clean_soup = _remove_attrs(soup) print 'After:\n%s' % clean_soup