Skip to content

Instantly share code, notes, and snippets.

@Andrew-Chen-Wang
Forked from revotu/remove_attrs.py
Created January 27, 2024 06:48
Show Gist options
  • Select an option

  • Save Andrew-Chen-Wang/2bcc3b4511ab8dda77ea1cffb8444b98 to your computer and use it in GitHub Desktop.

Select an option

Save Andrew-Chen-Wang/2bcc3b4511ab8dda77ea1cffb8444b98 to your computer and use it in GitHub Desktop.

Revisions

  1. @revotu revotu revised this gist Jul 14, 2017. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion remove_attrs.py
    Original file line number Diff line number Diff line change
    @@ -1,4 +1,4 @@
    from BeautifulSoup import BeautifulSoup
    from bs4 import BeautifulSoup

    # remove all attributes
    def _remove_all_attrs(soup):
  2. @revotu revotu created this gist Jul 14, 2017.
    28 changes: 28 additions & 0 deletions remove_attrs.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,28 @@
    from BeautifulSoup import BeautifulSoup

    # remove all attributes
    def _remove_all_attrs(soup):
    for tag in soup.find_all(True):
    tag.attrs = {}
    return soup

    # remove all attributes except some tags
    def _remove_all_attrs_except(soup):
    whitelist = ['a','img']
    for tag in soup.find_all(True):
    if tag.name not in whitelist:
    tag.attrs = {}
    return soup

    # remove all attributes except some tags(only saving ['href','src'] attr)
    def _remove_all_attrs_except_saving(soup):
    whitelist = ['a','img']
    for tag in soup.find_all(True):
    if tag.name not in whitelist:
    tag.attrs = {}
    else:
    attrs = dict(tag.attrs)
    for attr in attrs:
    if attr not in ['src','href']:
    del tag.attrs[attr]
    return soup