Skip to content

Instantly share code, notes, and snippets.

@rmusser01
Forked from lanmaster53/whey-cewler.py
Created December 21, 2022 22:53
Show Gist options
  • Save rmusser01/10c3236f1a021e30431f9a7522876c5a to your computer and use it in GitHub Desktop.
Save rmusser01/10c3236f1a021e30431f9a7522876c5a to your computer and use it in GitHub Desktop.

Revisions

  1. @lanmaster53 lanmaster53 revised this gist Jun 14, 2021. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion whey-cewler.py
    Original file line number Diff line number Diff line change
    @@ -98,7 +98,7 @@ def menu_action(self, event):
    self.roots.add(root)
    # get all sitemap entries associated with the selected messages and scrape them for words
    for http_message in self._callbacks.getSiteMap(None):
    url = str(http_message.getUrl())
    url = http_message.getUrl().toString()
    for root in self.roots:
    # will scrape the same URL multiple times if the site map has stored multiple instances
    # the site map stores multiple instances if it detects differences, so this is desirable
  2. @lanmaster53 lanmaster53 revised this gist May 11, 2021. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion whey-cewler.py
    Original file line number Diff line number Diff line change
    @@ -17,7 +17,7 @@
    5. Select the Extensions > Create wordlist context menu item.
    The wordlist is created to wherever you have the extension configured for output.
    ```
    '''

    from burp import IBurpExtender
    from burp import IContextMenuFactory
  3. @lanmaster53 lanmaster53 revised this gist May 11, 2021. 1 changed file with 21 additions and 0 deletions.
    21 changes: 21 additions & 0 deletions whey-cewler.py
    Original file line number Diff line number Diff line change
    @@ -1,3 +1,24 @@
    '''
    Based on the initial work of Digininja at https://github.com/digininja/CeWL. While CeWL is a script written
    in Ruby that requires an independent crawl of a website in order to build a custom wordlist, Whey CeWLer
    runs within Portswigger's Burp Suite and parses an already crawled sitemap to build a custom wordlist. It
    does not have the meta data parsing capabilities that CeWL does, but it more than makes up for it in
    convenience.
    The name gets its origins from the CeWLer portion of the CO2 Burp extension by Jason Gillam, which is written
    in Java and does something similar, but Whey CeWLer is a completely reimagined extension written in Python,
    making it "way cooler".
    Usage:
    1. Point Burp Suite to Jython in the Extender > Options tab.
    2. Install this extension manually in the Extender > Extensions tab.
    3. Select an option for extension output (File, Console or UI).
    4. Right-click on any element in the Target tab's hierarchical sitemap.
    5. Select the Extensions > Create wordlist context menu item.
    The wordlist is created to wherever you have the extension configured for output.
    ```
    from burp import IBurpExtender
    from burp import IContextMenuFactory
    from javax.swing import JMenuItem
  4. @lanmaster53 lanmaster53 revised this gist May 11, 2021. No changes.
  5. @lanmaster53 lanmaster53 revised this gist May 7, 2021. 1 changed file with 0 additions and 1 deletion.
    1 change: 0 additions & 1 deletion whey-cewler.py
    Original file line number Diff line number Diff line change
    @@ -8,7 +8,6 @@

    COMMON_PASSWORDS = ['password']
    TEXT_CONTENT_TYPES = ['text/html', 'application/xml', 'application/json', 'text/plain']
    DEBUG = False

    # helpful resource
    # https://github.com/laconicwolf/burp-extensions/blob/master/GenerateForcedBrowseWordlist.py
  6. @lanmaster53 lanmaster53 created this gist May 7, 2021.
    126 changes: 126 additions & 0 deletions whey-cewler.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,126 @@
    from burp import IBurpExtender
    from burp import IContextMenuFactory
    from javax.swing import JMenuItem
    from java.util import ArrayList, List
    from HTMLParser import HTMLParser
    from datetime import datetime
    import re

    COMMON_PASSWORDS = ['password']
    TEXT_CONTENT_TYPES = ['text/html', 'application/xml', 'application/json', 'text/plain']
    DEBUG = False

    # helpful resource
    # https://github.com/laconicwolf/burp-extensions/blob/master/GenerateForcedBrowseWordlist.py

    class TagStripper(HTMLParser):
    '''
    Attempts to strip all tags from an HTML page recieved in the HTTP response. The remaining text
    is appended to an array and then joined with " " for regex parsing.
    '''

    def __init__(self):
    HTMLParser.__init__(self)
    self.page_text = []

    def handle_data(self, data):
    self.page_text.append(data)

    def handle_comment(self, data):
    self.page_text.append(data)

    def strip(self, html_page):
    self.feed(html_page)
    return " ".join(self.page_text)


    class BurpExtender(IBurpExtender, IContextMenuFactory):
    '''
    BurpExtender Class as per Reference API.
    '''

    def registerExtenderCallbacks(self, callbacks):
    '''
    Registers the extension and initializes the root URLs and word list sets.
    '''
    self._callbacks = callbacks
    self._helpers = callbacks.getHelpers()
    self.context = None
    self.roots = set()
    self.word_list = set(COMMON_PASSWORDS)
    callbacks.setExtensionName("Whey CeWLer")
    callbacks.registerContextMenuFactory(self)
    return

    def createMenuItems(self, context):
    '''
    Invokes the "Create Wordlist" Menu.
    '''

    # HOW TO BIND ONLY TO SITEMAP

    self.context = context
    if context.getInvocationContext() == context.CONTEXT_TARGET_SITE_MAP_TREE:
    menu_list = ArrayList()
    menu_item = JMenuItem("Create Wordlist", actionPerformed=self.menu_action)
    menu_list.add(menu_item)
    return menu_list

    def menu_action(self, event):
    '''
    Obtains the selected messages from the interface. Filters the sitmap for all messages containing
    URLs within the selected messages' hierarchy. If so, the message is analyzed to create a word list.
    '''
    # get all first-level selected messages and store the URLs as roots to filter the sitemap
    http_messages = self.context.getSelectedMessages()
    for http_message in http_messages:
    root = str(http_message.getUrl())
    self.roots.add(root)
    # get all sitemap entries associated with the selected messages and scrape them for words
    for http_message in self._callbacks.getSiteMap(None):
    url = str(http_message.getUrl())
    for root in self.roots:
    # will scrape the same URL multiple times if the site map has stored multiple instances
    # the site map stores multiple instances if it detects differences, so this is desirable
    if url.startswith(root):
    # only scrape if there is a response to scrape
    http_response = http_message.getResponse()
    if http_response:
    self.get_words(url, http_response)
    self.display_words()
    return

    def get_words(self, url, http_response):
    '''
    Checks the header for a text-based content type. If the content type is text-based, uses
    the TagStripper class to parse out the text and runs a regex to create a wordlist based on
    the regex criteria. The resulting words are added to the word_list set.
    '''
    response = self._helpers.analyzeResponse(http_response)
    headers = response.getHeaders()[1:]
    body = self._helpers.bytesToString(http_response[response.getBodyOffset():])
    for header in headers:
    name, value = [x.strip() for x in header.split(':', 1)]
    if name.lower() == 'content-type':
    content_type = value.split(';')[0].strip()
    if content_type.lower() not in TEXT_CONTENT_TYPES:
    return
    tag_stripper = TagStripper()
    page_text = tag_stripper.strip(body)
    # alpha numerics and apostrophes
    # at least 3 characters in length
    word_candidates = re.findall(r"[\w']{3,}", page_text)
    for word in word_candidates:
    # strip apostrophes
    word = word.replace("'", "")
    # add the word to the list
    self.word_list.add(word)
    return

    def display_words(self):
    '''
    Displays the word list to whatever Burp is configured for stdout.
    '''
    for word in sorted(self.word_list):
    print word
    return