import xml.etree.ElementTree as ET import urllib import base64 import math import sys import re # usage: Open Burp, navigate to proxy history, ctrl-a to select all records, right click and "Save Items" as an .xml file. # python burplist.py burprequests.xml # output is saved to wordlist.txt def entropy(string): #"Calculates the Shannon entropy of a string" # get probability of chars in string prob = [ float(string.count(c)) / len(string) for c in dict.fromkeys(list(string)) ] # calculate the entropy entropy = - sum([ p * math.log(p) / math.log(2.0) for p in prob ]) return entropy def avgEntropyByChar(en,length): # calulate "average" entropy level return en / length tree = ET.parse(sys.argv[1]) root = tree.getroot() wordlist = [] for i in root: # preserve subdomains, file/dir names with . - _ wordlist += re.split('\/|\?|&|=',i[1].text) # get subdomain names and break up file names wordlist += re.split('\/|\?|&|=|_|-|\.|\+',i[1].text) # get words from cookies, headers, POST body requests wordlist += re.split('\/|\?|&|=|_|-|\.|\+|\:| |\n|\r|"|\'|<|>|{|}|\[|\]|`|~|\!|@|#|\$|;|,|\(|\)|\*|\|', urllib.unquote(base64.b64decode(i[8].text))) # response if i[12].text is not None: wordlist += re.split('\/|\?|&|=|_|-|\.|\+|\:| |\n|\r|\t|"|\'|<|>|{|}|\[|\]|`|~|\!|@|#|\$|;|,|\(|\)|\*|\^|\\\\|\|', urllib.unquote(base64.b64decode(i[12].text))) auxiliaryList = list(set(wordlist)) final = [] avgEntropyByLength = {} for word in auxiliaryList: if word.isalnum() or '-' in word or '.' in word or '_' in word: en = entropy(word) # remove "random strings" that are high entropy if en < 4.4: final.append(word) final.sort() with open('wordlist.txt', 'w') as f: for item in final: f.write("%s\n" % item) print "wordlist saved to wordlist.txt"