Skip to content

Instantly share code, notes, and snippets.

@baktun95827
Created June 20, 2021 10:05
Show Gist options
  • Select an option

  • Save baktun95827/8b4a22e6c4b6ca43f22c8b32c92f3e80 to your computer and use it in GitHub Desktop.

Select an option

Save baktun95827/8b4a22e6c4b6ca43f22c8b32c92f3e80 to your computer and use it in GitHub Desktop.

Revisions

  1. baktun95827 created this gist Jun 20, 2021.
    40 changes: 40 additions & 0 deletions snapshot2path.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,40 @@
    import re
    import sys
    import os

    rubbish_list = list((
    "access-control-allow-credentials",
    "{","}","chrome","data:image/png;base64", "<a", "zendesk"
    ))

    def isUrl(line):
    if '/' not in line:
    return False
    if re.search('/[a-z0-9_-]*/',line):
    return True

    def mightBeGarbage(line):
    if re.search("\.(png|jpg|jpeg|gif|svg|bmp|ttf|avif|wav|mp4|aac|ajax|css|all|woff|js)",line):
    return True
    for word in rubbish_list:
    if word in line:
    return True
    return False

    if __name__ == '__main__':
    snapshot_infile = sys.argv[1]
    pathlist_outfile = sys.argv[2]
    working_dir = os.sep.join(os.path.realpath(snapshot_infile).split(os.sep)[:-1])
    pathlist_outfile = working_dir + os.sep + pathlist_outfile
    # print(pathlist_outfile)
    results = set()
    with open(snapshot_infile,'r') as r:
    for line in r:
    if isUrl(line):
    if not mightBeGarbage(line):
    results.add(line)

    with open(pathlist_outfile,'w') as w:
    for entry in results:
    w.write(entry)