Last active
July 1, 2025 22:05
-
-
Save Guilhem7/bedc05a48a99c16c67e3479a1ccf028a to your computer and use it in GitHub Desktop.
Revisions
-
Guilhem7 revised this gist
Jul 1, 2025 . 1 changed file with 3 additions and 3 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -67,8 +67,8 @@ def get_servlets(self, root): name = servlet.findtext(self.tag("servlet-name"), default=self.__class__.DEFAULT_UNKNOWN_TAG).strip() clazz = servlet.findtext(self.tag("servlet-class"), default="").strip() jsp_file = servlet.findtext(self.tag("jsp-file"), default="").strip() servlets[name] = {"class": clazz, "jsp_file": jsp_file, @@ -84,7 +84,7 @@ def get_filters(self, root): name = servlet_filter.findtext(self.tag("filter-name"), default=self.__class__.DEFAULT_UNKNOWN_TAG).strip() clazz = servlet_filter.findtext(self.tag("filter-class"), default="").strip() filters[name] = {"class": clazz, "url_mapping": []} return filters -
Guilhem7 revised this gist
Jul 1, 2025 . No changes.There are no files selected for viewing
-
Guilhem7 revised this gist
Jul 1, 2025 . 2 changed files with 210 additions and 218 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,210 @@ #!/usr/bin/env python3 import re import json import logging import argparse import xml.etree.ElementTree as ET from rich import print as rprint from rich.logging import RichHandler # Set logger handler = RichHandler(rich_tracebacks=True, markup=True) logger = logging.getLogger("WebXmlParser") logger.addHandler(handler) class WebXmlParser: """ This class is the one used to parse a web.xml file """ DEFAULT_UNKNOWN_TAG = "(Unknown)" def __init__(self, xml): self.xml = xml self.ns = None self.display_name = None self.json_parsed = None @staticmethod def url_patterns_match(filter_pattern, servlet_pattern): # Normalize patterns f = filter_pattern.strip() s = servlet_pattern.strip() # Exact match or machall pattern if(f == s or f in ("/", "/*")): return True # filter path prefix match (e.g. /foo/*) if f.endswith("/*"): prefix = f[:-2] # servlet_pattern should start with this prefix + / if s.startswith(prefix) and (s == prefix or s[len(prefix)] == '/' or s.startswith(prefix + "/")): return True # filter extension match (*.jsp) if f.startswith("*."): ext = f[1:] return s.endswith(ext) return False def tag(self, name): """ Return the tag associated to a name and a namespace if any """ return f"{{{self.ns}}}{name}" if self.ns else name def get_servlets(self, root): """ Recover the servlets :rtype: Dict[str, Dict[str, str]] :return: A dictionnary of all servlets indexed by their name """ servlets = {} for servlet in root.findall(self.tag("servlet")): name = servlet.findtext(self.tag("servlet-name"), default=self.__class__.DEFAULT_UNKNOWN_TAG).strip() clazz = servlet.findtext(self.tag("servlet-class")) jsp_file = servlet.findtext(self.tag("jsp-file")) servlets[name] = {"class": clazz, "jsp_file": jsp_file, "url_mapping": []} return servlets def get_filters(self, root): """ Recover the filter in use on the application """ filters = {} for servlet_filter in root.findall(self.tag("filter")): name = servlet_filter.findtext(self.tag("filter-name"), default=self.__class__.DEFAULT_UNKNOWN_TAG).strip() clazz = servlet_filter.findtext(self.tag("filter-class")) filters[name] = {"class": clazz, "url_mapping": []} return filters def get_mapping(self, root, entities, entity_name): """ Recover the mapping from each entities (servlet or filter) """ for mapping in root.findall(self.tag(f"{entity_name}-mapping")): name = mapping.findtext(self.tag(f"{entity_name}-name"), default=self.__class__.DEFAULT_UNKNOWN_TAG) for pattern in mapping.findall(self.tag("url-pattern")): entities[name]["url_mapping"].append(pattern.text) def mix_filter_to_servlet(self, servlets, filters): """ Add filters to the servlets they applied by checking the url-pattern """ def update_filter(result, filter_pattern, servlet_pattern): for f_pattern in filter_pattern: for s_pattern in servlet_pattern: if WebXmlParser.url_patterns_match(f_pattern, s_pattern): if not result[servlet_name].get("filters"): result[servlet_name]["filters"] = {} result[servlet_name]["filters"][filter_name] = filter_attrs return result = {} for servlet_name, servlet_attrs in servlets.items(): # Initialize the result result[servlet_name] = servlet_attrs servlet_patterns = servlet_attrs["url_mapping"] for filter_name, filter_attrs in filters.items(): filter_patterns = filter_attrs["url_mapping"] update_filter(result, filter_patterns, servlet_patterns) return result def parse(self): """ Parses self.xml """ try: logger.info(f"Parsing '{self.xml}'") tree = ET.parse(self.xml) root = tree.getroot() except Exception as e: logger.critical(f"Error while parsing {self.xml}: {e}") return if root.tag.startswith("{"): self.ns = root.tag[root.tag.find("{")+1 : root.tag.find("}")] display_name = root.find(self.tag("display-name")) if display_name is not None: self.display_name = display_name.text logger.info(f"Application name '{self.display_name}'") else: self.display_name = self.__class__.DEFAULT_UNKNOWN_TAG servlets = self.get_servlets(root) self.get_mapping(root, servlets, "servlet") logger.info(f"Recovered {len(servlets.keys())} servlets") filters = self.get_filters(root) self.get_mapping(root, filters, "filter") logger.info(f"Recovered {len(filters.keys())} filters") self.json_parsed = self.mix_filter_to_servlet(servlets, filters) def display(self, json_format=False): """ Display in a specific format which can be json or a pretty display for terminal """ if not self.json_parsed: return if json_format: print(json.dumps(self.json_parsed, indent=4)) else: indent = " "*2 for servlet_name, attrs in self.json_parsed.items(): rprint(f"[b]Servlet:[/b] [green]{servlet_name}[/green]") if attrs.get("jsp_file"): rprint(f"{indent}[b]JSP file:[/b] [blue]{attrs.get('jsp_file')}[/blue]") else: rprint(f"{indent}[b]Class:[/b] [blue]{attrs.get('class')}[/blue]") rprint(f"{indent}[b]Urls:[/b]") for url_pattern in attrs["url_mapping"]: rprint(f"{indent} - [red]{url_pattern}[/red]") if attrs.get("filters"): rprint(f"{indent}[b]Filters:[/b]") for filter_name, filter_attrs in attrs["filters"].items(): rprint(f"{indent} - [light_sea_green]{filter_name}[/light_sea_green] ({filter_attrs.get('class')})") else: rprint(f"{indent}[b]No Filters[/b]") rprint() def options(): """ Parse cli options """ parser = argparse.ArgumentParser(description="Parse a tomcat web.xml file") parser.add_argument("-x", "--xml", nargs="+", required=False, help="Path to web.xml file (default: %(default)s)", default=["web.xml"]) parser.add_argument("-v", "--verbose", help="Increase verbosity", action="store_true") parser.add_argument("-j", "--json", help="Display output in json", action="store_true") return parser.parse_args() if __name__ == "__main__": args = options() logger.setLevel(logging.DEBUG if args.verbose else logging.WARNING) for web_xml in args.xml: xml_parser = WebXmlParser(web_xml) xml_parser.parse() xml_parser.display(json_format=args.json) This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,218 +0,0 @@ -
Guilhem7 revised this gist
Jun 30, 2025 . 1 changed file with 16 additions and 13 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -64,6 +64,9 @@ show_progress() { if [ "$1" -eq "$2" ]; then echo "" # move to next line when complete fi # Show progress arg 1 is count and arg 2 is the total # echo -en "\r${BLUE}[-]${N} Progress ${DIM}$percent${N}/${BOLD}100%${N}\e[K" # echo -en "\r[] $1/$2\e[K" } if ! command -v xq 2&>/dev/null;then @@ -131,7 +134,7 @@ xq_output=$(xq -c ' ' $WEB_XML) count=1 total_lines=$(printf "%s\n" "$xq_output" |wc -l) while IFS= read -r entry; do if [ "$VERBOSE" -eq 1 ];then show_progress "$count" "$total_lines" @@ -142,7 +145,7 @@ while IFS= read -r entry; do urls=$(jq -r '.urls[]' <<< "$entry" | paste -sd "|" -) servlet_class_map["$name"]="$class" servlet_urls_map["$name"]="$urls" done < <(printf "%s\n" "$xq_output") if [ "$VERBOSE" -eq 1 ];then info "Recovering ${GREEN}filters${N}" @@ -166,7 +169,7 @@ xq_output=$(xq -c ' ' $WEB_XML) count=1 total_lines=$(printf "%s\n" "$xq_output" |wc -l) while IFS= read -r entry; do if [ "$VERBOSE" -eq 1 ];then show_progress "$count" "$total_lines" @@ -177,7 +180,7 @@ while IFS= read -r entry; do patterns=$(jq -r '.patterns[]' <<< "$entry" | paste -sd "|" -) filter_class_map["$name"]="$class" filter_patterns_map["$name"]="$patterns" done < <(printf "%s\n" "$xq_output") # Process each servlet for servlet in "${!servlet_class_map[@]}"; do @@ -203,13 +206,13 @@ for servlet in "${!servlet_class_map[@]}"; do echo -e " Class : ${BOLD}${class}${N}" echo -e " URLs :" for u in "${urls[@]}"; do echo " - $u"; done echo " Filters:" if [[ ${#filter_set[@]} -eq 0 ]]; then echo " (none)" else for f in "${!filter_set[@]}"; do echo " - $f" done fi echo done -
Guilhem7 revised this gist
Jun 26, 2025 . 1 changed file with 0 additions and 3 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -64,9 +64,6 @@ show_progress() { if [ "$1" -eq "$2" ]; then echo "" # move to next line when complete fi } if ! command -v xq 2&>/dev/null;then -
Guilhem7 created this gist
Jun 26, 2025 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,218 @@ #!/bin/bash set -e ## Global var SCRIPT_NAME="${0##*/}" ## Style RED='\e[31m' GREEN='\e[32m' BLUE='\e[34m' YELLOW='\e[93m' BOLD='\e[1m' DIM='\e[2m' ITALIC='\e[3m' N='\e[0m' ## Util function die(){ if [ -n "$BASH_LINENO" ];then err "${SCRIPT_NAME}: ${BOLD}Error:Line:${BASH_LINENO[0]}${N} $*" >&2 else err "${SCRIPT_NAME}: ${BOLD}Error${N} $*" >&2 fi exit 1 } msg(){ echo -e "${GREEN}[+]${N} $*" } info(){ echo -e "${BLUE}[-]${N} $*" } err(){ echo -e "${RED}[x]${N} $*" >&2 } show_help() { echo "Usage: $SCRIPT_NAME [--xml XML] [-h] [-v]" echo echo "Options:" echo " --xml XML Specify the path to web.xml (default web.xml)" echo " -v, --verbose Increase verbosity" echo " -h, --help Display this help message" } show_progress() { local width=50 local percent=$(( 100 * $1 / $2 )) local filled=$(( width * $1 / $2 )) local empty=$(( width - filled )) local fill_char=$(printf " %.0s" {1..101}) printf "\r[" printf "\e[47m" printf "%.*s" "$filled" "$fill_char" printf "\e[0m" printf "%.*s" "$empty" "$fill_char" printf "] %3d%% (%d/%d)" "$percent" "$1" "$2" if [ "$1" -eq "$2" ]; then echo "" # move to next line when complete fi # Show progress arg 1 is count and arg 2 is the total # echo -en "\r${BLUE}[-]${N} Progress ${DIM}$percent${N}/${BOLD}100%${N}\e[K" # echo -en "\r[] $1/$2\e[K" } if ! command -v xq 2&>/dev/null;then err "Command xq not found, please run: ${BOLD}pip3 install yq${N}" fi if ! command -v jq 2&>/dev/null;then err "Command jq not found, please run: ${BOLD}sudo apt install -y jq${N}" fi WEB_XML="web.xml" VERBOSE=0 while [[ "$#" -gt 0 ]]; do case "$1" in -h) show_help exit 0 ;; --xml) if [[ -n "$2" && "$2" != -* ]]; then WEB_XML="$2" shift else echo "Error: --xml requires valid xml" exit 1 fi ;; -v|--verbose) VERBOSE=1 ;; *) echo "Unknown option: $1" show_help exit 1 ;; esac shift done if [ ! -f "$WEB_XML" ];then err "File not found: $WEB_XML" exit 1 fi if [ "$VERBOSE" -eq 1 ];then info "Processing $WEB_XML" info "Recovering ${GREEN}servlets${N} ${ITALIC}${DIM}(can take some time)${N}" fi # Parse all servlet mappings into a lookup: servlet-name -> list of url-patterns declare -A servlet_class_map declare -A servlet_urls_map xq_output=$(xq -c ' .["web-app"]["servlet"][] as $s | { name: $s["servlet-name"], class: $s["servlet-class"], urls: ( .["web-app"]["servlet-mapping"] | map(select(.["servlet-name"] == $s["servlet-name"])["url-pattern"]) | flatten ) } ' $WEB_XML) count=1 total_lines=$(printf "%s\n" $xq_output |wc -l) while IFS= read -r entry; do if [ "$VERBOSE" -eq 1 ];then show_progress "$count" "$total_lines" let "count=count+1" fi name=$(jq -r '.name' <<< "$entry") class=$(jq -r '.class' <<< "$entry") urls=$(jq -r '.urls[]' <<< "$entry" | paste -sd "|" -) servlet_class_map["$name"]="$class" servlet_urls_map["$name"]="$urls" done < <(printf "%s\n" $xq_output) if [ "$VERBOSE" -eq 1 ];then info "Recovering ${GREEN}filters${N}" fi # Parse all filters and map filter-name to list of patterns declare -A filter_class_map declare -A filter_patterns_map xq_output=$(xq -c ' .["web-app"]["filter"][] as $f | { name: $f["filter-name"], class: $f["filter-class"], patterns: ( .["web-app"]["filter-mapping"] | map(select(.["filter-name"] == $f["filter-name"])["url-pattern"]) | flatten ) } ' $WEB_XML) count=1 total_lines=$(printf "%s\n" $xq_output |wc -l) while IFS= read -r entry; do if [ "$VERBOSE" -eq 1 ];then show_progress "$count" "$total_lines" let "count=count+1" fi name=$(jq -r '.name' <<< "$entry") class=$(jq -r '.class' <<< "$entry") patterns=$(jq -r '.patterns[]' <<< "$entry" | paste -sd "|" -) filter_class_map["$name"]="$class" filter_patterns_map["$name"]="$patterns" done < <(printf "%s\n" $xq_output) # Process each servlet for servlet in "${!servlet_class_map[@]}"; do class="${servlet_class_map[$servlet]}" IFS='|' read -ra urls <<< "${servlet_urls_map[$servlet]}" declare -A filter_set=() # Match servlet URLs with all filter patterns for filter in "${!filter_patterns_map[@]}"; do IFS='|' read -ra patterns <<< "${filter_patterns_map[$filter]}" for u in "${urls[@]}"; do for p in "${patterns[@]}"; do regex="^${p//\*/.*}$" if [[ "$u" =~ $regex ]]; then filter_set["$filter"]=1 fi done done done # Output result echo -e "Servlet : ${GREEN}${servlet}${N}" echo -e " Class : ${BOLD}${class}${N}" echo -e " URLs :" for u in "${urls[@]}"; do echo " - $u"; done echo " Filters:" if [[ ${#filter_set[@]} -eq 0 ]]; then echo " (none)" else for f in "${!filter_set[@]}"; do echo " - $f" done fi echo done