import java.io.File; import java.io.IOException; import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Objects; import java.util.Scanner; import java.util.Stack; import java.util.regex.Matcher; import java.util.regex.Pattern; class Scratch { private static final String TAG_REGEX = "<[^>]*>"; private static final Pattern TAG_PATTERN = Pattern.compile(TAG_REGEX); private static final String QUERY_REGEX = "(\\w*)(\\[(\\w*)=(\\w*)\\])?"; private static final Pattern QUERY_PATTERN = Pattern.compile(QUERY_REGEX); public static void main(String[] args) throws IOException, TagValidationException { List lines = Arrays.asList( "", "
", "asdf", "
", "" ); DomNode root = parseFileLines(lines); Scanner scanner = new Scanner(System.in); System.out.println("Query: "); while (true) { String query = scanner.next(); if ("q".equals(query)) { System.exit(0); } long start = System.nanoTime(); boolean found = query(query, root); long end = System.nanoTime(); System.out.println(found ? "found" : "not found"); System.out.println("Query took: " + (end - start) + " ns"); } } /** * Performs a query on the DOM tree * It currently supports queries such as * html.body.div.div[id=123] * * @param queryString the query string * @param root the root of the dom tree * @return true if the node was found, false otherwise */ private static boolean query(String queryString, DomNode root) { String[] queryElements = queryString.split("\\."); if (root.getTag().getTagType().equals(queryElements[0])) { DomNode currentNode = root; for (int i = 1; i < queryElements.length; i++) { Matcher queryMatcher = QUERY_PATTERN.matcher(queryElements[i]); String tagType; Map attributesToMatch = null; if (queryMatcher.matches()) { tagType = queryMatcher.group(1); if (queryMatcher.group(3) != null && queryMatcher.group(4) != null) { attributesToMatch = new HashMap<>(); attributesToMatch.put(queryMatcher.group(3), queryMatcher.group(4)); } } else { return false; } DomNode matchedNode = currentNode.getChildren().stream() .filter(child -> child.getTag().getTagType().equals(tagType)) .findAny() .orElse(null); if (matchedNode == null) { return false; } if (attributesToMatch != null) { for (Map.Entry entry : attributesToMatch.entrySet()) { String actualValue = matchedNode.getTag().getAttributeMap().get(entry.getKey()); if (actualValue == null || !actualValue.equals(entry.getValue())) { return false; } } } currentNode = matchedNode; } return true; } return false; } /** * Creates a DOM tree from the lines of an html file * @param lines the file lines * @return a DOM tree representation of the html file * @throws TagValidationException if there was an invalid tag found */ private static DomNode parseFileLines(List lines) throws TagValidationException { DomNode firstNode = null; Stack tagStack = new Stack<>(); long lineCounter = 0; for (String line : lines) { Matcher matcher = TAG_PATTERN.matcher(line); while(matcher.find()) { String tagString = matcher.group(); HtmlTag htmlTag = null; try { htmlTag = HtmlTag.fromTagString(tagString); } catch (TagValidationException e) { System.out.println(e.getMessage()); System.out.println("Raw tag: " + tagString); System.out.println("Line number: " + lineCounter); throw e; } if (htmlTag.isClosingTag()) { HtmlTag lastTag = tagStack.peek().getTag(); if (!HtmlTag.isOpenClosePair(lastTag, htmlTag)) { throw new IllegalArgumentException("Found unmatched tag on line: " + lineCounter); } else { tagStack.pop(); } } else { DomNode newNode = new DomNode(htmlTag); if (firstNode == null) { firstNode = newNode; } else { DomNode parent = tagStack.peek(); parent.getChildren().add(newNode); } tagStack.push(newNode); } } lineCounter++; } return firstNode; } /** * A class to represent an HtmlTag * Currently stores the type of tag, it's attribute map and whether it was opening or closing */ private static final class HtmlTag { private final String tagType; private final boolean isOpeningTag; private final Map attributeMap; private HtmlTag(String tagType, boolean isOpeningTag, Map attributeMap) { this.tagType = tagType; this.isOpeningTag = isOpeningTag; this.attributeMap = attributeMap; } public String getTagType() { return tagType; } public boolean isOpeningTag() { return isOpeningTag; } public boolean isClosingTag() { return !isOpeningTag; } public Map getAttributeMap() { return attributeMap; } @Override public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; HtmlTag htmlTag = (HtmlTag) o; return isOpeningTag == htmlTag.isOpeningTag && Objects.equals(tagType, htmlTag.tagType) && Objects .equals(attributeMap, htmlTag.attributeMap); } @Override public int hashCode() { return Objects.hash(tagType, isOpeningTag, attributeMap); } @Override public String toString() { StringBuilder sb = new StringBuilder(); sb.append('<'); if (!isOpeningTag) { sb.append('/'); } sb.append(tagType); if (!attributeMap.isEmpty()) { sb.append(' '); for (Map.Entry entry : attributeMap.entrySet()) { sb.append(entry.getKey()); sb.append('='); sb.append(entry.getValue()); sb.append(' '); } } sb.append('>'); return sb.toString(); } public static HtmlTag fromTagString(String tagString) throws TagValidationException { String trimmedTagString = tagString.substring(1, tagString.length() - 1); String[] words = trimmedTagString.split("\\s+"); if (words.length < 1) { throw new TagValidationException("Bad tag string"); } // Plain tag case if (words.length == 1) { String rawTag = words[0]; if (rawTag.length() < 2) { throw new TagValidationException("Invalid tag"); } boolean isOpeningTag = rawTag.charAt(1) != '\\'; String tagType = isOpeningTag ? rawTag : rawTag.substring(1); return new HtmlTag(tagType, isOpeningTag, Collections.emptyMap()); } else { Map attributesMap = new HashMap<>(); String tagType = words[0]; for (int i = 1; i < words.length; i++) { String attribute = words[i]; String[] attrKeyValue = attribute.split("="); if (attrKeyValue.length != 2) { throw new TagValidationException("Invalid attribute"); } attributesMap.put(attrKeyValue[0], attrKeyValue[1]); } return new HtmlTag(tagType, true, Collections.unmodifiableMap(attributesMap)); } } public static boolean isOpenClosePair(HtmlTag tag1, HtmlTag tag2) { return tag1.isOpeningTag ^ tag2.isOpeningTag && tag1.tagType.equals(tag2.tagType); } } /** * A class to represent DomNodes * It contains a tag as well as it's links to any child tags */ private static final class DomNode { private final HtmlTag tag; private final List children = new ArrayList<>(); public DomNode(HtmlTag tag) { this.tag = tag; } public HtmlTag getTag() { return tag; } public List getChildren() { return children; } } /** * An Exception thrown if the tag could not be parsed from the string */ private static final class TagValidationException extends Exception { public TagValidationException(String msg) { super(msg); } } }