Created
November 9, 2018 20:12
-
-
Save mrhampson/c77e7e657e0d84d663c53157848c33c3 to your computer and use it in GitHub Desktop.
Programming problem solution. Trying to parse a simple HTML file and then allow the user to query to check if certain nodes exist.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import java.io.File; | |
| import java.io.IOException; | |
| import java.nio.charset.StandardCharsets; | |
| import java.nio.file.Files; | |
| import java.nio.file.Path; | |
| import java.nio.file.Paths; | |
| import java.util.ArrayList; | |
| import java.util.Arrays; | |
| import java.util.Collections; | |
| import java.util.HashMap; | |
| import java.util.List; | |
| import java.util.Map; | |
| import java.util.Objects; | |
| import java.util.Scanner; | |
| import java.util.Stack; | |
| import java.util.regex.Matcher; | |
| import java.util.regex.Pattern; | |
| class Scratch { | |
| private static final String TAG_REGEX = "<[^>]*>"; | |
| private static final Pattern TAG_PATTERN = Pattern.compile(TAG_REGEX); | |
| private static final String QUERY_REGEX = "(\\w*)(\\[(\\w*)=(\\w*)\\])?"; | |
| private static final Pattern QUERY_PATTERN = Pattern.compile(QUERY_REGEX); | |
| public static void main(String[] args) throws IOException, TagValidationException { | |
| List<String> lines = Arrays.asList( | |
| "<html><body>", | |
| "<div><div><div id=123>", | |
| "asdf", | |
| "</div></div></div>", | |
| "</body></html>" | |
| ); | |
| DomNode root = parseFileLines(lines); | |
| Scanner scanner = new Scanner(System.in); | |
| System.out.println("Query: "); | |
| while (true) { | |
| String query = scanner.next(); | |
| if ("q".equals(query)) { | |
| System.exit(0); | |
| } | |
| long start = System.nanoTime(); | |
| boolean found = query(query, root); | |
| long end = System.nanoTime(); | |
| System.out.println(found ? "found" : "not found"); | |
| System.out.println("Query took: " + (end - start) + " ns"); | |
| } | |
| } | |
| /** | |
| * Performs a query on the DOM tree | |
| * It currently supports queries such as | |
| * html.body.div.div[id=123] | |
| * | |
| * @param queryString the query string | |
| * @param root the root of the dom tree | |
| * @return true if the node was found, false otherwise | |
| */ | |
| private static boolean query(String queryString, DomNode root) { | |
| String[] queryElements = queryString.split("\\."); | |
| if (root.getTag().getTagType().equals(queryElements[0])) { | |
| DomNode currentNode = root; | |
| for (int i = 1; i < queryElements.length; i++) { | |
| Matcher queryMatcher = QUERY_PATTERN.matcher(queryElements[i]); | |
| String tagType; | |
| Map<String, String> attributesToMatch = null; | |
| if (queryMatcher.matches()) { | |
| tagType = queryMatcher.group(1); | |
| if (queryMatcher.group(3) != null && queryMatcher.group(4) != null) { | |
| attributesToMatch = new HashMap<>(); | |
| attributesToMatch.put(queryMatcher.group(3), queryMatcher.group(4)); | |
| } | |
| } | |
| else { | |
| return false; | |
| } | |
| DomNode matchedNode = currentNode.getChildren().stream() | |
| .filter(child -> child.getTag().getTagType().equals(tagType)) | |
| .findAny() | |
| .orElse(null); | |
| if (matchedNode == null) { | |
| return false; | |
| } | |
| if (attributesToMatch != null) { | |
| for (Map.Entry<String, String> entry : attributesToMatch.entrySet()) { | |
| String actualValue = matchedNode.getTag().getAttributeMap().get(entry.getKey()); | |
| if (actualValue == null || !actualValue.equals(entry.getValue())) { | |
| return false; | |
| } | |
| } | |
| } | |
| currentNode = matchedNode; | |
| } | |
| return true; | |
| } | |
| return false; | |
| } | |
| /** | |
| * Creates a DOM tree from the lines of an html file | |
| * @param lines the file lines | |
| * @return a DOM tree representation of the html file | |
| * @throws TagValidationException if there was an invalid tag found | |
| */ | |
| private static DomNode parseFileLines(List<String> lines) throws TagValidationException { | |
| DomNode firstNode = null; | |
| Stack<DomNode> tagStack = new Stack<>(); | |
| long lineCounter = 0; | |
| for (String line : lines) { | |
| Matcher matcher = TAG_PATTERN.matcher(line); | |
| while(matcher.find()) { | |
| String tagString = matcher.group(); | |
| HtmlTag htmlTag = null; | |
| try { | |
| htmlTag = HtmlTag.fromTagString(tagString); | |
| } | |
| catch (TagValidationException e) { | |
| System.out.println(e.getMessage()); | |
| System.out.println("Raw tag: " + tagString); | |
| System.out.println("Line number: " + lineCounter); | |
| throw e; | |
| } | |
| if (htmlTag.isClosingTag()) { | |
| HtmlTag lastTag = tagStack.peek().getTag(); | |
| if (!HtmlTag.isOpenClosePair(lastTag, htmlTag)) { | |
| throw new IllegalArgumentException("Found unmatched tag on line: " + lineCounter); | |
| } | |
| else { | |
| tagStack.pop(); | |
| } | |
| } | |
| else { | |
| DomNode newNode = new DomNode(htmlTag); | |
| if (firstNode == null) { | |
| firstNode = newNode; | |
| } | |
| else { | |
| DomNode parent = tagStack.peek(); | |
| parent.getChildren().add(newNode); | |
| } | |
| tagStack.push(newNode); | |
| } | |
| } | |
| lineCounter++; | |
| } | |
| return firstNode; | |
| } | |
| /** | |
| * A class to represent an HtmlTag | |
| * Currently stores the type of tag, it's attribute map and whether it was opening or closing | |
| */ | |
| private static final class HtmlTag { | |
| private final String tagType; | |
| private final boolean isOpeningTag; | |
| private final Map<String, String> attributeMap; | |
| private HtmlTag(String tagType, boolean isOpeningTag, Map<String, String> attributeMap) { | |
| this.tagType = tagType; | |
| this.isOpeningTag = isOpeningTag; | |
| this.attributeMap = attributeMap; | |
| } | |
| public String getTagType() { | |
| return tagType; | |
| } | |
| public boolean isOpeningTag() { | |
| return isOpeningTag; | |
| } | |
| public boolean isClosingTag() { | |
| return !isOpeningTag; | |
| } | |
| public Map<String, String> getAttributeMap() { | |
| return attributeMap; | |
| } | |
| @Override | |
| public boolean equals(Object o) { | |
| if (this == o) | |
| return true; | |
| if (o == null || getClass() != o.getClass()) | |
| return false; | |
| HtmlTag htmlTag = (HtmlTag) o; | |
| return isOpeningTag == htmlTag.isOpeningTag && Objects.equals(tagType, htmlTag.tagType) && Objects | |
| .equals(attributeMap, htmlTag.attributeMap); | |
| } | |
| @Override | |
| public int hashCode() { | |
| return Objects.hash(tagType, isOpeningTag, attributeMap); | |
| } | |
| @Override | |
| public String toString() { | |
| StringBuilder sb = new StringBuilder(); | |
| sb.append('<'); | |
| if (!isOpeningTag) { | |
| sb.append('/'); | |
| } | |
| sb.append(tagType); | |
| if (!attributeMap.isEmpty()) { | |
| sb.append(' '); | |
| for (Map.Entry<String, String> entry : attributeMap.entrySet()) { | |
| sb.append(entry.getKey()); | |
| sb.append('='); | |
| sb.append(entry.getValue()); | |
| sb.append(' '); | |
| } | |
| } | |
| sb.append('>'); | |
| return sb.toString(); | |
| } | |
| public static HtmlTag fromTagString(String tagString) throws TagValidationException { | |
| String trimmedTagString = tagString.substring(1, tagString.length() - 1); | |
| String[] words = trimmedTagString.split("\\s+"); | |
| if (words.length < 1) { | |
| throw new TagValidationException("Bad tag string"); | |
| } | |
| // Plain tag case | |
| if (words.length == 1) { | |
| String rawTag = words[0]; | |
| if (rawTag.length() < 2) { | |
| throw new TagValidationException("Invalid tag"); | |
| } | |
| boolean isOpeningTag = rawTag.charAt(1) != '\\'; | |
| String tagType = isOpeningTag ? rawTag : rawTag.substring(1); | |
| return new HtmlTag(tagType, isOpeningTag, Collections.emptyMap()); | |
| } | |
| else { | |
| Map<String, String> attributesMap = new HashMap<>(); | |
| String tagType = words[0]; | |
| for (int i = 1; i < words.length; i++) { | |
| String attribute = words[i]; | |
| String[] attrKeyValue = attribute.split("="); | |
| if (attrKeyValue.length != 2) { | |
| throw new TagValidationException("Invalid attribute"); | |
| } | |
| attributesMap.put(attrKeyValue[0], attrKeyValue[1]); | |
| } | |
| return new HtmlTag(tagType, true, Collections.unmodifiableMap(attributesMap)); | |
| } | |
| } | |
| public static boolean isOpenClosePair(HtmlTag tag1, HtmlTag tag2) { | |
| return tag1.isOpeningTag ^ tag2.isOpeningTag && | |
| tag1.tagType.equals(tag2.tagType); | |
| } | |
| } | |
| /** | |
| * A class to represent DomNodes | |
| * It contains a tag as well as it's links to any child tags | |
| */ | |
| private static final class DomNode { | |
| private final HtmlTag tag; | |
| private final List<DomNode> children = new ArrayList<>(); | |
| public DomNode(HtmlTag tag) { | |
| this.tag = tag; | |
| } | |
| public HtmlTag getTag() { | |
| return tag; | |
| } | |
| public List<DomNode> getChildren() { | |
| return children; | |
| } | |
| } | |
| /** | |
| * An Exception thrown if the tag could not be parsed from the string | |
| */ | |
| private static final class TagValidationException extends Exception { | |
| public TagValidationException(String msg) { | |
| super(msg); | |
| } | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment