import {JSDOM} from 'jsdom'; // npm i jsdom function htmlToObj(htmlString) { const newString = htmlString.replace('
','\n') // string hacks for edge cases // .replace('','') // .replace('','') // .replace('','') // .replace('',''); const dom = new JSDOM(newString); return elementToObj(dom.window.document.body); } function elementToObj(element) { let obj = {}; let loop = true; if (['style','script'].includes(element.tagName.toLowerCase())) { return null; // Drop the element if it matches any tag in the list } if (element.tagName.toLowerCase() === 'body') { obj = { type: 'root' }; } else { switch (element.tagName.toLowerCase()) { case 'h1': obj = { type: 'heading', level: 1 }; break; case 'h2': obj = { type: 'heading', level: 2 }; break; case 'h3': obj = { type: 'heading', level: 3 }; break; case 'h4': obj = { type: 'heading', level: 4 }; break; case 'h5': obj = { type: 'heading', level: 5 }; break; case 'p': obj = { type: 'paragraph' }; break; case 'a': obj = { type: 'link' }; break; case 'ol': obj = { type: 'list', listType: 'ordered' }; break; case 'ul': obj = { type: 'list', listType: 'unordered' }; break; case 'li': obj = { type: 'list-item' }; break; case 'b': case 'strong': obj = { type: 'text', bold: true, value: element.textContent }; loop = false; break; case 'em': obj = { type: 'text', italic: true, value: element.textContent }; loop = false; break; default: break; } } if (loop) { // only hyperlinks get attributes if (element.attributes && obj.type === 'link') { for (let attribute of element.attributes) { if (attribute.name === 'href') { obj['url'] = attribute.value; } if (attribute.name === 'title') { obj['title'] = attribute.value; } } } obj.children = []; for (let subElement of element.childNodes) { if (subElement.nodeType === subElement.TEXT_NODE) { const trimmedText = subElement.textContent.trim(); if (trimmedText) { obj.children.push({ type: 'text', value: trimmedText }); } } else if (subElement.nodeType === subElement.ELEMENT_NODE) { obj.children.push(elementToObj(subElement)); } } } return obj; }