const jsdom = require('jsdom') const { JSDOM } = jsdom function toJSDOM(responseBody) { return new JSDOM(responseBody) } /** * Pull structured data from the HTML of an Instagram page (e.g. www.instagram.com/theshaderoom) */ module.exports = function parse(body) { const rawDom = toJSDOM(body) const dom = rawDom.window.document.body function formatNumber(text, labelText) { let num = text.replace(labelText, '').replace(/,/g, '') // '17.4m' if (text.includes('m')) { // 17.4 // expand out millions condensedNum = Number(condensedNum.replace('m', '')) * 1000000 } return Number(num.trim()) } const blocks = { posts: null, followers: null, following: null } // Map description row chunks (mostly lacking good semantic CSS) into structured data Array.from(dom.querySelectorAll('ul li')) .map(node => node.textContent) .forEach(textNode => { // '128,123 posts' if (textNode.includes(' posts')) { blocks.posts = formatNumber(textNode, ' posts') // '27.4m followers' } else if (textNode.includes(' followers')) { blocks.followers = formatNumber(textNode, ' followers', '') // '128,123 following' } else if (textNode.includes(' following')) { blocks.following = formatNumber(textNode, ' following', '') } }) return { name: dom.querySelector('h1').textContent, isVerified: Boolean(dom.querySelector('.coreSpriteVerifiedBadge')), ...blocks } }