Created
March 23, 2020 07:22
-
-
Save pnhuyduy/c507e94e9309511264713dc91b8dfee5 to your computer and use it in GitHub Desktop.
Revisions
-
pnhuyduy renamed this gist
Mar 23, 2020 . 1 changed file with 0 additions and 0 deletions.There are no files selected for viewing
File renamed without changes. -
pnhuyduy created this gist
Mar 23, 2020 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,56 @@ // Source: https://intoli.com/blog/scrape-infinite-scroll/ const fs = require('fs'); const puppeteer = require('puppeteer'); function extractItems() { const extractedElements = document.querySelectorAll('main > div > div:nth-child(4) > article > div > div > div img'); const items = []; for (let element of extractedElements) { items.push(element.src); } console.log(items); return items; } async function scrapeInfiniteScrollItems( page, extractItems, itemTargetCount, scrollDelay = 1000, ) { let items = []; try { let previousHeight; while (items.length < itemTargetCount) { items = await page.evaluate(extractItems); previousHeight = await page.evaluate('document.body.scrollHeight'); await page.evaluate('window.scrollTo(0, document.body.scrollHeight)'); await page.waitForFunction(`document.body.scrollHeight > ${previousHeight}`); await page.waitFor(scrollDelay); } } catch(e) { } return items; } (async () => { // Set up browser and page. const browser = await puppeteer.launch({ headless: false, args: ['--no-sandbox', '--disable-setuid-sandbox'], }); const page = await browser.newPage(); page.setViewport({ width: 1280, height: 926 }); // Navigate to the demo page. await page.goto('https://www.instagram.com/diq.ng/'); // Scroll and extract items from the page. const items = await scrapeInfiniteScrollItems(page, extractItems, 100); // Save extracted items to a file. fs.writeFileSync('./items.txt', items.join('\n') + '\n'); // Close the browser. await browser.close(); })();