// Code structure // app.js // router.js // controller.js // service.js // implementation // This is hosted in aws Fargate. And this is the rough worst case calculation on the resource usage. // Fargate configuration: // - 8GB RAM per container // - 25 (MAX) concurrent request per container. // - 300MB RAM consumption per request. (this is a rough worst case from googling and we are using combination of cluster and new chrome instances). // now, // total_request_each_container_can_handle = (8GB RAM * 1000MB) / 300MB; // total_request_each_container_can_handle = ~26 requests.; // Initialize Puppeteer Cluster on server start const { Cluster } = require('puppeteer-cluster'); (async () => { const cluster = await Cluster.launch({ concurrency: Cluster.CONCURRENCY_PAGE, maxConcurrency: 3, // adjustable puppeteerOptions: { headless: true, }, }); app.use('/api', async (req, res, next) => { req.cluster = cluster; // req.cluster is available for every request going through "/api" route next(); }, routes); // Start the Express server app.listen(port, () => { console.log(`Server is running on http://localhost:${port}`); }); // Handle graceful shutdown process.on('SIGINT', async () => { await cluster.idle(); await cluster.close(); process.exit(); }); })(); // Router Router.post("/", scraperController.initTask); // this goes through "/api" route // Controller initTask = async (req, res) => { const cluster = req.cluster; const url = req.body.url; const result = await this.scraperService.initTask({ cluster, url }); // pass cluster to service res.json({ message: "Success", data: result }); }; // Service async initTask({ cluster, url }) { const data = { url } const scrapedData = await cluster.execute(data, myPuppeteerTask); // .execute() provides a "page", which is a new tab to the callback (myPuppeteerTask) return scrapedData; } // Implementation for the callback above, We get a "page" to work with. myPuppeteerTask = async ({ page, data }) => { const { url } = data; await page.goto(url); // Do puppeteer things return "Task Done"; // Or return what you want. })