Skip to content

Instantly share code, notes, and snippets.

@kuroski
Last active May 31, 2022 14:51
Show Gist options
  • Select an option

  • Save kuroski/b3129f6319c27e83f1a602362d11ba9f to your computer and use it in GitHub Desktop.

Select an option

Save kuroski/b3129f6319c27e83f1a602362d11ba9f to your computer and use it in GitHub Desktop.

Revisions

  1. kuroski revised this gist May 31, 2022. 1 changed file with 26 additions and 13 deletions.
    39 changes: 26 additions & 13 deletions checkSitemap.js
    Original file line number Diff line number Diff line change
    @@ -1,26 +1,39 @@
    /* eslint-disable */
    const fs = require('fs')
    const urlExists = require('url-exists');
    const parser = require('xml2js')
    const urlExists = require('url-exists')
    const parser = require('xml2js')
    const { PromisePool } = require('@supercharge/promise-pool')

    async function init() {
    const sitemapXml = fs.readFileSync('./public/sitemap.xml', 'utf8')
    parser.parseString(sitemapXml, async (err, result) => {
    const urls = result.urlset.url.map(({loc}) => loc).flat()
    const urls = result.urlset.url.map(({ loc }) => loc).flat()

    const { results, errors } = await PromisePool
    .withConcurrency(5)
    .for(urls)
    .onTaskStarted((item, pool) => {
    console.log(`Processing ${item}`)
    })
    .process(async (url, index, pool) => {
    return new Promise((resolve) => {
    urlExists(url, (_, exists) => resolve({ url, exists}))
    const { results, errors } = await PromisePool.withConcurrency(10)
    .for(urls)
    .onTaskStarted((item, pool) => {
    console.log(`Processing ${item}`)
    })
    .process(async (url, index, pool) => {
    return new Promise((resolve) => {
    urlExists(url, (_, exists) => resolve({ url, exists }))
    })
    })

    const p = results.reduce(
    (acc, curr) => ({
    exists: [...acc.exists, ...(curr.exists ? [curr.url] : [])],
    notExists: [...acc.notExists, ...(!curr.exists ? [curr.url] : [])],
    }),
    { exists: [], notExists: [] }
    )

    console.log({
    exists: p.exists.length,
    notExists: p.notExists.length,
    })

    console.log({results, errors})
    fs.writeFileSync('./result.json', JSON.stringify(p))
    })
    }

  2. kuroski created this gist May 31, 2022.
    27 changes: 27 additions & 0 deletions checkSitemap.js
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,27 @@
    const fs = require('fs')
    const urlExists = require('url-exists');
    const parser = require('xml2js')
    const { PromisePool } = require('@supercharge/promise-pool')

    async function init() {
    const sitemapXml = fs.readFileSync('./public/sitemap.xml', 'utf8')
    parser.parseString(sitemapXml, async (err, result) => {
    const urls = result.urlset.url.map(({loc}) => loc).flat()

    const { results, errors } = await PromisePool
    .withConcurrency(5)
    .for(urls)
    .onTaskStarted((item, pool) => {
    console.log(`Processing ${item}`)
    })
    .process(async (url, index, pool) => {
    return new Promise((resolve) => {
    urlExists(url, (_, exists) => resolve({ url, exists}))
    })
    })

    console.log({results, errors})
    })
    }

    init()