//Generated using Claude Sonnet 3.5 import axios from "axios"; import { XMLParser } from "fast-xml-parser"; import { writeFile } from "fs/promises"; import { parse as parseURL } from "url"; class SitemapChecker { constructor(sitemapUrl) { this.sitemapUrl = sitemapUrl; this.parser = new XMLParser(); this.results = []; } async #parseSitemap() { try { const { data } = await axios.get(this.sitemapUrl); const parsed = this.parser.parse(data); return parsed.urlset.url.map((url) => url.loc); } catch (error) { console.error("Error parsing sitemap:", error.message); throw error; } } async #checkUrl(url) { try { const response = await axios.get(url, { maxRedirects: 0, validateStatus: (status) => true, // Don't throw on any status }); if ([301, 302, 404].includes(response.status)) { return { url, status: response.status, redirectUrl: response.headers.location || "", timestamp: new Date().toISOString(), }; } } catch (error) { return { url, status: error.response?.status || "Error", redirectUrl: error.response?.headers?.location || "", timestamp: new Date().toISOString(), }; } return null; } async check() { try { console.log("Fetching sitemap..."); const urls = await this.#parseSitemap(); console.log(`Found ${urls.length} URLs to check`); const results = []; const batchSize = 5; // Adjust based on your needs for (let i = 0; i < urls.length; i += batchSize) { const batch = urls.slice(i, i + batchSize); const batchResults = await Promise.all( batch.map((url) => this.#checkUrl(url)) ); const filteredResults = batchResults.filter( (result) => result !== null ); results.push(...filteredResults); console.log(`Processed ${i + batch.length}/${urls.length} URLs`); } this.results = results; return results; } catch (error) { console.error("Error checking URLs:", error.message); throw error; } } async saveToCSV(filename = "problematic-urls.csv") { if (this.results.length === 0) { console.log("No problematic URLs found"); return; } const csvContent = [ "URL,Status Code,Redirect URL,Timestamp", ...this.results.map( ({ url, status, redirectUrl, timestamp }) => `"${url}","${status}","${redirectUrl}","${timestamp}"` ), ].join("\n"); await writeFile(filename, csvContent); console.log(`Results saved to ${filename}`); } } // Example usage const main = async () => { try { const sitemapUrl = process.argv[2]; if (!sitemapUrl) { console.error("Please provide a sitemap URL as an argument"); process.exit(1); } const checker = new SitemapChecker(sitemapUrl); await checker.check(); await checker.saveToCSV(); } catch (error) { console.error("Error:", error.message); process.exit(1); } }; // Run the script main();