Created
August 6, 2025 01:42
-
-
Save wlib/1ec790c2ee8367ad97cab4f499ad5eab to your computer and use it in GitHub Desktop.
Revisions
-
wlib created this gist
Aug 6, 2025 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,198 @@ const ordersUrl = "https://www.amazon.com/your-orders/orders" const wait = (ms = 1_000) => new Promise(resolve => { setTimeout(() =>{ resolve() }, ms) }) const fetchDocument = async url => { const html = await fetch(url).then(res => res.ok ? res.text() : undefined) if (!html) return return Document.parseHTMLUnsafe(html) } const fetchDocumentViaIframe = async url => new Promise(resolve => { const iframe = document.createElement("iframe") iframe.addEventListener("load", () => { const html = iframe.contentDocument.documentElement.outerHTML resolve(Document.parseHTMLUnsafe(html)) }) iframe.src = url }) const ordersPage = await fetchDocument(ordersUrl) const ordersUrlsByYear = [...ordersPage.querySelector("select[name = timeFilter]").options] .flatMap(option => { if (!option.value.startsWith("year-")) return [] const url = new URL(ordersUrl) url.searchParams.set("timeFilter", option.value) return [url.href] }) const orderIds = new Set() const extractOrderIds = (doc, baseUrl) => { for (const a of doc.querySelectorAll("a")) { let url try { url = new URL(a.href, baseUrl) } catch { continue } if (!url.href.startsWith("https://www.amazon.com/gp/css/summary/print.html")) continue const orderId = url.searchParams.get("orderID") if (!orderId) continue orderIds.add(orderId) } } for (const ordersPageUrl of ordersUrlsByYear) { const ordersPage = await fetchDocumentViaIframe(ordersPageUrl) await wait() extractOrderIds(ordersPage, ordersPageUrl) const maxStartIndex = [...ordersPage.querySelectorAll(".a-pagination a")] .flatMap(a => { let url try { url = new URL(a.href, ordersPageUrl) } catch { return [] } if (!url.href.startsWith(ordersUrl)) return [] const startIndex = parseInt(url.searchParams.get("startIndex")) if (isNaN(startIndex)) return [] return [startIndex] }) .sort((a, b) => b - a)[0] if (!maxStartIndex) continue const otherPageUrls = Array.from({ length: maxStartIndex / 10 }) .map((_, i) => { const startIndex = (i + 1) * 10 const url = new URL(ordersPageUrl) url.searchParams.set("startIndex", startIndex) return url.href }) for (const otherPageUrl of otherPageUrls) { const otherPage = await fetchDocumentViaIframe(otherPageUrl) await wait() extractOrderIds(otherPage, ordersPageUrl) } } const extractModernInvoiceInfo = (invoicePage, baseUrl) => { const orderDate = invoicePage.querySelector("[data-component = orderDate]")?.textContent.trim() const items = [...invoicePage.querySelectorAll("[data-component = purchasedItems] .a-fixed-left-grid")] .map(purchasedItem => { const imageContainer = purchasedItem.querySelector("[data-component = itemImage]") let image try { image = new URL(imageContainer?.querySelector("img")?.getAttribute("src"), baseUrl).href } catch {} let quantity = parseInt(imageContainer?.querySelector(".od-item-view-qty")?.textContent.trim()) if (isNaN(quantity)) quantity = undefined const titleLink = purchasedItem.querySelector("[data-component = itemTitle] a") const title = titleLink?.textContent.trim() let url try { const url_ = new URL(titleLink.href, baseUrl) url_.search = "" url = url_.href } catch {} const unitPrice = purchasedItem.querySelector("[data-component = unitPrice] .a-offscreen")?.textContent.trim() return { image, title, url, unitPrice, quantity } }) return { orderDate, items } } const extractLegacyInvoiceInfo = (invoicePage, baseUrl) => { const orderDateCell = [...invoicePage.querySelectorAll("td:not(:has(td))")] .find(td => td.textContent.includes("Order Placed:")) const orderDate = orderDateCell?.textContent.replace("Order Placed:", "").trim() const rawItems = [...invoicePage.querySelectorAll("tr")] .flatMap(row => { const italicText = row.querySelector("i") const priceCell = row.querySelector("td[align = right]") if (!(italicText && priceCell && priceCell.textContent.includes("$"))) return [] let quantity = parseInt(italicText.previousSibling?.textContent.match(/(\d+)\s+of:/)?.[1]) if (isNaN(quantity)) quantity = 1 const title = italicText.textContent.trim() const unitPrice = priceCell.textContent.trim() return [{ title, unitPrice, quantity }] }) const itemsMap = new Map() for (const item of rawItems) { const key = `${item.title}\0${item.unitPrice}` if (itemsMap.has(key)) itemsMap.get(key).quantity += item.quantity else itemsMap.set(key, { ...item }) } const items = [...itemsMap.values()] return { orderDate, items } } const orderIdToInfo = {} for (const orderId of orderIds) { const invoiceUrl = new URL("https://www.amazon.com/gp/css/summary/print.html") invoiceUrl.searchParams.set("orderID", orderId) const invoicePage = await fetchDocument(invoiceUrl) await wait() orderIdToInfo[orderId] = invoicePage.querySelector("[data-component = orderDate]") ? extractModernInvoiceInfo(invoicePage, invoiceUrl) : extractLegacyInvoiceInfo(invoicePage, invoiceUrl) } console.log(orderIdToInfo)