Last active
October 2, 2025 16:46
-
-
Save c4nc/c7f2e79adc5c9d70adf10af9cdd1f8c6 to your computer and use it in GitHub Desktop.
ChatGPT (Business) backup tool
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // === ChatGPT Project + Chats Single-ZIP Exporter ======================== | |
| // - Exports ALL owned Projects (gizmos/snorlax) + their files + all project chats | |
| // - Exports all non-project chats | |
| // - First-party asset downloads only (chatgpt.com / *.openai.com) | |
| // - ZIP writer (local headers → data → central dir → EOCD) | |
| // ======================================================================= | |
| (async () => { | |
| // ---------------- Config ---------------- | |
| const PAGE_LIMIT = 100; | |
| const INCLUDE_ARCHIVED = true; | |
| const INCLUDE_STARRED = true; | |
| const DETAIL_CONCURRENCY = 5; | |
| const ASSET_CONCURRENCY = 4; | |
| const PROJECT_FILE_CONCURRENCY = 3; | |
| const RETRIES = 3; | |
| const BASE_SLEEP_MS = 120; | |
| // ---------------------------------------- | |
| const sleep = (ms) => new Promise(r => setTimeout(r, ms)); | |
| const base = location.origin; | |
| const tsNow = () => new Date().toISOString().slice(0,19).replace(/[:T]/g,"-"); | |
| const ALLOW_ASSET_ORIGIN = (url) => { | |
| try { | |
| const u = new URL(url); | |
| if (u.origin === base) return true; | |
| return /(^|\.)openai\.com$/i.test(u.hostname); | |
| } catch { return false; } | |
| }; | |
| // ---------------- Auth ---------------- | |
| async function getAccessToken() { | |
| const res = await fetch("/api/auth/session", { credentials: "include", cache: "no-store" }); | |
| if (!res.ok) throw new Error(`Session fetch failed: ${res.status}`); | |
| const j = await res.json(); | |
| const t = j?.accessToken; | |
| if (!t || !t.split(".")[2]) throw new Error("No valid accessToken in session."); | |
| return t; | |
| } | |
| const authHeaders = (t) => ({ "accept": "*/*", "authorization": `Bearer ${t}` }); | |
| // --------------- Retry ---------------- | |
| async function withRetry(fn, label) { | |
| let delay = 500; | |
| for (let i = 0; i < RETRIES; i++) { | |
| try { return await fn(); } | |
| catch (e) { | |
| if (i === RETRIES - 1) throw new Error(`${label} failed after ${RETRIES}: ${e?.message || e}`); | |
| await sleep(delay); delay = Math.min(delay * 2, 5000); | |
| } | |
| } | |
| } | |
| // --------------- Conversations API ----------------- | |
| async function fetchPage(token, { offset = 0, is_archived = false, is_starred = false }) { | |
| const url = new URL(`${base}/backend-api/conversations`); | |
| url.searchParams.set("limit", String(PAGE_LIMIT)); | |
| url.searchParams.set("order", "updated"); | |
| url.searchParams.set("offset", String(offset)); | |
| url.searchParams.set("is_archived", String(is_archived)); | |
| url.searchParams.set("is_starred", String(is_starred)); | |
| const res = await withRetry(() => fetch(url, { | |
| method: "GET", credentials: "include", cache: "no-store", headers: authHeaders(token) | |
| }), `fetchPage offset=${offset}`); | |
| if (!res.ok) throw new Error(`List fetch ${res.status}`); | |
| return res.json(); | |
| } | |
| async function fetchConversation(token, id) { | |
| const res = await withRetry(() => fetch(`${base}/backend-api/conversation/${id}`, { | |
| method: "GET", credentials: "include", cache: "no-store", headers: authHeaders(token) | |
| }), `conversation ${id}`); | |
| if (!res.ok) throw new Error(`Conversation ${id} -> ${res.status}`); | |
| return res.json(); | |
| } | |
| async function collectListing(token, flags) { | |
| const all = []; | |
| let offset = 0, pageNum = 1; | |
| while (true) { | |
| const page = await fetchPage(token, { offset, ...flags }); | |
| const items = page.items || []; | |
| all.push(...items); | |
| console.log(`Chats page ${pageNum}: +${items.length} (acc=${all.length}/${page.total ?? "?"})`); | |
| if (items.length < PAGE_LIMIT || (page.total && all.length >= page.total)) break; | |
| offset += PAGE_LIMIT; pageNum++; await sleep(BASE_SLEEP_MS); | |
| } | |
| return all; | |
| } | |
| // --------------- Projects API ----------------- | |
| async function fetchProjectsSidebar(token) { | |
| const url = new URL(`${base}/backend-api/gizmos/snorlax/sidebar`); | |
| url.searchParams.set("conversations_per_gizmo", "5"); // sidebar preview; we’ll fully page separately | |
| url.searchParams.set("owned_only", "true"); | |
| const res = await withRetry(() => fetch(url, { | |
| method: "GET", credentials: "include", cache: "no-store", headers: authHeaders(token) | |
| }), "projects sidebar"); | |
| if (!res.ok) throw new Error(`Projects sidebar ${res.status}`); | |
| const j = await res.json(); | |
| // Shape: { items: [ { gizmo: { gizmo: {...}, files:[...]} , conversations:{items:[...] } }, ... ], cursor:null } | |
| return j?.items?.map(x => x?.gizmo?.gizmo).filter(Boolean) || []; | |
| } | |
| async function fetchProjectConversations(token, gizmoId) { | |
| const out = []; | |
| let cursor = 0; | |
| while (true) { | |
| const url = new URL(`${base}/backend-api/gizmos/${encodeURIComponent(gizmoId)}/conversations`); | |
| if (cursor != null) url.searchParams.set("cursor", String(cursor)); | |
| const res = await withRetry(() => fetch(url, { | |
| method: "GET", credentials: "include", cache: "no-store", headers: authHeaders(token) | |
| }), `project convs ${gizmoId} cursor=${cursor}`); | |
| if (!res.ok) throw new Error(`Project conv list ${res.status}`); | |
| const j = await res.json(); | |
| const items = j?.items || []; | |
| out.push(...items); | |
| console.log(`Project ${gizmoId}: +${items.length} convs`); | |
| if (!j?.cursor && items.length === 0) break; | |
| if (!j?.cursor) break; | |
| cursor = j.cursor; await sleep(BASE_SLEEP_MS); | |
| } | |
| return out; | |
| } | |
| async function fetchProjectFilesFromSidebar(token, fullProjectObj) { | |
| // When we call /gizmos/snorlax/sidebar we don't automatically get files here during our loop. | |
| // We’ll re-hit sidebar filtered to owned_only=true and map by id, OR read files via project open page if needed. | |
| // The sidebar payload (as in examples) stores files alongside each item, so we’ll do a second pass to pull them out. | |
| // To avoid another request per project, we’ll prefetch all sidebar items once again and index by id. | |
| return []; // Placeholder, we’ll fill via an indexed cache (see main) to avoid N+1 | |
| } | |
| // -------- Reconstruction -------- | |
| function extractText(content) { | |
| if (!content) return ""; | |
| try { | |
| const parts = content.parts; | |
| if (Array.isArray(parts)) { | |
| return parts.map(p => typeof p === "string" ? p : JSON.stringify(p)).join("\n").trim(); | |
| } | |
| } catch {} | |
| if (typeof content === "string") return content; | |
| if (Array.isArray(content)) return content.map(String).join("\n"); | |
| return JSON.stringify(content); | |
| } | |
| function discoverContext(metadata = {}) { | |
| const u = metadata?.user_context_message_data || {}; | |
| return { about_user: u.about_user_message || null, about_model: u.about_model_message || null }; | |
| } | |
| function flattenPath(convo) { | |
| const mapping = convo?.mapping || {}; | |
| let nodeId = convo?.current_node; | |
| if (!nodeId) { | |
| const roots = Object.entries(mapping).filter(([,v]) => v?.parent == null).map(([k]) => k); | |
| nodeId = roots[0]; | |
| } | |
| const path = []; const seen = new Set(); | |
| while (nodeId && !seen.has(nodeId) && mapping[nodeId]) { | |
| seen.add(nodeId); path.push(nodeId); nodeId = mapping[nodeId]?.parent; | |
| } | |
| path.reverse(); | |
| const messages = []; | |
| let about_user=null, about_model=null; | |
| for (const id of path) { | |
| const msg = mapping[id]?.message; | |
| if (!msg) continue; | |
| const role = msg?.author?.role || "unknown"; | |
| const text = extractText(msg?.content); | |
| const ct = msg?.create_time ?? null; | |
| const meta = msg?.metadata || {}; | |
| if (!about_user || !about_model) { | |
| const ctx = discoverContext(meta); | |
| if (ctx.about_user && !about_user) about_user = ctx.about_user; | |
| if (ctx.about_model && !about_model) about_model = ctx.about_model; | |
| } | |
| messages.push({ id: msg?.id || id, role, create_time: ct, text, meta }); | |
| } | |
| return { messages, context: { about_user, about_model } }; | |
| } | |
| // -------- Attachment discovery (metadata only) -------- | |
| function harvestAttachmentDescriptors(meta) { | |
| const out = []; | |
| const tryPush = (obj) => { | |
| if (!obj) return; | |
| const url = obj.url || obj.download_url || obj.signed_url || obj.href; | |
| if (!url) return; | |
| const name = obj.name || obj.filename || obj.file_name || obj.display_name || null; | |
| const mime = obj.mime_type || obj.content_type || null; | |
| out.push({ url, name, mime }); | |
| }; | |
| try { | |
| if (Array.isArray(meta.attachments)) meta.attachments.forEach(tryPush); | |
| if (Array.isArray(meta.files)) meta.files.forEach(tryPush); | |
| if (meta.file) tryPush(meta.file); | |
| if (meta.url) tryPush({ url: meta.url, name: meta.name, mime_type: meta.mime_type }); | |
| if (meta.artifacts && Array.isArray(meta.artifacts)) meta.artifacts.forEach(tryPush); | |
| for (const [k,v] of Object.entries(meta)) { | |
| if (!v) continue; | |
| if (Array.isArray(v) && v.length && typeof v[0] === "object" && | |
| (k.includes("file") || k.includes("attach") || k.includes("artifact"))) v.forEach(tryPush); | |
| } | |
| } catch {} | |
| return out; | |
| } | |
| // -------- Utilities -------- | |
| const sanitizeFs = (name) => | |
| (name || "Untitled").normalize("NFKC").replace(/[^\w\s\-\.\(\)\[\]&]+/g, "_").replace(/\s+/g, " ").trim() || "Untitled"; | |
| const normalizeTitle = (s) => sanitizeFs((s || "Untitled").replace(/\s+/g, " ").trim()); | |
| const isoFromTs = (ts) => (ts == null ? "—" : (typeof ts === "number" ? new Date(ts * 1000).toISOString() : String(ts))); | |
| // -------- Markdown (inline refs) -------- | |
| function buildMarkdown(title, convoMeta, messages, context, perMessageRefs) { | |
| const lines = []; | |
| lines.push(`# ${title}\n`); | |
| if (context?.about_user || context?.about_model) { | |
| lines.push(`## Context\n`); | |
| if (context.about_user) lines.push(`**About User**\n> ${context.about_user.split("\n").join("\n> ")}\n`); | |
| if (context.about_model) lines.push(`**About Model**\n> ${context.about_model.split("\n").join("\n> ")}\n`); | |
| } | |
| lines.push(`## Metadata\n`); | |
| lines.push(`- **Conversation ID**: \`${convoMeta.id}\``); | |
| lines.push(`- **Created**: ${isoFromTs(convoMeta.create_time)}`); | |
| lines.push(`- **Updated**: ${isoFromTs(convoMeta.update_time)}\n`); | |
| lines.push(`## Transcript\n`); | |
| for (const m of messages) { | |
| const who = m.role === "user" ? "User" : (m.role === "assistant" ? "Assistant" : m.role); | |
| lines.push(`### ${who} — ${isoFromTs(m.create_time)}\n`); | |
| lines.push(m.text && m.text.trim() ? m.text : "_(no content)_"); | |
| const refs = perMessageRefs.get(m.id) || { links: [], attachments: [] }; | |
| if (refs.attachments.length) { | |
| lines.push(`\n**Attachments**`); | |
| for (const a of refs.attachments) { | |
| lines.push(`- ${a.saved ? `[\`${a.localPath}\`](${encodeURI(a.localPath)})` : `\`${a.name || a.url}\` (not downloaded)`}`); | |
| } | |
| } | |
| if (refs.links.length) { | |
| lines.push(`\n**Links mentioned**`); | |
| for (const u of refs.links) lines.push(`- ${u}`); | |
| } | |
| lines.push(""); | |
| } | |
| return lines.join("\n"); | |
| } | |
| // ================= ZIP Builder (robust) ================= | |
| const CRC_TABLE = (() => { | |
| const t = new Uint32Array(256); | |
| for (let n=0; n<256; n++) { | |
| let c = n; | |
| for (let k=0; k<8; k++) c = (c & 1) ? (0xEDB88320 ^ (c >>> 1)) : (c >>> 1); | |
| t[n] = c >>> 0; | |
| } | |
| return t; | |
| })(); | |
| function crc32(buf) { | |
| let c = 0xFFFFFFFF; | |
| for (let i=0; i<buf.length; i++) c = CRC_TABLE[(c ^ buf[i]) & 0xFF] ^ (c >>> 8); | |
| return (c ^ 0xFFFFFFFF) >>> 0; | |
| } | |
| function toDOS(date = new Date()) { | |
| const d = new Date(date); | |
| const time = ((d.getHours() & 31) << 11) | ((d.getMinutes() & 63) << 5) | ((Math.floor(d.getSeconds()/2)) & 31); | |
| const day = (((d.getFullYear()-1980) & 127) << 9) | (((d.getMonth()+1) & 15) << 5) | (d.getDate() & 31); | |
| return { time, day }; | |
| } | |
| const enc = new TextEncoder(); | |
| function u8(x){ return x instanceof Uint8Array ? x : enc.encode(x); } | |
| function u16le(v){ const a = new Uint8Array(2); new DataView(a.buffer).setUint16(0, v & 0xFFFF, true); return a; } | |
| function u32le(v){ const a = new Uint8Array(4); new DataView(a.buffer).setUint32(0, v >>> 0, true); return a; } | |
| function concat(chunks){ | |
| const len = chunks.reduce((n,c)=>n+c.length,0); | |
| const out = new Uint8Array(len); | |
| let off = 0; | |
| for (const c of chunks){ out.set(c, off); off += c.length; } | |
| return out; | |
| } | |
| class ZipWriter { | |
| constructor() { this.files = []; this.chunks = []; this.offset = 0; } | |
| addLocalHeader(pathBytes, crc, size, dosTime, dosDate) { | |
| const sig = u32le(0x04034b50); | |
| const verNeeded = u16le(20); | |
| const gpFlag = u16le(0x0800); // UTF-8 | |
| const comp = u16le(0); // store | |
| const t = u16le(dosTime); | |
| const d = u16le(dosDate); | |
| const crcLE = u32le(crc); | |
| const szLE = u32le(size); | |
| const nameLen = u16le(pathBytes.length); | |
| const extraLen = u16le(0); | |
| const header = concat([sig, verNeeded, gpFlag, comp, t, d, crcLE, szLE, szLE, nameLen, extraLen, pathBytes]); | |
| this.chunks.push(header); | |
| const localOffset = this.offset; | |
| this.offset += header.length; | |
| return localOffset; | |
| } | |
| addFile(name, dataBytes) { | |
| const pathBytes = u8(name); | |
| const { time, day } = toDOS(new Date()); | |
| const crc = crc32(dataBytes); | |
| const localOffset = this.addLocalHeader(pathBytes, crc, dataBytes.length, time, day); | |
| this.chunks.push(dataBytes); | |
| this.offset += dataBytes.length; | |
| this.files.push({ nameBytes: pathBytes, crc, size: dataBytes.length, time, day, localOffset }); | |
| } | |
| addCentralDirectory() { | |
| const entries = []; | |
| let centralSize = 0; | |
| for (const f of this.files) { | |
| const sig = u32le(0x02014b50); | |
| const verMade = u16le(20); | |
| const verNeeded = u16le(20); | |
| const gpFlag = u16le(0x0800); | |
| const comp = u16le(0); | |
| const t = u16le(f.time); | |
| const d = u16le(f.day); | |
| const crcLE = u32le(f.crc); | |
| const szLE = u32le(f.size); | |
| const nameLen = u16le(f.nameBytes.length); | |
| const extraLen = u16le(0); | |
| const commentLen = u16le(0); | |
| const diskStart = u16le(0); | |
| const intAttr = u16le(0); | |
| const extAttr = u32le(0); | |
| const relOff = u32le(f.localOffset); | |
| const hdr = concat([ | |
| sig, verMade, verNeeded, gpFlag, comp, t, d, crcLE, szLE, szLE, | |
| nameLen, extraLen, commentLen, diskStart, intAttr, extAttr, relOff, f.nameBytes | |
| ]); | |
| entries.push(hdr); | |
| centralSize += hdr.length; | |
| } | |
| const startOfCD = this.offset; | |
| this.chunks.push(concat(entries)); | |
| this.offset += centralSize; | |
| const eocdSig = u32le(0x06054b50); | |
| const diskNum = u16le(0); | |
| const diskCD = u16le(0); | |
| const totalEntriesDisk = u16le(this.files.length); | |
| const totalEntries = u16le(this.files.length); | |
| const cdSize = u32le(centralSize); | |
| const cdOffset = u32le(startOfCD); | |
| const commentLen = u16le(0); | |
| const eocd = concat([eocdSig, diskNum, diskCD, totalEntriesDisk, totalEntries, cdSize, cdOffset, commentLen]); | |
| this.chunks.push(eocd); | |
| this.offset += eocd.length; | |
| } | |
| finalizeBlob() { | |
| this.addCentralDirectory(); | |
| const bytes = concat(this.chunks); | |
| return new Blob([bytes], { type: "application/zip" }); | |
| } | |
| } | |
| // -------- Asset fetch (messages) -------- | |
| async function guessNameFromResponse(url, res, fallback) { | |
| const cd = res.headers.get("Content-Disposition") || ""; | |
| const m = /filename\*?=(?:UTF-8''|")?([^";]+)?/i.exec(cd); | |
| let name = m?.[1] || fallback || url.split("?")[0].split("/").pop() || "asset"; | |
| try { name = decodeURIComponent(name); } catch {} | |
| name = sanitizeFs(name); | |
| if (!/\.[A-Za-z0-9]{1,8}$/.test(name)) { | |
| const ct = (res.headers.get("Content-Type") || "").split(";")[0].trim(); | |
| const ext = ({ | |
| "image/png": ".png", "image/jpeg": ".jpg", "image/gif": ".gif", | |
| "text/plain": ".txt", "text/html": ".html", "application/pdf": ".pdf", | |
| "application/json": ".json" | |
| })[ct] || ""; | |
| name += ext; | |
| } | |
| return name; | |
| } | |
| async function downloadAsset(token, desc) { | |
| const { url, name } = desc; | |
| if (!ALLOW_ASSET_ORIGIN(url)) throw new Error("Blocked by allowlist"); | |
| const res = await withRetry(() => fetch(url, { | |
| method: "GET", credentials: "include", cache: "no-store", headers: authHeaders(token) | |
| }), `asset ${url}`); | |
| if (!res.ok) throw new Error(`Asset ${res.status}`); | |
| const blob = await res.blob(); | |
| const fname = await guessNameFromResponse(url, res, name); | |
| const data = new Uint8Array(await blob.arrayBuffer()); | |
| return { name: fname, data }; | |
| } | |
| // -------- Project files fetch (by file_id) -------- | |
| async function downloadProjectFileById(token, fileId, fallbackName) { | |
| const candidates = [ | |
| `${base}/backend-api/files/${encodeURIComponent(fileId)}?download=1`, | |
| `${base}/backend-api/files/${encodeURIComponent(fileId)}`, | |
| `${base}/backend-api/files/${encodeURIComponent(fileId)}/download`, | |
| ]; | |
| let lastErr = null; | |
| for (const url of candidates) { | |
| try { | |
| const res = await withRetry(() => fetch(url, { | |
| method: "GET", credentials: "include", cache: "no-store", headers: authHeaders(token) | |
| }), `project file ${fileId}`); | |
| if (!res.ok) { lastErr = new Error(`HTTP ${res.status}`); continue; } | |
| const blob = await res.blob(); | |
| const name = await guessNameFromResponse(url, res, fallbackName || fileId); | |
| const data = new Uint8Array(await blob.arrayBuffer()); | |
| return { name: sanitizeFs(name), data }; | |
| } catch (e) { | |
| lastErr = e; | |
| } | |
| } | |
| throw lastErr || new Error(`Unable to download file ${fileId}`); | |
| } | |
| // -------- URL detection in text -------- | |
| const URL_RE = /https?:\/\/[^\s)<>\]]+/ig; | |
| // ================= MAIN ================= | |
| try { | |
| console.log("🔑 Resolving token…"); | |
| const token = await getAccessToken(); | |
| const stamp = tsNow(); | |
| const top = `chatgpt-export-${stamp}`; | |
| const zip = new ZipWriter(); | |
| // ===== 1) Discover projects (sidebar) & index files ===== | |
| console.log("📁 Listing projects…"); | |
| const sidebarItemsRes = await withRetry(() => fetch(`${base}/backend-api/gizmos/snorlax/sidebar?conversations_per_gizmo=5&owned_only=true`, { | |
| method: "GET", credentials: "include", cache: "no-store", headers: authHeaders(token) | |
| }), "projects sidebar (index)"); | |
| if (!sidebarItemsRes.ok) throw new Error(`Projects sidebar index ${sidebarItemsRes.status}`); | |
| const sidebarJson = await sidebarItemsRes.json(); | |
| const sidebarItems = sidebarJson?.items || []; | |
| // Build a quick index by gizmo id → { gizmo, files[] } | |
| const projectIndex = new Map(); | |
| for (const it of sidebarItems) { | |
| const gizmo = it?.gizmo?.gizmo; | |
| if (!gizmo?.id) continue; | |
| const files = (it?.gizmo?.files || []).map(f => ({ | |
| id: f?.file_id || f?.id, name: f?.name, type: f?.type, size: f?.size | |
| })).filter(x => x.id); | |
| projectIndex.set(gizmo.id, { gizmo, files }); | |
| } | |
| // Also return the list of gizmos (owned_only) | |
| const projects = [...projectIndex.values()].map(v => v.gizmo); | |
| console.log(`✅ Projects found: ${projects.length}`); | |
| // Keep a set of conversation IDs that belong to projects to prevent duplication in global export later | |
| const projectConversationIds = new Set(); | |
| // ===== 2) Export projects with files and chats ===== | |
| let pCounter = 0; | |
| for (const p of projects) { | |
| pCounter++; | |
| const pTitle = sanitizeFs(p?.display?.name || p?.short_url || p?.id || "Project"); | |
| const pFolder = `${top}/projects/${String(pCounter).padStart(4,"0")} - ${pTitle}`; | |
| console.log(`\n📦 Project [${pCounter}/${projects.length}] ${pTitle}`); | |
| // Persist project.json | |
| zip.addFile(`${pFolder}/project.json`, u8(JSON.stringify(p, null, 2))); | |
| // Persist instructions.md (raw) | |
| const instr = p?.instructions || ""; | |
| const instrMd = `# ${pTitle} — Instructions\n\n\`\`\`\n${instr}\n\`\`\`\n`; | |
| zip.addFile(`${pFolder}/instructions.md`, u8(instrMd)); | |
| // Download project files (first-party via file_id endpoints) | |
| const filesMeta = projectIndex.get(p.id)?.files || []; | |
| if (filesMeta.length) { | |
| const fQ = filesMeta.slice(); | |
| async function fWorker() { | |
| while (fQ.length) { | |
| const f = fQ.shift(); | |
| try { | |
| const { name, data } = await downloadProjectFileById(token, f.id, f.name); | |
| zip.addFile(`${pFolder}/files/${name}`, data); | |
| console.log(` • file: ${name}`); | |
| } catch (e) { | |
| console.warn(` • file skipped: ${f.name || f.id}`, e?.message || e); | |
| } | |
| await sleep(60); | |
| } | |
| } | |
| await Promise.all([...Array(Math.min(PROJECT_FILE_CONCURRENCY, fQ.length || 1))].map(fWorker)); | |
| } | |
| // Project conversations (paginate fully) | |
| const projConvs = await fetchProjectConversations(token, p.id); | |
| projConvs.forEach(c => projectConversationIds.add(c.id)); | |
| // Export each project chat using same pipeline as regular chats | |
| const convFolderBase = `${pFolder}/chats`; | |
| const titleCounts = new Map(); | |
| let i = 0; | |
| for (const meta of projConvs) { | |
| i++; | |
| let data; | |
| try { | |
| data = await fetchConversation(token, meta.id); | |
| } catch (e) { | |
| console.warn(" • conversation fetch failed:", meta.id, e?.message || e); | |
| continue; | |
| } | |
| const rawTitle = data?.title ?? meta?.title ?? "Untitled"; | |
| const safeTitle = normalizeTitle(rawTitle); | |
| const n = (titleCounts.get(safeTitle) || 0) + 1; | |
| titleCounts.set(safeTitle, n); | |
| const finalTitle = n === 1 ? safeTitle : `${safeTitle} (${n})`; | |
| const idx = String(i).padStart(4,"0"); | |
| const folder = `${convFolderBase}/${idx} - ${finalTitle}`; | |
| // Reconstruct messages | |
| const { messages, context } = flattenPath(data); | |
| // Per-message references (links displayed only; attachments downloaded if first-party) | |
| const perMessageRefs = new Map(); | |
| for (const m of messages) perMessageRefs.set(m.id, { links: [], attachments: [] }); | |
| // Links (display only) | |
| for (const m of messages) { | |
| const links = []; | |
| (m.text || "").replace(URL_RE, (match) => { | |
| const cleaned = match.replace(/[)\].,;]+$/, ""); | |
| links.push(cleaned); return match; | |
| }); | |
| if (links.length) perMessageRefs.get(m.id).links = [...new Set(links)]; | |
| } | |
| // Attachments from metadata | |
| const attQueue = []; | |
| for (const m of messages) { | |
| const descs = harvestAttachmentDescriptors(m.meta); | |
| const uniq = []; const seen = new Set(); | |
| for (const d of descs) { | |
| if (!d?.url) continue; | |
| const key = d.url + "::" + (d.name || ""); | |
| if (!seen.has(key)) { seen.add(key); uniq.push(d); } | |
| } | |
| for (const d of uniq) { | |
| const entry = { name: d.name || null, url: d.url, mime: d.mime || null, localPath: null, saved: false }; | |
| perMessageRefs.get(m.id).attachments.push(entry); | |
| if (ALLOW_ASSET_ORIGIN(d.url)) attQueue.push({ msgId: m.id, entry }); | |
| } | |
| } | |
| // Download first-party attachments and add to ZIP | |
| if (attQueue.length) { | |
| const aQ = attQueue.slice(); | |
| async function aWorker() { | |
| while (aQ.length) { | |
| const job = aQ.shift(); | |
| try { | |
| const { name, data: bin } = await downloadAsset(token, job.entry); | |
| const rel = `assets/${name}`; | |
| zip.addFile(`${folder}/${rel}`, bin); | |
| job.entry.localPath = rel; | |
| job.entry.saved = true; | |
| } catch (e) { | |
| console.warn(" • asset skipped:", job.entry.url, e); | |
| } | |
| await sleep(60); | |
| } | |
| } | |
| await Promise.all([...Array(Math.min(ASSET_CONCURRENCY, aQ.length || 1))].map(aWorker)); | |
| } | |
| // Build MD and JSON and add to ZIP | |
| const convoMeta = { | |
| id: meta?.id ?? null, | |
| create_time: data?.create_time ?? meta?.create_time ?? null, | |
| update_time: data?.update_time ?? meta?.update_time ?? null | |
| }; | |
| const md = buildMarkdown(rawTitle, convoMeta, messages, context, perMessageRefs); | |
| zip.addFile(`${folder}/conversation.md`, u8(md)); | |
| zip.addFile(`${folder}/conversation.json`, u8(JSON.stringify(data, null, 2))); | |
| console.log(` • chat: ${rawTitle}`); | |
| await sleep(BASE_SLEEP_MS); | |
| } | |
| } | |
| // ===== 3) Export remaining (non-project) chats ===== | |
| console.log("\n🗂️ Listing non-project chats…"); | |
| const catalogs = []; | |
| catalogs.push(await collectListing(token, { is_archived: false, is_starred: false })); | |
| if (INCLUDE_ARCHIVED) catalogs.push(await collectListing(token, { is_archived: true, is_starred: false })); | |
| if (INCLUDE_STARRED) catalogs.push(await collectListing(token, { is_archived: false, is_starred: true })); | |
| const byId = new Map(); | |
| [...catalogs.flat()] | |
| .sort((a,b) => new Date(b.update_time||0) - new Date(a.update_time||0)) | |
| .forEach(it => { if (!projectConversationIds.has(it.id)) byId.set(it.id, it); }); | |
| const listing = [...byId.values()]; | |
| console.log(`✅ Non-project conversations to export: ${listing.length}`); | |
| const totalNonProject = listing.length; | |
| const chatsFolderBase = `${top}/chats`; | |
| const titleCountsGlobal = new Map(); | |
| const queue = listing.slice(); | |
| let idxChat = 0; | |
| async function exportChat(meta, i, total) { | |
| let data; | |
| try { | |
| data = await fetchConversation(token, meta.id); | |
| } catch (e) { | |
| console.warn("Detail failed:", meta.id, e); | |
| return; | |
| } | |
| const rawTitle = data?.title ?? meta?.title ?? "Untitled"; | |
| const safeTitle = normalizeTitle(rawTitle); | |
| const n = (titleCountsGlobal.get(safeTitle) || 0) + 1; | |
| titleCountsGlobal.set(safeTitle, n); | |
| const finalTitle = n === 1 ? safeTitle : `${safeTitle} (${n})`; | |
| const idx = String(i).padStart(4,"0"); | |
| const folder = `${chatsFolderBase}/${idx} - ${finalTitle}`; | |
| const { messages, context } = flattenPath(data); | |
| const perMessageRefs = new Map(); | |
| for (const m of messages) perMessageRefs.set(m.id, { links: [], attachments: [] }); | |
| // Links view only | |
| for (const m of messages) { | |
| const links = []; | |
| (m.text || "").replace(URL_RE, (match) => { | |
| const cleaned = match.replace(/[)\].,;]+$/, ""); | |
| links.push(cleaned); return match; | |
| }); | |
| if (links.length) perMessageRefs.get(m.id).links = [...new Set(links)]; | |
| } | |
| // Attachments | |
| const attQueue = []; | |
| for (const m of messages) { | |
| const descs = harvestAttachmentDescriptors(m.meta); | |
| const uniq = []; const seen = new Set(); | |
| for (const d of descs) { | |
| if (!d?.url) continue; | |
| const key = d.url + "::" + (d.name || ""); | |
| if (!seen.has(key)) { seen.add(key); uniq.push(d); } | |
| } | |
| for (const d of uniq) { | |
| const entry = { name: d.name || null, url: d.url, mime: d.mime || null, localPath: null, saved: false }; | |
| perMessageRefs.get(m.id).attachments.push(entry); | |
| if (ALLOW_ASSET_ORIGIN(d.url)) attQueue.push({ msgId: m.id, entry }); | |
| } | |
| } | |
| if (attQueue.length) { | |
| const aQ = attQueue.slice(); | |
| async function aWorker() { | |
| while (aQ.length) { | |
| const job = aQ.shift(); | |
| try { | |
| const { name, data: bin } = await downloadAsset(token, job.entry); | |
| const rel = `assets/${name}`; | |
| zip.addFile(`${folder}/${rel}`, bin); | |
| job.entry.localPath = rel; | |
| job.entry.saved = true; | |
| } catch (e) { | |
| console.warn("Asset skipped:", job.entry.url, e); | |
| } | |
| await sleep(60); | |
| } | |
| } | |
| await Promise.all([...Array(Math.min(ASSET_CONCURRENCY, aQ.length || 1))].map(aWorker)); | |
| } | |
| const convoMeta = { | |
| id: meta?.id ?? null, | |
| create_time: data?.create_time ?? meta?.create_time ?? null, | |
| update_time: data?.update_time ?? meta?.update_time ?? null | |
| }; | |
| const md = buildMarkdown(rawTitle, convoMeta, messages, context, perMessageRefs); | |
| zip.addFile(`${folder}/conversation.md`, u8(md)); | |
| zip.addFile(`${folder}/conversation.json`, u8(JSON.stringify(data, null, 2))); | |
| console.log(`📄 [non-project ${String(i).padStart(4,"0")}/${total}] ${rawTitle}`); | |
| } | |
| async function worker() { | |
| while (queue.length) { | |
| const item = queue.shift(); | |
| idxChat++; | |
| try { await exportChat(item, idxChat, totalNonProject); } | |
| catch (e) { console.warn("Export non-project chat failed:", item?.id, e?.message || e); } | |
| await sleep(BASE_SLEEP_MS); | |
| } | |
| } | |
| await Promise.all([...Array(DETAIL_CONCURRENCY)].map(worker)); | |
| // ===== 4) Finalize ZIP and download ===== | |
| const zipBlob = zip.finalizeBlob(); | |
| const name = `ChatGPT-Export-${stamp}.zip`; | |
| const a = document.createElement("a"); | |
| a.href = URL.createObjectURL(zipBlob); | |
| a.download = name; | |
| document.body.appendChild(a); | |
| a.click(); | |
| setTimeout(() => { URL.revokeObjectURL(a.href); a.remove(); }, 1000); | |
| console.log(`\n🎉 Export complete → ${name}`); | |
| } catch (err) { | |
| console.error("❌ Export aborted:", err); | |
| alert(`Export failed: ${err?.message || err}`); | |
| } | |
| })(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment