// MAIN FUNCTION // This function will scrape all the conversations from the chatorg website // It will return an array of arrays, the outer array is of all conversation // the inner array is a list of message pairs // grab all anchors // advance to the next anchor // pull the current chat history async function main(min = 0){ // click all the empty chakra links let anchors = await clickEmptyChakraLinks() conversations = [] // this is going to be a async for each of the anchors // for each anchor, click it, wait for the page to load, then scrape the conversation history for (let i = min; i < anchors.length; i++) { // click the anchor anchors[i].click() let title = anchors[i].innerText // wait for the page to load // let container = await loadHistory(link, chatContainer) await wait(3500) // for each child of the container, get the name from aria-label of the avatar and the content from the div.chat-message-wrapper // then push it to the conversations array let conversation = {title, messages: []} let container = document.querySelector('#chatlog') for(let j = 0; j < container.children.length - 1; j++){ // length - 1 because last message isn't real let child = container.children[j] try { console.log("querying ", j) let name = child.querySelector('.chakra-avatar div').getAttribute('aria-label') let html = child.querySelector('.chat-message-wrapper').innerHTML conversation.messages.push({name, html}) } catch(e){ console.log("Failed to get name or html", j) console.warn(e) } } conversations[i] = conversation console.log("PUSHED CONVERSATION", title) } // download the conversation history downloadStringAsFile(JSON.stringify(conversations), 'conversationHistory.json') } // create a wait function to just throw some delay into the crawl async function wait(ms){ return new Promise(resolve => setTimeout(resolve, ms)) } // Function definitions BEGIN // first grab all the chakra links, filter by a tags with no href // perform an asynchronous while loop to click the empty chakra-links recursively until no more exist // this is a recursive function that will click all the empty chakra-links function clickEmptyChakraLinks() { return new Promise(resolve => { // grab all the chakra links, filter by a tags with no href const emptyChakraLinks = Array.from(document.querySelectorAll('a.chakra-link')).filter(a => !a.href) // if there are no more empty chakra links, resolve the promise if (emptyChakraLinks.length === 0) { resolve(document.querySelectorAll('a.chakra-link[href^="/chat"]')) } else { // click the first empty chakra link emptyChakraLinks.forEach(a => a.click()) // wait for the page to load setTimeout(() => { // recursively call the function clickEmptyChakraLinks().then(resolve) }, 100) } }) } // clicking on each link will take a moment to load the next conversation // so we need to wait for the page to load before clicking the next link // lets watch the #chatlog element for changes // use a mutation observer on the document to detect when the chat history is loaded // So I need to detect when a new chatlog is loaded // what happens when a history is clicked, is that the chatlog is first removed, and then later reappended // so I need to detect when the chatlog is removed, and then when it is reappended // so for every mutation, set a flag once there is no longer a chatlog, and then next time chatlog does exist, resolve the promise with the chatlog function loadHistory(element, container){ console.log("LOADING HISTORY", element.innerText) let chatlogRemoved = false element.click() // within the mutation observer, resolve once #chatlog exists & the removed flag had been set return new Promise((resolve) => { const observer = new MutationObserver(() => { // if the chat history is removed if (!container.querySelector('#chatlog')) { // set the removed flag chatlogRemoved = true console.log('!!! chatlog removed') } else if (chatlogRemoved) { // stop observing the document observer.disconnect() console.log('!!! resolving') // resolve the promise with the chatlog resolve(container.querySelector('#chatlog')) } }) // start observing the document observer.observe(container, { childList: true, subtree: true }) }) } function downloadStringAsFile(str, filename = 'download.txt') { // Create a hidden button element const btn = document.createElement('button'); btn.style.display = 'none'; document.body.appendChild(btn); // Create a Blob object from the input string const blob = new Blob([str], { type: 'text/plain' }); const url = URL.createObjectURL(blob); // Create an anchor element with download attribute const link = document.createElement('a'); link.href = url; link.download = filename; // Add the anchor element to the button btn.appendChild(link); // Add a click event listener to the button to trigger the file download btn.addEventListener('click', (event) => { link.click(); btn.remove() }); // Click the button programmatically btn.click(); }