Skip to content

Instantly share code, notes, and snippets.

@amandiobm
Forked from YuviGold/scrape_whatsapp.js
Created September 13, 2025 17:56
Show Gist options
  • Select an option

  • Save amandiobm/b865a0bf8ae03f572216e5e8ce7df556 to your computer and use it in GitHub Desktop.

Select an option

Save amandiobm/b865a0bf8ae03f572216e5e8ce7df556 to your computer and use it in GitHub Desktop.

Revisions

  1. @YuviGold YuviGold created this gist Jul 10, 2024.
    298 changes: 298 additions & 0 deletions scrape_whatsapp.js
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,298 @@
    /*
    Inspired by https://gist.github.com/shaneapen/3406477b9f946855d02e3f33ec121975
    The script scrapes the members of a WhatsApp group chat and exports the data to a CSV file.
    It scrolls automatically and extracts each list item in the members list with all the information available.
    Then it joins this information with the indexedDB data to get the groups the member is in and if the contact's info
    if it is already saved in the phone.
    Steps:
    1. Open WhatsApp Web
    2. Open the group chat you want to scrape ->
    Click on the group name to open the group info ->
    Click on the members list
    3. Open the browser console (F12)
    4. Copy and paste the code below into the console and press Enter
    After the script has finished running, a download link will appear for the CSV file containing the scraped data.
    */

    const SCROLL_INTERVAL = 1000,
    SCROLL_INCREMENT = 450,
    AUTO_SCROLL = true,
    CHECK_INDEXEDDB = true;

    var scrollInterval, observer, membersList, header, MEMBERS_QUEUE;

    class WhatsappDB {
    #db;
    #dbName = "model-storage";
    #groupsCollection = "group-metadata";
    #contactsCollection = "contact";
    #phoneNumberIndex = "phoneNumber";
    #participantsCollection = "participant";
    #participantsIndex = "participants";

    async openConnection() {
    if (!this.#db) {
    const dbName = this.#dbName;
    this.#db = await new Promise((resolve, reject) => {
    let request = indexedDB.open(dbName);
    request.onerror = (event) => {
    reject(event);
    };
    request.onsuccess = (event) => {
    resolve(event.target.result);
    };
    });
    }
    return this.#db;
    }

    async #promisifyCol(collection, index, query, count) {
    const db = await this.openConnection();
    return new Promise((resolve, reject) => {
    const transaction = db.transaction(collection, "readonly");
    const objectStore = transaction.objectStore(collection);

    let request;
    if (index) {
    request = objectStore.index(index).getAll(query, count);
    } else {
    request = objectStore.getAll(query, count);
    }

    request.onerror = (event) => {
    reject(event);
    };
    request.onsuccess = (event) => {
    resolve(event.target.result);
    };
    });
    }

    async getGroups() {
    return this.#promisifyCol(this.#groupsCollection);
    }

    async getParticipants(key) {
    return this.#promisifyCol(this.#participantsCollection, this.#participantsIndex, key);
    }

    async getContacts(key) {
    return this.#promisifyCol(this.#contactsCollection, this.#phoneNumberIndex, key);
    }

    phoneToKey(phone) {
    return `${phone.replace('+', '')}@c.us`;
    }
    }

    whatsappDB = new WhatsappDB();
    var groups, contacts;

    MutationObserver = window.MutationObserver || window.WebKitMutationObserver;

    const autoScroll = function () {
    if (!scrollEndReached(header.nextSibling.nextSibling))
    header.nextSibling.nextSibling.scrollTop += SCROLL_INCREMENT;
    else
    stop();
    };

    async function start() {
    MEMBERS_QUEUE = {};

    if (CHECK_INDEXEDDB) {
    groups = await whatsappDB.getGroups();
    contacts = await whatsappDB.getContacts();
    }

    header = document.getElementsByTagName('header')[0];
    membersList = header.parentNode;

    observer = new MutationObserver(function (mutations, observer) {
    scrapeData().then(r => {
    }); // fired when a mutation occurs
    });

    // the div to watch for mutations
    observer.observe(membersList, {
    childList: true,
    subtree: true
    });

    // scroll to top before beginning
    header.nextSibling.nextSibling.scrollTop = 0;
    await scrapeData();
    if (AUTO_SCROLL) scrollInterval = setInterval(autoScroll, SCROLL_INTERVAL);
    }


    /**
    * Stops the current scrape instance
    */
    const stop = function () {
    window.clearInterval(scrollInterval);
    observer.disconnect();
    console.table(MEMBERS_QUEUE);
    console.log(`Scrape stopped. ${Object.keys(MEMBERS_QUEUE).length} members scraped.`);

    createDownloadLink(convertToCSV(Object.values(MEMBERS_QUEUE)), "whatsapp_members.csv");
    };


    async function scrapeData() {
    const members = membersList.querySelectorAll('[role=listitem] > [role=button]');

    for (let i = 0; i < members.length; i++) {
    const member = members[i];
    const details = await handleMember(member)

    if (details.name === "You") {
    continue;
    }

    if (details.phone)
    MEMBERS_QUEUE[details.phone] = details;
    else
    MEMBERS_QUEUE[details.name] = details;
    }
    }


    async function handleMember(member) {
    const title = getTitle(member);
    const phoneCaption = getPhone(member);
    const status = getStatus(member);
    const image = getImage(member);
    let memberGroups = [];
    let isSaved = false;

    // If contact unsaved - the phone is the caption or the title.
    // If contact saved - the phone is unavailable.
    let phone = phoneCaption ? phoneCaption : title.startsWith("+") ? title : null;

    const name = phoneCaption || !phone ? title : null;
    if (name && !phone) {
    const contact = await getContact(name);
    phone = contact ? `+${contact.phoneNumber.split('@')[0]}` : null;
    isSaved = !!contact;
    }

    if (phone) {
    phone = phone.replaceAll(/\s/g, '').replaceAll('-', '').replaceAll('(', '').replaceAll(')', '');
    memberGroups = await getGroups(phone);
    }

    return {
    phone: phone,
    name: name,
    status: status,
    image: image,
    groups: JSON.stringify(memberGroups),
    isSaved: JSON.stringify(isSaved),
    };
    }

    function getImage(member) {
    const img = member.querySelector('img');
    if (!img) {
    return null;
    }

    return imageToDataURL(img);
    }

    function getStatus(member) {
    const status = member.querySelector('.copyable-text');
    return status ? status.title : null;
    }

    function getPhone(member) {
    const phone = member.querySelector('span[aria-label=""]:not(span[title])');
    return phone ? phone.innerHTML : null;
    }

    function getTitle(member) {
    const title = member.querySelector('span[title]');
    return title ? title.title : null;
    }

    async function getGroups(phone) {
    if (!CHECK_INDEXEDDB) {
    return [];
    }

    const key = whatsappDB.phoneToKey(phone);
    const participants = await whatsappDB.getParticipants(key);
    return participants.map(participant => getGroupName(participant.groupId));
    }

    function getGroupName(groupID) {
    const group = groups.find(group => group.id === groupID);
    return group ? group.subject : null;
    }

    function getContact(name) {
    if (!CHECK_INDEXEDDB) {
    return null;
    }

    return contacts.find(contact => contact.name === name);
    }

    /**
    * Helper functions
    * @References [1] https://stackoverflow.com/questions/53158796/get-scroll-position-with-reactjs/53158893#53158893
    */
    function scrollEndReached(el) {
    return ((el.scrollHeight - (el.clientHeight + el.scrollTop)) === 0);
    }

    function imageToDataURL(img) {
    img.crossOrigin = "anonymous";

    // Create a canvas element
    const canvas = document.createElement('canvas');
    canvas.width = img.naturalWidth || img.width;
    canvas.height = img.naturalHeight || img.height;

    // Draw the image onto the canvas
    const ctx = canvas.getContext('2d');
    ctx.drawImage(img, 0, 0);

    // Get the Data URI of the image
    return canvas.toDataURL('image/png');
    }

    function createDownloadLink(data, fileName) {
    var a = document.createElement('a');
    a.style.display = "none";

    var url = window.URL.createObjectURL(new Blob([data], {
    type: "data:attachment/text"
    }));
    a.setAttribute("href", url);
    a.setAttribute("download", fileName);
    document.body.append(a);
    a.click();
    window.URL.revokeObjectURL(url);
    a.remove();
    }

    // https://stackoverflow.com/questions/11257062/converting-json-object-to-csv-format-in-javascript
    function convertToCSV(arr) {
    const array = [Object.keys(arr[0])].concat(arr)

    return array.map(it => {
    return Object.values(it).map(value => {
    if (value == null) return '';
    return `"${value.replace(/\"/g, "'")}"`
    }).toString()
    }).join('\n')
    }

    start().then(r => {
    console.log("Finished scraping.")
    });