|
|
@@ -0,0 +1,298 @@ |
|
|
/* |
|
|
Inspired by https://gist.github.com/shaneapen/3406477b9f946855d02e3f33ec121975 |
|
|
|
|
|
The script scrapes the members of a WhatsApp group chat and exports the data to a CSV file. |
|
|
It scrolls automatically and extracts each list item in the members list with all the information available. |
|
|
Then it joins this information with the indexedDB data to get the groups the member is in and if the contact's info |
|
|
if it is already saved in the phone. |
|
|
|
|
|
Steps: |
|
|
1. Open WhatsApp Web |
|
|
2. Open the group chat you want to scrape -> |
|
|
Click on the group name to open the group info -> |
|
|
Click on the members list |
|
|
3. Open the browser console (F12) |
|
|
4. Copy and paste the code below into the console and press Enter |
|
|
|
|
|
After the script has finished running, a download link will appear for the CSV file containing the scraped data. |
|
|
*/ |
|
|
|
|
|
const SCROLL_INTERVAL = 1000, |
|
|
SCROLL_INCREMENT = 450, |
|
|
AUTO_SCROLL = true, |
|
|
CHECK_INDEXEDDB = true; |
|
|
|
|
|
var scrollInterval, observer, membersList, header, MEMBERS_QUEUE; |
|
|
|
|
|
class WhatsappDB { |
|
|
#db; |
|
|
#dbName = "model-storage"; |
|
|
#groupsCollection = "group-metadata"; |
|
|
#contactsCollection = "contact"; |
|
|
#phoneNumberIndex = "phoneNumber"; |
|
|
#participantsCollection = "participant"; |
|
|
#participantsIndex = "participants"; |
|
|
|
|
|
async openConnection() { |
|
|
if (!this.#db) { |
|
|
const dbName = this.#dbName; |
|
|
this.#db = await new Promise((resolve, reject) => { |
|
|
let request = indexedDB.open(dbName); |
|
|
request.onerror = (event) => { |
|
|
reject(event); |
|
|
}; |
|
|
request.onsuccess = (event) => { |
|
|
resolve(event.target.result); |
|
|
}; |
|
|
}); |
|
|
} |
|
|
return this.#db; |
|
|
} |
|
|
|
|
|
async #promisifyCol(collection, index, query, count) { |
|
|
const db = await this.openConnection(); |
|
|
return new Promise((resolve, reject) => { |
|
|
const transaction = db.transaction(collection, "readonly"); |
|
|
const objectStore = transaction.objectStore(collection); |
|
|
|
|
|
let request; |
|
|
if (index) { |
|
|
request = objectStore.index(index).getAll(query, count); |
|
|
} else { |
|
|
request = objectStore.getAll(query, count); |
|
|
} |
|
|
|
|
|
request.onerror = (event) => { |
|
|
reject(event); |
|
|
}; |
|
|
request.onsuccess = (event) => { |
|
|
resolve(event.target.result); |
|
|
}; |
|
|
}); |
|
|
} |
|
|
|
|
|
async getGroups() { |
|
|
return this.#promisifyCol(this.#groupsCollection); |
|
|
} |
|
|
|
|
|
async getParticipants(key) { |
|
|
return this.#promisifyCol(this.#participantsCollection, this.#participantsIndex, key); |
|
|
} |
|
|
|
|
|
async getContacts(key) { |
|
|
return this.#promisifyCol(this.#contactsCollection, this.#phoneNumberIndex, key); |
|
|
} |
|
|
|
|
|
phoneToKey(phone) { |
|
|
return `${phone.replace('+', '')}@c.us`; |
|
|
} |
|
|
} |
|
|
|
|
|
whatsappDB = new WhatsappDB(); |
|
|
var groups, contacts; |
|
|
|
|
|
MutationObserver = window.MutationObserver || window.WebKitMutationObserver; |
|
|
|
|
|
const autoScroll = function () { |
|
|
if (!scrollEndReached(header.nextSibling.nextSibling)) |
|
|
header.nextSibling.nextSibling.scrollTop += SCROLL_INCREMENT; |
|
|
else |
|
|
stop(); |
|
|
}; |
|
|
|
|
|
async function start() { |
|
|
MEMBERS_QUEUE = {}; |
|
|
|
|
|
if (CHECK_INDEXEDDB) { |
|
|
groups = await whatsappDB.getGroups(); |
|
|
contacts = await whatsappDB.getContacts(); |
|
|
} |
|
|
|
|
|
header = document.getElementsByTagName('header')[0]; |
|
|
membersList = header.parentNode; |
|
|
|
|
|
observer = new MutationObserver(function (mutations, observer) { |
|
|
scrapeData().then(r => { |
|
|
}); // fired when a mutation occurs |
|
|
}); |
|
|
|
|
|
// the div to watch for mutations |
|
|
observer.observe(membersList, { |
|
|
childList: true, |
|
|
subtree: true |
|
|
}); |
|
|
|
|
|
// scroll to top before beginning |
|
|
header.nextSibling.nextSibling.scrollTop = 0; |
|
|
await scrapeData(); |
|
|
if (AUTO_SCROLL) scrollInterval = setInterval(autoScroll, SCROLL_INTERVAL); |
|
|
} |
|
|
|
|
|
|
|
|
/** |
|
|
* Stops the current scrape instance |
|
|
*/ |
|
|
const stop = function () { |
|
|
window.clearInterval(scrollInterval); |
|
|
observer.disconnect(); |
|
|
console.table(MEMBERS_QUEUE); |
|
|
console.log(`Scrape stopped. ${Object.keys(MEMBERS_QUEUE).length} members scraped.`); |
|
|
|
|
|
createDownloadLink(convertToCSV(Object.values(MEMBERS_QUEUE)), "whatsapp_members.csv"); |
|
|
}; |
|
|
|
|
|
|
|
|
async function scrapeData() { |
|
|
const members = membersList.querySelectorAll('[role=listitem] > [role=button]'); |
|
|
|
|
|
for (let i = 0; i < members.length; i++) { |
|
|
const member = members[i]; |
|
|
const details = await handleMember(member) |
|
|
|
|
|
if (details.name === "You") { |
|
|
continue; |
|
|
} |
|
|
|
|
|
if (details.phone) |
|
|
MEMBERS_QUEUE[details.phone] = details; |
|
|
else |
|
|
MEMBERS_QUEUE[details.name] = details; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
async function handleMember(member) { |
|
|
const title = getTitle(member); |
|
|
const phoneCaption = getPhone(member); |
|
|
const status = getStatus(member); |
|
|
const image = getImage(member); |
|
|
let memberGroups = []; |
|
|
let isSaved = false; |
|
|
|
|
|
// If contact unsaved - the phone is the caption or the title. |
|
|
// If contact saved - the phone is unavailable. |
|
|
let phone = phoneCaption ? phoneCaption : title.startsWith("+") ? title : null; |
|
|
|
|
|
const name = phoneCaption || !phone ? title : null; |
|
|
if (name && !phone) { |
|
|
const contact = await getContact(name); |
|
|
phone = contact ? `+${contact.phoneNumber.split('@')[0]}` : null; |
|
|
isSaved = !!contact; |
|
|
} |
|
|
|
|
|
if (phone) { |
|
|
phone = phone.replaceAll(/\s/g, '').replaceAll('-', '').replaceAll('(', '').replaceAll(')', ''); |
|
|
memberGroups = await getGroups(phone); |
|
|
} |
|
|
|
|
|
return { |
|
|
phone: phone, |
|
|
name: name, |
|
|
status: status, |
|
|
image: image, |
|
|
groups: JSON.stringify(memberGroups), |
|
|
isSaved: JSON.stringify(isSaved), |
|
|
}; |
|
|
} |
|
|
|
|
|
function getImage(member) { |
|
|
const img = member.querySelector('img'); |
|
|
if (!img) { |
|
|
return null; |
|
|
} |
|
|
|
|
|
return imageToDataURL(img); |
|
|
} |
|
|
|
|
|
function getStatus(member) { |
|
|
const status = member.querySelector('.copyable-text'); |
|
|
return status ? status.title : null; |
|
|
} |
|
|
|
|
|
function getPhone(member) { |
|
|
const phone = member.querySelector('span[aria-label=""]:not(span[title])'); |
|
|
return phone ? phone.innerHTML : null; |
|
|
} |
|
|
|
|
|
function getTitle(member) { |
|
|
const title = member.querySelector('span[title]'); |
|
|
return title ? title.title : null; |
|
|
} |
|
|
|
|
|
async function getGroups(phone) { |
|
|
if (!CHECK_INDEXEDDB) { |
|
|
return []; |
|
|
} |
|
|
|
|
|
const key = whatsappDB.phoneToKey(phone); |
|
|
const participants = await whatsappDB.getParticipants(key); |
|
|
return participants.map(participant => getGroupName(participant.groupId)); |
|
|
} |
|
|
|
|
|
function getGroupName(groupID) { |
|
|
const group = groups.find(group => group.id === groupID); |
|
|
return group ? group.subject : null; |
|
|
} |
|
|
|
|
|
function getContact(name) { |
|
|
if (!CHECK_INDEXEDDB) { |
|
|
return null; |
|
|
} |
|
|
|
|
|
return contacts.find(contact => contact.name === name); |
|
|
} |
|
|
|
|
|
/** |
|
|
* Helper functions |
|
|
* @References [1] https://stackoverflow.com/questions/53158796/get-scroll-position-with-reactjs/53158893#53158893 |
|
|
*/ |
|
|
function scrollEndReached(el) { |
|
|
return ((el.scrollHeight - (el.clientHeight + el.scrollTop)) === 0); |
|
|
} |
|
|
|
|
|
function imageToDataURL(img) { |
|
|
img.crossOrigin = "anonymous"; |
|
|
|
|
|
// Create a canvas element |
|
|
const canvas = document.createElement('canvas'); |
|
|
canvas.width = img.naturalWidth || img.width; |
|
|
canvas.height = img.naturalHeight || img.height; |
|
|
|
|
|
// Draw the image onto the canvas |
|
|
const ctx = canvas.getContext('2d'); |
|
|
ctx.drawImage(img, 0, 0); |
|
|
|
|
|
// Get the Data URI of the image |
|
|
return canvas.toDataURL('image/png'); |
|
|
} |
|
|
|
|
|
function createDownloadLink(data, fileName) { |
|
|
var a = document.createElement('a'); |
|
|
a.style.display = "none"; |
|
|
|
|
|
var url = window.URL.createObjectURL(new Blob([data], { |
|
|
type: "data:attachment/text" |
|
|
})); |
|
|
a.setAttribute("href", url); |
|
|
a.setAttribute("download", fileName); |
|
|
document.body.append(a); |
|
|
a.click(); |
|
|
window.URL.revokeObjectURL(url); |
|
|
a.remove(); |
|
|
} |
|
|
|
|
|
// https://stackoverflow.com/questions/11257062/converting-json-object-to-csv-format-in-javascript |
|
|
function convertToCSV(arr) { |
|
|
const array = [Object.keys(arr[0])].concat(arr) |
|
|
|
|
|
return array.map(it => { |
|
|
return Object.values(it).map(value => { |
|
|
if (value == null) return ''; |
|
|
return `"${value.replace(/\"/g, "'")}"` |
|
|
}).toString() |
|
|
}).join('\n') |
|
|
} |
|
|
|
|
|
start().then(r => { |
|
|
console.log("Finished scraping.") |
|
|
}); |