-
install npm
-
Run
mkdir nepsealpha-scrap; cd nepsealpha-scrap; npm init -
Install dependencies
npm install csv-writer puppeteer- Run Script
node script.js| const puppeteer = require('puppeteer'); | |
| const createCsvWriter = require('csv-writer').createObjectCsvWriter; | |
| (async () => { | |
| const browser = await puppeteer.launch({ headless: true }); | |
| const page = await browser.newPage(); | |
| await page.setUserAgent('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36'); | |
| await page.setViewport({ | |
| width: 1440, | |
| height: 900, | |
| deviceScaleFactor: 1, | |
| }); | |
| // Navigate to the traded stocks page | |
| await page.goto('https://nepsealpha.com/traded-stocks', { waitUntil: 'networkidle2' }); | |
| const extractedData = []; | |
| let hasNextPage = true; | |
| while (hasNextPage) { | |
| const extractedData = []; | |
| // Wait for the table to load | |
| await page.waitForSelector('#DataTables_Table_0'); | |
| // Extract stock links and names | |
| const stockData = await page.evaluate(() => { | |
| return Array.from(document.querySelectorAll('#DataTables_Table_0 tbody tr')).map(row => { | |
| const linkElement = row.querySelector('td a'); | |
| const link = linkElement ? linkElement.href : null; | |
| const name = linkElement ? linkElement.innerText.trim() : null; | |
| return { link, name }; | |
| }).filter(data => data.link && data.link.includes('/stocks/')); | |
| }); | |
| console.log(stockData); | |
| // Extract info from each stock link | |
| for (let { link, name } of stockData) { | |
| const stockPage = await browser.newPage(); | |
| await stockPage.setUserAgent('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36'); | |
| await stockPage.goto(link , { waitUntil: 'networkidle2' }); | |
| await stockPage.setViewport({ | |
| width: 1440, | |
| height: 900, | |
| deviceScaleFactor: 1, | |
| }); | |
| // Extract company general information in tabular form | |
| const companyInfo = await stockPage.evaluate(() => { | |
| const table = document.querySelector('#home > div > div.box-body > div:nth-child(1) > div.col-md-4 > div'); | |
| table2 = document.querySelector('#home > div > div.box-body > div:nth-child(1) > div.col-md-5 > div > table'); | |
| if (!table) return null; | |
| console.log(table); | |
| rows2 = table2.querySelectorAll('tr'); | |
| const rows = table.querySelectorAll('tr'); | |
| // rows.concat(rows2); | |
| console.log({rows, rows2}); | |
| const info = {}; | |
| rows.forEach(row => { | |
| const cols = row.querySelectorAll('td'); | |
| if (cols.length === 2) { | |
| const key = cols[0].innerText.trim(); | |
| const value = cols[1].innerText.trim(); | |
| info[key] = value; | |
| } | |
| }); | |
| rows2.forEach(row => { | |
| const cols = row.querySelectorAll('td'); | |
| if (cols.length === 2) { | |
| const key = cols[0].innerText.trim(); | |
| const value = cols[1].innerText.trim(); | |
| info[key] = value; | |
| } | |
| }); | |
| return info; | |
| }); | |
| extractedData.push({ name, ...companyInfo }); | |
| await stockPage.close(); | |
| } | |
| // Check if there's a next page | |
| hasNextPage = await page.evaluate(() => { | |
| const nextButton = document.querySelector('#DataTables_Table_0_next'); | |
| console.log({nextButton}) | |
| if (nextButton && !nextButton.parentElement.classList.contains('disabled')) { | |
| nextButton.click(); | |
| // return false; | |
| return true; | |
| } | |
| return false; | |
| }); | |
| console.log({hasNextPage}) | |
| const csvWriter = createCsvWriter({ | |
| path: 'company_info.csv', | |
| header: [ | |
| { id: 'name', title: 'Company Name' }, | |
| { id: 'Promoter Holding', title: 'Promoter Holding' }, | |
| { id: 'Public Holding', title: 'Public Holding' }, | |
| { id: 'Government Holding', title: 'Government Holding' }, | |
| { id: 'Foreign Ownership', title: 'Foreign Ownership' }, | |
| { id: 'Address', title: 'Address' }, | |
| { id: 'Phone', title: 'Phone' }, | |
| { id: 'Fax', title: 'Fax' }, | |
| { id: 'Email', title: 'Email' }, | |
| { id: 'Website', title: 'Website' }, | |
| { id: 'Registrar', title: 'Registrar' }, | |
| { id: 'Listed Shares', title: 'Listed Shares' }, | |
| { id: 'Paid Up Capital', title: 'Paid Up Capital' }, | |
| { id: 'Market Capitalization', title: 'Market Capitalization' }, | |
| // Add more fields as necessary | |
| ], | |
| append: true | |
| }); | |
| // Write records to CSV file | |
| await csvWriter.writeRecords(extractedData); | |
| console.log('CSV file written successfully'); | |
| // if (hasNextPage) { | |
| // await page.waitForNavigation({ waitUntil: 'networkidle2' }); | |
| // } | |
| } | |
| await browser.close(); | |
| // Define the CSV writer | |
| // const csvWriter = createCsvWriter({ | |
| // path: 'company_info.csv', | |
| // header: [ | |
| // { id: 'name', title: 'Company Name' }, | |
| // { id: 'Promoter Holding', title: 'Promoter Holding' }, | |
| // { id: 'Public Holding', title: 'Public Holding' }, | |
| // { id: 'Government Holding', title: 'Government Holding' }, | |
| // { id: 'Foreign Ownership', title: 'Foreign Ownership' }, | |
| // { id: 'Address', title: 'Address' }, | |
| // { id: 'Phone', title: 'Phone' }, | |
| // { id: 'Fax', title: 'Fax' }, | |
| // { id: 'Email', title: 'Email' }, | |
| // { id: 'Website', title: 'Website' }, | |
| // { id: 'Registrar', title: 'Registrar' }, | |
| // { id: 'Listed Shares', title: 'Listed Shares' }, | |
| // { id: 'Paid Up Capital', title: 'Paid Up Capital' }, | |
| // { id: 'Market Capitalization', title: 'Market Capitalization' }, | |
| // // Add more fields as necessary | |
| // ], | |
| // append: true | |
| // }); | |
| // // Write records to CSV file | |
| // await csvWriter.writeRecords(extractedData); | |
| // console.log('CSV file written successfully'); | |
| })(); |
csv file