-
-
Save RutwikPatel13/c068326bcd9fc5364c5d68607d81f696 to your computer and use it in GitHub Desktop.
data scrapper js script for extracting isin number from the site `https://www.isin.com/isin-database/`
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // 1. login to https://www.isin.com/ and then head over to https://www.isin.com/isin-database/ page | |
| // 2. open the chrome console by pressing F12, and paste the script inside the console. | |
| // output: a json file named `all-data.json` containing list of dictionaries of info about the companies. | |
| const org = ["nobiskrug", "swm", "vattenfall", | |
| // and all the other companies you need to extract information about | |
| ]; | |
| function downloadJSON(data) { | |
| const dataStr = JSON.stringify(data, null, 2); | |
| const blob = new Blob([dataStr], { type: 'application/json' }); | |
| const url = URL.createObjectURL(blob); | |
| const link = document.createElement('a'); | |
| link.href = url; | |
| link.download = 'all-data.json'; | |
| document.body.appendChild(link); | |
| link.click(); | |
| document.body.removeChild(link); | |
| URL.revokeObjectURL(url); | |
| } | |
| async function fetchTableDataByISIN(isinList) { | |
| const results = []; | |
| console.log('Starting data fetch...'); | |
| const startTime = performance.now(); // Start timing for total processing | |
| for (let i = 0; i < isinList.length; i++) { | |
| const isin = isinList[i]; | |
| const individualStartTime = performance.now(); // Start timing for this ISIN | |
| console.log(`Processing ${i + 1}/${isinList.length}: ${isin}`); | |
| try { | |
| // Change the input value | |
| document.getElementById('isin').value = isin; | |
| // Change the dropdown to 'By Title' | |
| const dropdown = document.querySelector('select[name="search_select"]'); | |
| dropdown.value = 'title'; | |
| // Create a FormData object from the form | |
| const form = dropdown.closest('form'); | |
| const formData = new FormData(form); | |
| // Promise for the fetch request | |
| const fetchPromise = fetch(form.action, { | |
| method: form.method, | |
| body: formData, | |
| redirect: 'manual' // try to prevent automatic navigation | |
| }); | |
| // Promise for the timeout | |
| const timeoutPromise = new Promise((resolve, reject) => { | |
| setTimeout(() => { | |
| reject(new Error('Request timed out after 10000 milliseconds')); | |
| }, 10000); | |
| }); | |
| // Use Promise.race to race fetch against the timeout | |
| // const response = await Promise.race([fetchPromise, timeoutPromise]); | |
| const response = await Promise.race([fetchPromise]); | |
| if (!response.ok) throw new Error('Failed to fetch data'); | |
| const text = await response.text(); | |
| // Parse the response HTML | |
| const parser = new DOMParser(); | |
| const doc = parser.parseFromString(text, "text/html"); | |
| // Check if the table exists in the response | |
| const table = doc.querySelector('.data_report_Table'); | |
| if (table) { | |
| const headers = Array.from(table.querySelectorAll('th[scope="col"]')).map(th => th.textContent.trim()); | |
| const rows = table.querySelectorAll('tr.temRow_dt_intfc524c84e8baaee.report_entry'); | |
| const tableData = Array.from(rows).map(row => { | |
| const cells = row.querySelectorAll('td'); | |
| const rowData = {}; | |
| cells.forEach((cell, index) => { | |
| rowData[headers[index]] = cell.textContent.trim(); | |
| }); | |
| return rowData; | |
| }); | |
| results.push({ isin, data: tableData }); | |
| } else { | |
| results.push({ isin, error: 'No table found' }); | |
| } | |
| } catch (error) { | |
| console.error(`Error processing ISIN ${isin}: ${error.message}`); | |
| results.push({ isin, error: error.message }); | |
| } | |
| const individualEndTime = performance.now(); // End timing for this ISIN | |
| console.log(`Processed ${isin} in ${(individualEndTime - individualStartTime).toFixed(2)} ms`); | |
| } | |
| const endTime = performance.now(); // End timing for total processing | |
| console.log(`Data fetching completed in ${(endTime - startTime).toFixed(2)} ms.`); | |
| downloadJSON(results); | |
| return results; | |
| } | |
| // Example use case | |
| // fetchTableDataByISIN(org.slice(0,10)).then(data => console.log(data)); | |
| fetchTableDataByISIN(org).then(data => console.log(data)); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment