Skip to content

Instantly share code, notes, and snippets.

@VityaSchel
Created October 3, 2024 05:10
Show Gist options
  • Save VityaSchel/81ccf5a160173a5c5e78a520e09f162c to your computer and use it in GitHub Desktop.
Save VityaSchel/81ccf5a160173a5c5e78a520e09f162c to your computer and use it in GitHub Desktop.

Revisions

  1. VityaSchel created this gist Oct 3, 2024.
    56 changes: 56 additions & 0 deletions tapochek-net-scraper.js
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,56 @@
    // OPEN https://tapochek.net/viewforum.php?f=910&start=0 FIRST
    // THEN RUN THIS SCRIPT:

    async function start() {
    const result = []

    async function parsePage(url) {
    const response = await fetch(url)

    if (!response.ok) {
    throw new Error(`HTTP error! status: ${response.status}`)
    }

    const arrayBuffer = await response.arrayBuffer()
    const decoder = new TextDecoder('windows-1251')
    const decodedString = decoder.decode(arrayBuffer)

    const parser = new DOMParser()
    const doc = parser.parseFromString(decodedString, 'text/html')

    const table = Array.from(doc.querySelectorAll('tbody > tr[id^=tr]')).filter(row => {
    const topicName = row.querySelector('a.torTopic')
    return topicName && topicName.textContent.trim().length > 0
    }).map(row => {
    const topicName = row.querySelector('a.torTopic')
    return {
    title: topicName.textContent.trim(),
    size: row.querySelector('a[href^="./download.php"]')?.textContent?.trim(),
    downloads: row.querySelector('p.med > b')?.textContent?.trim()
    }
    })

    result.push(...table)

    const nextPageLink = Array.from(doc.querySelectorAll('a[href^=viewforum]'))
    .find(a => a.textContent.trim() === 'След.')?.href

    return nextPageLink
    }

    let nextPage = window.location.href
    while (nextPage) {
    try {
    nextPage = await parsePage(nextPage)
    } catch (err) {
    console.error('Error fetching the page:', err)
    nextPage = null
    }
    }

    console.log(result)
    }

    start()

    // NOW COPY LOGGED ARRAY AS JSON OBJECT FROM BROWSER'S CONSOLE