example input:
[helb@kookaburra~/tmp/alienvault] $ cat input.json
{
"indicators": [
{
"indicator": "127.0.0.1",
"description": "",
"created": "2018-03-13T00:53:33",
| const puppeteer = require("puppeteer"); | |
| const websites = [ | |
| { url: "https://www.antikvariaty.cz/", search: "input[name=fulltxt]", submit: "button[type=submit]" }, | |
| { url: "https://www.trhknih.cz/", search: "input#searchbox", submit: "button[type=submit]" }, | |
| { url: "https://antikvariat11.cz/", search: "input#searchbox", submit: "input[name=Submit]" }, | |
| { url: "https://www.antikvariat-benes.cz/", search: "input.srch_input", submit: "a.srch_btn" }, | |
| { url: "https://www.antikvariat-levneknihy.cz/", search: "input[name=hledany_text]", submit: "input.submit" } | |
| ]; |
| * Trying 2606:4700::6810:f8f9:443... | |
| * TCP_NODELAY set | |
| * Connected to cloudflare-dns.com (2606:4700::6810:f8f9) port 443 (#0) | |
| * ALPN, offering h2 | |
| * ALPN, offering http/1.1 | |
| * successfully set certificate verify locations: | |
| * CAfile: /etc/ssl/certs/ca-certificates.crt | |
| CApath: /etc/ssl/certs | |
| * TLSv1.2 (OUT), TLS handshake, Client hello (1): | |
| * TLSv1.2 (IN), TLS handshake, Server hello (2): |
| * Trying 2a00:1450:4014:800::200e:443... | |
| * TCP_NODELAY set | |
| * Connected to dns.google.com (2a00:1450:4014:800::200e) port 443 (#0) | |
| * ALPN, offering h2 | |
| * ALPN, offering http/1.1 | |
| * successfully set certificate verify locations: | |
| * CAfile: /etc/ssl/certs/ca-certificates.crt | |
| CApath: /etc/ssl/certs | |
| } [5 bytes data] | |
| * TLSv1.2 (OUT), TLS handshake, Client hello (1): |
| // run in console on https://www.youtube.com/feed/channels (and wait): | |
| document.querySelectorAll("paper-button[subscribed]").forEach((btn, i) => { | |
| setTimeout(()=>{ | |
| btn.click(); | |
| setTimeout(()=>{ | |
| document.querySelector("paper-button.style-blue-text").click() | |
| }, 500) | |
| }, 1000 + (i*2500)); | |
| }); |
example input:
[helb@kookaburra~/tmp/alienvault] $ cat input.json
{
"indicators": [
{
"indicator": "127.0.0.1",
"description": "",
"created": "2018-03-13T00:53:33",
| #!/usr/bin/env python | |
| """Simple HTTP Server With Upload. | |
| This module builds on BaseHTTPServer by implementing the standard GET | |
| and HEAD requests in a fairly straightforward manner. | |
| """ | |
| var str=""; document.querySelectorAll("a[href*='dl.humble.com']").forEach(link => str += (`wget "${link.href}" -O "${link.href.replace('https://dl.humble.com/', '').replace(/\?.*/, '')}"; `)); console.dir(str) |
| #!/usr/bin/env python | |
| from bs4 import BeautifulSoup, element | |
| import requests | |
| import re | |
| from sys import argv | |
| from decimal import Decimal | |
| butter_url = "https://www.akcniceny.cz/zbozi/mlecne-vyrobky/masla/" | |
| bitgup_url = "https://bitgup.com/view_currency/" | |
| fiat_url = "https://api.fixer.io/latest?symbols=CZK&base=USD" |
https://stackoverflow.com/questions/46528003/how-to-clean-up-the-data-from-this-webscraping-script
for table in tables:
rows = table.find_all("tr")
for row in rows:
cells = row.find_all("td")
if len(cells) == 7: # this filters out rows with 'Term', 'Instructor Name' etc.
for cell in cells:
print(cell.text + "\t", end="") # \t is a Tab character, and end="" prevents a newline between cells