// current design import ScrapePages from '../src' import { normalizeConfig } from '../lib/normalize-config' const config = { scrape: {} } const options = { input: { username: 'bob' }, optionsEach: { gallery: { downloadPriority: 1, logLevel: 'info' } }, logger: { level: 'debug', useFile: 'downloads/record.log' } } const fullConfig = normalizeConfig(config) const siteScraper = new Scraper(config) const emitter = siteScraper.run(options) emitter.on('close', queryFor => queryFor().then(console.log)) emit('stop', () => console.log('stopped.')) // --=== API proposals ===-- // /** * First Design: * least stateful */ import { scraper, querier } from 'scraper' import * as util from 'scraper/util' // utils const scrapers = util.flatten(config) const fullConfig = util.normalize(config) util.verify({ config, options }) // main const { on, emit, query } = scraper(config, options) on('done', () => { const results = query({ scrapers: ['image'] }) }) emit('stop') // instantiate another sqlite db instance const query = querier(config, options) /** * Second Design: * class instance only contains config state, re-run with different options/input */ import * as scraper from 'scraper' // util is available here too import * as util from 'scraper/util' // utils const scrapers = util.flatten(config) const fullConfig = util.normalize(config) util.verifyConfig(config) util.verifyOptions(options) // main const siteScraper = scraper.createScraper(config) const { on, emit, query } = siteScraper.run(options) // push all async tasks into observable (creating folders) on('done', () => { const results = query({ scrapers: ['image'] }) }) emit('stop') // instantiate another sqlite db instance const query = scraper.createQuerier(config, options) /** * Third Design: * most stateful, but allows instantiating two classes pointing at db * one instance for scraping and one for querying (possibly two different worker threads) */ import { Scraper } from 'scraper' // utils Scraper.verify(config) const fullConfig = Scraper.normalize(config) const scrapers = Scraper.flatten(config) Scraper.verify({ config, options }) // main const siteScraper = new Scraper(config, options) await siteScraper.init() // create folders, instantiate sqlite db, sync or async? siteScraper.start() const results = siteScraper.query({ scrapers: ['image'] }) // query can be called w/out calling start(), so long as init() has been called siteScraper.on('done', () => { const results = siteScraper.query({ scrapers: ['image'] }) }) siteScraper.emit('stop')