Skip to content

Instantly share code, notes, and snippets.

@kui
Last active September 9, 2019 04:15
Show Gist options
  • Save kui/6c17e82d733f1d95ffe7 to your computer and use it in GitHub Desktop.
Save kui/6c17e82d733f1d95ffe7 to your computer and use it in GitHub Desktop.

Revisions

  1. kui revised this gist Aug 19, 2014. 1 changed file with 4 additions and 5 deletions.
    9 changes: 4 additions & 5 deletions scrape.dart
    Original file line number Diff line number Diff line change
    @@ -6,7 +6,7 @@ import 'package:html5lib/dom.dart';
    main() {
    final url = 'http://comic-walker.com/';

    getHtml(url, (document) {
    getHtml(url).then((document) {
    // page title
    print(document.querySelector('title').text);

    @@ -17,13 +17,12 @@ main() {
    });
    }

    /// fetch the HTML from [url] then execute [f] with the parsed HTML
    Future getHtml(String url, f(Document docment)) =>
    /// fetch and parse the HTML from [url]
    Future<Document> getHtml(String url) =>
    new HttpClient()
    .getUrl(Uri.parse(url))
    .then((req) => req.close())
    .then((res) => res
    .asyncExpand((bytes) => new Stream.fromIterable(bytes))
    .toList())
    .then((bytes) => parse(bytes, sourceUrl: url))
    .then(f);
    .then((bytes) => parse(bytes, sourceUrl: url));
  2. kui created this gist Aug 19, 2014.
    29 changes: 29 additions & 0 deletions scrape.dart
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,29 @@
    import 'dart:io';
    import 'dart:async';
    import 'package:html5lib/parser.dart';
    import 'package:html5lib/dom.dart';

    main() {
    final url = 'http://comic-walker.com/';

    getHtml(url, (document) {
    // page title
    print(document.querySelector('title').text);

    // Newer comics
    document.querySelectorAll('#bookList > li').forEach((e) {
    print(e.querySelector('.list_bookName').text);
    });
    });
    }

    /// fetch the HTML from [url] then execute [f] with the parsed HTML
    Future getHtml(String url, f(Document docment)) =>
    new HttpClient()
    .getUrl(Uri.parse(url))
    .then((req) => req.close())
    .then((res) => res
    .asyncExpand((bytes) => new Stream.fromIterable(bytes))
    .toList())
    .then((bytes) => parse(bytes, sourceUrl: url))
    .then(f);