Skip to content

Instantly share code, notes, and snippets.

@koyachi
Forked from kui/scrape.dart
Last active April 7, 2018 01:09
Show Gist options
  • Save koyachi/d802c607223888651b5719a38fd9e4d8 to your computer and use it in GitHub Desktop.
Save koyachi/d802c607223888651b5719a38fd9e4d8 to your computer and use it in GitHub Desktop.

Revisions

  1. koyachi revised this gist Apr 7, 2018. 1 changed file with 10 additions and 17 deletions.
    27 changes: 10 additions & 17 deletions scrape.dart
    Original file line number Diff line number Diff line change
    @@ -1,28 +1,21 @@
    import 'dart:io';
    import 'dart:async';
    import 'package:html5lib/parser.dart';
    import 'package:html5lib/dom.dart';
    import 'package:html/parser.dart';
    import 'package:html/dom.dart';

    main() {
    final url = 'http://comic-walker.com/';

    getHtml(url).then((document) {
    // page title
    print(document.querySelector('title').text);

    // Newer comics
    document.querySelectorAll('#bookList > li').forEach((e) {
    print(e.querySelector('.list_bookName').text);
    document.querySelectorAll('#mainContent > li h2 span').forEach((e) {
    print(e.text);
    });
    });
    }

    /// fetch and parse the HTML from [url]
    Future<Document> getHtml(String url) =>
    new HttpClient()
    .getUrl(Uri.parse(url))
    .then((req) => req.close())
    .then((res) => res
    .asyncExpand((bytes) => new Stream.fromIterable(bytes))
    .toList())
    .then((bytes) => parse(bytes, sourceUrl: url));
    Future<Document> getHtml(String url) => new HttpClient()
    .getUrl(Uri.parse(url))
    .then((req) => req.close())
    .then((res) =>
    res.asyncExpand((bytes) => new Stream.fromIterable(bytes)).toList())
    .then((bytes) => parse(bytes, sourceUrl: url));
  2. @kui kui revised this gist Aug 19, 2014. 1 changed file with 4 additions and 5 deletions.
    9 changes: 4 additions & 5 deletions scrape.dart
    Original file line number Diff line number Diff line change
    @@ -6,7 +6,7 @@ import 'package:html5lib/dom.dart';
    main() {
    final url = 'http://comic-walker.com/';

    getHtml(url, (document) {
    getHtml(url).then((document) {
    // page title
    print(document.querySelector('title').text);

    @@ -17,13 +17,12 @@ main() {
    });
    }

    /// fetch the HTML from [url] then execute [f] with the parsed HTML
    Future getHtml(String url, f(Document docment)) =>
    /// fetch and parse the HTML from [url]
    Future<Document> getHtml(String url) =>
    new HttpClient()
    .getUrl(Uri.parse(url))
    .then((req) => req.close())
    .then((res) => res
    .asyncExpand((bytes) => new Stream.fromIterable(bytes))
    .toList())
    .then((bytes) => parse(bytes, sourceUrl: url))
    .then(f);
    .then((bytes) => parse(bytes, sourceUrl: url));
  3. @kui kui created this gist Aug 19, 2014.
    29 changes: 29 additions & 0 deletions scrape.dart
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,29 @@
    import 'dart:io';
    import 'dart:async';
    import 'package:html5lib/parser.dart';
    import 'package:html5lib/dom.dart';

    main() {
    final url = 'http://comic-walker.com/';

    getHtml(url, (document) {
    // page title
    print(document.querySelector('title').text);

    // Newer comics
    document.querySelectorAll('#bookList > li').forEach((e) {
    print(e.querySelector('.list_bookName').text);
    });
    });
    }

    /// fetch the HTML from [url] then execute [f] with the parsed HTML
    Future getHtml(String url, f(Document docment)) =>
    new HttpClient()
    .getUrl(Uri.parse(url))
    .then((req) => req.close())
    .then((res) => res
    .asyncExpand((bytes) => new Stream.fromIterable(bytes))
    .toList())
    .then((bytes) => parse(bytes, sourceUrl: url))
    .then(f);