Skip to content

Instantly share code, notes, and snippets.

@mashingan
Last active November 7, 2022 12:27
Show Gist options
  • Save mashingan/6f83c936f9621a40a434074462ced935 to your computer and use it in GitHub Desktop.
Save mashingan/6f83c936f9621a40a434074462ced935 to your computer and use it in GitHub Desktop.

Revisions

  1. mashingan revised this gist Apr 30, 2019. 1 changed file with 3 additions and 0 deletions.
    3 changes: 3 additions & 0 deletions downloadmanga.go
    Original file line number Diff line number Diff line change
    @@ -11,6 +11,7 @@ import (
    "strconv"
    "strings"
    "sync"
    "time"
    )

    func toString(name xml.Name) string {
    @@ -150,6 +151,7 @@ func main() {
    var nextlink, imglink string
    nextlink = mangaurl

    start := time.Now()
    fetchingpage:
    for {
    nextlink, imglink, err = pageExtract(nextlink)
    @@ -186,5 +188,6 @@ fetchingpage:
    }
    }
    wg.Wait()
    fmt.Println("ended after:", time.Now().Sub(start))

    }
  2. mashingan created this gist Apr 30, 2019.
    190 changes: 190 additions & 0 deletions downloadmanga.go
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,190 @@
    package main

    import (
    "encoding/xml"
    "flag"
    "fmt"
    "io"
    "net/http"
    "net/url"
    "os"
    "strconv"
    "strings"
    "sync"
    )

    func toString(name xml.Name) string {
    return name.Space + ":" + name.Local

    }

    func isPageClass(t xml.StartElement) bool {
    return t.Name == xml.Name{Space: "", Local: "div"} &&
    len(t.Attr) > 0 &&
    t.Attr[0].Value == "page"
    }

    func nextToken(d *xml.Decoder) (token xml.Token, err error) {
    token, err = d.Token()
    if err != nil {
    return
    }
    switch v := token.(type) {
    default:
    fmt.Printf("the type: %T\n", v)
    }
    return
    }

    func pageExtract(linkurl string) (nextlink string, imglink string, err error) {
    resp, err := http.Get(linkurl)
    if err != nil {
    return "", "", err
    }
    decoder := xml.NewDecoder(resp.Body)
    decoder.AutoClose = []string{"link", "meta"}
    decoder.Strict = false
    searching:
    for {
    token, err := decoder.Token()
    if err != nil {
    fmt.Println(err)
    if err == io.EOF {
    break searching
    }
    } else {
    switch v := token.(type) {
    case xml.StartElement:
    if isPageClass(v) {
    fmt.Println("found!")
    break searching
    }
    default:
    }
    }
    }
    _, err = nextToken(decoder)
    if err != nil {
    fmt.Println(err)
    }
    stelm, err := nextToken(decoder)
    nstelm, ok := stelm.(xml.StartElement)
    if !ok {
    fmt.Println("cannot convert to start element")
    return "", "", fmt.Errorf("Error pageExtract: cannot convert to start element")
    }
    fmt.Println("next link:", nstelm.Attr[0].Value)

    img, err := nextToken(decoder)
    nimg, ok := img.(xml.StartElement)
    if !ok {
    fmt.Println("cannot convert to start element")
    return "", "", fmt.Errorf("Error pageExtract: cannot convert to start element")
    }
    base, _ := url.Parse(linkurl)
    nexturl, _ := url.Parse(nstelm.Attr[0].Value)
    nextlink = base.ResolveReference(nexturl).String()
    imglink = "https:" + nimg.Attr[1].Value
    err = nil
    return
    }

    func downloadFile(imglink string) error {
    lenImgName := strings.Split(imglink, "/")
    fname := lenImgName[len(lenImgName)-1]
    file, err := os.Create(fname)
    defer file.Close()
    if err != nil {
    fmt.Println(err)
    return err
    }
    imgresp, err := http.Get(imglink)
    if err != nil {
    fmt.Println(err)
    return err
    }
    defer imgresp.Body.Close()
    size, err := io.Copy(file, imgresp.Body)
    if err != nil {
    fmt.Println(err)
    return err
    }
    fmt.Printf("downloaded a file %s with size %d\n", fname, size)
    return nil
    }

    func chapterExtract(link string) (int, error) {
    rpos := strings.Index(link, "/r/")
    if rpos == -1 {
    return rpos, nil
    }
    nextslash := strings.Index(link[rpos+3:], "/")
    nextslash += rpos + 3
    chapstr := strings.Index(link[nextslash+1:], "/")
    chapstr += nextslash + 1
    intonly := link[nextslash+1 : chapstr]
    result, err := strconv.Atoi(intonly)
    if err != nil {
    return -1, err
    }
    return result, nil

    }

    func main() {
    mangaurl := ""
    flag.StringVar(&mangaurl, "url", "", "Please specify the url")
    flag.Parse()
    if mangaurl == "" {
    fmt.Println("url:", mangaurl)
    panic("No url specified")
    }

    var wg sync.WaitGroup
    thisChapter, err := chapterExtract(mangaurl)
    if err != nil {
    fmt.Println(err)
    return
    }
    fmt.Println("This chapter:", thisChapter)
    var nextlink, imglink string
    nextlink = mangaurl

    fetchingpage:
    for {
    nextlink, imglink, err = pageExtract(nextlink)
    if err != nil {
    fmt.Println(err)
    return
    }
    fmt.Println("Next link:", nextlink)
    fmt.Println("img link:", imglink)

    if imglink == "" {
    break fetchingpage
    }

    wg.Add(1)
    go func(w *sync.WaitGroup, link string) {
    defer w.Done()
    if err := downloadFile(link); err != nil {
    fmt.Println(err)
    }
    }(&wg, imglink)

    nextChapter, err := chapterExtract(nextlink)
    if err != nil {
    fmt.Println(err)
    return
    } else if nextChapter == -1 {
    break fetchingpage
    }
    fmt.Printf("old %d and current %d\n", thisChapter, nextChapter)

    if nextChapter != thisChapter {
    break fetchingpage
    }
    }
    wg.Wait()

    }