Last active
November 7, 2022 12:27
-
-
Save mashingan/6f83c936f9621a40a434074462ced935 to your computer and use it in GitHub Desktop.
Revisions
-
mashingan revised this gist
Apr 30, 2019 . 1 changed file with 3 additions and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -11,6 +11,7 @@ import ( "strconv" "strings" "sync" "time" ) func toString(name xml.Name) string { @@ -150,6 +151,7 @@ func main() { var nextlink, imglink string nextlink = mangaurl start := time.Now() fetchingpage: for { nextlink, imglink, err = pageExtract(nextlink) @@ -186,5 +188,6 @@ fetchingpage: } } wg.Wait() fmt.Println("ended after:", time.Now().Sub(start)) } -
mashingan created this gist
Apr 30, 2019 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,190 @@ package main import ( "encoding/xml" "flag" "fmt" "io" "net/http" "net/url" "os" "strconv" "strings" "sync" ) func toString(name xml.Name) string { return name.Space + ":" + name.Local } func isPageClass(t xml.StartElement) bool { return t.Name == xml.Name{Space: "", Local: "div"} && len(t.Attr) > 0 && t.Attr[0].Value == "page" } func nextToken(d *xml.Decoder) (token xml.Token, err error) { token, err = d.Token() if err != nil { return } switch v := token.(type) { default: fmt.Printf("the type: %T\n", v) } return } func pageExtract(linkurl string) (nextlink string, imglink string, err error) { resp, err := http.Get(linkurl) if err != nil { return "", "", err } decoder := xml.NewDecoder(resp.Body) decoder.AutoClose = []string{"link", "meta"} decoder.Strict = false searching: for { token, err := decoder.Token() if err != nil { fmt.Println(err) if err == io.EOF { break searching } } else { switch v := token.(type) { case xml.StartElement: if isPageClass(v) { fmt.Println("found!") break searching } default: } } } _, err = nextToken(decoder) if err != nil { fmt.Println(err) } stelm, err := nextToken(decoder) nstelm, ok := stelm.(xml.StartElement) if !ok { fmt.Println("cannot convert to start element") return "", "", fmt.Errorf("Error pageExtract: cannot convert to start element") } fmt.Println("next link:", nstelm.Attr[0].Value) img, err := nextToken(decoder) nimg, ok := img.(xml.StartElement) if !ok { fmt.Println("cannot convert to start element") return "", "", fmt.Errorf("Error pageExtract: cannot convert to start element") } base, _ := url.Parse(linkurl) nexturl, _ := url.Parse(nstelm.Attr[0].Value) nextlink = base.ResolveReference(nexturl).String() imglink = "https:" + nimg.Attr[1].Value err = nil return } func downloadFile(imglink string) error { lenImgName := strings.Split(imglink, "/") fname := lenImgName[len(lenImgName)-1] file, err := os.Create(fname) defer file.Close() if err != nil { fmt.Println(err) return err } imgresp, err := http.Get(imglink) if err != nil { fmt.Println(err) return err } defer imgresp.Body.Close() size, err := io.Copy(file, imgresp.Body) if err != nil { fmt.Println(err) return err } fmt.Printf("downloaded a file %s with size %d\n", fname, size) return nil } func chapterExtract(link string) (int, error) { rpos := strings.Index(link, "/r/") if rpos == -1 { return rpos, nil } nextslash := strings.Index(link[rpos+3:], "/") nextslash += rpos + 3 chapstr := strings.Index(link[nextslash+1:], "/") chapstr += nextslash + 1 intonly := link[nextslash+1 : chapstr] result, err := strconv.Atoi(intonly) if err != nil { return -1, err } return result, nil } func main() { mangaurl := "" flag.StringVar(&mangaurl, "url", "", "Please specify the url") flag.Parse() if mangaurl == "" { fmt.Println("url:", mangaurl) panic("No url specified") } var wg sync.WaitGroup thisChapter, err := chapterExtract(mangaurl) if err != nil { fmt.Println(err) return } fmt.Println("This chapter:", thisChapter) var nextlink, imglink string nextlink = mangaurl fetchingpage: for { nextlink, imglink, err = pageExtract(nextlink) if err != nil { fmt.Println(err) return } fmt.Println("Next link:", nextlink) fmt.Println("img link:", imglink) if imglink == "" { break fetchingpage } wg.Add(1) go func(w *sync.WaitGroup, link string) { defer w.Done() if err := downloadFile(link); err != nil { fmt.Println(err) } }(&wg, imglink) nextChapter, err := chapterExtract(nextlink) if err != nil { fmt.Println(err) return } else if nextChapter == -1 { break fetchingpage } fmt.Printf("old %d and current %d\n", thisChapter, nextChapter) if nextChapter != thisChapter { break fetchingpage } } wg.Wait() }