/* inspired by https://eduardolezcano.com/a-tour-of-go-web-crawler/ + https://github.com/fgrehm/go-tour/blob/master/73-web-crawler-golang-team-solution.go another solution with a Crawler struct, with a "safe" map with mutex and a Crawl method with WaitGroup this solution keeps the original signature of the Crawl function and does not change the main function but does not use or store the body of the URL */ package main import ( "fmt" "sync" ) type Crawler struct { urls SafeMap wg sync.WaitGroup fetcher Fetcher } type SafeMap struct { m map[string]error mux sync.Mutex } func (s *SafeMap) find(url string) (found bool) { s.mux.Lock() defer s.mux.Unlock() _, found = s.m[url] return found } func (s *SafeMap) addOrUpdate(url string, err error) { s.mux.Lock() defer s.mux.Unlock() s.m[url] = err } func (c *Crawler) Crawl(url string, depth int) { defer c.wg.Done() if depth <= 0 { return } if c.urls.find(url) { return } c.urls.addOrUpdate(url, nil) _, urls, err := fetcher.Fetch(url) c.urls.addOrUpdate(url, err) if err != nil { return } for _, newUrl := range urls { c.wg.Add(1) go c.Crawl(newUrl, depth-1) } } func Crawl(url string, depth int, fetcher Fetcher) { if depth <= 0 { return } crawler := Crawler{ urls: SafeMap{ m: make(map[string]error), }, fetcher: fetcher, } crawler.wg.Add(1) go crawler.Crawl(url, depth) crawler.wg.Wait() // blocking operation for url, err := range crawler.urls.m { if err != nil { fmt.Printf("%v failed: %v\n", url, err) } else { fmt.Printf("%v was fetched\n", url) } } } func main() { Crawl("https://golang.org/", 4, fetcher) } type Fetcher interface { // Fetch returns the body of URL and // a slice of URLs found on that page. Fetch(url string) (body string, urls []string, err error) } // fakeFetcher is Fetcher that returns canned results. type fakeFetcher map[string]*fakeResult type fakeResult struct { body string urls []string } func (f fakeFetcher) Fetch(url string) (string, []string, error) { if res, ok := f[url]; ok { return res.body, res.urls, nil } return "", nil, fmt.Errorf("not found: %s", url) } // fetcher is a populated fakeFetcher. var fetcher = fakeFetcher{ "https://golang.org/": &fakeResult{ "The Go Programming Language", []string{ "https://golang.org/pkg/", "https://golang.org/cmd/", }, }, "https://golang.org/pkg/": &fakeResult{ "Packages", []string{ "https://golang.org/", "https://golang.org/cmd/", "https://golang.org/pkg/fmt/", "https://golang.org/pkg/os/", }, }, "https://golang.org/pkg/fmt/": &fakeResult{ "Package fmt", []string{ "https://golang.org/", "https://golang.org/pkg/", }, }, "https://golang.org/pkg/os/": &fakeResult{ "Package os", []string{ "https://golang.org/", "https://golang.org/pkg/", }, }, }