/* inspired by https://eduardolezcano.com/a-tour-of-go-web-crawler/ and https://github.com/fgrehm/go-tour/blob/master/73-web-crawler-golang-team-solution.go solution with a "safe" map with mutex + a sub-function crawl with WaitGroup this solution keeps the original signature of the Crawl function and does not change the main function but does not use or store the body of the URL */ package main import ( "fmt" "sync" ) type SafeMap struct { urls map[string]error mux sync.Mutex } func (s *SafeMap) Find(url string) (found bool) { s.mux.Lock() defer s.mux.Unlock() _, found = s.urls[url] return found } func (s *SafeMap) AddOrUpdate(url string, err error) { s.mux.Lock() defer s.mux.Unlock() s.urls[url] = err } func crawl(url string, depth int, fetcher Fetcher, urlsFetched *SafeMap, wg *sync.WaitGroup) { defer wg.Done() if depth <= 0 { return } if urlsFetched.Find(url) { return } urlsFetched.AddOrUpdate(url, nil) _, urls, err := fetcher.Fetch(url) urlsFetched.AddOrUpdate(url, err) if err != nil { return } for _, newUrl := range urls { wg.Add(1) go crawl(newUrl, depth-1, fetcher, urlsFetched, wg) } } func Crawl(url string, depth int, fetcher Fetcher) { if depth <= 0 { return } urlsFetched := SafeMap{ urls: make(map[string]error), } wg := new(sync.WaitGroup) wg.Add(1) go crawl(url, depth, fetcher, &urlsFetched, wg) wg.Wait() // blocking operation for url, err := range urlsFetched.urls { if err != nil { fmt.Printf("%v failed: %v\n", url, err) } else { fmt.Printf("%v was fetched\n", url) } } } func main() { Crawl("https://golang.org/", 4, fetcher) } type Fetcher interface { // Fetch returns the body of URL and // a slice of URLs found on that page. Fetch(url string) (body string, urls []string, err error) } // fakeFetcher is Fetcher that returns canned results. type fakeFetcher map[string]*fakeResult type fakeResult struct { body string urls []string } func (f fakeFetcher) Fetch(url string) (string, []string, error) { if res, ok := f[url]; ok { return res.body, res.urls, nil } return "", nil, fmt.Errorf("not found: %s", url) } // fetcher is a populated fakeFetcher. var fetcher = fakeFetcher{ "https://golang.org/": &fakeResult{ "The Go Programming Language", []string{ "https://golang.org/pkg/", "https://golang.org/cmd/", }, }, "https://golang.org/pkg/": &fakeResult{ "Packages", []string{ "https://golang.org/", "https://golang.org/cmd/", "https://golang.org/pkg/fmt/", "https://golang.org/pkg/os/", }, }, "https://golang.org/pkg/fmt/": &fakeResult{ "Package fmt", []string{ "https://golang.org/", "https://golang.org/pkg/", }, }, "https://golang.org/pkg/os/": &fakeResult{ "Package os", []string{ "https://golang.org/", "https://golang.org/pkg/", }, }, }