-
-
Save imxiaohui/9d320ab83ce611f923a092954e8bac03 to your computer and use it in GitHub Desktop.
Go Tour concurrency crawler exercice with goroutines https://tour.golang.org/concurrency/10
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /* | |
| inspired by https://eduardolezcano.com/a-tour-of-go-web-crawler/ | |
| and https://github.com/fgrehm/go-tour/blob/master/73-web-crawler-golang-team-solution.go | |
| solution with a "safe" map with mutex + a sub-function crawl with WaitGroup | |
| this solution keeps the original signature of the Crawl function and does not change the main function | |
| but does not use or store the body of the URL | |
| */ | |
| package main | |
| import ( | |
| "fmt" | |
| "sync" | |
| ) | |
| type SafeMap struct { | |
| urls map[string]error | |
| mux sync.Mutex | |
| } | |
| func (s *SafeMap) Find(url string) (found bool) { | |
| s.mux.Lock() | |
| defer s.mux.Unlock() | |
| _, found = s.urls[url] | |
| return found | |
| } | |
| func (s *SafeMap) AddOrUpdate(url string, err error) { | |
| s.mux.Lock() | |
| defer s.mux.Unlock() | |
| s.urls[url] = err | |
| } | |
| func crawl(url string, depth int, fetcher Fetcher, urlsFetched *SafeMap, wg *sync.WaitGroup) { | |
| defer wg.Done() | |
| if depth <= 0 { | |
| return | |
| } | |
| if urlsFetched.Find(url) { | |
| return | |
| } | |
| urlsFetched.AddOrUpdate(url, nil) | |
| _, urls, err := fetcher.Fetch(url) | |
| urlsFetched.AddOrUpdate(url, err) | |
| if err != nil { | |
| return | |
| } | |
| for _, newUrl := range urls { | |
| wg.Add(1) | |
| go crawl(newUrl, depth-1, fetcher, urlsFetched, wg) | |
| } | |
| } | |
| func Crawl(url string, depth int, fetcher Fetcher) { | |
| if depth <= 0 { | |
| return | |
| } | |
| urlsFetched := SafeMap{ | |
| urls: make(map[string]error), | |
| } | |
| wg := new(sync.WaitGroup) | |
| wg.Add(1) | |
| go crawl(url, depth, fetcher, &urlsFetched, wg) | |
| wg.Wait() // blocking operation | |
| for url, err := range urlsFetched.urls { | |
| if err != nil { | |
| fmt.Printf("%v failed: %v\n", url, err) | |
| } else { | |
| fmt.Printf("%v was fetched\n", url) | |
| } | |
| } | |
| } | |
| func main() { | |
| Crawl("https://golang.org/", 4, fetcher) | |
| } | |
| type Fetcher interface { | |
| // Fetch returns the body of URL and | |
| // a slice of URLs found on that page. | |
| Fetch(url string) (body string, urls []string, err error) | |
| } | |
| // fakeFetcher is Fetcher that returns canned results. | |
| type fakeFetcher map[string]*fakeResult | |
| type fakeResult struct { | |
| body string | |
| urls []string | |
| } | |
| func (f fakeFetcher) Fetch(url string) (string, []string, error) { | |
| if res, ok := f[url]; ok { | |
| return res.body, res.urls, nil | |
| } | |
| return "", nil, fmt.Errorf("not found: %s", url) | |
| } | |
| // fetcher is a populated fakeFetcher. | |
| var fetcher = fakeFetcher{ | |
| "https://golang.org/": &fakeResult{ | |
| "The Go Programming Language", | |
| []string{ | |
| "https://golang.org/pkg/", | |
| "https://golang.org/cmd/", | |
| }, | |
| }, | |
| "https://golang.org/pkg/": &fakeResult{ | |
| "Packages", | |
| []string{ | |
| "https://golang.org/", | |
| "https://golang.org/cmd/", | |
| "https://golang.org/pkg/fmt/", | |
| "https://golang.org/pkg/os/", | |
| }, | |
| }, | |
| "https://golang.org/pkg/fmt/": &fakeResult{ | |
| "Package fmt", | |
| []string{ | |
| "https://golang.org/", | |
| "https://golang.org/pkg/", | |
| }, | |
| }, | |
| "https://golang.org/pkg/os/": &fakeResult{ | |
| "Package os", | |
| []string{ | |
| "https://golang.org/", | |
| "https://golang.org/pkg/", | |
| }, | |
| }, | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /* | |
| inspired by https://eduardolezcano.com/a-tour-of-go-web-crawler/ | |
| + https://github.com/fgrehm/go-tour/blob/master/73-web-crawler-golang-team-solution.go | |
| another solution with a Crawler struct, with a "safe" map with mutex | |
| and a Crawl method with WaitGroup | |
| this solution keeps the original signature of the Crawl function and does not change the main function | |
| but does not use or store the body of the URL | |
| */ | |
| package main | |
| import ( | |
| "fmt" | |
| "sync" | |
| ) | |
| type Crawler struct { | |
| urls SafeMap | |
| wg sync.WaitGroup | |
| fetcher Fetcher | |
| } | |
| type SafeMap struct { | |
| m map[string]error | |
| mux sync.Mutex | |
| } | |
| func (s *SafeMap) find(url string) (found bool) { | |
| s.mux.Lock() | |
| defer s.mux.Unlock() | |
| _, found = s.m[url] | |
| return found | |
| } | |
| func (s *SafeMap) addOrUpdate(url string, err error) { | |
| s.mux.Lock() | |
| defer s.mux.Unlock() | |
| s.m[url] = err | |
| } | |
| func (c *Crawler) Crawl(url string, depth int) { | |
| defer c.wg.Done() | |
| if depth <= 0 { | |
| return | |
| } | |
| if c.urls.find(url) { | |
| return | |
| } | |
| c.urls.addOrUpdate(url, nil) | |
| _, urls, err := fetcher.Fetch(url) | |
| c.urls.addOrUpdate(url, err) | |
| if err != nil { | |
| return | |
| } | |
| for _, newUrl := range urls { | |
| c.wg.Add(1) | |
| go c.Crawl(newUrl, depth-1) | |
| } | |
| } | |
| func Crawl(url string, depth int, fetcher Fetcher) { | |
| if depth <= 0 { | |
| return | |
| } | |
| crawler := Crawler{ | |
| urls: SafeMap{ | |
| m: make(map[string]error), | |
| }, | |
| fetcher: fetcher, | |
| } | |
| crawler.wg.Add(1) | |
| go crawler.Crawl(url, depth) | |
| crawler.wg.Wait() // blocking operation | |
| for url, err := range crawler.urls.m { | |
| if err != nil { | |
| fmt.Printf("%v failed: %v\n", url, err) | |
| } else { | |
| fmt.Printf("%v was fetched\n", url) | |
| } | |
| } | |
| } | |
| func main() { | |
| Crawl("https://golang.org/", 4, fetcher) | |
| } | |
| type Fetcher interface { | |
| // Fetch returns the body of URL and | |
| // a slice of URLs found on that page. | |
| Fetch(url string) (body string, urls []string, err error) | |
| } | |
| // fakeFetcher is Fetcher that returns canned results. | |
| type fakeFetcher map[string]*fakeResult | |
| type fakeResult struct { | |
| body string | |
| urls []string | |
| } | |
| func (f fakeFetcher) Fetch(url string) (string, []string, error) { | |
| if res, ok := f[url]; ok { | |
| return res.body, res.urls, nil | |
| } | |
| return "", nil, fmt.Errorf("not found: %s", url) | |
| } | |
| // fetcher is a populated fakeFetcher. | |
| var fetcher = fakeFetcher{ | |
| "https://golang.org/": &fakeResult{ | |
| "The Go Programming Language", | |
| []string{ | |
| "https://golang.org/pkg/", | |
| "https://golang.org/cmd/", | |
| }, | |
| }, | |
| "https://golang.org/pkg/": &fakeResult{ | |
| "Packages", | |
| []string{ | |
| "https://golang.org/", | |
| "https://golang.org/cmd/", | |
| "https://golang.org/pkg/fmt/", | |
| "https://golang.org/pkg/os/", | |
| }, | |
| }, | |
| "https://golang.org/pkg/fmt/": &fakeResult{ | |
| "Package fmt", | |
| []string{ | |
| "https://golang.org/", | |
| "https://golang.org/pkg/", | |
| }, | |
| }, | |
| "https://golang.org/pkg/os/": &fakeResult{ | |
| "Package os", | |
| []string{ | |
| "https://golang.org/", | |
| "https://golang.org/pkg/", | |
| }, | |
| }, | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /* | |
| inspired by https://eduardolezcano.com/a-tour-of-go-web-crawler/ | |
| and https://github.com/fgrehm/go-tour/blob/master/73-web-crawler-golang-team-solution.go | |
| solution with a "safe" map with mutex | |
| WITHOUT a WaitGroup | |
| */ | |
| package main | |
| import ( | |
| "fmt" | |
| "sync" | |
| ) | |
| type SafeMap struct { | |
| m map[string]error | |
| mux sync.Mutex | |
| } | |
| func (s *SafeMap) Find(url string) (found bool) { | |
| s.mux.Lock() | |
| defer s.mux.Unlock() | |
| _, found = s.m[url] | |
| return found | |
| } | |
| func (s *SafeMap) AddOrUpdate(url string, err error) { | |
| s.mux.Lock() | |
| defer s.mux.Unlock() | |
| s.m[url] = err | |
| } | |
| func Crawl(url string, depth int, fetcher Fetcher) { | |
| if depth <= 0 { | |
| return | |
| } | |
| if fetched.Find(url){ | |
| return | |
| } | |
| // We mark the url to be loading to avoid others reloading it at the same time. | |
| fetched.AddOrUpdate(url, nil) | |
| // We load it concurrently. | |
| _, urls, err := fetcher.Fetch(url) | |
| // And update the status in a synced zone. | |
| fetched.AddOrUpdate(url, err) | |
| if err != nil { | |
| return | |
| } | |
| // https://gobyexample.com/channel-synchronization | |
| // When waiting for multiple goroutines to finish, you may prefer to use a WaitGroup | |
| done := make(chan bool) | |
| for _, u := range urls { | |
| go func(url string) { | |
| Crawl(url, depth-1, fetcher) | |
| done <- true | |
| }(u) | |
| } | |
| for range urls { | |
| <-done // blocking operation | |
| // un channel en reception est bloquant | |
| // comment savoir quand il n y a plus d'URL a scan ? | |
| // parce quon est dans une boucle for de la meme taille | |
| // que le nombre de goroutines lancées | |
| // cela marche car le nombre de goroutines est connu | |
| // si jamais il n'etait pas connu il faudrait utiliser un WaitGroup | |
| // ici le `<- done` est donc "faux" dans le sens où | |
| // ce n'est pas forcement cette URL qui est en attente | |
| } | |
| } | |
| var fetched = SafeMap{ | |
| m: make(map[string]error), | |
| } | |
| func main() { | |
| Crawl("https://golang.org/", 4, fetcher) | |
| for url, err := range fetched.m { | |
| if err != nil { | |
| fmt.Printf("%v failed: %v\n", url, err) | |
| } else { | |
| fmt.Printf("%v was fetched\n", url) | |
| } | |
| } | |
| } | |
| type Fetcher interface { | |
| // Fetch returns the body of URL and | |
| // a slice of URLs found on that page. | |
| Fetch(url string) (body string, urls []string, err error) | |
| } | |
| // fakeFetcher is Fetcher that returns canned results. | |
| type fakeFetcher map[string]*fakeResult | |
| type fakeResult struct { | |
| body string | |
| urls []string | |
| } | |
| func (f fakeFetcher) Fetch(url string) (string, []string, error) { | |
| if res, ok := f[url]; ok { | |
| return res.body, res.urls, nil | |
| } | |
| return "", nil, fmt.Errorf("not found: %s", url) | |
| } | |
| // fetcher is a populated fakeFetcher. | |
| var fetcher = fakeFetcher{ | |
| "https://golang.org/": &fakeResult{ | |
| "The Go Programming Language", | |
| []string{ | |
| "https://golang.org/pkg/", | |
| "https://golang.org/cmd/", | |
| }, | |
| }, | |
| "https://golang.org/pkg/": &fakeResult{ | |
| "Packages", | |
| []string{ | |
| "https://golang.org/", | |
| "https://golang.org/cmd/", | |
| "https://golang.org/pkg/fmt/", | |
| "https://golang.org/pkg/os/", | |
| }, | |
| }, | |
| "https://golang.org/pkg/fmt/": &fakeResult{ | |
| "Package fmt", | |
| []string{ | |
| "https://golang.org/", | |
| "https://golang.org/pkg/", | |
| }, | |
| }, | |
| "https://golang.org/pkg/os/": &fakeResult{ | |
| "Package os", | |
| []string{ | |
| "https://golang.org/", | |
| "https://golang.org/pkg/", | |
| }, | |
| }, | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /* | |
| a solution without a safe map with mutex and without a waitgroup ? | |
| */ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment