Skip to content

Instantly share code, notes, and snippets.

@cabloo
Created August 27, 2020 03:27
Show Gist options
  • Select an option

  • Save cabloo/65b94c6ee50307fead93ec4af928afcf to your computer and use it in GitHub Desktop.

Select an option

Save cabloo/65b94c6ee50307fead93ec4af928afcf to your computer and use it in GitHub Desktop.

Revisions

  1. cabloo renamed this gist Aug 27, 2020. 1 changed file with 0 additions and 0 deletions.
    File renamed without changes.
  2. cabloo created this gist Aug 27, 2020.
    155 changes: 155 additions & 0 deletions gistfile1.txt
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,155 @@
    package main

    import (
    "fmt"
    )

    type Fetcher interface {
    // Fetch returns the body of URL and
    // a slice of URLs found on that page.
    Fetch(url string) (body string, urls []string, err error)
    }

    type FetchResult struct {
    url string
    body string
    err error
    }

    type IUniqueReservationMap interface {
    Reserve(key string) chan bool
    }

    type UniqueReservationMap struct {
    isReserved *map[string] bool
    lockAttempts chan lockAttempt
    }

    type lockAttempt struct {
    key string
    success chan bool
    }

    func (reservationMap UniqueReservationMap) Reserve(key string) chan bool {
    attempt := lockAttempt{key, make(chan bool)}
    reservationMap.lockAttempts <- attempt
    return attempt.success
    }

    func createUniqueReservationMap() IUniqueReservationMap {
    isReserved := make(map[string] bool)
    reservationMap := UniqueReservationMap{&isReserved, make(chan lockAttempt)}
    go func() {
    for {
    select {
    case attempt := <- reservationMap.lockAttempts:
    attempt.success <- !isReserved[attempt.key]
    isReserved[attempt.key] = true
    }
    }
    }()
    return reservationMap
    }

    // Crawl uses fetcher to recursively crawl
    // pages starting with url, to a maximum of depth.
    func Crawl(url string, depth int, fetcher Fetcher, results chan<- FetchResult) {
    crawlRequests := make(chan string, int)
    urlReservations := make(map[string] bool)
    fetchAndCrawl := func (url string, depth int, results chan<- FetchResult) {
    if depth <= 0 {
    return
    }

    body, urls, err := fetcher.Fetch(url)
    results <- FetchResult{url, body, err}

    if len(urls) == 0 {
    done <- true
    return
    }

    subResults := make([]chan FetchResult, len(urls))
    for i, url := range urls {
    subResults[i] = crawl(url, depth-1, fetcher)
    }

    for _, resultChannel := range subResults {
    result <- resultChannel
    }
    results.close()
    }

    go func() {
    for {
    select {
    case crawlRequest <- crawlRequests:
    if crawlRequests[crawlRequest] == nil { break }
    go fetchAndCrawl(...crawlRequest, results)
    }
    }
    }()
    }

    func main() {
    results := make(chan FetchResult)
    go Crawl("https://golang.org/", 4, fetcher, results)
    for result := range results {
    if result.err != nil {
    fmt.Println(result.err)
    continue
    }
    fmt.Printf("found: %s %q\n", result.url, result.body)
    }

    fmt.Printf("done!")
    }

    // fakeFetcher is Fetcher that returns canned results.
    type fakeFetcher map[string]*fakeResult

    type fakeResult struct {
    body string
    urls []string
    }

    func (f fakeFetcher) Fetch(url string) (string, []string, error) {
    if res, ok := f[url]; ok {
    return res.body, res.urls, nil
    }
    return "", nil, fmt.Errorf("not found: %s", url)
    }

    // fetcher is a populated fakeFetcher.
    var fetcher = fakeFetcher{
    "https://golang.org/": &fakeResult{
    "The Go Programming Language",
    []string{
    "https://golang.org/pkg/",
    "https://golang.org/cmd/",
    },
    },
    "https://golang.org/pkg/": &fakeResult{
    "Packages",
    []string{
    "https://golang.org/",
    "https://golang.org/cmd/",
    "https://golang.org/pkg/fmt/",
    "https://golang.org/pkg/os/",
    },
    },
    "https://golang.org/pkg/fmt/": &fakeResult{
    "Package fmt",
    []string{
    "https://golang.org/",
    "https://golang.org/pkg/",
    },
    },
    "https://golang.org/pkg/os/": &fakeResult{
    "Package os",
    []string{
    "https://golang.org/",
    "https://golang.org/pkg/",
    },
    },
    }