Skip to content

Instantly share code, notes, and snippets.

@lockwooddev
Last active February 17, 2021 10:21
Show Gist options
  • Save lockwooddev/b857a63d6fb16e3c1f14d1bae76ab1d1 to your computer and use it in GitHub Desktop.
Save lockwooddev/b857a63d6fb16e3c1f14d1bae76ab1d1 to your computer and use it in GitHub Desktop.

Revisions

  1. lockwooddev revised this gist Feb 17, 2021. 1 changed file with 3 additions and 12 deletions.
    15 changes: 3 additions & 12 deletions robots.go
    Original file line number Diff line number Diff line change
    @@ -1,12 +1,3 @@
    package article

    import (
    "bufio"
    "fmt"
    "net/http"
    "strings"
    )

    type Rule struct {
    UserAgents []string
    Disallow []string
    @@ -22,13 +13,13 @@ type RobotRules struct {
    Sitemaps []string
    }

    type Requester interface {
    type Roboteer interface {
    FetchRobots(url string) (RobotRules, error)
    }

    type RobotsClient struct{}

    func NewRobotsClient() Requester {
    func NewRobotsClient() Roboteer {
    return RobotsClient{}
    }

    @@ -86,4 +77,4 @@ func (rc RobotsClient) FetchRobots(url string) (RobotRules, error) {
    }

    return robotRules, nil
    }
    }
  2. lockwooddev created this gist Feb 17, 2021.
    89 changes: 89 additions & 0 deletions robots.go
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,89 @@
    package article

    import (
    "bufio"
    "fmt"
    "net/http"
    "strings"
    )

    type Rule struct {
    UserAgents []string
    Disallow []string
    Allow []string
    }

    func (r Rule) isEmpty() bool {
    return (len(r.UserAgents) + len(r.Disallow) + len(r.Allow)) == 0
    }

    type RobotRules struct {
    rules []Rule
    Sitemaps []string
    }

    type Requester interface {
    FetchRobots(url string) (RobotRules, error)
    }

    type RobotsClient struct{}

    func NewRobotsClient() Requester {
    return RobotsClient{}
    }

    func (rc RobotsClient) FetchRobots(url string) (RobotRules, error) {
    robotRules := RobotRules{}

    res, err := http.Get(url)
    if err != nil {
    return robotRules, err
    }
    defer res.Body.Close()

    rule := Rule{}
    scanner := bufio.NewScanner(res.Body)
    lineNo := 0
    for scanner.Scan() {
    lineNo++
    line := scanner.Text()

    // append rule and reset rule for next directive
    if line == "" {
    if !rule.isEmpty() {
    robotRules.rules = append(robotRules.rules, rule)
    }
    rule = Rule{}
    continue
    }

    // Skip comment
    if strings.HasPrefix(line, "#") {
    continue
    }

    // Validate directive parts
    parts := strings.SplitN(line, ":", 2)
    key := parts[0]
    value := strings.TrimSpace(parts[1])

    switch key {
    case "User-agent":
    rule.UserAgents = append(rule.UserAgents, value)
    case "Disallow":
    rule.Disallow = append(rule.Disallow, value)
    case "Allow":
    rule.Allow = append(rule.Allow, value)
    case "Sitemap":
    robotRules.Sitemaps = append(robotRules.Sitemaps, value)
    default:
    return robotRules, fmt.Errorf("illegal directive type '%s' on line %d", key, lineNo)
    }
    }
    err = scanner.Err()
    if err != nil {
    return robotRules, err
    }

    return robotRules, nil
    }