Last active
February 17, 2021 10:21
-
-
Save lockwooddev/b857a63d6fb16e3c1f14d1bae76ab1d1 to your computer and use it in GitHub Desktop.
Revisions
-
lockwooddev revised this gist
Feb 17, 2021 . 1 changed file with 3 additions and 12 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,12 +1,3 @@ type Rule struct { UserAgents []string Disallow []string @@ -22,13 +13,13 @@ type RobotRules struct { Sitemaps []string } type Roboteer interface { FetchRobots(url string) (RobotRules, error) } type RobotsClient struct{} func NewRobotsClient() Roboteer { return RobotsClient{} } @@ -86,4 +77,4 @@ func (rc RobotsClient) FetchRobots(url string) (RobotRules, error) { } return robotRules, nil } -
lockwooddev created this gist
Feb 17, 2021 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,89 @@ package article import ( "bufio" "fmt" "net/http" "strings" ) type Rule struct { UserAgents []string Disallow []string Allow []string } func (r Rule) isEmpty() bool { return (len(r.UserAgents) + len(r.Disallow) + len(r.Allow)) == 0 } type RobotRules struct { rules []Rule Sitemaps []string } type Requester interface { FetchRobots(url string) (RobotRules, error) } type RobotsClient struct{} func NewRobotsClient() Requester { return RobotsClient{} } func (rc RobotsClient) FetchRobots(url string) (RobotRules, error) { robotRules := RobotRules{} res, err := http.Get(url) if err != nil { return robotRules, err } defer res.Body.Close() rule := Rule{} scanner := bufio.NewScanner(res.Body) lineNo := 0 for scanner.Scan() { lineNo++ line := scanner.Text() // append rule and reset rule for next directive if line == "" { if !rule.isEmpty() { robotRules.rules = append(robotRules.rules, rule) } rule = Rule{} continue } // Skip comment if strings.HasPrefix(line, "#") { continue } // Validate directive parts parts := strings.SplitN(line, ":", 2) key := parts[0] value := strings.TrimSpace(parts[1]) switch key { case "User-agent": rule.UserAgents = append(rule.UserAgents, value) case "Disallow": rule.Disallow = append(rule.Disallow, value) case "Allow": rule.Allow = append(rule.Allow, value) case "Sitemap": robotRules.Sitemaps = append(robotRules.Sitemaps, value) default: return robotRules, fmt.Errorf("illegal directive type '%s' on line %d", key, lineNo) } } err = scanner.Err() if err != nil { return robotRules, err } return robotRules, nil }