Last active
February 17, 2021 10:21
-
-
Save lockwooddev/b857a63d6fb16e3c1f14d1bae76ab1d1 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| type Rule struct { | |
| UserAgents []string | |
| Disallow []string | |
| Allow []string | |
| } | |
| func (r Rule) isEmpty() bool { | |
| return (len(r.UserAgents) + len(r.Disallow) + len(r.Allow)) == 0 | |
| } | |
| type RobotRules struct { | |
| rules []Rule | |
| Sitemaps []string | |
| } | |
| type Roboteer interface { | |
| FetchRobots(url string) (RobotRules, error) | |
| } | |
| type RobotsClient struct{} | |
| func NewRobotsClient() Roboteer { | |
| return RobotsClient{} | |
| } | |
| func (rc RobotsClient) FetchRobots(url string) (RobotRules, error) { | |
| robotRules := RobotRules{} | |
| res, err := http.Get(url) | |
| if err != nil { | |
| return robotRules, err | |
| } | |
| defer res.Body.Close() | |
| rule := Rule{} | |
| scanner := bufio.NewScanner(res.Body) | |
| lineNo := 0 | |
| for scanner.Scan() { | |
| lineNo++ | |
| line := scanner.Text() | |
| // append rule and reset rule for next directive | |
| if line == "" { | |
| if !rule.isEmpty() { | |
| robotRules.rules = append(robotRules.rules, rule) | |
| } | |
| rule = Rule{} | |
| continue | |
| } | |
| // Skip comment | |
| if strings.HasPrefix(line, "#") { | |
| continue | |
| } | |
| // Validate directive parts | |
| parts := strings.SplitN(line, ":", 2) | |
| key := parts[0] | |
| value := strings.TrimSpace(parts[1]) | |
| switch key { | |
| case "User-agent": | |
| rule.UserAgents = append(rule.UserAgents, value) | |
| case "Disallow": | |
| rule.Disallow = append(rule.Disallow, value) | |
| case "Allow": | |
| rule.Allow = append(rule.Allow, value) | |
| case "Sitemap": | |
| robotRules.Sitemaps = append(robotRules.Sitemaps, value) | |
| default: | |
| return robotRules, fmt.Errorf("illegal directive type '%s' on line %d", key, lineNo) | |
| } | |
| } | |
| err = scanner.Err() | |
| if err != nil { | |
| return robotRules, err | |
| } | |
| return robotRules, nil | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment