Skip to content

Instantly share code, notes, and snippets.

@lockwooddev
Last active February 17, 2021 10:21
Show Gist options
  • Save lockwooddev/b857a63d6fb16e3c1f14d1bae76ab1d1 to your computer and use it in GitHub Desktop.
Save lockwooddev/b857a63d6fb16e3c1f14d1bae76ab1d1 to your computer and use it in GitHub Desktop.
type Rule struct {
UserAgents []string
Disallow []string
Allow []string
}
func (r Rule) isEmpty() bool {
return (len(r.UserAgents) + len(r.Disallow) + len(r.Allow)) == 0
}
type RobotRules struct {
rules []Rule
Sitemaps []string
}
type Roboteer interface {
FetchRobots(url string) (RobotRules, error)
}
type RobotsClient struct{}
func NewRobotsClient() Roboteer {
return RobotsClient{}
}
func (rc RobotsClient) FetchRobots(url string) (RobotRules, error) {
robotRules := RobotRules{}
res, err := http.Get(url)
if err != nil {
return robotRules, err
}
defer res.Body.Close()
rule := Rule{}
scanner := bufio.NewScanner(res.Body)
lineNo := 0
for scanner.Scan() {
lineNo++
line := scanner.Text()
// append rule and reset rule for next directive
if line == "" {
if !rule.isEmpty() {
robotRules.rules = append(robotRules.rules, rule)
}
rule = Rule{}
continue
}
// Skip comment
if strings.HasPrefix(line, "#") {
continue
}
// Validate directive parts
parts := strings.SplitN(line, ":", 2)
key := parts[0]
value := strings.TrimSpace(parts[1])
switch key {
case "User-agent":
rule.UserAgents = append(rule.UserAgents, value)
case "Disallow":
rule.Disallow = append(rule.Disallow, value)
case "Allow":
rule.Allow = append(rule.Allow, value)
case "Sitemap":
robotRules.Sitemaps = append(robotRules.Sitemaps, value)
default:
return robotRules, fmt.Errorf("illegal directive type '%s' on line %d", key, lineNo)
}
}
err = scanner.Err()
if err != nil {
return robotRules, err
}
return robotRules, nil
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment