Created
February 20, 2024 01:44
-
-
Save myzie/601cf170f88b09efd2012ea322a80259 to your computer and use it in GitHub Desktop.
Risor Web Crawler
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| package main | |
| import ( | |
| "fmt" | |
| "github.com/risor-io/risor/object" | |
| "github.com/risor-io/risor/op" | |
| ) | |
| const CrawlerType object.Type = "crawler.crawler" | |
| type CrawlerObject struct { | |
| value *Crawler | |
| } | |
| func (c *CrawlerObject) Type() object.Type { | |
| return CrawlerType | |
| } | |
| func (c *CrawlerObject) Inspect() string { | |
| return "crawler.crawler()" | |
| } | |
| func (c *CrawlerObject) Interface() interface{} { | |
| return c.value | |
| } | |
| func (c *CrawlerObject) IsTruthy() bool { | |
| return true | |
| } | |
| func (c *CrawlerObject) Cost() int { | |
| return 0 | |
| } | |
| func (c *CrawlerObject) MarshalJSON() ([]byte, error) { | |
| return nil, fmt.Errorf("type error: unable to marshal crawler.crawler") | |
| } | |
| func (c *CrawlerObject) RunOperation(opType op.BinaryOpType, right object.Object) object.Object { | |
| return object.Errorf("eval error: unsupported operation for %s: %v", CrawlerType, opType) | |
| } | |
| func (c *CrawlerObject) Equals(other object.Object) object.Object { | |
| return object.NewBool(c == other) | |
| } | |
| func (c *CrawlerObject) SetAttr(name string, value object.Object) error { | |
| switch name { | |
| case "response": | |
| value, err := object.AsString(value) | |
| if err != nil { | |
| return err.Value() | |
| } | |
| c.value.Response = value | |
| return nil | |
| case "status": | |
| value, err := object.AsInt(value) | |
| if err != nil { | |
| return err.Value() | |
| } | |
| c.value.Status = int(value) | |
| return nil | |
| default: | |
| return fmt.Errorf("attribute error: %s object has no attribute %q", CrawlerType, name) | |
| } | |
| } | |
| func (c *CrawlerObject) GetAttr(name string) (object.Object, bool) { | |
| switch name { | |
| case "response": | |
| return object.NewString(c.value.Response), true | |
| case "status": | |
| return object.NewInt(int64(c.value.Status)), true | |
| } | |
| return nil, false | |
| } | |
| func NewCrawlerObject(c *Crawler) *CrawlerObject { | |
| return &CrawlerObject{value: c} | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| package main | |
| import ( | |
| "context" | |
| "fmt" | |
| "github.com/risor-io/risor/object" | |
| "github.com/risor-io/risor/op" | |
| ) | |
| const CrawlerRegistryType object.Type = "crawler.registry" | |
| type CrawlerRegistryObject struct { | |
| value *CrawlerRegistry | |
| } | |
| func (r *CrawlerRegistryObject) Type() object.Type { | |
| return CrawlerRegistryType | |
| } | |
| func (r *CrawlerRegistryObject) Inspect() string { | |
| return "crawler.registry()" | |
| } | |
| func (r *CrawlerRegistryObject) Interface() interface{} { | |
| return r.value | |
| } | |
| func (r *CrawlerRegistryObject) IsTruthy() bool { | |
| return true | |
| } | |
| func (r *CrawlerRegistryObject) Cost() int { | |
| return 0 | |
| } | |
| func (r *CrawlerRegistryObject) MarshalJSON() ([]byte, error) { | |
| return nil, fmt.Errorf("type error: unable to marshal crawler.registry") | |
| } | |
| func (r *CrawlerRegistryObject) RunOperation(opType op.BinaryOpType, right object.Object) object.Object { | |
| return object.Errorf("eval error: unsupported operation for %s: %v", CrawlerRegistryType, opType) | |
| } | |
| func (r *CrawlerRegistryObject) Equals(other object.Object) object.Object { | |
| return object.NewBool(r == other) | |
| } | |
| func (r *CrawlerRegistryObject) SetAttr(name string, value object.Object) error { | |
| return fmt.Errorf("attribute error: %s object has no attribute %q", CrawlerRegistryType, name) | |
| } | |
| func (r *CrawlerRegistryObject) GetAttr(name string) (object.Object, bool) { | |
| switch name { | |
| case "register": | |
| return object.NewBuiltin("crawler.registry.register", func(ctx context.Context, args ...object.Object) object.Object { | |
| if len(args) != 2 { | |
| return object.Errorf("wrong number of arguments. got=%d, want=2", len(args)) | |
| } | |
| name, errObj := object.AsString(args[0]) | |
| if errObj != nil { | |
| return errObj | |
| } | |
| fn, ok := args[1].(*object.Function) | |
| if !ok { | |
| return object.Errorf("argument error: expected function, got %s", args[1].Type()) | |
| } | |
| callFunc, ok := object.GetCallFunc(ctx) | |
| if !ok { | |
| return object.Errorf("unable to get call function") | |
| } | |
| r.value.Register(name, func(crawler *Crawler, query string) error { | |
| c := NewCrawlerObject(crawler) | |
| _, err := callFunc(ctx, fn, []object.Object{c, object.NewString(query)}) | |
| return err | |
| }) | |
| return object.Nil | |
| }), true | |
| case "call": | |
| return object.NewBuiltin("crawler.registry.call", func(ctx context.Context, args ...object.Object) object.Object { | |
| if len(args) != 2 { | |
| return object.Errorf("wrong number of arguments. got=%d, want=2", len(args)) | |
| } | |
| name, errObj := object.AsString(args[0]) | |
| if errObj != nil { | |
| return errObj | |
| } | |
| query, errObj := object.AsString(args[1]) | |
| if errObj != nil { | |
| return errObj | |
| } | |
| crawler, err := r.value.Call(name, query) | |
| if err != nil { | |
| return object.Errorf("crawler error: %v", err) | |
| } | |
| return NewCrawlerObject(crawler) | |
| }), true | |
| } | |
| return nil, false | |
| } | |
| func NewCrawlerRegistryObject(r *CrawlerRegistry) *CrawlerRegistryObject { | |
| return &CrawlerRegistryObject{value: r} | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| package main | |
| import ( | |
| "context" | |
| "flag" | |
| "fmt" | |
| "os" | |
| "sync" | |
| "github.com/risor-io/risor" | |
| ) | |
| type CrawlerFunc func(crawler *Crawler, query string) error | |
| type Crawler struct { | |
| Response string | |
| Status int | |
| } | |
| type CrawlerRegistry struct { | |
| crawlers map[string]CrawlerFunc | |
| mutex sync.RWMutex | |
| } | |
| func NewCrawlerRegistry() *CrawlerRegistry { | |
| return &CrawlerRegistry{ | |
| crawlers: make(map[string]CrawlerFunc), | |
| } | |
| } | |
| func (cr *CrawlerRegistry) Register(name string, callback CrawlerFunc) { | |
| cr.mutex.Lock() | |
| defer cr.mutex.Unlock() | |
| cr.crawlers[name] = callback | |
| } | |
| func (cr *CrawlerRegistry) Call(name string, query string) (*Crawler, error) { | |
| cr.mutex.RLock() | |
| defer cr.mutex.RUnlock() | |
| if callback, ok := cr.crawlers[name]; ok { | |
| crawler := &Crawler{} | |
| if err := callback(crawler, query); err != nil { | |
| return nil, err | |
| } | |
| return crawler, nil | |
| } | |
| return nil, fmt.Errorf("crawler not found: %s", name) | |
| } | |
| var defaultScript = ` | |
| print("script running...") | |
| registry.register("google", func(crawler, query) { | |
| response := fetch("https://www.google.com/search?q=" + query) | |
| crawler.response = response.text() | |
| crawler.status = response.status_code | |
| printf("crawl complete for \"%s\" (status: %d)\n", query, crawler.status) | |
| }); | |
| print("crawling...") | |
| result := registry.call("google", "animals") | |
| print("status:", result.status, "response len:", len(result.response)) | |
| ` | |
| func main() { | |
| var script string | |
| flag.StringVar(&script, "script", defaultScript, "path to the script file") | |
| flag.Parse() | |
| app := NewCrawlerRegistry() | |
| ctx := context.Background() | |
| _, err := risor.Eval(ctx, script, risor.WithGlobals(map[string]interface{}{ | |
| "registry": NewCrawlerRegistryObject(app), | |
| })) | |
| if err != nil { | |
| fmt.Println(err) | |
| os.Exit(1) | |
| } | |
| } |
Risor VMs are safe for use by one goroutine only. In the v1.4.0 of Risor that I just released, I think the Call() function will error with your code because it checks if the VM is already active and raises an error if it is.
This new release of Risor adds support for the go and defer keywords as well as channels. You might want to try moving the concurrency into the Risor script now using these mechanisms. Or use the vm.Clone() method to create a copy of the VM per goroutine in Go.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Running .Call() inside go routines give these errors:
But with
cr.mutex.Lock() defer cr.mutex.Unlock()in Call() instead RLock and RUnlock, the code works as expected. Also removing locks give errors. Same thing with fetch() instead http. I don't know if is golang specific error or Risor...