Created
June 29, 2019 03:37
-
-
Save robpot891/19f5d94a28f7ec499822bd2b54dfc83e to your computer and use it in GitHub Desktop.
Revisions
-
robpot891 created this gist
Jun 29, 2019 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,49 @@ # Based loosely on the Redis Cookbook FIFO Queue: http://www.rediscookbook.org/implement_a_fifo_queue.html from redis import StrictRedis class RedisQueue: """ RedisQueue helps store urls to crawl to Redis Initialization components: client: a Redis client connected to the key-value database for the webcrawling cache (if not set, a localhost:6379 default connection is used). db (int): which database to use for Redis queue_name (str): name for queue (default: wswp) """ def __init__(self, client=None, db=0, queue_name='wswp'): self.client = (StrictRedis(host='localhost', port=6379, db=db) if client is None else client) self.name = "queue:%s" % queue_name self.seen_set = "seen:%s" % queue_name self.depth = "depth:%s" % queue_nameERR_ADDRESS_UNREACHABLEERR_ADDRESS_UNREACHABLE def __len__(self): return self.client.llen(self.name) def push(self, element): """Push an element to the tail of the queue""" if isinstance(element, list): element = [e for e in element if not self.already_seen(e)] self.client.lpush(self.name, *element) self.client.sadd(self.seen_set, *element) elif not self.already_seen(element): self.client.lpush(self.name, element) self.client.sadd(self.seen_set, element) def already_seen(self, element): """ determine if an element has already been seen """ return self.client.sismember(self.seen_set, element) def set_depth(self, element, depth): """ Set the seen hash and depth """ self.client.hset(self.depth, element, depth) def get_depth(self, element): """ Get the seen hash and depth """ return (lambda dep: int(dep) if dep else 0)(self.client.hget(self.depth, element)) def pop(self): """Pop an element from the head of the queue""" return self.client.rpop(self.name).decode('utf-8')