Created
June 29, 2019 03:37
-
-
Save robpot891/19f5d94a28f7ec499822bd2b54dfc83e to your computer and use it in GitHub Desktop.
Redis_LinkDB
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Based loosely on the Redis Cookbook FIFO Queue: http://www.rediscookbook.org/implement_a_fifo_queue.html | |
| from redis import StrictRedis | |
| class RedisQueue: | |
| """ RedisQueue helps store urls to crawl to Redis | |
| Initialization components: | |
| client: a Redis client connected to the key-value database for | |
| the webcrawling cache (if not set, a localhost:6379 | |
| default connection is used). | |
| db (int): which database to use for Redis | |
| queue_name (str): name for queue (default: wswp) | |
| """ | |
| def __init__(self, client=None, db=0, queue_name='wswp'): | |
| self.client = (StrictRedis(host='localhost', port=6379, db=db) | |
| if client is None else client) | |
| self.name = "queue:%s" % queue_name | |
| self.seen_set = "seen:%s" % queue_name | |
| self.depth = "depth:%s" % queue_nameERR_ADDRESS_UNREACHABLEERR_ADDRESS_UNREACHABLE | |
| def __len__(self): | |
| return self.client.llen(self.name) | |
| def push(self, element): | |
| """Push an element to the tail of the queue""" | |
| if isinstance(element, list): | |
| element = [e for e in element if not self.already_seen(e)] | |
| self.client.lpush(self.name, *element) | |
| self.client.sadd(self.seen_set, *element) | |
| elif not self.already_seen(element): | |
| self.client.lpush(self.name, element) | |
| self.client.sadd(self.seen_set, element) | |
| def already_seen(self, element): | |
| """ determine if an element has already been seen """ | |
| return self.client.sismember(self.seen_set, element) | |
| def set_depth(self, element, depth): | |
| """ Set the seen hash and depth """ | |
| self.client.hset(self.depth, element, depth) | |
| def get_depth(self, element): | |
| """ Get the seen hash and depth """ | |
| return (lambda dep: int(dep) if dep else 0)(self.client.hget(self.depth, element)) | |
| def pop(self): | |
| """Pop an element from the head of the queue""" | |
| return self.client.rpop(self.name).decode('utf-8') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment