Skip to content

Instantly share code, notes, and snippets.

@robpot891
Created June 29, 2019 03:37
Show Gist options
  • Select an option

  • Save robpot891/19f5d94a28f7ec499822bd2b54dfc83e to your computer and use it in GitHub Desktop.

Select an option

Save robpot891/19f5d94a28f7ec499822bd2b54dfc83e to your computer and use it in GitHub Desktop.
Redis_LinkDB
# Based loosely on the Redis Cookbook FIFO Queue: http://www.rediscookbook.org/implement_a_fifo_queue.html
from redis import StrictRedis
class RedisQueue:
""" RedisQueue helps store urls to crawl to Redis
Initialization components:
client: a Redis client connected to the key-value database for
the webcrawling cache (if not set, a localhost:6379
default connection is used).
db (int): which database to use for Redis
queue_name (str): name for queue (default: wswp)
"""
def __init__(self, client=None, db=0, queue_name='wswp'):
self.client = (StrictRedis(host='localhost', port=6379, db=db)
if client is None else client)
self.name = "queue:%s" % queue_name
self.seen_set = "seen:%s" % queue_name
self.depth = "depth:%s" % queue_nameERR_ADDRESS_UNREACHABLEERR_ADDRESS_UNREACHABLE
def __len__(self):
return self.client.llen(self.name)
def push(self, element):
"""Push an element to the tail of the queue"""
if isinstance(element, list):
element = [e for e in element if not self.already_seen(e)]
self.client.lpush(self.name, *element)
self.client.sadd(self.seen_set, *element)
elif not self.already_seen(element):
self.client.lpush(self.name, element)
self.client.sadd(self.seen_set, element)
def already_seen(self, element):
""" determine if an element has already been seen """
return self.client.sismember(self.seen_set, element)
def set_depth(self, element, depth):
""" Set the seen hash and depth """
self.client.hset(self.depth, element, depth)
def get_depth(self, element):
""" Get the seen hash and depth """
return (lambda dep: int(dep) if dep else 0)(self.client.hget(self.depth, element))
def pop(self):
"""Pop an element from the head of the queue"""
return self.client.rpop(self.name).decode('utf-8')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment