Skip to content

Instantly share code, notes, and snippets.

@robpot891
Created June 29, 2019 03:37
Show Gist options
  • Select an option

  • Save robpot891/19f5d94a28f7ec499822bd2b54dfc83e to your computer and use it in GitHub Desktop.

Select an option

Save robpot891/19f5d94a28f7ec499822bd2b54dfc83e to your computer and use it in GitHub Desktop.

Revisions

  1. robpot891 created this gist Jun 29, 2019.
    49 changes: 49 additions & 0 deletions redis_queue.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,49 @@
    # Based loosely on the Redis Cookbook FIFO Queue: http://www.rediscookbook.org/implement_a_fifo_queue.html
    from redis import StrictRedis


    class RedisQueue:
    """ RedisQueue helps store urls to crawl to Redis
    Initialization components:
    client: a Redis client connected to the key-value database for
    the webcrawling cache (if not set, a localhost:6379
    default connection is used).
    db (int): which database to use for Redis
    queue_name (str): name for queue (default: wswp)
    """

    def __init__(self, client=None, db=0, queue_name='wswp'):
    self.client = (StrictRedis(host='localhost', port=6379, db=db)
    if client is None else client)
    self.name = "queue:%s" % queue_name
    self.seen_set = "seen:%s" % queue_name
    self.depth = "depth:%s" % queue_nameERR_ADDRESS_UNREACHABLEERR_ADDRESS_UNREACHABLE

    def __len__(self):
    return self.client.llen(self.name)

    def push(self, element):
    """Push an element to the tail of the queue"""
    if isinstance(element, list):
    element = [e for e in element if not self.already_seen(e)]
    self.client.lpush(self.name, *element)
    self.client.sadd(self.seen_set, *element)
    elif not self.already_seen(element):
    self.client.lpush(self.name, element)
    self.client.sadd(self.seen_set, element)

    def already_seen(self, element):
    """ determine if an element has already been seen """
    return self.client.sismember(self.seen_set, element)

    def set_depth(self, element, depth):
    """ Set the seen hash and depth """
    self.client.hset(self.depth, element, depth)

    def get_depth(self, element):
    """ Get the seen hash and depth """
    return (lambda dep: int(dep) if dep else 0)(self.client.hget(self.depth, element))

    def pop(self):
    """Pop an element from the head of the queue"""
    return self.client.rpop(self.name).decode('utf-8')