Skip to content

Instantly share code, notes, and snippets.

@pbabics
Created January 31, 2017 09:37
Show Gist options
  • Save pbabics/7f04e83f5c7953dd8742e26efd10462c to your computer and use it in GitHub Desktop.
Save pbabics/7f04e83f5c7953dd8742e26efd10462c to your computer and use it in GitHub Desktop.

Revisions

  1. pbabics created this gist Jan 31, 2017.
    129 changes: 129 additions & 0 deletions gitlab-registry-cleaner.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,129 @@
    #!/usr/bin/env python

    import os
    import json
    import argparse
    import math
    from datetime import datetime
    import dateutil.parser
    import time
    import sys

    def convert_size(size_bytes):
    if (size_bytes == 0):
    return '0B'
    size_name = ("B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB")
    i = int(math.floor(math.log(size_bytes, 1024)))
    p = math.pow(1024, i)
    s = round(size_bytes/p, 2)
    return '%s %s' % (s, size_name[i])

    def listProjects(BASE_DIR):
    projects = []
    for group in os.listdir("%s/repositories" % BASE_DIR):
    for project in os.listdir("%s/repositories/%s" % (BASE_DIR, group)):
    projects.append("%s/%s" % (group, project))
    return projects

    def listProjectRevisions(BASE_DIR, project):
    return os.listdir("%s/repositories/%s/_manifests/revisions/sha256/" % (BASE_DIR, project))

    def listProjectTags(BASE_DIR, project):
    tags = []
    for tag in os.listdir("%s/repositories/%s/_manifests/tags" % (BASE_DIR, project)):
    tagData = { 'name' : tag, 'revs' : [], 'current' : '' }
    for rev in os.listdir("%s/repositories/%s/_manifests/tags/%s/index/sha256" % (BASE_DIR, project, tag)):
    tagData['revs'].append(rev)

    with open("%s/repositories/%s/_manifests/tags/%s/current/link" % (BASE_DIR, project, tag), 'r') as cur:
    tagData['current'] = cur.read().split(':')[1]
    tags.append(tagData)
    return tags

    def getRevisionSize(BASE_DIR, revision):
    if not os.path.isfile("%s/blobs/sha256/%s/%s/data" % (BASE_DIR, revision[:2], revision)):
    return 0
    with open("%s/blobs/sha256/%s/%s/data" % (BASE_DIR, revision[:2], revision), 'r') as cur:
    revData = json.load(cur)
    size = 0
    for l in revData['layers']:
    size += l['size']
    size += revData['config']['size']
    return size

    def timedelta_total_seconds(timedelta):
    return (
    timedelta.microseconds + 0.0 +
    (timedelta.seconds + timedelta.days * 24 * 3600) * 10 ** 6) / 10 ** 6

    def getRevisionDate(BASE_DIR, revision):
    if not os.path.isfile("%s/blobs/sha256/%s/%s/data" % (BASE_DIR, revision[:2], revision)):
    return 0
    with open("%s/blobs/sha256/%s/%s/data" % (BASE_DIR, revision[:2], revision), 'r') as cur:
    revData = json.load(cur)
    config = revData['config']['digest'].split(':')[1]
    with open("%s/blobs/sha256/%s/%s/data" % (BASE_DIR, config[:2], config), 'r') as conf:
    confData = json.load(conf)
    return timedelta_total_seconds(dateutil.parser.parse(confData['created']).replace(tzinfo=None) - datetime(1970, 1, 1))

    def selector(x):
    return x[2]

    def removeRevision(BASE_DIR, project, tag, revision):
    if os.path.isfile("%s/repositories/%s/_manifests/tags/%s/index/sha256/%s/link" % (BASE_DIR, project, tag, revision)):
    os.remove("%s/repositories/%s/_manifests/tags/%s/index/sha256/%s/link" % (BASE_DIR, project, tag, revision))
    os.rmdir("%s/repositories/%s/_manifests/tags/%s/index/sha256/%s" % (BASE_DIR, project, tag, revision))
    if len(os.listdir("%s/repositories/%s/_manifests/tags/%s/index/sha256" % (BASE_DIR, project, tag))) == 0:
    os.remove("%s/repositories/%s/_manifests/tags/%s/current/link" % (BASE_DIR, project, tag))
    os.rmdir("%s/repositories/%s/_manifests/tags/%s/index/sha256" % (BASE_DIR, project, tag))
    os.rmdir("%s/repositories/%s/_manifests/tags/%s/index" % (BASE_DIR, project, tag))
    os.rmdir("%s/repositories/%s/_manifests/tags/%s/current" % (BASE_DIR, project, tag))
    os.rmdir("%s/repositories/%s/_manifests/tags/%s" % (BASE_DIR, project, tag))

    if os.path.isfile("%s/repositories/%s/_manifests/revisions/sha256/%s/link" % (BASE_DIR, project, revision)):
    os.remove("%s/repositories/%s/_manifests/revisions/sha256/%s/link" % (BASE_DIR, project, revision))
    os.rmdir("%s/repositories/%s/_manifests/revisions/sha256/%s" % (BASE_DIR, project, revision))

    BASE_DIR = '/var/opt/gitlab/gitlab-rails/shared/registry/docker/registry/v2'
    KEEP_COUNT=5
    DRY_RUN=False

    parser = argparse.ArgumentParser(description='Clean gitlab registry tags by date they were created, keeps K newest tags')
    parser.add_argument('--dry-run', '-d', help='Only prints what will be removed', dest='dry_run', action='store_true')
    parser.add_argument('--keep', '-k', type=int, help='Number of newest revisions to keep')
    parser.add_argument('--base-dir', '-b', help='Base directory of gitlab registry (ending with registry/v2')
    parser.set_defaults(dry_run=DRY_RUN, keep=KEEP_COUNT, base_dir=BASE_DIR)
    args = parser.parse_args()

    BASE_DIR = args.base_dir
    KEEP_COUNT = args.keep
    DRY_RUN = args.dry_run

    if not os.path.isdir(BASE_DIR):
    print "Base directory of gitlab registry DOES NOT EXISTS!"
    sys.exit(1)


    print "Existing projects:"
    for proj in listProjects(BASE_DIR):
    tags = listProjectTags(BASE_DIR, proj)
    revsList = []

    for tag in tags:
    for rev in tag['revs']:
    revsList.append([ tag['name'], rev, getRevisionDate(BASE_DIR, rev) ])

    revsList = list(reversed(sorted(revsList, key=selector)))

    print ">> %40s (%d marked for removal out of %d)" % (proj, max(0, len(revsList) - KEEP_COUNT), len(revsList))
    for i in range(len(revsList)):
    tag, rev, created = revsList[i]
    if i < KEEP_COUNT and created != 0:
    print ">>> Kept Back: Tag: %s - Revision: %s (created on %s)" % (tag, rev[:12], datetime.fromtimestamp(created).strftime("%d-%m-%Y"))
    else:
    print ">>> Marked for Removal: Tag: %s - Revision: %s (created on %s)" % (tag, rev[:12], datetime.fromtimestamp(created).strftime("%d-%m-%Y"))
    if not DRY_RUN:
    removeRevision(BASE_DIR, proj, tag, rev)

    if not DRY_RUN:
    print "!!!!! Please Run gitlab garbage collector !!!!!"