Skip to content

Instantly share code, notes, and snippets.

@KeyboardInterrupt
Forked from mgedmin/github_mirror.py
Created April 28, 2021 06:29
Show Gist options
  • Save KeyboardInterrupt/1f84267dc524cc81ff29db5be86276cf to your computer and use it in GitHub Desktop.
Save KeyboardInterrupt/1f84267dc524cc81ff29db5be86276cf to your computer and use it in GitHub Desktop.

Revisions

  1. @mgedmin mgedmin revised this gist Nov 23, 2014. 1 changed file with 48 additions and 10 deletions.
    58 changes: 48 additions & 10 deletions github_mirror.py
    Original file line number Diff line number Diff line change
    @@ -1,14 +1,14 @@
    #!/usr/bin/env python
    #!/usr/bin/env python3
    # See http://stackoverflow.com/questions/3581031/backup-mirror-github-repositories/13917251#13917251
    # You can find the latest version of this script at
    # https://gist.github.com/4319265
    import os
    import sys
    import json
    import urllib
    import urllib.request
    import subprocess

    __version__ = '0.3'
    __version__ = '0.4'
    __author__ = 'Marius Gedminas <[email protected]>'
    __url__ = 'https://gist.github.com/4319265'

    @@ -21,16 +21,54 @@

    # helpers

    class Error(Exception):
    """An error that is not a bug in this script."""


    def ensure_dir(dir):
    if not os.path.isdir(dir):
    os.makedirs(dir)


    def get_github_list(url):
    response = urllib.urlopen(url + '?per_page=100')
    if response.info().getheader('Link'):
    print >> sys.stderr, "error: pagination is not supported yet"
    return json.load(response)
    def get_json_and_headers(url):
    """Perform HTTP GET for a URL, return deserialized JSON and headers.
    Returns a tuple (json_data, headers) where headers is an instance
    of email.message.Message (because that's what urllib gives us).
    """
    with urllib.request.urlopen(url) as r:
    # We expect Github to return UTF-8, but let's verify that.
    content_type = r.info().get('Content-Type', '').lower()
    if content_type not in ('application/json; charset="utf-8"',
    'application/json; charset=utf-8'):
    raise Error('Did not get UTF-8 JSON data from {0}, got {1}'
    .format(url, content_type))
    return json.loads(r.read().decode('UTF-8')), r.info()


    def get_github_list(url, batch_size=100):
    """Perform (a series of) HTTP GETs for a URL, return deserialized JSON.
    Format of the JSON is documented at
    http://developer.github.com/v3/repos/#list-organization-repositories
    Supports batching (which Github indicates by the presence of a Link header,
    e.g. ::
    Link: <https://api.github.com/resource?page=2>; rel="next",
    <https://api.github.com/resource?page=5>; rel="last"
    """
    # API documented at http://developer.github.com/v3/#pagination
    res, headers = get_json_and_headers('{0}?per_page={1}'.format(
    url, batch_size))
    page = 1
    while 'rel="next"' in headers.get('Link', ''):
    page += 1
    more, headers = get_json_and_headers('{0}?page={1}&per_page={2}'.format(
    url, page, batch_size))
    res += more
    return res


    def info(*args):
    @@ -46,8 +84,8 @@ def backup(git_url, dir):


    def update_description(git_dir, description):
    with open(os.path.join(git_dir, 'description'), 'w') as f:
    f.write(description.encode('UTF-8') + '\n')
    with open(os.path.join(git_dir, 'description'), 'w', encoding='UTF-8') as f:
    f.write(description + '\n')


    def update_cloneurl(git_dir, cloneurl):
  2. @mgedmin mgedmin revised this gist Nov 23, 2014. 1 changed file with 7 additions and 0 deletions.
    7 changes: 7 additions & 0 deletions github_mirror.py
    100644 → 100755
    Original file line number Diff line number Diff line change
    @@ -18,31 +18,38 @@
    backup_dir = os.path.expanduser('~/github')
    gist_backup_dir = os.path.expanduser('~/github/gists')


    # helpers

    def ensure_dir(dir):
    if not os.path.isdir(dir):
    os.makedirs(dir)


    def get_github_list(url):
    response = urllib.urlopen(url + '?per_page=100')
    if response.info().getheader('Link'):
    print >> sys.stderr, "error: pagination is not supported yet"
    return json.load(response)


    def info(*args):
    print(" ".join(map(str, args)))
    sys.stdout.flush()


    def backup(git_url, dir):
    if os.path.exists(dir):
    subprocess.call(['git', 'fetch'], cwd=dir)
    else:
    subprocess.call(['git', 'clone', '--mirror', git_url])


    def update_description(git_dir, description):
    with open(os.path.join(git_dir, 'description'), 'w') as f:
    f.write(description.encode('UTF-8') + '\n')


    def update_cloneurl(git_dir, cloneurl):
    with open(os.path.join(git_dir, 'cloneurl'), 'w') as f:
    f.write(cloneurl + '\n')
  3. @mgedmin mgedmin revised this gist Sep 29, 2014. 1 changed file with 31 additions and 24 deletions.
    55 changes: 31 additions & 24 deletions github_mirror.py
    Original file line number Diff line number Diff line change
    @@ -8,12 +8,13 @@
    import urllib
    import subprocess

    __version__ = '0.2'
    __version__ = '0.3'
    __author__ = 'Marius Gedminas <[email protected]>'
    __url__ = 'https://gist.github.com/4319265'

    # configuration
    username = 'mgedmin'
    gists_of = ['mgedmin']
    repos_of = ['mgedmin', 'gtimelog']
    backup_dir = os.path.expanduser('~/github')
    gist_backup_dir = os.path.expanduser('~/github/gists')

    @@ -46,28 +47,34 @@ def update_cloneurl(git_dir, cloneurl):
    with open(os.path.join(git_dir, 'cloneurl'), 'w') as f:
    f.write(cloneurl + '\n')

    # action
    ensure_dir(gist_backup_dir)
    os.chdir(gist_backup_dir)
    for gist in get_github_list('https://api.github.com/users/%s/gists' % username):
    dir = gist['id'] + '.git'
    description = gist['description'] or "(no description)"
    info("+", "gists/" + gist['id'], "-", description.partition('\n')[0])
    backup(gist['git_pull_url'], dir)
    update_description(dir, description + '\n\n' + gist['html_url'])
    update_cloneurl(dir, gist['git_push_url'])

    # help me catch silly errors
    gist = None
    del gist
    def back_up_gists_of(username, gist_backup_dir=gist_backup_dir):
    ensure_dir(gist_backup_dir)
    os.chdir(gist_backup_dir)
    for gist in get_github_list('https://api.github.com/users/%s/gists' % username):
    dir = gist['id'] + '.git'
    description = gist['description'] or "(no description)"
    info("+", "gists/" + gist['id'], "-", description.partition('\n')[0])
    backup(gist['git_pull_url'], dir)
    update_description(dir, description + '\n\n' + gist['html_url'])
    update_cloneurl(dir, gist['git_push_url'])


    ensure_dir(backup_dir)
    os.chdir(backup_dir)
    for repo in get_github_list('https://api.github.com/users/%s/repos' % username):
    dir = repo['name'] + '.git'
    description = repo['description'] or "(no description)"
    info("+", repo['full_name'])
    backup(repo['git_url'], dir)
    update_description(dir, description + '\n\n' + repo['html_url'])
    update_cloneurl(dir, repo['ssh_url'])
    def back_up_repos_of(username, backup_dir=backup_dir):
    ensure_dir(backup_dir)
    os.chdir(backup_dir)
    for repo in get_github_list('https://api.github.com/users/%s/repos' % username):
    dir = repo['name'] + '.git'
    description = repo['description'] or "(no description)"
    info("+", repo['full_name'])
    backup(repo['git_url'], dir)
    update_description(dir, description + '\n\n' + repo['html_url'])
    update_cloneurl(dir, repo['ssh_url'])


    # action
    if __name__ == '__main__':
    for user in gists_of:
    back_up_gists_of(user)
    for user in repos_of:
    back_up_repos_of(user)
  4. @mgedmin mgedmin revised this gist Mar 16, 2013. 1 changed file with 12 additions and 1 deletion.
    13 changes: 12 additions & 1 deletion github_mirror.py
    Original file line number Diff line number Diff line change
    @@ -42,6 +42,10 @@ def update_description(git_dir, description):
    with open(os.path.join(git_dir, 'description'), 'w') as f:
    f.write(description.encode('UTF-8') + '\n')

    def update_cloneurl(git_dir, cloneurl):
    with open(os.path.join(git_dir, 'cloneurl'), 'w') as f:
    f.write(cloneurl + '\n')

    # action
    ensure_dir(gist_backup_dir)
    os.chdir(gist_backup_dir)
    @@ -51,6 +55,11 @@ def update_description(git_dir, description):
    info("+", "gists/" + gist['id'], "-", description.partition('\n')[0])
    backup(gist['git_pull_url'], dir)
    update_description(dir, description + '\n\n' + gist['html_url'])
    update_cloneurl(dir, gist['git_push_url'])

    # help me catch silly errors
    gist = None
    del gist

    ensure_dir(backup_dir)
    os.chdir(backup_dir)
    @@ -59,4 +68,6 @@ def update_description(git_dir, description):
    description = repo['description'] or "(no description)"
    info("+", repo['full_name'])
    backup(repo['git_url'], dir)
    update_description(dir, description + '\n\n' + gist['html_url'])
    update_description(dir, description + '\n\n' + repo['html_url'])
    update_cloneurl(dir, repo['ssh_url'])

  5. @mgedmin mgedmin revised this gist Mar 16, 2013. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion github_mirror.py
    Original file line number Diff line number Diff line change
    @@ -40,7 +40,7 @@ def backup(git_url, dir):

    def update_description(git_dir, description):
    with open(os.path.join(git_dir, 'description'), 'w') as f:
    f.write(description + '\n')
    f.write(description.encode('UTF-8') + '\n')

    # action
    ensure_dir(gist_backup_dir)
  6. @mgedmin mgedmin revised this gist Mar 5, 2013. 1 changed file with 44 additions and 13 deletions.
    57 changes: 44 additions & 13 deletions github_mirror.py
    Original file line number Diff line number Diff line change
    @@ -8,24 +8,55 @@
    import urllib
    import subprocess

    __version__ = '0.2'
    __author__ = 'Marius Gedminas <[email protected]>'
    __url__ = 'https://gist.github.com/4319265'

    # configuration
    username = 'mgedmin'
    backup_dir = os.path.expanduser('~/github')
    gist_backup_dir = os.path.expanduser('~/github/gists')

    # action
    if not os.path.isdir(backup_dir):
    os.makedirs(backup_dir)
    os.chdir(backup_dir)
    url = 'https://api.github.com/users/%s/repos?per_page=100' % username
    response = urllib.urlopen(url)
    for repo in json.load(response):
    print "+", repo['full_name']
    # helpers
    def ensure_dir(dir):
    if not os.path.isdir(dir):
    os.makedirs(dir)

    def get_github_list(url):
    response = urllib.urlopen(url + '?per_page=100')
    if response.info().getheader('Link'):
    print >> sys.stderr, "error: pagination is not supported yet"
    return json.load(response)

    def info(*args):
    print(" ".join(map(str, args)))
    sys.stdout.flush()
    dir = repo['name'] + '.git'

    def backup(git_url, dir):
    if os.path.exists(dir):
    subprocess.call(['git', 'fetch'], cwd=dir)
    else:
    subprocess.call(['git', 'clone', '--mirror', repo['git_url']])
    if response.info().getheader('Link'):
    # looks like you've got more than 100 repositories
    print >> sys.stderr, "error: pagination is not supported yet"
    subprocess.call(['git', 'clone', '--mirror', git_url])

    def update_description(git_dir, description):
    with open(os.path.join(git_dir, 'description'), 'w') as f:
    f.write(description + '\n')

    # action
    ensure_dir(gist_backup_dir)
    os.chdir(gist_backup_dir)
    for gist in get_github_list('https://api.github.com/users/%s/gists' % username):
    dir = gist['id'] + '.git'
    description = gist['description'] or "(no description)"
    info("+", "gists/" + gist['id'], "-", description.partition('\n')[0])
    backup(gist['git_pull_url'], dir)
    update_description(dir, description + '\n\n' + gist['html_url'])

    ensure_dir(backup_dir)
    os.chdir(backup_dir)
    for repo in get_github_list('https://api.github.com/users/%s/repos' % username):
    dir = repo['name'] + '.git'
    description = repo['description'] or "(no description)"
    info("+", repo['full_name'])
    backup(repo['git_url'], dir)
    update_description(dir, description + '\n\n' + gist['html_url'])
  7. @mgedmin mgedmin revised this gist Feb 25, 2013. 1 changed file with 4 additions and 3 deletions.
    7 changes: 4 additions & 3 deletions github_mirror.py
    Original file line number Diff line number Diff line change
    @@ -21,10 +21,11 @@
    for repo in json.load(response):
    print "+", repo['full_name']
    sys.stdout.flush()
    if os.path.exists(repo['name']):
    subprocess.call(['git', 'pull'], cwd=repo['name'])
    dir = repo['name'] + '.git'
    if os.path.exists(dir):
    subprocess.call(['git', 'fetch'], cwd=dir)
    else:
    subprocess.call(['git', 'clone', '--mirror', repo['git_url']])
    if response.info().getheader('Link'):
    # looks like you've got more than 100 repositories
    print >> sys.stderr, "error: pagination is not supported yet"
    print >> sys.stderr, "error: pagination is not supported yet"
  8. @mgedmin mgedmin revised this gist Feb 25, 2013. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion github_mirror.py
    Original file line number Diff line number Diff line change
    @@ -24,7 +24,7 @@
    if os.path.exists(repo['name']):
    subprocess.call(['git', 'pull'], cwd=repo['name'])
    else:
    subprocess.call(['git', 'clone', repo['git_url']])
    subprocess.call(['git', 'clone', '--mirror', repo['git_url']])
    if response.info().getheader('Link'):
    # looks like you've got more than 100 repositories
    print >> sys.stderr, "error: pagination is not supported yet"
  9. @mgedmin mgedmin revised this gist Dec 18, 2012. 1 changed file with 18 additions and 3 deletions.
    21 changes: 18 additions & 3 deletions github_mirror.py
    Original file line number Diff line number Diff line change
    @@ -1,15 +1,30 @@
    #!/usr/bin/env python
    # See http://stackoverflow.com/questions/3581031/backup-mirror-github-repositories/13917251#13917251
    # You can find the latest version of this script at
    # https://gist.github.com/4319265
    import os
    import sys
    import json
    import urllib
    import subprocess
    os.chdir(os.path.expanduser('~/github'))

    # configuration
    username = 'mgedmin'
    backup_dir = os.path.expanduser('~/github')

    # action
    if not os.path.isdir(backup_dir):
    os.makedirs(backup_dir)
    os.chdir(backup_dir)
    url = 'https://api.github.com/users/%s/repos?per_page=100' % username
    for repo in json.load(urllib.urlopen(url)):
    response = urllib.urlopen(url)
    for repo in json.load(response):
    print "+", repo['full_name']
    sys.stdout.flush()
    if os.path.exists(repo['name']):
    subprocess.call(['git', 'pull'], cwd=repo['name'])
    else:
    subprocess.call(['git', 'clone', repo['git_url']])
    subprocess.call(['git', 'clone', repo['git_url']])
    if response.info().getheader('Link'):
    # looks like you've got more than 100 repositories
    print >> sys.stderr, "error: pagination is not supported yet"
  10. @mgedmin mgedmin created this gist Dec 17, 2012.
    15 changes: 15 additions & 0 deletions github_mirror.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,15 @@
    #!/usr/bin/env python
    # See http://stackoverflow.com/questions/3581031/backup-mirror-github-repositories/13917251#13917251
    import os
    import json
    import urllib
    import subprocess
    os.chdir(os.path.expanduser('~/github'))
    username = 'mgedmin'
    url = 'https://api.github.com/users/%s/repos?per_page=100' % username
    for repo in json.load(urllib.urlopen(url)):
    print "+", repo['full_name']
    if os.path.exists(repo['name']):
    subprocess.call(['git', 'pull'], cwd=repo['name'])
    else:
    subprocess.call(['git', 'clone', repo['git_url']])