How to maintain a local mirror of github repositories

Similarly to yesterday’s post about mirroring ONAP’s git, I also want to mirror all of the git repositories for certain github projects. In this specific case, all of the Kubernetes repositories.

So once again, here is a script based on something Tony Breeds and I cooked up a long time ago for OpenStack…

#!/usr/bin/env

from __future__ import print_function

import datetime
import json
import os
import subprocess
import random
import requests

from github import Github as github


GITHUB_ACCESS_TOKEN = '...use yours!...'


def get_github_projects():
    g = github(GITHUB_ACCESS_TOKEN)
    for user in ['kubernetes']:
        for repo in g.get_user(login=user).get_repos():
            yield('https://github.com', repo.full_name)


def _ensure_path(path):
    if not path:
        return

    full = []
    for elem in path.split('/'):
        full.append(elem)
        if not os.path.exists('/'.join(full)):
            os.makedirs('/'.join(full))


starting_dir = os.getcwd()
projects = []
for res in list(get_github_projects()):
    if len(res) == 3:
        projects.append(res)
    else:
        projects.append((res[0], res[1], res[1]))

random.shuffle(projects)

for base_url, project, subdir in projects:
    print('%s Considering %s %s'
          %(datetime.datetime.now(), base_url, project))
    os.chdir(starting_dir)

    if os.path.isdir(subdir):
        os.chdir(subdir)

        print('%s Updating %s'
              %(datetime.datetime.now(), project))
        try:
            subprocess.check_call(
                ['git', 'remote', '-vvv', 'update'])
        except Exception as e:
            print('%s FAILED: %s'
                  %(datetime.datetime.now(), e))
    else:
        git_url = os.path.join(base_url, project)
        _ensure_path('/'.join(subdir.split('/')[:-1]))

        print('%s Cloning %s'
              %(datetime.datetime.now(), project))
        subprocess.check_call(
            ['ionice', '-c', 'idle', 'git', 'clone',
             '-vvv', '--mirror', git_url, subdir])

This script is basically the same as the ONAP one, but it understands how to get a project list from github and doesn’t need to handle ONAP’s slightly strange repository naming scheme.

I hope it is useful to someone other than me.

Leave a Reply

This site uses Akismet to reduce spam. Learn how your comment data is processed.