#!/usr/bin/env python

# For migration from one CVS repository to many Git ones.

from __future__ import print_function
import os
from subprocess import check_call
import sys
import re

mdir = "mysociety"
unix_group = "publiccvs" # Unix group for shared repositroy

if not os.path.exists(mdir):
    print("No %s repository found in this directory." % (mdir,))
    print("Clone it with something like:")
    print("  git clone ssh://mark@majestic.ukcod.org.uk/data/git/public/mysociety/")
    sys.exit(1)

def subdirectory_to_extract(d):
    return os.path.isdir(os.path.join(mdir,x)) and not re.search('^\.',d)

#directories = [ "perllib", "phplib", "pylib", "shlib", "jslib", "rblib", "cpplib"] 
#directories = [ "survey" ] 
directories = [ "na2005", "gny", "news" ]
directory_map = {}
for d in directories:
    directory_map[d] = d

# We might want to overwrite a few of these names when creating
# repositories:
#directory_map["hm"] = "hassleme"
#directory_map["twfy"] = "theyworkforyou"
#directory_map["ms"] = "mysocietywww"
#directory_map["bci"] = "fixmystreet"
#directory_map["bci"] = "fixmystreet"
#directory_map["fyr"] = "writetothem"
#directory_map["ycml"] = "hearfromyourmp"
#directory_map["iso"] = "mapumental"

for directory in directories:
    renamed = directory_map[directory] + ".git"
    if os.path.exists(renamed):
        print("=== Directory '%s' exists, skipping..." % (renamed,))
        continue
    print("=== Working on '%s'..." % (renamed,))
    pruning = renamed + "-pruning"
    if os.path.exists(pruning):
        raise Exception(pruning + " temporary directory already exists")
    print("=== Cloning the complete mysociety repository...")
    clone_command = [ "git", "clone" ]
    clone_command += [ "--no-hardlinks", mdir, pruning ]
    check_call( clone_command )
    os.chdir( pruning )
    # It's a pain rewriting tags properly, and Francis has told me
    # that they're not important, so just remove all the tags from the
    # cloned repository before "git filter-branch":
    print("=== Removing tags from the cloned repository in '%s'" % (pruning,))
    check_call( "git tag | xargs -n 1 git tag -d", shell=True )
    print("=== Using git filter-branch to extract subdirectory '%s'..." % (directory,))
    check_call( [ "git", "filter-branch", "--subdirectory-filter", directory,
                  "--tag-name-filter", "cat", "--", "--all" ] )
    # See "Checklist for Shrinking a Repository" on this page:
    #   http://ftp.kernel.org/pub/software/scm/git/docs/git-filter-branch.html
    # This is the "very destructive" method described there:
    print("=== Removing refs/original...")
    check_call( "git for-each-ref --format=\"%(refname)\" refs/original/ | xargs -n 1 git update-ref -d", shell=True )
    print("=== Expiring entries from the reflog...")
    check_call( [ "git", "reflog", "expire", "--expire=now", "--all" ] )
    print("=== Calling git gc --prune=now...")
    check_call( [ "git", "gc", "--prune=now" ] )
    os.chdir( ".." )
    print("=== Making bare shared repository...")
    os.mkdir( renamed )
    os.chdir( renamed )
    check_call( [ "git", "init", "--bare", "--shared=group"] )
    check_call( [ "git", "--bare", "fetch", "../" + pruning, "master:master" ] )
    os.chdir( ".." )
    check_call( [ "chgrp", "-R", unix_group, renamed ] )
    print("=== Done (%s)" % (renamed,))

