import os, stat, sys
from errno import EEXIST
def usage():
"""Print a usage message and exit."""
print("""usage: %s REPOS_PATH MAX_FILES_PER_SHARD [START END]
Perform an offline conversion of an FSFS repository between linear
(readable by Subversion 1.4 or later) and sharded (readable by
Subversion 1.5 or later) layouts.
The MAX_FILES_PER_SHARD argument specifies the maximum number of
files that will be stored in each shard (directory), or zero to
specify a linear layout. Subversion 1.5 uses a default value of
1000 files per shard.
Convert revisions START through END inclusive if specified, or all
revisions if unspecified.
""" % sys.argv[0])
sys.exit(1)
def incompatible_repos_format(repos_path, format):
"""Print an error saying that REPOS_PATH is a repository with an
incompatible repository format FORMAT, then exit."""
sys.stderr.write("""error: unable to convert repository '%s'.
This repository is not compatible with this tool. Valid
repository formats are '3' or '5'; this repository is
format '%s'.
""" % (repos_path, format))
sys.stderr.flush()
sys.exit(1)
def incompatible_fs_format(repos_path, format):
"""Print an error saying that REPOS_PATH is a repository with an
incompatible filesystem format FORMAT, then exit."""
sys.stderr.write("""error: unable to convert repository '%s'.
This repository contains a filesystem that is not compatible with
this tool. Valid filesystem formats are '1', '2', or '3'; this
repository contains a filesystem with format '%s'.
""" % (repos_path, format))
sys.stderr.flush()
sys.exit(1)
def unexpected_fs_format_options(repos_path):
"""Print an error saying that REPOS_PATH is a repository with
unexpected filesystem format options, then exit."""
sys.stderr.write("""error: unable to convert repository '%s'.
This repository contains a filesystem that appears to be invalid -
there is unexpected data after the filesystem format number.
""" % repos_path)
sys.stderr.flush()
sys.exit(1)
def incompatible_fs_format_option(repos_path, option):
"""Print an error saying that REPOS_PATH is a repository with an
incompatible filesystem format option OPTION, then exit."""
sys.stderr.write("""error: unable to convert repository '%s'.
This repository contains a filesystem that is not compatible with
this tool. This tool recognises the 'layout' option but the
filesystem uses the '%s' option.
""" % (repos_path, option))
sys.stderr.flush()
sys.exit(1)
def warn_about_fs_format_1(repos_path, format_path):
"""Print a warning saying that REPOS_PATH contains a format 1 FSFS
filesystem that we can't reconstruct, then exit."""
sys.stderr.write("""warning: conversion of '%s' will be one-way.
This repository is currently readable by Subversion 1.1 or later.
This tool can convert this repository to one that is readable by
either Subversion 1.4 (or later) or Subversion 1.5 (or later),
but it is not able to convert it back to the original format - a
separate dump/load step would be required.
If you would like to upgrade this repository anyway, delete the
file '%s' and re-run this tool.
""" % (repos_path, format_path))
sys.stderr.flush()
sys.exit(1)
def check_repos_format(repos_path):
"""Check that REPOS_PATH contains a repository with a suitable format;
print a message and exit if not."""
format_path = os.path.join(repos_path, 'format')
try:
format_file = open(format_path)
format = format_file.readline()
if not format.endswith('\n'):
incompatible_repos_format(repos_path, format + ' <missing newline>')
format = format.rstrip('\n')
if format == '3' or format == '5':
pass
else:
incompatible_repos_format(repos_path, format)
except IOError:
incompatible_repos_format(repos_path, '<unreadable>')
def check_fs_format(repos_path):
"""Check that REPOS_PATH contains a filesystem with a suitable format,
or that it contains no format file; print a message and exit if neither
is true. Return bool whether the filesystem is sharded."""
sharded = False
db_path = os.path.join(repos_path, 'db')
format_path = os.path.join(db_path, 'format')
try:
format_file = open(format_path)
format = format_file.readline()
if not format.endswith('\n'):
incompatible_fs_format(repos_path, format + ' <missing newline>')
format = format.rstrip('\n')
if format == '1':
warn_about_fs_format_1(repos_path, format_path)
if format == '2':
pass
elif format == '3':
pass
else:
incompatible_fs_format(repos_path, format)
for line in format_file:
if format == '2':
unexpected_fs_format_options(repos_path)
line = line.rstrip('\n')
if line == 'layout linear':
pass
elif line.startswith('layout sharded '):
sharded = True
else:
incompatible_fs_format_option(repos_path, line)
format_file.close()
except IOError:
pass
return sharded
def current_file(repos_path):
"""Return triple of (revision, next_node_id, next_copy_id) from
REPOS_PATH/db/current ."""
return open(os.path.join(repos_path, 'db', 'current')).readline().split()
def remove_fs_format(repos_path):
"""Remove the filesystem format file for repository REPOS_PATH.
Do not raise an error if the file is already missing."""
format_path = os.path.join(repos_path, 'db', 'format')
try:
statinfo = os.stat(format_path)
except OSError:
return
os.chmod(format_path, statinfo.st_mode | stat.S_IWUSR)
os.remove(format_path)
def write_fs_format(repos_path, contents):
"""Write a new filesystem format file for repository REPOS_PATH containing
CONTENTS."""
format_path = os.path.join(repos_path, 'db', 'format')
f = open(format_path, 'wb')
f.write(contents)
f.close()
os.chmod(format_path, stat.S_IRUSR | stat.S_IRGRP)
def linearise(path):
"""Move all the files in subdirectories of PATH into PATH, and remove the
subdirectories. Handle conflicts between subdirectory names and files
contained in subdirectories by ensuring subdirectories have a '.shard'
suffix prior to moving (the files are assumed not to have this suffix.
Abort if a subdirectory is found to contain another subdirectory."""
for name in os.listdir(path):
if name.endswith('.shard'):
continue
subdir_path = os.path.join(path, name)
if not os.path.isdir(subdir_path):
continue
os.rename(subdir_path, subdir_path + '.shard')
for root_path, dirnames, filenames in os.walk(path):
if root_path == path:
continue
if len(dirnames) > 0:
sys.stderr.write("error: directory '%s' contains other unexpected directories.\n" \
% root_path)
sys.stderr.flush()
sys.exit(1)
for name in filenames:
from_path = os.path.join(root_path, name)
to_path = os.path.join(path, name)
os.rename(from_path, to_path)
os.rmdir(root_path)
def shard(path, max_files_per_shard, start, end):
"""Move the files for revisions START to END inclusive in PATH into
subdirectories of PATH named such that subdirectory '0' contains at most
MAX_FILES_PER_SHARD files, those named [0, MAX_FILES_PER_SHARD). Abort if
PATH is found to contain any entries with non-numeric names."""
tmp = path + '.reshard'
try:
os.mkdir(tmp)
except OSError, e:
if e.errno != EEXIST:
raise
for rev in range(start, end + 1):
name = str(rev)
shard = rev // max_files_per_shard
shard_name = str(shard) + '.shard'
from_path = os.path.join(path, name)
to_path = os.path.join(tmp, shard_name, name)
try:
os.rename(from_path, to_path)
except OSError:
os.mkdir(os.path.join(tmp, shard_name))
os.rename(from_path, to_path)
skipped = 0
for name in os.listdir(tmp):
if not name.endswith('.shard'):
sys.stderr.write("warning: ignoring unexpected subdirectory '%s'.\n" \
% os.path.join(tmp, name))
sys.stderr.flush()
skipped += 1
continue
from_path = os.path.join(tmp, name)
to_path = os.path.join(path, os.path.basename(from_path)[:-6])
os.rename(from_path, to_path)
skipped == 0 and os.rmdir(tmp)
def main():
if len(sys.argv) < 3:
usage()
repos_path = sys.argv[1]
max_files_per_shard = sys.argv[2]
try:
start = int(sys.argv[3])
end = int(sys.argv[4])
except IndexError:
start = 0
end = int(current_file(repos_path)[0])
db_path = os.path.join(repos_path, 'db')
current_path = os.path.join(db_path, 'current')
if not os.path.exists(current_path):
sys.stderr.write("error: '%s' doesn't appear to be a Subversion FSFS repository.\n" \
% repos_path)
sys.stderr.flush()
sys.exit(1)
try:
max_files_per_shard = int(max_files_per_shard)
except ValueError, OverflowError:
sys.stderr.write("error: maximum files per shard ('%s') is not a valid number.\n" \
% max_files_per_shard)
sys.stderr.flush()
sys.exit(1)
if max_files_per_shard < 0:
sys.stderr.write("error: maximum files per shard ('%d') must not be negative.\n" \
% max_files_per_shard)
sys.stderr.flush()
sys.exit(1)
check_repos_format(repos_path)
sharded = check_fs_format(repos_path)
if max_files_per_shard > 0:
print("Converting '%s' to a sharded structure with %d files per directory" \
% (repos_path, max_files_per_shard))
if sharded:
print('(will convert to a linear structure first)')
else:
print("Converting '%s' to a linear structure" % repos_path)
print('- marking the repository as invalid')
remove_fs_format(repos_path)
if sharded:
print('- linearising db/revs')
linearise(os.path.join(repos_path, 'db', 'revs'))
print('- linearising db/revprops')
linearise(os.path.join(repos_path, 'db', 'revprops'))
if max_files_per_shard == 0:
print('- marking the repository as a valid linear repository')
write_fs_format(repos_path, '2\n')
else:
print('- sharding db/revs')
shard(os.path.join(repos_path, 'db', 'revs'), max_files_per_shard,
start, end)
print('- sharding db/revprops')
shard(os.path.join(repos_path, 'db', 'revprops'), max_files_per_shard,
start, end)
print('- marking the repository as a valid sharded repository')
write_fs_format(repos_path, '3\nlayout sharded %d\n' % max_files_per_shard)
print('- done.')
sys.exit(0)
if __name__ == '__main__':
raise Exception("""This script is unfinished and not ready to be used on live data.
Trust us.""")
main()