From 3b37f8ecdac287725cb36b78f81d18734726df28 Mon Sep 17 00:00:00 2001 From: Gregor Kleen Date: Thu, 17 Feb 2022 12:29:02 +0100 Subject: vidhar: borg copy --- hosts/vidhar/borg/copy.py | 216 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 216 insertions(+) create mode 100755 hosts/vidhar/borg/copy.py (limited to 'hosts/vidhar/borg/copy.py') diff --git a/hosts/vidhar/borg/copy.py b/hosts/vidhar/borg/copy.py new file mode 100755 index 00000000..b99e301a --- /dev/null +++ b/hosts/vidhar/borg/copy.py @@ -0,0 +1,216 @@ +#!@python@/bin/python + +import json +import os +import subprocess +import re +import sys +from sys import stderr +from humanize import naturalsize + +from tempfile import TemporaryDirectory + +from datetime import (datetime, timedelta) +from dateutil.tz import (tzlocal, tzutc) +import dateutil.parser +import argparse + +from tqdm import tqdm + +from xdg import xdg_runtime_dir +import pathlib + +import unshare +from time import sleep + +from halo import Halo + + +parser = argparse.ArgumentParser() +parser.add_argument('source', metavar='REPO_OR_ARCHIVE') +parser.add_argument('target', metavar='REPO_OR_ARCHIVE') +args = parser.parse_args() + +def read_repo(path): + with Halo(text=f'Listing {path}', spinner='arc') as sp: + res = None + with subprocess.Popen(['borg', 'list', '--info', '--lock-wait', '120', '--json', path], stdout=subprocess.PIPE) as proc: + res = json.load(proc.stdout)['archives'] + sp.succeed(f'{len(res)} archives in {path}') + return res + +class ToSync: + def __iter__(self): + return self + + def __next__(self): + while True: + try: + src = read_repo(args.source) + dst = read_repo(args.target) + for entry in src: + if entry['name'] not in {dst_entry['name'] for dst_entry in dst} and not entry['name'].endswith('.checkpoint'): + return entry + raise StopIteration + except (subprocess.CalledProcessError, json.decoder.JSONDecodeError) as err: + print(err, file=stderr) + continue + +def copy_archive(src_repo_path, dst_repo_path, entry): + cache_suffix = None + with Halo(text=f'Determine archive parameters', spinner='arc') as sp: + match = re.compile('^(.*)-[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}(\.(checkpoint|recreate)(\.[0-9]+)?)?').fullmatch(entry['name']) + if match: + repo_id = None + with subprocess.Popen(['borg', 'info', '--info', '--lock-wait', '120', '--json', src_repo_path], stdout=subprocess.PIPE) as proc: + repo_id = json.load(proc.stdout)['repository']['id'] + if repo_id: + cache_suffix = f'{repo_id}_{match.group(1)}' + sp.succeed(f'Will process {entry["name"]} ({dateutil.parser.isoparse(entry["start"])}, cache_suffix={cache_suffix})') + with TemporaryDirectory(prefix=f'borg-mount_{entry["name"]}_') as tmpdir: + child = os.fork() + if child == 0: + # print('unshare/chroot', file=stderr) + unshare.unshare(unshare.CLONE_NEWNS) + subprocess.run(['mount', '--make-rprivate', '/'], check=True) + chroot = pathlib.Path(tmpdir) / 'chroot' + upper = pathlib.Path(tmpdir) / 'upper' + work = pathlib.Path(tmpdir) / 'work' + for path in [chroot,upper,work]: + path.mkdir() + subprocess.run(['mount', '-t', 'overlay', 'overlay', '-o', f'lowerdir=/,upperdir={upper},workdir={work}', chroot], check=True) + bindMounts = ['nix', 'run', 'proc', 'dev', 'sys', pathlib.Path(os.path.expanduser('~')).relative_to('/')] + if not ":" in src_repo_path: + bindMounts.append(pathlib.Path(src_repo_path).relative_to('/')) + if 'SSH_AUTH_SOCK' in os.environ: + bindMounts.append(pathlib.Path(os.environ['SSH_AUTH_SOCK']).parent.relative_to('/')) + for bindMount in bindMounts: + (chroot / bindMount).mkdir(parents=True,exist_ok=True) + subprocess.run(['mount', '--bind', pathlib.Path('/') / bindMount, chroot / bindMount], check=True) + os.chroot(chroot) + os.chdir('/') + dir = pathlib.Path('/borg') + dir.mkdir(parents=True,exist_ok=True) + with Halo(text=f'Determine size', spinner='arc') as sp: + total_size = None + total_files = None + with subprocess.Popen(['borg', 'info', '--info', '--json', '--lock-wait', '120', f'{src_repo_path}::{entry["name"]}'], stdout=subprocess.PIPE, text=True) as proc: + stats = json.load(proc.stdout)['archives'][0]['stats'] + total_size = stats['original_size'] + total_files = stats['nfiles'] + sp.succeed(f'{total_files} files, {naturalsize(total_size, binary=True)}') + # print(f'Mounting to {dir}', file=stderr) + with subprocess.Popen(['borg', 'mount', '--foreground', '--progress', '--lock-wait', '120', f'{src_repo_path}::{entry["name"]}', dir]) as mount_proc: + with Halo(text='Waiting for mount', spinner='arc') as sp: + wait_start = datetime.now() + while True: + ret = subprocess.run(['mountpoint', '-q', dir]) + if ret.returncode == 0: + break + elif datetime.now() - wait_start > timedelta(minutes=10): + ret.check_returncode() + sleep(0.1) + sp.succeed('Mounted') + while True: + try: + with tqdm(total=total_size, unit_scale=True, unit_divisor=1024, unit='B', smoothing=0.01, disable=None, dynamic_ncols=True, maxinterval=0.5, miniters=1) as progress: + seen = 0 + env = os.environ.copy() + create_args = ['borg', + 'create', + '--lock-wait=120', + '--one-file-system', + '--compression=auto,zstd,10', + '--chunker-params=10,23,16,4095', + '--files-cache=ctime,size', + '--show-rc', + # '--remote-ratelimit=20480', + '--log-json', + '--progress', + '--list', + '--filter=AMEi-x?', + '--stats' + ] + archive_time = datetime.strptime(entry["time"], "%Y-%m-%dT%H:%M:%S.%f").replace(tzinfo=tzlocal()).astimezone(tzutc()) + create_args += [f'--timestamp={archive_time.strftime("%Y-%m-%dT%H:%M:%S")}'] + if cache_suffix: + env['BORG_FILES_CACHE_SUFFIX'] = cache_suffix + else: + create_args += ['--files-cache=disabled'] + create_args += [f'{dst_repo_path}::{entry["name"]}', '.'] + with subprocess.Popen(create_args, cwd=dir, stdin=subprocess.DEVNULL, stderr=subprocess.PIPE, text=True, env=env) as proc: + last_list = None + last_list_time = None + for line in proc.stderr: + try: + json_line = json.loads(line) + except json.decoder.JSONDecodeError: + tqdm.write(line) + continue + + t = '' + if 'time' in json_line: + ts = datetime.fromtimestamp(json_line['time']).replace(tzinfo=tzlocal()) + t = f'{ts.isoformat(timespec="minutes")} ' + if json_line['type'] == 'archive_progress': + if last_list_time is None or ((datetime.now() - last_list_time) // timedelta(seconds=3)) % 2 == 1: + if 'path' in json_line and json_line['path']: + progress.set_description(f'… {json_line["path"]}', refresh=False) + else: + progress.set_description(None, refresh=False) + elif last_list is not None: + progress.set_description(last_list, refresh=False) + progress.set_postfix(compressed=naturalsize(json_line['compressed_size'], binary=True), deduplicated=naturalsize(json_line['deduplicated_size'], binary=True), nfiles=f'{json_line["nfiles"]}/{total_files}', refresh=False) + progress.update(json_line["original_size"] - seen) + seen = json_line["original_size"] + elif json_line['type'] == 'file_status': + # tqdm.write(t + f'{json_line["status"]} {json_line["path"]}') + last_list = f'{json_line["status"]} {json_line["path"]}' + last_list_time = datetime.now() + progress.set_description(last_list, refresh=False) + elif (json_line['type'] == 'log_message' or json_line['type'] == 'progress_message' or json_line['type'] == 'progress_percent') and ('message' in json_line or 'msgid' in json_line): + if 'message' in json_line: + tqdm.write(t + json_line['message']) + elif 'msgid' in json_line: + tqdm.write(t + json_line['msgid']) + else: + tqdm.write(t + line) + progress.set_description(None) + if proc.wait() != 0: + continue + except subprocess.CalledProcessError as err: + print(err, file=stderr) + continue + else: + break + mount_proc.terminate() + os._exit(0) + else: + while True: + waitpid, waitret = os.wait() + if waitret != 0: + sys.exit(waitret) + if waitpid == child: + break + +def main(): + if "::" in args.source: + (src_repo_path, _, src_archive) = args.source.partition("::") + entry = None + for candidate_entry in read_repo(src_repo_path): + if entry['name'] != src_archive: + continue + entry = candidate_entry + break + + if entry is None: + print("Did not find archive", file=stderr) + os.exit(1) + + copy_archive(src_repo_path, args.target, entry) + else: + for entry in ToSync(): + copy_archive(args.source, args.target, entry) + +if __name__ == "__main__": + sys.exit(main()) -- cgit v1.2.3