From b931543508377c0e48a6801e4ea217eb523e2b03 Mon Sep 17 00:00:00 2001 From: Gregor Kleen Date: Tue, 13 Sep 2022 10:29:35 +0200 Subject: ... --- modules/zfssnap/zfssnap.py | 197 +++++++++++++++++++++++++++++++-------------- 1 file changed, 138 insertions(+), 59 deletions(-) (limited to 'modules/zfssnap/zfssnap.py') diff --git a/modules/zfssnap/zfssnap.py b/modules/zfssnap/zfssnap.py index 21ed1d5b..a8dae75f 100644 --- a/modules/zfssnap/zfssnap.py +++ b/modules/zfssnap/zfssnap.py @@ -3,9 +3,9 @@ import csv import subprocess import io -from distutils.util import strtobool +from distutils.util import strtobool from datetime import datetime, timezone, timedelta -from dateutil.tz import gettz, tzlocal +from dateutil.tz import gettz, tzutc import pytimeparse import argparse import re @@ -27,6 +27,36 @@ from math import floor import asyncio +from dataclasses import dataclass + + +TIME_PATTERNS = OrderedDict([ + ("secondly", lambda t: t.strftime('%Y-%m-%d %H:%M:%S')), + ("minutely", lambda t: t.strftime('%Y-%m-%d %H:%M')), + ("5m", lambda t: (t.strftime('%Y-%m-%d %H'), floor(t.minute / 5) * 5)), + ("15m", lambda t: (t.strftime('%Y-%m-%d %H'), floor(t.minute / 15) * 15)), + ("hourly", lambda t: t.strftime('%Y-%m-%d %H')), + ("4h", lambda t: (t.strftime('%Y-%m-%d'), floor(t.hour / 4) * 4)), + ("12h", lambda t: (t.strftime('%Y-%m-%d'), floor(t.hour / 12) * 12)), + ("daily", lambda t: t.strftime('%Y-%m-%d')), + ("halfweekly", lambda t: (t.strftime('%G-%V'), floor(int(t.strftime('%u')) / 4) * 4)), + ("weekly", lambda t: t.strftime('%G-%V')), + ("monthly", lambda t: t.strftime('%Y-%m')), + ("yearly", lambda t: t.strftime('%Y')), +]) + +@dataclass(eq=True, order=True, frozen=True) +class Snap: + name: str + creation: datetime + +@dataclass(eq=True, order=True, frozen=True) +class KeptBecause: + rule: str + ix: int + base: str + period: str + @cache def _now(): @@ -42,56 +72,120 @@ def _log_cmd(*args): def _get_items(): items = {} - + args = ['zfs', 'get', '-H', '-p', '-o', 'name,value', '-t', 'filesystem,volume', '-s', 'local,default,inherited,temporary,received', 'li.yggdrasil:auto-snapshot'] _log_cmd(*args) with subprocess.Popen(args, stdout=subprocess.PIPE) as proc: text_stdout = io.TextIOWrapper(proc.stdout) - reader = csv.reader(text_stdout, delimiter='\t', quoting=csv.QUOTE_NONE) - Row = namedtuple('Row', ['name', 'setting']) - for row in map(Row._make, reader): + reader = csv.DictReader(text_stdout, fieldnames=['name', 'setting'], delimiter='\t', quoting=csv.QUOTE_NONE) + Row = namedtuple('Row', reader.fieldnames) + for row in [Row(**data) for data in reader]: items[row.name] = bool(strtobool(row.setting)) return items - -def prune(config, dry_run, keep_newest): - prunable_snapshots = set() - args = ['zfs', 'get', '-H', '-p', '-o', 'name,value', '-t', 'snapshot', '-s', 'local', 'li.yggdrasil:is-auto-snapshot'] - _log_cmd(*args) - with subprocess.Popen(args, stdout=subprocess.PIPE) as proc: - text_stdout = io.TextIOWrapper(proc.stdout) - reader = csv.reader(text_stdout, delimiter='\t', quoting=csv.QUOTE_NONE) - Row = namedtuple('Row', ['name', 'is_auto_snapshot']) - for row in map(Row._make, reader): - if bool(strtobool(row.is_auto_snapshot)): - prunable_snapshots.add(row.name) - - items = defaultdict(list) - Snap = namedtuple('Snap', ['name', 'creation']) - args = ['zfs', 'get', '-H', '-p', '-o', 'name,value', '-t', 'snapshot', 'creation'] + +def _get_snaps(only_auto=True): + snapshots = defaultdict(list) + args = ['zfs', 'list', '-H', '-p', '-t', 'snapshot', '-o', 'name,li.yggdrasil:is-auto-snapshot,creation'] _log_cmd(*args) with subprocess.Popen(args, stdout=subprocess.PIPE) as proc: text_stdout = io.TextIOWrapper(proc.stdout) - reader = csv.reader(text_stdout, delimiter='\t', quoting=csv.QUOTE_NONE) - Row = namedtuple('Row', ['name', 'timestamp']) - for row in map(Row._make, reader): - if row.name not in prunable_snapshots: + reader = csv.DictReader(text_stdout, fieldnames=['name', 'is_auto_snapshot', 'timestamp'], delimiter='\t', quoting=csv.QUOTE_NONE) + Row = namedtuple('Row', reader.fieldnames) + for row in [Row(**data) for data in reader]: + if only_auto and not bool(strtobool(row.is_auto_snapshot)): continue base_name, _, _ = row.name.rpartition('@') creation = datetime.fromtimestamp(int(row.timestamp), timezone.utc) - items[base_name].append(Snap(name=row.name, creation=creation)) + snapshots[base_name].append(Snap(name=row.name, creation=creation)) + + return snapshots + +def prune(config, dry_run, keep_newest, do_exec): + do_exec = do_exec and 'EXEC' in config + prune_timezone = config.gettimezone('KEEP', 'timezone', fallback=tzutc()) + logger.debug(f'prune timezone: {prune_timezone}') + + items = _get_snaps() + + exec_candidates = set() + if do_exec: + exec_timezone = config.gettimezone('EXEC', 'timezone', fallback=prune_timezone) + logger.debug(f'exec timezone: {exec_timezone}') + + for rule, pattern in TIME_PATTERNS.items(): + desired_count = config.getint('EXEC', rule, fallback=0) + + for base, snaps in items.items(): + periods = OrderedDict() + + for snap in sorted(snaps, key=lambda snap: snap.creation): + period = pattern(snap.creation.astimezone(exec_timezone)) + if period not in periods: + periods[period] = deque() + periods[period].append(snap) + + to_exec = desired_count + ordered_periods = periods.items() + for period, period_snaps in ordered_periods: + if to_exec == 0: + break + + for snap in period_snaps: + exec_candidates.add(snap) + logger.debug(f'{snap.name} is exec candidate') + to_exec -= 1 + break + + if to_exec > 0: + logger.debug(f'Missing {to_exec} to fulfill exec {rule}={desired_count} for ‘{base}’') + + check_cmd = config.get('EXEC', 'check', fallback=None) + if check_cmd: + already_execed = set() + for snap in exec_candidates: + args = [] + args += shlex.split(check_cmd) + args += [snap.name] + _log_cmd(*args) + check_res = subprocess.run(args) + if check_res.returncode == 0: + already_execed.add(snap) + logger.debug(f'{snap.name} already execed') + exec_candidates -= already_execed + + exec_cmd = config.get('EXEC', 'cmd', fallback=None) + exec_count = config.getint('EXEC', 'count', fallback=1) + if exec_cmd: + execed = set() + for snap in sorted(exec_candidates, key=lambda snap: snap.creation): + if len(execed) >= exec_count: + logger.debug(f'exc_count of {exec_count} reached') + break + + args = [] + args += shlex.split(exec_cmd) + args += [snap.name] + _log_cmd(*args) + subprocess.run(args).check_returncode() + execed.add(snap) + + exec_candidates -= execed kept_count = defaultdict(lambda: defaultdict(lambda: 0)) - KeptBecause = namedtuple('KeptBecause', ['rule', 'ix', 'base', 'period']) kept_because = OrderedDict() def keep_because(base, snap, rule, period=None): - nonlocal KeptBecause, kept_count, kept_because + nonlocal kept_count, kept_because kept_count[rule][base] += 1 if snap not in kept_because: kept_because[snap] = deque() kept_because[snap].append(KeptBecause(rule=rule, ix=kept_count[rule][base], base=base, period=period)) + for candidate in exec_candidates: + base_name, _, _ = candidate.name.rpartition('@') + keep_because(base_name, candidate.name, 'exec-candidate') + within = config.gettimedelta('KEEP', 'within') if within > timedelta(seconds=0): for base, snaps in items.items(): @@ -109,31 +203,14 @@ def prune(config, dry_run, keep_newest): else: logger.warn('Skipping rule ‘within’ since retention period is zero') - prune_timezone = config.gettimezone('KEEP', 'timezone', fallback=tzlocal) - - PRUNING_PATTERNS = OrderedDict([ - ("secondly", lambda t: t.strftime('%Y-%m-%d %H:%M:%S')), - ("minutely", lambda t: t.strftime('%Y-%m-%d %H:%M')), - ("5m", lambda t: (t.strftime('%Y-%m-%d %H'), floor(t.minute / 5) * 5)), - ("15m", lambda t: (t.strftime('%Y-%m-%d %H'), floor(t.minute / 15) * 15)), - ("hourly", lambda t: t.strftime('%Y-%m-%d %H')), - ("4h", lambda t: (t.strftime('%Y-%m-%d'), floor(t.hour / 4) * 4)), - ("12h", lambda t: (t.strftime('%Y-%m-%d'), floor(t.hour / 12) * 12)), - ("daily", lambda t: t.strftime('%Y-%m-%d')), - ("halfweekly", lambda t: (t.strftime('%G-%V'), floor(int(t.strftime('%u')) / 4) * 4)), - ("weekly", lambda t: t.strftime('%G-%V')), - ("monthly", lambda t: t.strftime('%Y-%m')), - ("yearly", lambda t: t.strftime('%Y')), - ]) - - for rule, pattern in PRUNING_PATTERNS.items(): + for rule, pattern in TIME_PATTERNS.items(): desired_count = config.getint('KEEP', rule, fallback=0) for base, snaps in items.items(): periods = OrderedDict() - + for snap in sorted(snaps, key=lambda snap: snap.creation, reverse=keep_newest): - period = pattern(snap.creation) + period = pattern(snap.creation.astimezone(prune_timezone)) if period not in periods: periods[period] = deque() periods[period].append(snap) @@ -150,7 +227,7 @@ def prune(config, dry_run, keep_newest): break if to_keep > 0: - logger.debug(f'Missing {to_keep} to fulfill {rule}={desired_count} for ‘{base}’') + logger.debug(f'Missing {to_keep} to fulfill prune {rule}={desired_count} for ‘{base}’') for snap, reasons in kept_because.items(): reasons_str = ', '.join(map(str, reasons)) @@ -171,16 +248,16 @@ def prune(config, dry_run, keep_newest): logger.info(f'Would have pruned ‘{snap}’') else: logger.info(f'Pruned ‘{snap}’') - + def rename(snapshots, destroy=False, set_is_auto=False): args = ['zfs', 'get', '-H', '-p', '-o', 'name,value', 'creation', *snapshots] _log_cmd(*args) renamed_to = set() with subprocess.Popen(args, stdout=subprocess.PIPE) as proc: text_stdout = io.TextIOWrapper(proc.stdout) - reader = csv.reader(text_stdout, delimiter='\t', quoting=csv.QUOTE_NONE) - Row = namedtuple('Row', ['name', 'timestamp']) - for row in map(Row._make, reader): + reader = csv.DictReader(text_stdout, fieldnames=['name', 'timestamp'], delimiter='\t', quoting=csv.QUOTE_NONE) + Row = namedtuple('Row', reader.fieldnames) + for row in [Row(**data) for data in reader]: creation = datetime.fromtimestamp(int(row.timestamp), timezone.utc) base_name, _, _ = row.name.rpartition('@') new_name = _snap_name(base_name, time=creation) @@ -217,7 +294,7 @@ def autosnap(): all_snap_names = set() async def do_snapshot(*snap_items, recursive=False): nonlocal items, all_snap_names - snap_names = {_snap_name(item) for item in snap_items} + snap_names = {_snap_name(item) for item in snap_items if items[item]} if recursive: for snap_item in snap_items: all_snap_names |= {_snap_name(item) for item in items if item.startswith(snap_item)} @@ -268,7 +345,7 @@ def main(): sys.__excepthook__(type, value, tb) # calls default excepthook sys.excepthook = log_exceptions - + parser = argparse.ArgumentParser(prog='zfssnap') parser.add_argument('--verbose', '-v', action='count', default=0) subparsers = parser.add_subparsers() @@ -282,6 +359,7 @@ def main(): prune_parser.add_argument('--config', '-c', dest='config_files', nargs='*', default=list()) prune_parser.add_argument('--dry-run', '-n', action='store_true', default=False) prune_parser.add_argument('--keep-newest', action='store_true', default=False) + prune_parser.add_argument('--no-exec', dest='do_exec', action='store_false', default=True) prune_parser.set_defaults(cmd=prune) args = parser.parse_args() @@ -293,7 +371,7 @@ def main(): logger.setLevel(logging.DEBUG) cmdArgs = {} - for copy in {'snapshots', 'dry_run', 'destroy', 'keep_newest', 'set_is_auto'}: + for copy in {'snapshots', 'dry_run', 'destroy', 'keep_newest', 'set_is_auto', 'do_exec'}: if copy in vars(args): cmdArgs[copy] = vars(args)[copy] if 'config_files' in vars(args): @@ -308,7 +386,7 @@ def main(): }) search_files = args.config_files if args.config_files else [*BaseDirectory.load_config_paths('zfssnap.ini')] read_files = config.read(search_files) - + def format_config_files(files): if not files: return 'no files' @@ -323,4 +401,5 @@ def main(): args.cmd(**cmdArgs) -sys.exit(main()) +if __name__ == '__main__': + sys.exit(main()) -- cgit v1.2.3