From 6f82f965a742d2013d9bab578a4d83fa37526902 Mon Sep 17 00:00:00 2001 From: Gregor Kleen Date: Sat, 19 Feb 2022 15:05:25 +0100 Subject: vidhar: zfssnap... --- hosts/vidhar/zfs.nix | 62 +--------- modules/zfssnap/default.nix | 95 +++++++++++++++ modules/zfssnap/zfssnap.py | 290 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 387 insertions(+), 60 deletions(-) create mode 100644 modules/zfssnap/default.nix create mode 100644 modules/zfssnap/zfssnap.py diff --git a/hosts/vidhar/zfs.nix b/hosts/vidhar/zfs.nix index f5a0383a..bee4e88a 100644 --- a/hosts/vidhar/zfs.nix +++ b/hosts/vidhar/zfs.nix @@ -1,32 +1,5 @@ { pkgs, lib, config, ... }: -let - snapshotNames = ["frequent" "hourly" "daily" "monthly" "yearly"]; - snapshotCount = { - frequent = 24; - hourly = 24; - daily = 30; - monthly = 12; - yearly = 5; - }; - snapshotTimerConfig = { - frequent = { OnCalendar = "*:0/5 UTC"; Persistent = true; }; - hourly = { OnCalendar = "hourly UTC"; Persistent = true; }; - daily = { OnCalendar = "daily UTC"; Persistent = true; }; - monthly = { OnCalendar = "monthly UTC"; Persistent = true; }; - yearly = { OnCalendar = "yearly UTC"; Persistent = true; }; - }; - snapshotDescr = { - frequent = "few minutes"; - hourly = "hour"; - daily = "day"; - monthly = "month"; - yearly = "year"; - }; - - zfs = config.boot.zfs.package; - - autosnapPackage = pkgs.zfstools.override { inherit zfs; }; -in { +{ config = { fileSystems = { "/boot" = @@ -136,38 +109,7 @@ in { echo "=== ZPOOL IMPORT COMPLETE ===" ''; - systemd.services = - let mkSnapService = snapName: { - name = "zfs-snapshot-${snapName}"; - value = { - description = "ZFS auto-snapshot every ${snapshotDescr.${snapName}}"; - after = [ "zfs-import.target" ]; - serviceConfig = { - Type = "oneshot"; - ExecStart = "${autosnapPackage}/bin/zfs-auto-snapshot -k -p -u ${snapName} ${toString snapshotCount.${snapName}}"; - }; - restartIfChanged = false; - - preStart = '' - ${zfs}/bin/zfs set com.sun:auto-snapshot=true hdd-raid6/safe - ${zfs}/bin/zfs set com.sun:auto-snapshot=false hdd-raid6/safe/home/mherold/eos/base - ${zfs}/bin/zfs set com.sun:auto-snapshot=true ssd-raid1/safe - ${zfs}/bin/zfs set com.sun:auto-snapshot=true boot - ''; - }; - }; - in builtins.listToAttrs (map mkSnapService snapshotNames); - - systemd.timers = - let mkSnapTimer = snapName: { - name = "zfs-snapshot-${snapName}"; - value = { - wantedBy = [ "timers.target" ]; - timerConfig = snapshotTimerConfig.${snapName}; - }; - }; - in builtins.listToAttrs (map mkSnapTimer snapshotNames); - + services.zfssnap.enable = true; services.zfs.trim.enable = false; services.zfs.autoScrub = { enable = true; diff --git a/modules/zfssnap/default.nix b/modules/zfssnap/default.nix new file mode 100644 index 00000000..a0590c9f --- /dev/null +++ b/modules/zfssnap/default.nix @@ -0,0 +1,95 @@ +{ config, pkgs, lib, ... }: + +with lib; + +let + zfssnap = pkgs.stdenv.mkDerivation rec { + name = "zfssnap"; + src = ./zfssnap.py; + + phases = [ "buildPhase" "checkPhase" "installPhase" ]; + + buildInputs = with pkgs; [makeWrapper]; + + python = pkgs.python39.withPackages (ps: with ps; [pyxdg pytimeparse dateutil systemd]); + + buildPhase = '' + substitute $src zfssnap \ + --subst-var-by python ${escapeShellArg python} + ''; + + doCheck = true; + checkPhase = '' + ${python}/bin/python -m py_compile zfssnap + ''; + + installPhase = '' + install -m 0755 -D -t $out/bin \ + zfssnap + + wrapProgram $out/bin/zfssnap \ + --prefix PATH : ${makeBinPath [config.boot.zfs.package]} + ''; + }; + + cfg = config.services.zfssnap; +in { + options = { + services.zfssnap = { + enable = mkEnableOption "zfssnap service"; + + config = mkOption { + type = with types; attrsOf (attrsOf str); + default = { + keep = { + within = "5m"; + "5m" = "24"; + hourly = "24"; + daily = "31"; + monthly = "24"; + yearly = "-1"; + }; + }; + }; + + snapInterval = mkOption { + type = types.str; + default = "*-*-* *:00/5:00 Europe/Berlin"; + }; + }; + }; + + config = mkIf cfg.enable { + systemd.services."zfssnap" = { + description = "Create automatic ZFS snapshots"; + after = [ "zfs-import.target" ]; + wants = [ "zfssnap-prune.service" ]; + before = [ "zfssnap-prune.service" ]; + serviceConfig = { + Type = "oneshot"; + ExecStart = "${zfssnap}/bin/zfssnap --no-stderr -vv"; + }; + }; + systemd.services."zfssnap-prune" = { + description = "Prune automatic ZFS snapshots"; + after = [ "zfs-import.target" "zfssnap.service" ]; + serviceConfig = { + Type = "oneshot"; + ExecStart = let + mkSectionName = name: strings.escape [ "[" "]" ] (strings.toUpper name); + zfssnapConfig = generators.toINI { inherit mkSectionName; } cfg.config; + in "${zfssnap}/bin/zfssnap --no-stderr -vv prune --config=${zfssnapConfig}"; + }; + }; + + systemd.timers."zfssnap" = { + wantedBy = ["timers.target"]; + timerConfig = { + OnCalendar = cfg.snapInterval; + Persistent = true; + }; + }; + + environment.systemPackages = [zfssnap]; + }; +} diff --git a/modules/zfssnap/zfssnap.py b/modules/zfssnap/zfssnap.py new file mode 100644 index 00000000..86690127 --- /dev/null +++ b/modules/zfssnap/zfssnap.py @@ -0,0 +1,290 @@ +#!@python@/bin/python + +import csv +import subprocess +import io +from distutils.util import strtobool +from datetime import datetime, timezone, timedelta +from dateutil.tz import gettz, tzlocal +import pytimeparse +import argparse +import re + +import sys + +import logging + +import shlex + +from collections import defaultdict, OrderedDict + +import configparser +from xdg import BaseDirectory + +from functools import cache + +from math import floor + +from systemd import journal + + +@cache +def _now(): + return datetime.now(timezone.utc) + +def _snap_name(item, time=_now()): + suffix = re.sub(r'\+00:00$', r'Z', time.isoformat()) + return f'{item}@auto_{suffix}' + +def _log_cmd(*args): + fmt_args = ' '.join(map(shlex.quote, args)) + logger.debug(f'Running command: {fmt_args}') + +def _get_items(): + items = {} + + args = ['zfs', 'get', '-H', '-p', '-o', 'name,value', '-t', 'filesystem,volume', '-s', 'local,default,inherited,temporary,received', 'li.yggdrasil:auto-snapshot'] + _log_cmd(*args) + with subprocess.Popen(args, stdout=subprocess.PIPE) as proc: + text_stdout = io.TextIOWrapper(proc.stdout) + reader = csv.reader(text_stdout, delimiter='\t', quoting=csv.QUOTE_NONE) + for row in reader: + name = row[0] + setting = bool(strtobool(row[1])) + items[name] = setting + + return items + +def prune(config, dry_run): + + items = defaultdict(list) + + args = ['zfs', 'get', '-H', '-p', '-o', 'name,value', '-t', 'snapshot', 'creation'] + _log_cmd(*args) + with subprocess.Popen(args, stdout=subprocess.PIPE) as proc: + text_stdout = io.TextIOWrapper(proc.stdout) + reader = csv.reader(text_stdout, delimiter='\t', quoting=csv.QUOTE_NONE) + for row in reader: + name = row[0] + timestamp = int(row[1]) + creation = datetime.fromtimestamp(timestamp, timezone.utc) + base_name, _, _ = name.rpartition('@') + expected_name = _snap_name(base_name, time=creation) + if expected_name != name: + # logger.debug(f'Skipping ‘{name}’ since it does not conform to naming scheme') + continue + items[base_name].append({'name': name, 'creation': creation}) + + keep = set() + kept_count = defaultdict(lambda: defaultdict(lambda: 0)) + def keep_because(base, snap, rule, period=None): + nonlocal kept_count + if snap not in keep: + kept_count[rule][base] += 1 + logger.info(f'Keeping ‘{snap}’ because of rule ‘{rule}’ (#{kept_count[rule][base]} for ‘{base}’, period={period})') + keep.add(snap) + + within = config.gettimedelta('KEEP', 'within') + within_cutoff = _now() - within + + for base, snap in [(base, snap) for base, snaps in items.items() for snap in snaps]: + if snap['creation'] >= within_cutoff: + keep_because(base, snap['name'], 'within') + + prune_timezone = config.gettimezone('KEEP', 'timezone', fallback=tzlocal) + + PRUNING_PATTERNS = OrderedDict([ + ("secondly", lambda t: t.strftime('%Y-%m-%d %H:%M:%S')), + ("minutely", lambda t: t.strftime('%Y-%m-%d %H:%M')), + ("5m", lambda t: (t.strftime('%Y-%m-%d %H'), floor(t.minute / 5) * 5)), + ("hourly", lambda t: t.strftime('%Y-%m-%d %H')), + ("daily", lambda t: t.strftime('%Y-%m-%d')), + ("weekly", lambda t: t.strftime('%G-%V')), + ("monthly", lambda t: t.strftime('%Y-%m')), + ("yearly", lambda t: t.strftime('%Y')), + ]) + + for rule, pattern in PRUNING_PATTERNS.items(): + desired_count = config.getint('KEEP', rule, fallback=0) + + for base, snaps in items.items(): + last_period = None + to_keep = desired_count + + if to_keep == 0: + continue + + for snap in sorted(snaps, key=lambda snap: snap['creation'], reverse=True): + if to_keep == 0: + break + + period = pattern(snap['creation']) + if period != last_period: + last_period = period + keep_because(base, snap['name'], rule, period=period) + to_keep -= 1 + + if to_keep > 0: + logger.debug(f'Missing {to_keep} to fulfill {rule}={desired_count} for ‘{base}’') + + all_snaps = {snap['name'] for _, snaps in items.items() for snap in snaps} + to_delete = all_snaps - keep + if to_delete: + logger.info(f'Will prune: %s', ', '.join(map(lambda snap: f'‘{snap}’', to_delete))) + else: + logger.info('Nothing to prune') + + for snap in to_delete: + args = ['zfs', 'destroy'] + if dry_run: + args += ['-n'] + args += [snap] + _log_cmd(*args) + subprocess.run(args, check=True) + +def rename(snapshots): + args = ['zfs', 'get', '-H', '-p', '-o', 'name,value', 'creation', *snapshots] + _log_cmd(*args) + with subprocess.Popen(args, stdout=subprocess.PIPE) as proc: + text_stdout = io.TextIOWrapper(proc.stdout) + reader = csv.reader(text_stdout, delimiter='\t', quoting=csv.QUOTE_NONE) + for row in reader: + name = row[0] + timestamp = int(row[1]) + creation = datetime.fromtimestamp(timestamp, timezone.utc) + base_name, _, _ = name.rpartition('@') + new_name = _snap_name(base_name, time=creation) + if new_name == name: + logger.debug(f'Not renaming ‘{name}’ since name is already correct') + continue + logger.info(f'Renaming ‘{name}’ to ‘{new_name}’') + + args = ['zfs', 'rename', name, new_name] + _log_cmd(*args) + subprocess.run(args, check=True) + +def autosnap(): + items = _get_items() + + recursive, single = set(), set() + + for item_name, is_included in items.items(): + if not is_included: + continue + + children = {sub_name for sub_name in items if sub_name.startswith(f'{item_name}/')} + is_recursive = all([items[sub_name] for sub_name in children]) + if is_recursive and children: + recursive.add(item_name) + else: + single.add(item_name) + + for item_name in recursive | single: + is_covered = any([item_name.startswith(f'{super_name}/') for super_name in recursive]) + if is_covered: + try: + recursive.remove(item_name) + except KeyError: + pass + try: + single.remove(item_name) + except KeyError: + pass + + def do_snapshot(*snap_items, recursive=False): + nonlocal items + snap_names = {_snap_name(item) for item in snap_items} + all_snap_names = None + if recursive: + all_snap_names = set() + for snap_item in snap_items: + all_snap_names |= {_snap_name(item) for item in items if item.startswith(snap_item)} + else: + all_snap_names = snap_names + + args = ['zfs', 'snapshot'] + if recursive: + args += ['-r'] + args += snap_names + + _log_cmd(*args) + subprocess.run(args, check=True) + rename(snapshots=all_snap_names) + + do_snapshot(*single) + do_snapshot(*recursive, recursive=True) + +def main(): + global logger + logger = logging.getLogger(__name__) + systemd_handler = journal.JournalHandler() + + # log uncaught exceptions + def log_exceptions(type, value, tb): + global logger + + logger.error(value) + sys.__excepthook__(type, value, tb) # calls default excepthook + + sys.excepthook = log_exceptions + + parser = argparse.ArgumentParser(prog='zfssnap') + parser.add_argument('--verbose', '-v', action='count', default=0) + parser.add_argument('--no-stderr', dest='stderr', action='store_false', default=True) + subparsers = parser.add_subparsers() + parser.set_defaults(cmd=autosnap) + rename_parser = subparsers.add_parser('rename') + rename_parser.add_argument('snapshots', nargs='+') + rename_parser.set_defaults(cmd=rename) + prune_parser = subparsers.add_parser('prune') + prune_parser.add_argument('--config', '-c', dest='config_files', type=argparse.FileType('r'), nargs='*', default=list()) + prune_parser.add_argument('--dry-run', '-n', action='store_true', default=False) + prune_parser.set_defaults(cmd=prune) + args = parser.parse_args() + + if args.stderr: + console_handler = logging.StreamHandler() + console_handler.setFormatter( logging.Formatter('[%(levelname)s](%(name)s): %(message)s') ) + if sys.stderr.isatty(): + console_handler.setFormatter( logging.Formatter('%(asctime)s [%(levelname)s](%(name)s): %(message)s') ) + logger.addHandler(console_handler) + + if args.verbose <= 0: + logger.setLevel(logging.WARNING) + elif args.verbose <= 1: + logger.setLevel(logging.INFO) + else: + logger.setLevel(logging.DEBUG) + + cmdArgs = {} + for copy in {'snapshots', 'dry_run'}: + if copy in vars(args): + cmdArgs[copy] = vars(args)[copy] + if 'config_files' in vars(args): + def convert_timedelta(secs_str): + secs=pytimeparse.parse(secs_str) + if secs is None: + raise ValueError(f'Could not parse timedelta expression ‘{secs_str}’') + return timedelta(seconds=secs) + config = configparser.ConfigParser(converters={ + 'timedelta': convert_timedelta, + 'timezone': gettz + }) + search_files = args.config_files if args.config_files else [*BaseDirectory.load_config_paths('zfssnap.ini')] + read_files = config.read(search_files) + + def format_config_files(files): + if not files: + return 'no files' + return ', '.join(map(lambda file: f'‘{file}’', files)) + + if not read_files: + raise Exception(f'Found no config files. Tried: {format_config_files(search_files)}') + + logger.debug(f'Read following config files: {format_config_files(read_files)}') + + cmdArgs['config'] = config + + args.cmd(**cmdArgs) + +sys.exit(main()) -- cgit v1.2.3