From 6e820741126c9c8b156cf8f9fe285e96cafb3138 Mon Sep 17 00:00:00 2001 From: Gregor Kleen Date: Mon, 13 Mar 2023 15:42:35 +0100 Subject: spin off backup-utils --- modules/zfssnap/default.nix | 113 ------- modules/zfssnap/zfssnap/setup.py | 10 - modules/zfssnap/zfssnap/zfssnap/__main__.py | 438 ---------------------------- 3 files changed, 561 deletions(-) delete mode 100644 modules/zfssnap/default.nix delete mode 100644 modules/zfssnap/zfssnap/setup.py delete mode 100644 modules/zfssnap/zfssnap/zfssnap/__main__.py (limited to 'modules/zfssnap') diff --git a/modules/zfssnap/default.nix b/modules/zfssnap/default.nix deleted file mode 100644 index 23041c36..00000000 --- a/modules/zfssnap/default.nix +++ /dev/null @@ -1,113 +0,0 @@ -{ config, pkgs, lib, flakeInputs, ... }: - -with lib; - -let - zfssnap = flakeInputs.mach-nix.lib.${config.nixpkgs.system}.buildPythonPackage rec { - pname = "zfssnap"; - src = ./zfssnap; - version = "0.0.0"; - ignoreDataOutdated = true; - - requirements = '' - pyxdg - pytimeparse - python-dateutil - ''; - postInstall = '' - wrapProgram $out/bin/zfssnap \ - --prefix PATH : ${makeBinPath [config.boot.zfs.package]} - ''; - }; - - cfg = config.services.zfssnap; -in { - options = { - services.zfssnap = { - enable = mkEnableOption "zfssnap service"; - - config = mkOption { - type = types.submodule { - options = { - keep = mkOption { - type = with types; attrsOf str; - default = { - within = "15m"; - "5m" = "48"; - "15m" = "32"; - hourly = "48"; - "4h" = "24"; - "12h" = "12"; - daily = "62"; - halfweekly = "32"; - weekly = "24"; - monthly = "-1"; - }; - }; - exec = mkOption { - type = with types; attrsOf str; - default = {}; - }; - }; - }; - }; - - snapInterval = mkOption { - type = types.str; - default = "*-*-* *:00/5:00"; - }; - - verbosity = mkOption { - type = types.int; - default = 2; - }; - - extraPruneArgs = mkOption { - type = with types; listOf str; - default = []; - }; - extraAutosnapArgs = mkOption { - type = with types; listOf str; - default = []; - }; - }; - }; - - config = mkIf cfg.enable { - systemd.services."zfssnap" = { - description = "Create automatic ZFS snapshots"; - after = [ "zfs-import.target" ]; - wants = [ "zfssnap-prune.service" ]; - before = [ "zfssnap-prune.service" ]; - serviceConfig = { - Type = "oneshot"; - ExecStart = "${zfssnap}/bin/zfssnap --verbosity=${toString cfg.verbosity} autosnap ${escapeShellArgs cfg.extraAutosnapArgs}"; - - LogRateLimitIntervalSec = 0; - }; - }; - systemd.services."zfssnap-prune" = { - description = "Prune automatic ZFS snapshots"; - after = [ "zfs-import.target" "zfssnap.service" ]; - serviceConfig = { - Type = "oneshot"; - ExecStart = let - mkSectionName = name: strings.escape [ "[" "]" ] (strings.toUpper name); - zfssnapConfig = generators.toINI { inherit mkSectionName; } cfg.config; - in "${zfssnap}/bin/zfssnap --verbosity=${toString cfg.verbosity} prune --config=${pkgs.writeText "zfssnap.ini" zfssnapConfig} ${escapeShellArgs cfg.extraPruneArgs}"; - - LogRateLimitIntervalSec = 0; - }; - }; - - systemd.timers."zfssnap" = { - wantedBy = ["timers.target"]; - timerConfig = { - OnCalendar = cfg.snapInterval; - Persistent = true; - }; - }; - - environment.systemPackages = [zfssnap]; - }; -} diff --git a/modules/zfssnap/zfssnap/setup.py b/modules/zfssnap/zfssnap/setup.py deleted file mode 100644 index 6c58757d..00000000 --- a/modules/zfssnap/zfssnap/setup.py +++ /dev/null @@ -1,10 +0,0 @@ -from setuptools import setup - -setup(name='zfssnap', - packages=['zfssnap'], - entry_points={ - 'console_scripts': [ - 'zfssnap=zfssnap.__main__:main', - ], - } -) diff --git a/modules/zfssnap/zfssnap/zfssnap/__main__.py b/modules/zfssnap/zfssnap/zfssnap/__main__.py deleted file mode 100644 index 2ff8b309..00000000 --- a/modules/zfssnap/zfssnap/zfssnap/__main__.py +++ /dev/null @@ -1,438 +0,0 @@ -import csv -import subprocess -import io -from distutils.util import strtobool -from datetime import datetime, timezone, timedelta -from dateutil.tz import gettz, tzutc -import pytimeparse -import argparse -import re - -import sys - -import logging - -import shlex - -from collections import defaultdict, OrderedDict, deque, namedtuple - -import configparser -from xdg import BaseDirectory - -from functools import cache - -from math import floor - -import asyncio - -from dataclasses import dataclass - - -TIME_PATTERNS = OrderedDict([ - ("secondly", lambda t: t.strftime('%Y-%m-%d %H:%M:%S')), - ("minutely", lambda t: t.strftime('%Y-%m-%d %H:%M')), - ("5m", lambda t: (t.strftime('%Y-%m-%d %H'), floor(t.minute / 5) * 5)), - ("15m", lambda t: (t.strftime('%Y-%m-%d %H'), floor(t.minute / 15) * 15)), - ("hourly", lambda t: t.strftime('%Y-%m-%d %H')), - ("4h", lambda t: (t.strftime('%Y-%m-%d'), floor(t.hour / 4) * 4)), - ("12h", lambda t: (t.strftime('%Y-%m-%d'), floor(t.hour / 12) * 12)), - ("daily", lambda t: t.strftime('%Y-%m-%d')), - ("halfweekly", lambda t: (t.strftime('%G-%V'), floor(int(t.strftime('%u')) / 4) * 4)), - ("weekly", lambda t: t.strftime('%G-%V')), - ("monthly", lambda t: t.strftime('%Y-%m')), - ("yearly", lambda t: t.strftime('%Y')), -]) - -PROP_DO_AUTO_SNAPSHOT = 'li.yggdrasil:auto-snapshot' -PROP_IS_AUTO_SNAPSHOT = 'li.yggdrasil:is-auto-snapshot' - -@dataclass(eq=True, order=True, frozen=True) -class Snap: - name: str - creation: datetime - -@dataclass(eq=True, order=True, frozen=True) -class KeptBecause: - rule: str - ix: int - base: str - period: str - - -@cache -def _now(): - return datetime.now(timezone.utc) - -def _snap_name(item, time=_now()): - suffix = re.sub(r'\+00:00$', r'Z', time.isoformat(timespec='seconds')) - return f'{item}@{suffix}' - -def _log_cmd(*args): - fmt_args = ' '.join(map(shlex.quote, args)) - logger.debug('Running command: %s', fmt_args) - -def _get_items(): - items = {} - - args = ['zfs', 'get', '-H', '-p', '-o', 'name,value', '-t', 'filesystem,volume', PROP_DO_AUTO_SNAPSHOT] - _log_cmd(*args) - with subprocess.Popen(args, stdout=subprocess.PIPE) as proc: - text_stdout = io.TextIOWrapper(proc.stdout) - reader = csv.DictReader(text_stdout, fieldnames=['name', 'value'], delimiter='\t', quoting=csv.QUOTE_NONE) - Row = namedtuple('Row', reader.fieldnames) - for row in [Row(**data) for data in reader]: - if not row.value or row.value == '-': - continue - - items[row.name] = bool(strtobool(row.value)) - - return items - -def _get_snaps(only_auto=True): - snapshots = defaultdict(list) - args = ['zfs', 'list', '-H', '-p', '-t', 'snapshot', '-o', f'name,{PROP_IS_AUTO_SNAPSHOT},creation'] - _log_cmd(*args) - with subprocess.Popen(args, stdout=subprocess.PIPE) as proc: - text_stdout = io.TextIOWrapper(proc.stdout) - reader = csv.DictReader(text_stdout, fieldnames=['name', 'is_auto_snapshot', 'timestamp'], delimiter='\t', quoting=csv.QUOTE_NONE) - Row = namedtuple('Row', reader.fieldnames) - for row in [Row(**data) for data in reader]: - if only_auto and not bool(strtobool(row.is_auto_snapshot)): - continue - - base_name, _, _ = row.name.rpartition('@') - creation = datetime.fromtimestamp(int(row.timestamp), timezone.utc) - snapshots[base_name].append(Snap(name=row.name, creation=creation)) - - return snapshots - -def prune(config, dry_run, keep_newest, do_exec, exec_newest): - do_exec = do_exec and 'EXEC' in config - prune_timezone = config.gettimezone('KEEP', 'timezone', fallback=tzutc()) - logger.debug('prune timezone: %s', prune_timezone) - - items = _get_snaps() - - kept_count = defaultdict(lambda: defaultdict(lambda: 0)) - kept_because = OrderedDict() - def keep_because(base, snap, rule, period=None): - nonlocal kept_count, kept_because - kept_count[rule][base] += 1 - if snap not in kept_because: - kept_because[snap] = deque() - kept_because[snap].append(KeptBecause(rule=rule, ix=kept_count[rule][base], base=base, period=period)) - - exec_candidates = set() - if do_exec: - exec_timezone = config.gettimezone('EXEC', 'timezone', fallback=prune_timezone) - logger.debug('exec timezone: %s', exec_timezone) - - for rule, pattern in TIME_PATTERNS.items(): - desired_count = config.getint('EXEC', rule, fallback=0) - - for base, snaps in items.items(): - periods = OrderedDict() - - for snap in sorted(snaps, key=lambda snap: snap.creation, reverse=exec_newest): - period = pattern(snap.creation.astimezone(exec_timezone)) - if period not in periods: - periods[period] = deque() - periods[period].append(snap) - - to_exec = desired_count - ordered_periods = periods.items() if exec_newest else reversed(periods.items()) - for period, period_snaps in ordered_periods: - if to_exec == 0: - break - - for snap in period_snaps: - exec_candidates.add(snap) - logger.debug('‘%s’ is exec candidate', snap.name) - to_exec -= 1 - break - - if to_exec > 0: - logger.debug('Missing %d to fulfill exec %s=%d for ‘%s’', to_exec, rule, desired_count, base) - - check_cmd = config.get('EXEC', 'check', fallback=None) - if check_cmd: - logger.debug('exec_candidates=%s', exec_candidates) - already_execed = set() - for snap in exec_candidates: - logger.debug('checking for ‘%s’...', snap.name) - args = [] - args += shlex.split(check_cmd) - args += [snap.name] - _log_cmd(*args) - check_res = subprocess.run(args) - if check_res.returncode == 0: - already_execed.add(snap) - logger.debug('‘%s’ already execed', snap.name) - elif check_res.returncode == 124: - already_execed.add(snap) - logger.warn('‘%s’ ignored', snap.name) - pass - elif check_res.returncode == 125: - already_execed.add(snap) - logger.info('‘%s’ ignored but specified for keeping, doing so...', snap.name) - base_name, _, _ = snap.name.rpartition('@') - keep_because(base_name, snap.name, 'exec-ignored') - elif check_res.returncode == 126: - logger.debug('‘%s’ to exec', snap.name) - else: - check_res.check_returncode() - exec_candidates -= already_execed - - exec_cmd = config.get('EXEC', 'cmd', fallback=None) - exec_count = config.getint('EXEC', 'count', fallback=1) - if exec_cmd: - execed = set() - for snap in sorted(exec_candidates, key=lambda snap: snap.creation): - if exec_count > 0 and len(execed) >= exec_count: - logger.debug('exec_count of %d reached', exec_count) - break - - logger.info('execing for ‘%s’...', snap.name) - args = [] - args += shlex.split(exec_cmd) - args += [snap.name] - _log_cmd(*args) - p = subprocess.run(args) - if p.returncode == 125: - logger.warn('got dry-run returncode for ‘%s’, keeping...', snap.name) - base_name, _, _ = snap.name.rpartition('@') - keep_because(base_name, snap.name, 'exec-dryrun') - pass - else: - p.check_returncode() - execed.add(snap) - - exec_candidates -= execed - - for candidate in exec_candidates: - base_name, _, _ = candidate.name.rpartition('@') - keep_because(base_name, candidate.name, 'exec-candidate') - - within = config.gettimedelta('KEEP', 'within') - if within > timedelta(seconds=0): - for base, snaps in items.items(): - time_ref = max(snaps, key=lambda snap: snap.creation, default=None) - if not time_ref: - logger.warn('Nothing to keep for ‘%s’', base) - continue - - logger.info('Using ‘%s’ as time reference for ‘%s’', time_ref.name, base) - within_cutoff = time_ref.creation - within - - for snap in snaps: - if snap.creation >= within_cutoff: - keep_because(base, snap.name, 'within') - else: - logger.warn('Skipping rule ‘within’ since retention period is zero') - - for rule, pattern in TIME_PATTERNS.items(): - desired_count = config.getint('KEEP', rule, fallback=0) - - for base, snaps in items.items(): - periods = OrderedDict() - - for snap in sorted(snaps, key=lambda snap: snap.creation, reverse=keep_newest): - period = pattern(snap.creation.astimezone(prune_timezone)) - if period not in periods: - periods[period] = deque() - periods[period].append(snap) - - to_keep = desired_count - ordered_periods = periods.items() if keep_newest else reversed(periods.items()) - for period, period_snaps in ordered_periods: - if to_keep == 0: - break - - for snap in period_snaps: - keep_because(base, snap.name, rule, period=period) - to_keep -= 1 - break - - if to_keep > 0: - logger.debug('Missing %d to fulfill prune %s=%d for ‘%s’', to_keep, rule, desired_count, base) - - for snap, reasons in kept_because.items(): - logger.info('Keeping ‘%s’ because: %s', snap, ', '.join(map(str, reasons))) - all_snaps = {snap.name for _, snaps in items.items() for snap in snaps} - to_destroy = all_snaps - {*kept_because} - if not to_destroy: - logger.info('Nothing to prune') - - for snap in sorted(to_destroy): - args = ['zfs', 'destroy'] - if dry_run: - args += ['-n'] - args += [snap] - _log_cmd(*args) - subprocess.run(args, check=True) - if dry_run: - logger.info('Would have pruned ‘%s’', snap) - else: - logger.info('Pruned ‘%s’', snap) - -def rename(snapshots, destroy=False, set_is_auto=False): - args = ['zfs', 'get', '-H', '-p', '-o', 'name,value', 'creation', *snapshots] - _log_cmd(*args) - renamed_to = set() - with subprocess.Popen(args, stdout=subprocess.PIPE) as proc: - text_stdout = io.TextIOWrapper(proc.stdout) - reader = csv.DictReader(text_stdout, fieldnames=['name', 'timestamp'], delimiter='\t', quoting=csv.QUOTE_NONE) - Row = namedtuple('Row', reader.fieldnames) - for row in [Row(**data) for data in reader]: - creation = datetime.fromtimestamp(int(row.timestamp), timezone.utc) - base_name, _, _ = row.name.rpartition('@') - new_name = _snap_name(base_name, time=creation) - if new_name == row.name: - logger.debug('Not renaming ‘%s’ since name is already correct', row.name) - continue - - if new_name in renamed_to: - if destroy: - logger.warning('Destroying ‘%s’ since ‘%s’ was already renamed to', row.name, new_name) - args = ['zfs', 'destroy', row.name] - _log_cmd(*args) - subprocess.run(args, check=True) - else: - logger.info('Skipping ‘%s’ since ‘%s’ was already renamed to', row.name, new_name) - - continue - - logger.info('Renaming ‘%s’ to ‘%s’', row.name, new_name) - args = ['zfs', 'rename', row.name, new_name] - _log_cmd(*args) - subprocess.run(args, check=True) - renamed_to.add(new_name) - - if set_is_auto: - logger.info('Setting is-auto-snapshot on ‘%s’', new_name) - args = ['zfs', 'set', f'{PROP_IS_AUTO_SNAPSHOT}=true', new_name] - _log_cmd(*args) - subprocess.run(args, check=True) - -def autosnap(): - items = _get_items() - - all_snap_names = set() - async def do_snapshot(*snap_items, recursive=False): - nonlocal items, all_snap_names - snap_names = {_snap_name(item) for item in snap_items if items[item]} - if recursive: - for snap_item in snap_items: - all_snap_names |= {_snap_name(item) for item in items if item.startswith(snap_item)} - else: - all_snap_names |= snap_names - - args = ['zfs', 'snapshot', '-o', f'{PROP_IS_AUTO_SNAPSHOT}=true'] - if recursive: - args += ['-r'] - args += snap_names - - _log_cmd(*args) - subprocess.run(args, check=True) - - pool_items = defaultdict(set) - for item in items: - pool, _, _ = item.partition('/') - pool_items[pool].add(item) - - tasks = [] - for snap_items in pool_items.values(): - tasks.append(do_snapshot(*snap_items)) - if not tasks: - logger.warning('No snapshots to create') - else: - async def run_tasks(): - await asyncio.gather(*tasks) - asyncio.run(run_tasks()) - for snap in all_snap_names: - logger.info('Created ‘%s’', snap) - if all_snap_names: - rename(snapshots=all_snap_names) - -def main(): - global logger - logger = logging.getLogger(__name__) - console_handler = logging.StreamHandler() - console_handler.setFormatter( logging.Formatter('[%(levelname)s](%(name)s): %(message)s') ) - if sys.stderr.isatty(): - console_handler.setFormatter( logging.Formatter('%(asctime)s [%(levelname)s](%(name)s): %(message)s') ) - logger.addHandler(console_handler) - - # log uncaught exceptions - def log_exceptions(type, value, tb): - global logger - - logger.error(value) - sys.__excepthook__(type, value, tb) # calls default excepthook - - sys.excepthook = log_exceptions - - parser = argparse.ArgumentParser(prog='zfssnap') - parser.add_argument('--verbosity', dest='log_level', action='append', type=int) - parser.add_argument('--verbose', '-v', dest='log_level', action='append_const', const=1) - parser.add_argument('--quiet', '-q', dest='log_level', action='append_const', const=-1) - subparsers = parser.add_subparsers() - parser.set_defaults(cmd=autosnap) - autosnap_parser = subparsers.add_parser('autosnap') - autosnap_parser.set_defaults(cmd=autosnap) - rename_parser = subparsers.add_parser('rename') - rename_parser.add_argument('snapshots', nargs='+') - rename_parser.add_argument('--destroy', action='store_true', default=False) - rename_parser.add_argument('--set-is-auto', action='store_true', default=False) - rename_parser.set_defaults(cmd=rename) - prune_parser = subparsers.add_parser('prune') - prune_parser.add_argument('--config', '-c', dest='config_files', nargs='*', default=list()) - prune_parser.add_argument('--dry-run', '-n', action='store_true', default=False) - prune_parser.add_argument('--keep-newest', action='store_true', default=False) - prune_parser.add_argument('--exec-newest', action='store_true', default=False) - prune_parser.add_argument('--no-exec', dest='do_exec', action='store_false', default=True) - prune_parser.set_defaults(cmd=prune) - args = parser.parse_args() - - - LOG_LEVELS = [logging.DEBUG, logging.INFO, logging.WARNING, logging.ERROR, logging.CRITICAL] - DEFAULT_LOG_LEVEL = logging.ERROR - log_level = LOG_LEVELS.index(DEFAULT_LOG_LEVEL) - - for adjustment in args.log_level or (): - log_level = min(len(LOG_LEVELS) - 1, max(log_level - adjustment, 0)) - logger.setLevel(LOG_LEVELS[log_level]) - - cmdArgs = {} - for copy in {'snapshots', 'dry_run', 'destroy', 'keep_newest', 'exec_newest', 'set_is_auto', 'do_exec'}: - if copy in vars(args): - cmdArgs[copy] = vars(args)[copy] - if 'config_files' in vars(args): - def convert_timedelta(secs_str): - secs=pytimeparse.parse(secs_str) - if secs is None: - raise ValueError('Could not parse timedelta expression ‘%s’', secs_str) - return timedelta(seconds=secs) - config = configparser.ConfigParser(converters={ - 'timedelta': convert_timedelta, - 'timezone': gettz - }) - search_files = args.config_files if args.config_files else [*BaseDirectory.load_config_paths('zfssnap.ini')] - read_files = config.read(search_files) - - def format_config_files(files): - if not files: - return 'no files' - return ', '.join(map(lambda file: f'‘{file}’', files)) - - if not read_files: - raise Exception('Found no config files. Tried: %s', format_config_files(search_files)) - - logger.debug('Read following config files: %s', format_config_files(read_files)) - - cmdArgs['config'] = config - - args.cmd(**cmdArgs) - -if __name__ == '__main__': - sys.exit(main()) -- cgit v1.2.3