diff options
author | Gregor Kleen <gkleen@yggdrasil.li> | 2023-03-13 15:42:35 +0100 |
---|---|---|
committer | Gregor Kleen <gkleen@yggdrasil.li> | 2023-03-13 15:42:35 +0100 |
commit | 6e820741126c9c8b156cf8f9fe285e96cafb3138 (patch) | |
tree | de2b4aeaff584420f2410ce47e0f906f68e79387 /modules/zfssnap/zfssnap | |
parent | b9e6f77db0871da3c72928619395590b28ea0181 (diff) | |
download | nixos-6e820741126c9c8b156cf8f9fe285e96cafb3138.tar nixos-6e820741126c9c8b156cf8f9fe285e96cafb3138.tar.gz nixos-6e820741126c9c8b156cf8f9fe285e96cafb3138.tar.bz2 nixos-6e820741126c9c8b156cf8f9fe285e96cafb3138.tar.xz nixos-6e820741126c9c8b156cf8f9fe285e96cafb3138.zip |
spin off backup-utils
Diffstat (limited to 'modules/zfssnap/zfssnap')
-rw-r--r-- | modules/zfssnap/zfssnap/setup.py | 10 | ||||
-rw-r--r-- | modules/zfssnap/zfssnap/zfssnap/__main__.py | 438 |
2 files changed, 0 insertions, 448 deletions
diff --git a/modules/zfssnap/zfssnap/setup.py b/modules/zfssnap/zfssnap/setup.py deleted file mode 100644 index 6c58757d..00000000 --- a/modules/zfssnap/zfssnap/setup.py +++ /dev/null | |||
@@ -1,10 +0,0 @@ | |||
1 | from setuptools import setup | ||
2 | |||
3 | setup(name='zfssnap', | ||
4 | packages=['zfssnap'], | ||
5 | entry_points={ | ||
6 | 'console_scripts': [ | ||
7 | 'zfssnap=zfssnap.__main__:main', | ||
8 | ], | ||
9 | } | ||
10 | ) | ||
diff --git a/modules/zfssnap/zfssnap/zfssnap/__main__.py b/modules/zfssnap/zfssnap/zfssnap/__main__.py deleted file mode 100644 index 2ff8b309..00000000 --- a/modules/zfssnap/zfssnap/zfssnap/__main__.py +++ /dev/null | |||
@@ -1,438 +0,0 @@ | |||
1 | import csv | ||
2 | import subprocess | ||
3 | import io | ||
4 | from distutils.util import strtobool | ||
5 | from datetime import datetime, timezone, timedelta | ||
6 | from dateutil.tz import gettz, tzutc | ||
7 | import pytimeparse | ||
8 | import argparse | ||
9 | import re | ||
10 | |||
11 | import sys | ||
12 | |||
13 | import logging | ||
14 | |||
15 | import shlex | ||
16 | |||
17 | from collections import defaultdict, OrderedDict, deque, namedtuple | ||
18 | |||
19 | import configparser | ||
20 | from xdg import BaseDirectory | ||
21 | |||
22 | from functools import cache | ||
23 | |||
24 | from math import floor | ||
25 | |||
26 | import asyncio | ||
27 | |||
28 | from dataclasses import dataclass | ||
29 | |||
30 | |||
31 | TIME_PATTERNS = OrderedDict([ | ||
32 | ("secondly", lambda t: t.strftime('%Y-%m-%d %H:%M:%S')), | ||
33 | ("minutely", lambda t: t.strftime('%Y-%m-%d %H:%M')), | ||
34 | ("5m", lambda t: (t.strftime('%Y-%m-%d %H'), floor(t.minute / 5) * 5)), | ||
35 | ("15m", lambda t: (t.strftime('%Y-%m-%d %H'), floor(t.minute / 15) * 15)), | ||
36 | ("hourly", lambda t: t.strftime('%Y-%m-%d %H')), | ||
37 | ("4h", lambda t: (t.strftime('%Y-%m-%d'), floor(t.hour / 4) * 4)), | ||
38 | ("12h", lambda t: (t.strftime('%Y-%m-%d'), floor(t.hour / 12) * 12)), | ||
39 | ("daily", lambda t: t.strftime('%Y-%m-%d')), | ||
40 | ("halfweekly", lambda t: (t.strftime('%G-%V'), floor(int(t.strftime('%u')) / 4) * 4)), | ||
41 | ("weekly", lambda t: t.strftime('%G-%V')), | ||
42 | ("monthly", lambda t: t.strftime('%Y-%m')), | ||
43 | ("yearly", lambda t: t.strftime('%Y')), | ||
44 | ]) | ||
45 | |||
46 | PROP_DO_AUTO_SNAPSHOT = 'li.yggdrasil:auto-snapshot' | ||
47 | PROP_IS_AUTO_SNAPSHOT = 'li.yggdrasil:is-auto-snapshot' | ||
48 | |||
49 | @dataclass(eq=True, order=True, frozen=True) | ||
50 | class Snap: | ||
51 | name: str | ||
52 | creation: datetime | ||
53 | |||
54 | @dataclass(eq=True, order=True, frozen=True) | ||
55 | class KeptBecause: | ||
56 | rule: str | ||
57 | ix: int | ||
58 | base: str | ||
59 | period: str | ||
60 | |||
61 | |||
62 | @cache | ||
63 | def _now(): | ||
64 | return datetime.now(timezone.utc) | ||
65 | |||
66 | def _snap_name(item, time=_now()): | ||
67 | suffix = re.sub(r'\+00:00$', r'Z', time.isoformat(timespec='seconds')) | ||
68 | return f'{item}@{suffix}' | ||
69 | |||
70 | def _log_cmd(*args): | ||
71 | fmt_args = ' '.join(map(shlex.quote, args)) | ||
72 | logger.debug('Running command: %s', fmt_args) | ||
73 | |||
74 | def _get_items(): | ||
75 | items = {} | ||
76 | |||
77 | args = ['zfs', 'get', '-H', '-p', '-o', 'name,value', '-t', 'filesystem,volume', PROP_DO_AUTO_SNAPSHOT] | ||
78 | _log_cmd(*args) | ||
79 | with subprocess.Popen(args, stdout=subprocess.PIPE) as proc: | ||
80 | text_stdout = io.TextIOWrapper(proc.stdout) | ||
81 | reader = csv.DictReader(text_stdout, fieldnames=['name', 'value'], delimiter='\t', quoting=csv.QUOTE_NONE) | ||
82 | Row = namedtuple('Row', reader.fieldnames) | ||
83 | for row in [Row(**data) for data in reader]: | ||
84 | if not row.value or row.value == '-': | ||
85 | continue | ||
86 | |||
87 | items[row.name] = bool(strtobool(row.value)) | ||
88 | |||
89 | return items | ||
90 | |||
91 | def _get_snaps(only_auto=True): | ||
92 | snapshots = defaultdict(list) | ||
93 | args = ['zfs', 'list', '-H', '-p', '-t', 'snapshot', '-o', f'name,{PROP_IS_AUTO_SNAPSHOT},creation'] | ||
94 | _log_cmd(*args) | ||
95 | with subprocess.Popen(args, stdout=subprocess.PIPE) as proc: | ||
96 | text_stdout = io.TextIOWrapper(proc.stdout) | ||
97 | reader = csv.DictReader(text_stdout, fieldnames=['name', 'is_auto_snapshot', 'timestamp'], delimiter='\t', quoting=csv.QUOTE_NONE) | ||
98 | Row = namedtuple('Row', reader.fieldnames) | ||
99 | for row in [Row(**data) for data in reader]: | ||
100 | if only_auto and not bool(strtobool(row.is_auto_snapshot)): | ||
101 | continue | ||
102 | |||
103 | base_name, _, _ = row.name.rpartition('@') | ||
104 | creation = datetime.fromtimestamp(int(row.timestamp), timezone.utc) | ||
105 | snapshots[base_name].append(Snap(name=row.name, creation=creation)) | ||
106 | |||
107 | return snapshots | ||
108 | |||
109 | def prune(config, dry_run, keep_newest, do_exec, exec_newest): | ||
110 | do_exec = do_exec and 'EXEC' in config | ||
111 | prune_timezone = config.gettimezone('KEEP', 'timezone', fallback=tzutc()) | ||
112 | logger.debug('prune timezone: %s', prune_timezone) | ||
113 | |||
114 | items = _get_snaps() | ||
115 | |||
116 | kept_count = defaultdict(lambda: defaultdict(lambda: 0)) | ||
117 | kept_because = OrderedDict() | ||
118 | def keep_because(base, snap, rule, period=None): | ||
119 | nonlocal kept_count, kept_because | ||
120 | kept_count[rule][base] += 1 | ||
121 | if snap not in kept_because: | ||
122 | kept_because[snap] = deque() | ||
123 | kept_because[snap].append(KeptBecause(rule=rule, ix=kept_count[rule][base], base=base, period=period)) | ||
124 | |||
125 | exec_candidates = set() | ||
126 | if do_exec: | ||
127 | exec_timezone = config.gettimezone('EXEC', 'timezone', fallback=prune_timezone) | ||
128 | logger.debug('exec timezone: %s', exec_timezone) | ||
129 | |||
130 | for rule, pattern in TIME_PATTERNS.items(): | ||
131 | desired_count = config.getint('EXEC', rule, fallback=0) | ||
132 | |||
133 | for base, snaps in items.items(): | ||
134 | periods = OrderedDict() | ||
135 | |||
136 | for snap in sorted(snaps, key=lambda snap: snap.creation, reverse=exec_newest): | ||
137 | period = pattern(snap.creation.astimezone(exec_timezone)) | ||
138 | if period not in periods: | ||
139 | periods[period] = deque() | ||
140 | periods[period].append(snap) | ||
141 | |||
142 | to_exec = desired_count | ||
143 | ordered_periods = periods.items() if exec_newest else reversed(periods.items()) | ||
144 | for period, period_snaps in ordered_periods: | ||
145 | if to_exec == 0: | ||
146 | break | ||
147 | |||
148 | for snap in period_snaps: | ||
149 | exec_candidates.add(snap) | ||
150 | logger.debug('‘%s’ is exec candidate', snap.name) | ||
151 | to_exec -= 1 | ||
152 | break | ||
153 | |||
154 | if to_exec > 0: | ||
155 | logger.debug('Missing %d to fulfill exec %s=%d for ‘%s’', to_exec, rule, desired_count, base) | ||
156 | |||
157 | check_cmd = config.get('EXEC', 'check', fallback=None) | ||
158 | if check_cmd: | ||
159 | logger.debug('exec_candidates=%s', exec_candidates) | ||
160 | already_execed = set() | ||
161 | for snap in exec_candidates: | ||
162 | logger.debug('checking for ‘%s’...', snap.name) | ||
163 | args = [] | ||
164 | args += shlex.split(check_cmd) | ||
165 | args += [snap.name] | ||
166 | _log_cmd(*args) | ||
167 | check_res = subprocess.run(args) | ||
168 | if check_res.returncode == 0: | ||
169 | already_execed.add(snap) | ||
170 | logger.debug('‘%s’ already execed', snap.name) | ||
171 | elif check_res.returncode == 124: | ||
172 | already_execed.add(snap) | ||
173 | logger.warn('‘%s’ ignored', snap.name) | ||
174 | pass | ||
175 | elif check_res.returncode == 125: | ||
176 | already_execed.add(snap) | ||
177 | logger.info('‘%s’ ignored but specified for keeping, doing so...', snap.name) | ||
178 | base_name, _, _ = snap.name.rpartition('@') | ||
179 | keep_because(base_name, snap.name, 'exec-ignored') | ||
180 | elif check_res.returncode == 126: | ||
181 | logger.debug('‘%s’ to exec', snap.name) | ||
182 | else: | ||
183 | check_res.check_returncode() | ||
184 | exec_candidates -= already_execed | ||
185 | |||
186 | exec_cmd = config.get('EXEC', 'cmd', fallback=None) | ||
187 | exec_count = config.getint('EXEC', 'count', fallback=1) | ||
188 | if exec_cmd: | ||
189 | execed = set() | ||
190 | for snap in sorted(exec_candidates, key=lambda snap: snap.creation): | ||
191 | if exec_count > 0 and len(execed) >= exec_count: | ||
192 | logger.debug('exec_count of %d reached', exec_count) | ||
193 | break | ||
194 | |||
195 | logger.info('execing for ‘%s’...', snap.name) | ||
196 | args = [] | ||
197 | args += shlex.split(exec_cmd) | ||
198 | args += [snap.name] | ||
199 | _log_cmd(*args) | ||
200 | p = subprocess.run(args) | ||
201 | if p.returncode == 125: | ||
202 | logger.warn('got dry-run returncode for ‘%s’, keeping...', snap.name) | ||
203 | base_name, _, _ = snap.name.rpartition('@') | ||
204 | keep_because(base_name, snap.name, 'exec-dryrun') | ||
205 | pass | ||
206 | else: | ||
207 | p.check_returncode() | ||
208 | execed.add(snap) | ||
209 | |||
210 | exec_candidates -= execed | ||
211 | |||
212 | for candidate in exec_candidates: | ||
213 | base_name, _, _ = candidate.name.rpartition('@') | ||
214 | keep_because(base_name, candidate.name, 'exec-candidate') | ||
215 | |||
216 | within = config.gettimedelta('KEEP', 'within') | ||
217 | if within > timedelta(seconds=0): | ||
218 | for base, snaps in items.items(): | ||
219 | time_ref = max(snaps, key=lambda snap: snap.creation, default=None) | ||
220 | if not time_ref: | ||
221 | logger.warn('Nothing to keep for ‘%s’', base) | ||
222 | continue | ||
223 | |||
224 | logger.info('Using ‘%s’ as time reference for ‘%s’', time_ref.name, base) | ||
225 | within_cutoff = time_ref.creation - within | ||
226 | |||
227 | for snap in snaps: | ||
228 | if snap.creation >= within_cutoff: | ||
229 | keep_because(base, snap.name, 'within') | ||
230 | else: | ||
231 | logger.warn('Skipping rule ‘within’ since retention period is zero') | ||
232 | |||
233 | for rule, pattern in TIME_PATTERNS.items(): | ||
234 | desired_count = config.getint('KEEP', rule, fallback=0) | ||
235 | |||
236 | for base, snaps in items.items(): | ||
237 | periods = OrderedDict() | ||
238 | |||
239 | for snap in sorted(snaps, key=lambda snap: snap.creation, reverse=keep_newest): | ||
240 | period = pattern(snap.creation.astimezone(prune_timezone)) | ||
241 | if period not in periods: | ||
242 | periods[period] = deque() | ||
243 | periods[period].append(snap) | ||
244 | |||
245 | to_keep = desired_count | ||
246 | ordered_periods = periods.items() if keep_newest else reversed(periods.items()) | ||
247 | for period, period_snaps in ordered_periods: | ||
248 | if to_keep == 0: | ||
249 | break | ||
250 | |||
251 | for snap in period_snaps: | ||
252 | keep_because(base, snap.name, rule, period=period) | ||
253 | to_keep -= 1 | ||
254 | break | ||
255 | |||
256 | if to_keep > 0: | ||
257 | logger.debug('Missing %d to fulfill prune %s=%d for ‘%s’', to_keep, rule, desired_count, base) | ||
258 | |||
259 | for snap, reasons in kept_because.items(): | ||
260 | logger.info('Keeping ‘%s’ because: %s', snap, ', '.join(map(str, reasons))) | ||
261 | all_snaps = {snap.name for _, snaps in items.items() for snap in snaps} | ||
262 | to_destroy = all_snaps - {*kept_because} | ||
263 | if not to_destroy: | ||
264 | logger.info('Nothing to prune') | ||
265 | |||
266 | for snap in sorted(to_destroy): | ||
267 | args = ['zfs', 'destroy'] | ||
268 | if dry_run: | ||
269 | args += ['-n'] | ||
270 | args += [snap] | ||
271 | _log_cmd(*args) | ||
272 | subprocess.run(args, check=True) | ||
273 | if dry_run: | ||
274 | logger.info('Would have pruned ‘%s’', snap) | ||
275 | else: | ||
276 | logger.info('Pruned ‘%s’', snap) | ||
277 | |||
278 | def rename(snapshots, destroy=False, set_is_auto=False): | ||
279 | args = ['zfs', 'get', '-H', '-p', '-o', 'name,value', 'creation', *snapshots] | ||
280 | _log_cmd(*args) | ||
281 | renamed_to = set() | ||
282 | with subprocess.Popen(args, stdout=subprocess.PIPE) as proc: | ||
283 | text_stdout = io.TextIOWrapper(proc.stdout) | ||
284 | reader = csv.DictReader(text_stdout, fieldnames=['name', 'timestamp'], delimiter='\t', quoting=csv.QUOTE_NONE) | ||
285 | Row = namedtuple('Row', reader.fieldnames) | ||
286 | for row in [Row(**data) for data in reader]: | ||
287 | creation = datetime.fromtimestamp(int(row.timestamp), timezone.utc) | ||
288 | base_name, _, _ = row.name.rpartition('@') | ||
289 | new_name = _snap_name(base_name, time=creation) | ||
290 | if new_name == row.name: | ||
291 | logger.debug('Not renaming ‘%s’ since name is already correct', row.name) | ||
292 | continue | ||
293 | |||
294 | if new_name in renamed_to: | ||
295 | if destroy: | ||
296 | logger.warning('Destroying ‘%s’ since ‘%s’ was already renamed to', row.name, new_name) | ||
297 | args = ['zfs', 'destroy', row.name] | ||
298 | _log_cmd(*args) | ||
299 | subprocess.run(args, check=True) | ||
300 | else: | ||
301 | logger.info('Skipping ‘%s’ since ‘%s’ was already renamed to', row.name, new_name) | ||
302 | |||
303 | continue | ||
304 | |||
305 | logger.info('Renaming ‘%s’ to ‘%s’', row.name, new_name) | ||
306 | args = ['zfs', 'rename', row.name, new_name] | ||
307 | _log_cmd(*args) | ||
308 | subprocess.run(args, check=True) | ||
309 | renamed_to.add(new_name) | ||
310 | |||
311 | if set_is_auto: | ||
312 | logger.info('Setting is-auto-snapshot on ‘%s’', new_name) | ||
313 | args = ['zfs', 'set', f'{PROP_IS_AUTO_SNAPSHOT}=true', new_name] | ||
314 | _log_cmd(*args) | ||
315 | subprocess.run(args, check=True) | ||
316 | |||
317 | def autosnap(): | ||
318 | items = _get_items() | ||
319 | |||
320 | all_snap_names = set() | ||
321 | async def do_snapshot(*snap_items, recursive=False): | ||
322 | nonlocal items, all_snap_names | ||
323 | snap_names = {_snap_name(item) for item in snap_items if items[item]} | ||
324 | if recursive: | ||
325 | for snap_item in snap_items: | ||
326 | all_snap_names |= {_snap_name(item) for item in items if item.startswith(snap_item)} | ||
327 | else: | ||
328 | all_snap_names |= snap_names | ||
329 | |||
330 | args = ['zfs', 'snapshot', '-o', f'{PROP_IS_AUTO_SNAPSHOT}=true'] | ||
331 | if recursive: | ||
332 | args += ['-r'] | ||
333 | args += snap_names | ||
334 | |||
335 | _log_cmd(*args) | ||
336 | subprocess.run(args, check=True) | ||
337 | |||
338 | pool_items = defaultdict(set) | ||
339 | for item in items: | ||
340 | pool, _, _ = item.partition('/') | ||
341 | pool_items[pool].add(item) | ||
342 | |||
343 | tasks = [] | ||
344 | for snap_items in pool_items.values(): | ||
345 | tasks.append(do_snapshot(*snap_items)) | ||
346 | if not tasks: | ||
347 | logger.warning('No snapshots to create') | ||
348 | else: | ||
349 | async def run_tasks(): | ||
350 | await asyncio.gather(*tasks) | ||
351 | asyncio.run(run_tasks()) | ||
352 | for snap in all_snap_names: | ||
353 | logger.info('Created ‘%s’', snap) | ||
354 | if all_snap_names: | ||
355 | rename(snapshots=all_snap_names) | ||
356 | |||
357 | def main(): | ||
358 | global logger | ||
359 | logger = logging.getLogger(__name__) | ||
360 | console_handler = logging.StreamHandler() | ||
361 | console_handler.setFormatter( logging.Formatter('[%(levelname)s](%(name)s): %(message)s') ) | ||
362 | if sys.stderr.isatty(): | ||
363 | console_handler.setFormatter( logging.Formatter('%(asctime)s [%(levelname)s](%(name)s): %(message)s') ) | ||
364 | logger.addHandler(console_handler) | ||
365 | |||
366 | # log uncaught exceptions | ||
367 | def log_exceptions(type, value, tb): | ||
368 | global logger | ||
369 | |||
370 | logger.error(value) | ||
371 | sys.__excepthook__(type, value, tb) # calls default excepthook | ||
372 | |||
373 | sys.excepthook = log_exceptions | ||
374 | |||
375 | parser = argparse.ArgumentParser(prog='zfssnap') | ||
376 | parser.add_argument('--verbosity', dest='log_level', action='append', type=int) | ||
377 | parser.add_argument('--verbose', '-v', dest='log_level', action='append_const', const=1) | ||
378 | parser.add_argument('--quiet', '-q', dest='log_level', action='append_const', const=-1) | ||
379 | subparsers = parser.add_subparsers() | ||
380 | parser.set_defaults(cmd=autosnap) | ||
381 | autosnap_parser = subparsers.add_parser('autosnap') | ||
382 | autosnap_parser.set_defaults(cmd=autosnap) | ||
383 | rename_parser = subparsers.add_parser('rename') | ||
384 | rename_parser.add_argument('snapshots', nargs='+') | ||
385 | rename_parser.add_argument('--destroy', action='store_true', default=False) | ||
386 | rename_parser.add_argument('--set-is-auto', action='store_true', default=False) | ||
387 | rename_parser.set_defaults(cmd=rename) | ||
388 | prune_parser = subparsers.add_parser('prune') | ||
389 | prune_parser.add_argument('--config', '-c', dest='config_files', nargs='*', default=list()) | ||
390 | prune_parser.add_argument('--dry-run', '-n', action='store_true', default=False) | ||
391 | prune_parser.add_argument('--keep-newest', action='store_true', default=False) | ||
392 | prune_parser.add_argument('--exec-newest', action='store_true', default=False) | ||
393 | prune_parser.add_argument('--no-exec', dest='do_exec', action='store_false', default=True) | ||
394 | prune_parser.set_defaults(cmd=prune) | ||
395 | args = parser.parse_args() | ||
396 | |||
397 | |||
398 | LOG_LEVELS = [logging.DEBUG, logging.INFO, logging.WARNING, logging.ERROR, logging.CRITICAL] | ||
399 | DEFAULT_LOG_LEVEL = logging.ERROR | ||
400 | log_level = LOG_LEVELS.index(DEFAULT_LOG_LEVEL) | ||
401 | |||
402 | for adjustment in args.log_level or (): | ||
403 | log_level = min(len(LOG_LEVELS) - 1, max(log_level - adjustment, 0)) | ||
404 | logger.setLevel(LOG_LEVELS[log_level]) | ||
405 | |||
406 | cmdArgs = {} | ||
407 | for copy in {'snapshots', 'dry_run', 'destroy', 'keep_newest', 'exec_newest', 'set_is_auto', 'do_exec'}: | ||
408 | if copy in vars(args): | ||
409 | cmdArgs[copy] = vars(args)[copy] | ||
410 | if 'config_files' in vars(args): | ||
411 | def convert_timedelta(secs_str): | ||
412 | secs=pytimeparse.parse(secs_str) | ||
413 | if secs is None: | ||
414 | raise ValueError('Could not parse timedelta expression ‘%s’', secs_str) | ||
415 | return timedelta(seconds=secs) | ||
416 | config = configparser.ConfigParser(converters={ | ||
417 | 'timedelta': convert_timedelta, | ||
418 | 'timezone': gettz | ||
419 | }) | ||
420 | search_files = args.config_files if args.config_files else [*BaseDirectory.load_config_paths('zfssnap.ini')] | ||
421 | read_files = config.read(search_files) | ||
422 | |||
423 | def format_config_files(files): | ||
424 | if not files: | ||
425 | return 'no files' | ||
426 | return ', '.join(map(lambda file: f'‘{file}’', files)) | ||
427 | |||
428 | if not read_files: | ||
429 | raise Exception('Found no config files. Tried: %s', format_config_files(search_files)) | ||
430 | |||
431 | logger.debug('Read following config files: %s', format_config_files(read_files)) | ||
432 | |||
433 | cmdArgs['config'] = config | ||
434 | |||
435 | args.cmd(**cmdArgs) | ||
436 | |||
437 | if __name__ == '__main__': | ||
438 | sys.exit(main()) | ||