summaryrefslogtreecommitdiff
path: root/modules/zfssnap/zfssnap.py
diff options
context:
space:
mode:
Diffstat (limited to 'modules/zfssnap/zfssnap.py')
-rw-r--r--modules/zfssnap/zfssnap.py197
1 files changed, 138 insertions, 59 deletions
diff --git a/modules/zfssnap/zfssnap.py b/modules/zfssnap/zfssnap.py
index 21ed1d5b..a8dae75f 100644
--- a/modules/zfssnap/zfssnap.py
+++ b/modules/zfssnap/zfssnap.py
@@ -3,9 +3,9 @@
3import csv 3import csv
4import subprocess 4import subprocess
5import io 5import io
6from distutils.util import strtobool 6from distutils.util import strtobool
7from datetime import datetime, timezone, timedelta 7from datetime import datetime, timezone, timedelta
8from dateutil.tz import gettz, tzlocal 8from dateutil.tz import gettz, tzutc
9import pytimeparse 9import pytimeparse
10import argparse 10import argparse
11import re 11import re
@@ -27,6 +27,36 @@ from math import floor
27 27
28import asyncio 28import asyncio
29 29
30from dataclasses import dataclass
31
32
33TIME_PATTERNS = OrderedDict([
34 ("secondly", lambda t: t.strftime('%Y-%m-%d %H:%M:%S')),
35 ("minutely", lambda t: t.strftime('%Y-%m-%d %H:%M')),
36 ("5m", lambda t: (t.strftime('%Y-%m-%d %H'), floor(t.minute / 5) * 5)),
37 ("15m", lambda t: (t.strftime('%Y-%m-%d %H'), floor(t.minute / 15) * 15)),
38 ("hourly", lambda t: t.strftime('%Y-%m-%d %H')),
39 ("4h", lambda t: (t.strftime('%Y-%m-%d'), floor(t.hour / 4) * 4)),
40 ("12h", lambda t: (t.strftime('%Y-%m-%d'), floor(t.hour / 12) * 12)),
41 ("daily", lambda t: t.strftime('%Y-%m-%d')),
42 ("halfweekly", lambda t: (t.strftime('%G-%V'), floor(int(t.strftime('%u')) / 4) * 4)),
43 ("weekly", lambda t: t.strftime('%G-%V')),
44 ("monthly", lambda t: t.strftime('%Y-%m')),
45 ("yearly", lambda t: t.strftime('%Y')),
46])
47
48@dataclass(eq=True, order=True, frozen=True)
49class Snap:
50 name: str
51 creation: datetime
52
53@dataclass(eq=True, order=True, frozen=True)
54class KeptBecause:
55 rule: str
56 ix: int
57 base: str
58 period: str
59
30 60
31@cache 61@cache
32def _now(): 62def _now():
@@ -42,56 +72,120 @@ def _log_cmd(*args):
42 72
43def _get_items(): 73def _get_items():
44 items = {} 74 items = {}
45 75
46 args = ['zfs', 'get', '-H', '-p', '-o', 'name,value', '-t', 'filesystem,volume', '-s', 'local,default,inherited,temporary,received', 'li.yggdrasil:auto-snapshot'] 76 args = ['zfs', 'get', '-H', '-p', '-o', 'name,value', '-t', 'filesystem,volume', '-s', 'local,default,inherited,temporary,received', 'li.yggdrasil:auto-snapshot']
47 _log_cmd(*args) 77 _log_cmd(*args)
48 with subprocess.Popen(args, stdout=subprocess.PIPE) as proc: 78 with subprocess.Popen(args, stdout=subprocess.PIPE) as proc:
49 text_stdout = io.TextIOWrapper(proc.stdout) 79 text_stdout = io.TextIOWrapper(proc.stdout)
50 reader = csv.reader(text_stdout, delimiter='\t', quoting=csv.QUOTE_NONE) 80 reader = csv.DictReader(text_stdout, fieldnames=['name', 'setting'], delimiter='\t', quoting=csv.QUOTE_NONE)
51 Row = namedtuple('Row', ['name', 'setting']) 81 Row = namedtuple('Row', reader.fieldnames)
52 for row in map(Row._make, reader): 82 for row in [Row(**data) for data in reader]:
53 items[row.name] = bool(strtobool(row.setting)) 83 items[row.name] = bool(strtobool(row.setting))
54 84
55 return items 85 return items
56 86
57def prune(config, dry_run, keep_newest): 87def _get_snaps(only_auto=True):
58 prunable_snapshots = set() 88 snapshots = defaultdict(list)
59 args = ['zfs', 'get', '-H', '-p', '-o', 'name,value', '-t', 'snapshot', '-s', 'local', 'li.yggdrasil:is-auto-snapshot'] 89 args = ['zfs', 'list', '-H', '-p', '-t', 'snapshot', '-o', 'name,li.yggdrasil:is-auto-snapshot,creation']
60 _log_cmd(*args)
61 with subprocess.Popen(args, stdout=subprocess.PIPE) as proc:
62 text_stdout = io.TextIOWrapper(proc.stdout)
63 reader = csv.reader(text_stdout, delimiter='\t', quoting=csv.QUOTE_NONE)
64 Row = namedtuple('Row', ['name', 'is_auto_snapshot'])
65 for row in map(Row._make, reader):
66 if bool(strtobool(row.is_auto_snapshot)):
67 prunable_snapshots.add(row.name)
68
69 items = defaultdict(list)
70 Snap = namedtuple('Snap', ['name', 'creation'])
71 args = ['zfs', 'get', '-H', '-p', '-o', 'name,value', '-t', 'snapshot', 'creation']
72 _log_cmd(*args) 90 _log_cmd(*args)
73 with subprocess.Popen(args, stdout=subprocess.PIPE) as proc: 91 with subprocess.Popen(args, stdout=subprocess.PIPE) as proc:
74 text_stdout = io.TextIOWrapper(proc.stdout) 92 text_stdout = io.TextIOWrapper(proc.stdout)
75 reader = csv.reader(text_stdout, delimiter='\t', quoting=csv.QUOTE_NONE) 93 reader = csv.DictReader(text_stdout, fieldnames=['name', 'is_auto_snapshot', 'timestamp'], delimiter='\t', quoting=csv.QUOTE_NONE)
76 Row = namedtuple('Row', ['name', 'timestamp']) 94 Row = namedtuple('Row', reader.fieldnames)
77 for row in map(Row._make, reader): 95 for row in [Row(**data) for data in reader]:
78 if row.name not in prunable_snapshots: 96 if only_auto and not bool(strtobool(row.is_auto_snapshot)):
79 continue 97 continue
80 98
81 base_name, _, _ = row.name.rpartition('@') 99 base_name, _, _ = row.name.rpartition('@')
82 creation = datetime.fromtimestamp(int(row.timestamp), timezone.utc) 100 creation = datetime.fromtimestamp(int(row.timestamp), timezone.utc)
83 items[base_name].append(Snap(name=row.name, creation=creation)) 101 snapshots[base_name].append(Snap(name=row.name, creation=creation))
102
103 return snapshots
104
105def prune(config, dry_run, keep_newest, do_exec):
106 do_exec = do_exec and 'EXEC' in config
107 prune_timezone = config.gettimezone('KEEP', 'timezone', fallback=tzutc())
108 logger.debug(f'prune timezone: {prune_timezone}')
109
110 items = _get_snaps()
111
112 exec_candidates = set()
113 if do_exec:
114 exec_timezone = config.gettimezone('EXEC', 'timezone', fallback=prune_timezone)
115 logger.debug(f'exec timezone: {exec_timezone}')
116
117 for rule, pattern in TIME_PATTERNS.items():
118 desired_count = config.getint('EXEC', rule, fallback=0)
119
120 for base, snaps in items.items():
121 periods = OrderedDict()
122
123 for snap in sorted(snaps, key=lambda snap: snap.creation):
124 period = pattern(snap.creation.astimezone(exec_timezone))
125 if period not in periods:
126 periods[period] = deque()
127 periods[period].append(snap)
128
129 to_exec = desired_count
130 ordered_periods = periods.items()
131 for period, period_snaps in ordered_periods:
132 if to_exec == 0:
133 break
134
135 for snap in period_snaps:
136 exec_candidates.add(snap)
137 logger.debug(f'{snap.name} is exec candidate')
138 to_exec -= 1
139 break
140
141 if to_exec > 0:
142 logger.debug(f'Missing {to_exec} to fulfill exec {rule}={desired_count} for ‘{base}’')
143
144 check_cmd = config.get('EXEC', 'check', fallback=None)
145 if check_cmd:
146 already_execed = set()
147 for snap in exec_candidates:
148 args = []
149 args += shlex.split(check_cmd)
150 args += [snap.name]
151 _log_cmd(*args)
152 check_res = subprocess.run(args)
153 if check_res.returncode == 0:
154 already_execed.add(snap)
155 logger.debug(f'{snap.name} already execed')
156 exec_candidates -= already_execed
157
158 exec_cmd = config.get('EXEC', 'cmd', fallback=None)
159 exec_count = config.getint('EXEC', 'count', fallback=1)
160 if exec_cmd:
161 execed = set()
162 for snap in sorted(exec_candidates, key=lambda snap: snap.creation):
163 if len(execed) >= exec_count:
164 logger.debug(f'exc_count of {exec_count} reached')
165 break
166
167 args = []
168 args += shlex.split(exec_cmd)
169 args += [snap.name]
170 _log_cmd(*args)
171 subprocess.run(args).check_returncode()
172 execed.add(snap)
173
174 exec_candidates -= execed
84 175
85 kept_count = defaultdict(lambda: defaultdict(lambda: 0)) 176 kept_count = defaultdict(lambda: defaultdict(lambda: 0))
86 KeptBecause = namedtuple('KeptBecause', ['rule', 'ix', 'base', 'period'])
87 kept_because = OrderedDict() 177 kept_because = OrderedDict()
88 def keep_because(base, snap, rule, period=None): 178 def keep_because(base, snap, rule, period=None):
89 nonlocal KeptBecause, kept_count, kept_because 179 nonlocal kept_count, kept_because
90 kept_count[rule][base] += 1 180 kept_count[rule][base] += 1
91 if snap not in kept_because: 181 if snap not in kept_because:
92 kept_because[snap] = deque() 182 kept_because[snap] = deque()
93 kept_because[snap].append(KeptBecause(rule=rule, ix=kept_count[rule][base], base=base, period=period)) 183 kept_because[snap].append(KeptBecause(rule=rule, ix=kept_count[rule][base], base=base, period=period))
94 184
185 for candidate in exec_candidates:
186 base_name, _, _ = candidate.name.rpartition('@')
187 keep_because(base_name, candidate.name, 'exec-candidate')
188
95 within = config.gettimedelta('KEEP', 'within') 189 within = config.gettimedelta('KEEP', 'within')
96 if within > timedelta(seconds=0): 190 if within > timedelta(seconds=0):
97 for base, snaps in items.items(): 191 for base, snaps in items.items():
@@ -109,31 +203,14 @@ def prune(config, dry_run, keep_newest):
109 else: 203 else:
110 logger.warn('Skipping rule ‘within’ since retention period is zero') 204 logger.warn('Skipping rule ‘within’ since retention period is zero')
111 205
112 prune_timezone = config.gettimezone('KEEP', 'timezone', fallback=tzlocal) 206 for rule, pattern in TIME_PATTERNS.items():
113
114 PRUNING_PATTERNS = OrderedDict([
115 ("secondly", lambda t: t.strftime('%Y-%m-%d %H:%M:%S')),
116 ("minutely", lambda t: t.strftime('%Y-%m-%d %H:%M')),
117 ("5m", lambda t: (t.strftime('%Y-%m-%d %H'), floor(t.minute / 5) * 5)),
118 ("15m", lambda t: (t.strftime('%Y-%m-%d %H'), floor(t.minute / 15) * 15)),
119 ("hourly", lambda t: t.strftime('%Y-%m-%d %H')),
120 ("4h", lambda t: (t.strftime('%Y-%m-%d'), floor(t.hour / 4) * 4)),
121 ("12h", lambda t: (t.strftime('%Y-%m-%d'), floor(t.hour / 12) * 12)),
122 ("daily", lambda t: t.strftime('%Y-%m-%d')),
123 ("halfweekly", lambda t: (t.strftime('%G-%V'), floor(int(t.strftime('%u')) / 4) * 4)),
124 ("weekly", lambda t: t.strftime('%G-%V')),
125 ("monthly", lambda t: t.strftime('%Y-%m')),
126 ("yearly", lambda t: t.strftime('%Y')),
127 ])
128
129 for rule, pattern in PRUNING_PATTERNS.items():
130 desired_count = config.getint('KEEP', rule, fallback=0) 207 desired_count = config.getint('KEEP', rule, fallback=0)
131 208
132 for base, snaps in items.items(): 209 for base, snaps in items.items():
133 periods = OrderedDict() 210 periods = OrderedDict()
134 211
135 for snap in sorted(snaps, key=lambda snap: snap.creation, reverse=keep_newest): 212 for snap in sorted(snaps, key=lambda snap: snap.creation, reverse=keep_newest):
136 period = pattern(snap.creation) 213 period = pattern(snap.creation.astimezone(prune_timezone))
137 if period not in periods: 214 if period not in periods:
138 periods[period] = deque() 215 periods[period] = deque()
139 periods[period].append(snap) 216 periods[period].append(snap)
@@ -150,7 +227,7 @@ def prune(config, dry_run, keep_newest):
150 break 227 break
151 228
152 if to_keep > 0: 229 if to_keep > 0:
153 logger.debug(f'Missing {to_keep} to fulfill {rule}={desired_count} for ‘{base}’') 230 logger.debug(f'Missing {to_keep} to fulfill prune {rule}={desired_count} for ‘{base}’')
154 231
155 for snap, reasons in kept_because.items(): 232 for snap, reasons in kept_because.items():
156 reasons_str = ', '.join(map(str, reasons)) 233 reasons_str = ', '.join(map(str, reasons))
@@ -171,16 +248,16 @@ def prune(config, dry_run, keep_newest):
171 logger.info(f'Would have pruned ‘{snap}’') 248 logger.info(f'Would have pruned ‘{snap}’')
172 else: 249 else:
173 logger.info(f'Pruned ‘{snap}’') 250 logger.info(f'Pruned ‘{snap}’')
174 251
175def rename(snapshots, destroy=False, set_is_auto=False): 252def rename(snapshots, destroy=False, set_is_auto=False):
176 args = ['zfs', 'get', '-H', '-p', '-o', 'name,value', 'creation', *snapshots] 253 args = ['zfs', 'get', '-H', '-p', '-o', 'name,value', 'creation', *snapshots]
177 _log_cmd(*args) 254 _log_cmd(*args)
178 renamed_to = set() 255 renamed_to = set()
179 with subprocess.Popen(args, stdout=subprocess.PIPE) as proc: 256 with subprocess.Popen(args, stdout=subprocess.PIPE) as proc:
180 text_stdout = io.TextIOWrapper(proc.stdout) 257 text_stdout = io.TextIOWrapper(proc.stdout)
181 reader = csv.reader(text_stdout, delimiter='\t', quoting=csv.QUOTE_NONE) 258 reader = csv.DictReader(text_stdout, fieldnames=['name', 'timestamp'], delimiter='\t', quoting=csv.QUOTE_NONE)
182 Row = namedtuple('Row', ['name', 'timestamp']) 259 Row = namedtuple('Row', reader.fieldnames)
183 for row in map(Row._make, reader): 260 for row in [Row(**data) for data in reader]:
184 creation = datetime.fromtimestamp(int(row.timestamp), timezone.utc) 261 creation = datetime.fromtimestamp(int(row.timestamp), timezone.utc)
185 base_name, _, _ = row.name.rpartition('@') 262 base_name, _, _ = row.name.rpartition('@')
186 new_name = _snap_name(base_name, time=creation) 263 new_name = _snap_name(base_name, time=creation)
@@ -217,7 +294,7 @@ def autosnap():
217 all_snap_names = set() 294 all_snap_names = set()
218 async def do_snapshot(*snap_items, recursive=False): 295 async def do_snapshot(*snap_items, recursive=False):
219 nonlocal items, all_snap_names 296 nonlocal items, all_snap_names
220 snap_names = {_snap_name(item) for item in snap_items} 297 snap_names = {_snap_name(item) for item in snap_items if items[item]}
221 if recursive: 298 if recursive:
222 for snap_item in snap_items: 299 for snap_item in snap_items:
223 all_snap_names |= {_snap_name(item) for item in items if item.startswith(snap_item)} 300 all_snap_names |= {_snap_name(item) for item in items if item.startswith(snap_item)}
@@ -268,7 +345,7 @@ def main():
268 sys.__excepthook__(type, value, tb) # calls default excepthook 345 sys.__excepthook__(type, value, tb) # calls default excepthook
269 346
270 sys.excepthook = log_exceptions 347 sys.excepthook = log_exceptions
271 348
272 parser = argparse.ArgumentParser(prog='zfssnap') 349 parser = argparse.ArgumentParser(prog='zfssnap')
273 parser.add_argument('--verbose', '-v', action='count', default=0) 350 parser.add_argument('--verbose', '-v', action='count', default=0)
274 subparsers = parser.add_subparsers() 351 subparsers = parser.add_subparsers()
@@ -282,6 +359,7 @@ def main():
282 prune_parser.add_argument('--config', '-c', dest='config_files', nargs='*', default=list()) 359 prune_parser.add_argument('--config', '-c', dest='config_files', nargs='*', default=list())
283 prune_parser.add_argument('--dry-run', '-n', action='store_true', default=False) 360 prune_parser.add_argument('--dry-run', '-n', action='store_true', default=False)
284 prune_parser.add_argument('--keep-newest', action='store_true', default=False) 361 prune_parser.add_argument('--keep-newest', action='store_true', default=False)
362 prune_parser.add_argument('--no-exec', dest='do_exec', action='store_false', default=True)
285 prune_parser.set_defaults(cmd=prune) 363 prune_parser.set_defaults(cmd=prune)
286 args = parser.parse_args() 364 args = parser.parse_args()
287 365
@@ -293,7 +371,7 @@ def main():
293 logger.setLevel(logging.DEBUG) 371 logger.setLevel(logging.DEBUG)
294 372
295 cmdArgs = {} 373 cmdArgs = {}
296 for copy in {'snapshots', 'dry_run', 'destroy', 'keep_newest', 'set_is_auto'}: 374 for copy in {'snapshots', 'dry_run', 'destroy', 'keep_newest', 'set_is_auto', 'do_exec'}:
297 if copy in vars(args): 375 if copy in vars(args):
298 cmdArgs[copy] = vars(args)[copy] 376 cmdArgs[copy] = vars(args)[copy]
299 if 'config_files' in vars(args): 377 if 'config_files' in vars(args):
@@ -308,7 +386,7 @@ def main():
308 }) 386 })
309 search_files = args.config_files if args.config_files else [*BaseDirectory.load_config_paths('zfssnap.ini')] 387 search_files = args.config_files if args.config_files else [*BaseDirectory.load_config_paths('zfssnap.ini')]
310 read_files = config.read(search_files) 388 read_files = config.read(search_files)
311 389
312 def format_config_files(files): 390 def format_config_files(files):
313 if not files: 391 if not files:
314 return 'no files' 392 return 'no files'
@@ -323,4 +401,5 @@ def main():
323 401
324 args.cmd(**cmdArgs) 402 args.cmd(**cmdArgs)
325 403
326sys.exit(main()) 404if __name__ == '__main__':
405 sys.exit(main())