From b931543508377c0e48a6801e4ea217eb523e2b03 Mon Sep 17 00:00:00 2001 From: Gregor Kleen Date: Tue, 13 Sep 2022 10:29:35 +0200 Subject: ... --- hosts/vidhar/borg/borgsnap/borgsnap/__main__.py | 202 ++++++++++++++++++++++++ hosts/vidhar/borg/borgsnap/setup.py | 10 ++ hosts/vidhar/borg/copy.py | 4 +- hosts/vidhar/borg/default.nix | 92 +++++++++-- hosts/vidhar/default.nix | 39 ++++- hosts/vidhar/prometheus/default.nix | 50 +++--- hosts/vidhar/zfs.nix | 16 +- 7 files changed, 363 insertions(+), 50 deletions(-) create mode 100644 hosts/vidhar/borg/borgsnap/borgsnap/__main__.py create mode 100644 hosts/vidhar/borg/borgsnap/setup.py (limited to 'hosts/vidhar') diff --git a/hosts/vidhar/borg/borgsnap/borgsnap/__main__.py b/hosts/vidhar/borg/borgsnap/borgsnap/__main__.py new file mode 100644 index 00000000..e93e6a60 --- /dev/null +++ b/hosts/vidhar/borg/borgsnap/borgsnap/__main__.py @@ -0,0 +1,202 @@ +import argparse +import os, sys, signal +from pyprctl import cap_permitted, cap_inheritable, cap_effective, cap_ambient, Cap +from pwd import getpwnam + +from datetime import datetime, timezone +from dateutil.parser import isoparse + +from xdg import xdg_runtime_dir +import unshare +from tempfile import TemporaryDirectory + +import logging + +import json +import subprocess + +import pathlib +from pathlib import Path + +from atomicwrites import atomic_write + +from traceback import format_exc + + +borg_pwd = getpwnam('borg') + +def as_borg(caps=set(), cwd=None): + if caps: + cap_permitted.add(*caps) + cap_inheritable.add(*caps) + cap_effective.add(*caps) + cap_ambient.add(*caps) + + os.setgid(borg_pwd.pw_gid) + os.setuid(borg_pwd.pw_uid) + + if cwd is not None: + os.chdir(cwd) + + +def _archive_name(snapshot, target, archive_prefix): + _, _, ts = snapshot.rpartition('@') + creation_time = isoparse(ts).astimezone(timezone.utc) + archive_name = _archive_basename(snapshot, archive_prefix) + return f'{target}::{archive_name}-{creation_time.strftime("%Y-%m-%dT%H:%M:%S")}' + +def _archive_basename(snapshot, archive_prefix): + base_name, _, _ = snapshot.rpartition('@') + return archive_prefix + base_name.replace('-', '--').replace('/', '-') + +def check(*, snapshot, target, archive_prefix, cache_file): + archives = None + if cache_file: + logger.debug('Trying cache...') + try: + with open(cache_file, mode='r', encoding='utf-8') as fp: + archives = set(json.load(fp)) + logger.info('Loaded archive list from cache') + except FileNotFoundError: + pass + + if not archives: + logger.info('Loading archive list from remote...') + with subprocess.Popen(['borg', 'list', '--info', '--lock-wait=600', '--json', target], stdout=subprocess.PIPE, preexec_fn=lambda: as_borg()) as proc: + archives = set([archive['barchive'] for archive in json.load(proc.stdout)['archives']]) + if cache_file: + logger.debug('Saving archive list to cache...') + with atomic_write(cache_file, mode='w', encoding='utf-8', overwrite=True) as fp: + json.dump(list(archives), fp) + + # logger.debug(f'archives: {archives}') + _, _, archive_name = _archive_name(snapshot, target, archive_prefix).partition('::') + if archive_name in archives: + logger.info(f'{archive_name} found') + return 0 + else: + logger.info(f'{archive_name} not found') + return 126 + +def create(*, snapshot, target, archive_prefix, dry_run): + basename = _archive_basename(snapshot, archive_prefix) + + with TemporaryDirectory(prefix=f'borg-mount_{basename}_', dir=os.environ.get('RUNTIME_DIRECTORY')) as tmpdir: + child = os.fork() + if child == 0: + unshare.unshare(unshare.CLONE_NEWNS) + subprocess.run(['mount', '--make-rprivate', '/'], check=True) + chroot = pathlib.Path(tmpdir) / 'chroot' + upper = pathlib.Path(tmpdir) / 'upper' + work = pathlib.Path(tmpdir) / 'work' + for path in [chroot,upper,work]: + path.mkdir() + subprocess.run(['mount', '-t', 'overlay', 'overlay', '-o', f'lowerdir=/,upperdir={upper},workdir={work}', chroot], check=True) + bindMounts = ['nix', 'run', 'run/secrets.d', 'run/wrappers', 'proc', 'dev', 'sys', pathlib.Path(os.path.expanduser('~')).relative_to('/')] + if os.environ.get('BORG_BASE_DIR'): + bindMounts.append(pathlib.Path(os.environ['BORG_BASE_DIR']).relative_to('/')) + if 'SSH_AUTH_SOCK' in os.environ: + bindMounts.append(pathlib.Path(os.environ['SSH_AUTH_SOCK']).parent.relative_to('/')) + for bindMount in bindMounts: + (chroot / bindMount).mkdir(parents=True,exist_ok=True) + # print(*['mount', '--bind', pathlib.Path('/') / bindMount, chroot / bindMount], file=stderr) + subprocess.run(['mount', '--bind', pathlib.Path('/') / bindMount, chroot / bindMount], check=True) + os.chroot(chroot) + os.chdir('/') + dir = pathlib.Path('/borg') + dir.mkdir(parents=True,exist_ok=True,mode=0o0750) + os.chown(dir, borg_pwd.pw_uid, borg_pwd.pw_gid) + try: + subprocess.run(['mount', '-t', 'zfs', '-o', 'ro', snapshot, dir], check=True) + env = os.environ.copy() + create_args = ['borg', + 'create', + '--lock-wait=600', + '--one-file-system', + '--compression=auto,zstd,10', + '--chunker-params=10,23,16,4095', + '--files-cache=ctime,size', + '--show-rc', + # '--remote-ratelimit=20480', + '--progress', + '--list', + '--filter=AMEi-x?', + '--stats' if not dry_run else '--dry-run' + ] + _, _, ts = snapshot.rpartition('@') + creation_time = isoparse(ts).astimezone(timezone.utc) + create_args += [f'--timestamp={creation_time.strftime("%Y-%m-%dT%H:%M:%S")}'] + env['BORG_FILES_CACHE_SUFFIX'] = basename + create_args += [_archive_name(snapshot, target, archive_prefix), '.'] + print({'create_args': create_args, 'cwd': dir, 'env': env}, file=sys.stderr) + subprocess.run(create_args, stdin=subprocess.DEVNULL, env=env, preexec_fn=lambda: as_borg(caps={CAP.DAC_READ_SEARCH}, cwd=dir), check=True) + # subprocess.run(create_args, stdin=subprocess.DEVNULL, env=env, preexec_fn=lambda: None, cwd=dir, check=True) + finally: + subprocess.run(['umount', dir], check=True) + os._exit(0) + else: + while True: + waitpid, waitret = os.wait() + if waitret != 0: + sys.exit(waitret) + if waitpid == child: + break + return 0 + +def sigterm(signum, frame): + raise SystemExit(128 + signum) + +def main(): + signal.signal(signal.SIGTERM, sigterm) + + global logger + logger = logging.getLogger(__name__) + console_handler = logging.StreamHandler() + console_handler.setFormatter( logging.Formatter('[%(levelname)s](%(name)s): %(message)s') ) + if sys.stderr.isatty(): + console_handler.setFormatter( logging.Formatter('%(asctime)s [%(levelname)s](%(name)s): %(message)s') ) + logger.addHandler(console_handler) + + # log uncaught exceptions + def log_exceptions(type, value, tb): + global logger + + logger.error(value) + sys.__excepthook__(type, value, tb) # calls default excepthook + + sys.excepthook = log_exceptions + + parser = argparse.ArgumentParser(prog='borgsnap') + parser.add_argument('--verbose', '-v', action='count', default=0) + parser.add_argument('--target', metavar='REPO', default='yggdrasil.borgbase:repo') + parser.add_argument('--archive-prefix', metavar='REPO', default='yggdrasil.vidhar.') + subparsers = parser.add_subparsers() + subparsers.required = True + parser.set_defaults(cmd=None) + check_parser = subparsers.add_parser('check') + check_parser.add_argument('--cache-file', type=lambda p: Path(p).absolute(), default=None) + check_parser.add_argument('snapshot') + check_parser.set_defaults(cmd=check) + create_parser = subparsers.add_parser('create') + create_parser.add_argument('--dry-run', '-n', action='store_true', default=False) + create_parser.add_argument('snapshot') + create_parser.set_defaults(cmd=create) + args = parser.parse_args() + + if args.verbose <= 0: + logger.setLevel(logging.WARNING) + elif args.verbose <= 1: + logger.setLevel(logging.INFO) + else: + logger.setLevel(logging.DEBUG) + + cmdArgs = {} + for copy in {'target', 'archive_prefix', 'snapshot', 'cache_file', 'dry_run'}: + if copy in vars(args): + cmdArgs[copy] = vars(args)[copy] + + return args.cmd(**cmdArgs) + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/hosts/vidhar/borg/borgsnap/setup.py b/hosts/vidhar/borg/borgsnap/setup.py new file mode 100644 index 00000000..76356bfc --- /dev/null +++ b/hosts/vidhar/borg/borgsnap/setup.py @@ -0,0 +1,10 @@ +from setuptools import setup + +setup(name='borgsnap', + packages=['borgsnap'], + entry_points={ + 'console_scripts': [ + 'borgsnap=borgsnap.__main__:main', + ], + } +) diff --git a/hosts/vidhar/borg/copy.py b/hosts/vidhar/borg/copy.py index 4e9599b8..b9b667f2 100755 --- a/hosts/vidhar/borg/copy.py +++ b/hosts/vidhar/borg/copy.py @@ -71,7 +71,7 @@ def read_repo(path): class ToSync: to_sync = deque() - + def __iter__(self): return self @@ -267,7 +267,7 @@ def sigterm(signum, frame): def main(): signal.signal(signal.SIGTERM, sigterm) - + if "::" in args.source: (src_repo_path, _, src_archive) = args.source.partition("::") entry = None diff --git a/hosts/vidhar/borg/default.nix b/hosts/vidhar/borg/default.nix index 579630a9..650c91ee 100644 --- a/hosts/vidhar/borg/default.nix +++ b/hosts/vidhar/borg/default.nix @@ -1,23 +1,28 @@ -{ config, pkgs, lib, ... }: +{ config, pkgs, lib, flakeInputs, ... }: with lib; let + sshConfig = pkgs.writeText "config" '' + Include /etc/ssh/ssh_config + + ControlMaster auto + ControlPath /var/lib/borg/.borgssh-master-%r@%n:%p + ControlPersist yes + + Host yggdrasil.borgbase + HostName nx69hpl8.repo.borgbase.com + User nx69hpl8 + IdentityFile ${config.sops.secrets."append.borgbase".path} + IdentitiesOnly yes + + BatchMode yes + ServerAliveInterval 10 + ServerAliveCountMax 30 + ''; + copyService = { repo, repoEscaped }: let serviceName = "copy-borg@${repoEscaped}"; - sshConfig = pkgs.writeText "config" '' - Include /etc/ssh/ssh_config - - Host yggdrasil.borgbase - HostName nx69hpl8.repo.borgbase.com - User nx69hpl8 - IdentityFile ${config.sops.secrets."append.borgbase".path} - IdentitiesOnly yes - - BatchMode yes - ServerAliveInterval 10 - ServerAliveCountMax 30 - ''; in nameValuePair serviceName { serviceConfig = { Type = "oneshot"; @@ -72,8 +77,63 @@ let --prefix PATH : ${makeBinPath (with pkgs; [utillinux borgbackup])}:${config.security.wrapperDir} ''; }); + + borgsnap = flakeInputs.mach-nix.lib.${config.nixpkgs.system}.buildPythonPackage rec { + pname = "borgsnap"; + src = ./borgsnap; + version = "0.0.0"; + ignoreDataOutdated = true; + + requirements = '' + atomicwrites + pyprctl + python-unshare + xdg + python-dateutil + ''; + postInstall = '' + wrapProgram $out/bin/borgsnap \ + --prefix PATH : ${makeBinPath (with pkgs; [utillinux borgbackup])}:${config.security.wrapperDir} + ''; + + providers.python-unshare = "nixpkgs"; + overridesPre = [ + (self: super: { python-unshare = super.python-unshare.overrideAttrs (oldAttrs: { name = "python-unshare-0.2.1"; version = "0.2.1"; }); }) + ]; + + _.xdg.buildInputs.add = with pkgs."python3Packages"; [ poetry ]; + _.tomli.buildInputs.add = with pkgs."python3Packages"; [ flit-core ]; + }; in { config = { + services.zfssnap.config.exec = { + check = "${borgsnap}/bin/borgsnap -vvv --target yggdrasil.borgbase:repo --archive-prefix yggdrasil.vidhar. check --cache-file /run/zfssnap-prune/archives-cache.json"; + cmd = "${borgsnap}/bin/borgsnap -vvv --target yggdrasil.borgbase:repo --archive-prefix yggdrasil.vidhar. create --dry-run"; + + halfweekly = "8"; + monthly = "-1"; + }; + + systemd.services = { + "zfssnap-prune" = { + serviceConfig = { + Environment = [ + "BORG_RSH=\"${pkgs.openssh}/bin/ssh -F ${sshConfig}\"" + "BORG_BASE_DIR=/var/lib/borg" + "BORG_CONFIG_DIR=/var/lib/borg/config" + "BORG_CACHE_DIR=/var/lib/borg/cache" + "BORG_SECURITY_DIR=/var/lib/borg/security" + "BORG_KEYS_DIR=/var/lib/borg/keys" + "BORG_KEY_FILE=${config.sops.secrets."yggdrasil.borgkey".path}" + "BORG_UNKNOWN_UNENCRYPTED_REPO_ACCESS_IS_OK=yes" + "BORG_HOSTNAME_IS_UNIQUE=yes" + ]; + RuntimeDirectory = "zfssnap-prune"; + }; + }; + } // listToAttrs (map copyService [{ repo = "/srv/backup/borg/jotnar"; repoEscaped = "srv-backup-borg-jotnar"; }]); + + services.borgbackup.repos.jotnar = { path = "/srv/backup/borg/jotnar"; authorizedKeysAppendOnly = let @@ -111,11 +171,9 @@ in { mode = "0400"; }; - systemd.services = listToAttrs (map copyService [{ repo = "/srv/backup/borg/jotnar"; repoEscaped = "srv-backup-borg-jotnar"; }]); - systemd.timers."copy-borg@srv-backup-borg-jotnar" = { wantedBy = ["multi-user.target"]; - + timerConfig = { OnCalendar = "*-*-* 00/4:00:00 Europe/Berlin"; }; diff --git a/hosts/vidhar/default.nix b/hosts/vidhar/default.nix index 121cc9df..3f5d17d5 100644 --- a/hosts/vidhar/default.nix +++ b/hosts/vidhar/default.nix @@ -1,4 +1,7 @@ { hostName, flake, config, pkgs, lib, ... }: + +with lib; + { imports = with flake.nixosModules.systemProfiles; [ ./zfs.nix ./network ./samba.nix ./dns ./prometheus ./borg @@ -39,7 +42,7 @@ luks.devices = { nvm0 = { device = "/dev/disk/by-label/${hostName}-nvm0"; bypassWorkqueues = true; }; nvm1 = { device = "/dev/disk/by-label/${hostName}-nvm1"; bypassWorkqueues = true; }; - + hdd0.device = "/dev/disk/by-label/${hostName}-hdd0"; hdd1.device = "/dev/disk/by-label/${hostName}-hdd1"; hdd2.device = "/dev/disk/by-label/${hostName}-hdd2"; @@ -58,7 +61,7 @@ options = [ "mode=0755" ]; }; }; - + services.timesyncd.enable = false; services.chrony = { enable = true; @@ -132,6 +135,7 @@ access_log syslog:server=unix:/dev/log main; error_log syslog:server=unix:/dev/log info; + client_body_buffer_size 16m; client_body_temp_path /run/nginx-client-bodies; ''; upstreams.grafana = { @@ -173,12 +177,12 @@ sopsFile = ./selfsigned.key; }; systemd.services.nginx = { - preStart = lib.mkForce config.services.nginx.preStart; + preStart = mkForce config.services.nginx.preStart; serviceConfig = { - ExecReload = lib.mkForce "${pkgs.coreutils}/bin/kill -HUP $MAINPID"; + ExecReload = mkForce "${pkgs.coreutils}/bin/kill -HUP $MAINPID"; LoadCredential = [ "selfsigned.key:${config.sops.secrets."selfsigned.key".path}" ]; - RuntimeDirectory = lib.mkForce [ "nginx" "nginx-client-bodies" ]; + RuntimeDirectory = mkForce [ "nginx" "nginx-client-bodies" ]; RuntimeDirectoryMode = "0750"; }; }; @@ -232,7 +236,7 @@ }; }; systemd.services.loki.preStart = let - rulesYaml = lib.generators.toYAML {} { + rulesYaml = generators.toYAML {} { groups = [ { name = "power-failures"; rules = [ @@ -311,6 +315,29 @@ timers.wants = ["systemd-tmpfiles-clean.timer"]; }; + services.smartd = { + enable = true; + autodetect = false; + defaults.monitored = "-a -o on -s (S/../.././02|L/../../7/04)"; + devices = map (dev: { device = "/dev/disk/by-path/${dev}"; }) [ + "pci-0000:00:1f.2-ata-1" + "pci-0000:00:1f.2-ata-3" + "pci-0000:00:1f.2-ata-4" + "pci-0000:00:1f.2-ata-5" + "pci-0000:00:1f.2-ata-6" + "pci-0000:02:00.0-nvme-1" + "pci-0000:05:00.0-sas-phy0-lun-0" + "pci-0000:05:00.0-sas-phy1-lun-0" + "pci-0000:06:00.0-nvme-1" + ]; + notifications = { + test = false; + mail.enable = false; + x11.enable = false; + wall.enable = false; + }; + }; + environment.systemPackages = with pkgs; [iotop vmtouch]; system.stateVersion = "21.05"; diff --git a/hosts/vidhar/prometheus/default.nix b/hosts/vidhar/prometheus/default.nix index 4c23d8a9..7ac86c30 100644 --- a/hosts/vidhar/prometheus/default.nix +++ b/hosts/vidhar/prometheus/default.nix @@ -34,20 +34,6 @@ in { enable = true; enabledCollectors = []; }; - smartctl = { - enable = true; - devices = map (dev: "/dev/disk/by-path/${dev}") [ - "pci-0000:00:1f.2-ata-1" - "pci-0000:00:1f.2-ata-3" - "pci-0000:00:1f.2-ata-4" - "pci-0000:00:1f.2-ata-5" - "pci-0000:00:1f.2-ata-6" - "pci-0000:02:00.0-nvme-1" - "pci-0000:05:00.0-sas-phy0-lun-0" - "pci-0000:05:00.0-sas-phy1-lun-0" - "pci-0000:06:00.0-nvme-1" - ]; - }; snmp = { enable = true; configurationPath = ./snmp.yml; @@ -124,10 +110,10 @@ in { } { job_name = "smartctl"; static_configs = [ - { targets = ["localhost:${toString config.services.prometheus.exporters.smartctl.port}"]; } + { targets = ["localhost:9633"]; } ]; relabel_configs = relabelHosts; - scrape_interval = "1s"; + scrape_interval = "60s"; } { job_name = "snmp"; static_configs = [ @@ -376,6 +362,30 @@ in { }; }; + systemd.services."prometheus-smartctl-exporter" = { + wantedBy = [ "multi-user.target" ]; + after = [ "network.target" ]; + path = with pkgs; [ smartmontools ]; + serviceConfig = { + Restart = "always"; + + CapabilityBoundingSet = ["CAP_DAC_OVERRIDE" "CAP_SYS_RAWIO" "CAP_SYS_ADMIN"]; + AmbientCapabilities = ["CAP_DAC_OVERRIDE" "CAP_SYS_RAWIO" "CAP_SYS_ADMIN"]; + ProtectSystem = "strict"; + DynamicUser = true; + LockPersonality = true; + MemoryDenyWriteExecute = true; + NoNewPrivileges = true; + PrivateDevices = false; + PrivateTmp = true; + ProcSubset = "pid"; + + Type = "simple"; + ExecStart = "${pkgs.smartprom}/bin/smartprom"; + Environment = "SMARTCTL_EXPORTER_PORT=9633"; + }; + }; + systemd.services."prometheus-systemd-exporter" = let cfg = config.services.prometheus.exporters.systemd; in { @@ -385,14 +395,6 @@ in { ''; }; - systemd.services."prometheus-smartctl-exporter" = { - serviceConfig = { - DeviceAllow = lib.mkForce config.services.prometheus.exporters.smartctl.devices; - CapabilityBoundingSet = lib.mkForce ["CAP_SYS_ADMIN"]; - AmbientCapabilities = lib.mkForce ["CAP_SYS_ADMIN"]; - }; - }; - services.nginx = { upstreams.prometheus = { servers = { "localhost:${toString config.services.prometheus.port}" = {}; }; diff --git a/hosts/vidhar/zfs.nix b/hosts/vidhar/zfs.nix index ef285536..52b48aca 100644 --- a/hosts/vidhar/zfs.nix +++ b/hosts/vidhar/zfs.nix @@ -130,7 +130,21 @@ echo "=== ZPOOL IMPORT COMPLETE ===" ''; - services.zfssnap.enable = true; + services.zfssnap = { + enable = true; + config.keep = { + within = "15m"; + "5m" = "48"; + "15m" = "32"; + hourly = "48"; + "4h" = "24"; + "12h" = "12"; + daily = "62"; + halfweekly = "32"; + weekly = "24"; + monthly = "-1"; + }; + }; services.zfs.trim.enable = false; services.zfs.autoScrub = { enable = true; -- cgit v1.2.3