From 02cca501daa04fc1d5434acc33b3464e8b209003 Mon Sep 17 00:00:00 2001
From: Gregor Kleen <gkleen@yggdrasil.li>
Date: Mon, 30 Jan 2023 13:59:18 +0100
Subject: ...

---
 tools/sops-inventory/sops_inventory/__main__.py | 107 ++++++++++++++----------
 1 file changed, 63 insertions(+), 44 deletions(-)

(limited to 'tools/sops-inventory/sops_inventory')

diff --git a/tools/sops-inventory/sops_inventory/__main__.py b/tools/sops-inventory/sops_inventory/__main__.py
index 68f72b60..47100c17 100644
--- a/tools/sops-inventory/sops_inventory/__main__.py
+++ b/tools/sops-inventory/sops_inventory/__main__.py
@@ -5,16 +5,39 @@ from collections import deque, defaultdict
 
 import argparse
 
+import subprocess
+
+from operator import attrgetter, itemgetter
+
 from yaml import load, YAMLError
 try:
     from yaml import CLoader as Loader
 except ImportError:
     from yaml import Loader
 
-
 SOPS_TYPES = frozenset({'kms', 'gcp_kms', 'azure_kv', 'hc_vault', 'age', 'pgp'})
 
 
+def readnull(fh):
+    buffer = b''
+
+    while True:
+        chunk = fh.read(4096)
+        buffer += chunk
+        if not buffer:
+            break
+
+        while True:
+            lines = buffer.split(b'\0', maxsplit=1)
+            match lines:
+              case [l, r]:
+                buffer = r
+                yield l
+              case _:
+                if not chunk:
+                    yield buffer
+                break
+
 class BooleanAction(argparse.Action):
     def __init__(self, option_strings, dest, nargs=None, **kwargs):
         super(BooleanAction, self).__init__(option_strings, dest, nargs=0, **kwargs)
@@ -24,55 +47,51 @@ class BooleanAction(argparse.Action):
 
 
 def main():
-    default_base = os.getenv('SOPS_INVENTORY_BASE', default=[])
-    if default_base:
-        default_base = Path(default_base)
-
     parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
     parser.add_argument('--list-files', '--no-list-files', action=BooleanAction, default=False, help='Only list sops files')
-    parser.add_argument('path', metavar='PATH', nargs='?' if default_base else None, type=Path, default=default_base, help='Base directory to take inventory of')
+    parser.add_argument('path', metavar='PATH', nargs='?', type=Path, default=Path('.'), help='Base directory to take inventory of')
     args = parser.parse_args()
 
-    inventory = defaultdict(set)
-
-    queue = deque([args.path])
-    while queue:
-        baseDir = queue.popleft()
-        for child in baseDir.iterdir():
-            if child.is_dir():
-                queue.append(child)
-            else:
-                try:
-                    with child.open(mode='r') as fh:
-                        yaml = load(fh, Loader=Loader)
-                        if not yaml:
-                            raise ValueError('Could not parse YAML')
-                        if not isinstance(yaml, dict) or not 'sops' in yaml:
-                            raise ValueError('Did not find "sops" key')
-                        sops = yaml['sops']
-
-                        key_info = set()
-                        for k in SOPS_TYPES:
-                            if k in sops:
-                                v = sops[k]
-                                if not v:
-                                    continue
-
-                                match k:
-                                    case 'pgp':
-                                      for r in v:
-                                          key_info.add(r['fp'])
-                                    case 'age':
-                                      for r in v:
-                                          key_info.add(r['recipient'])
-                                    case _:
-                                      raise NotImplementedError
-                        inventory[frozenset(key_info)].add(child.relative_to(args.path))
-                except (YAMLError, ValueError) as e:
-                    pass
+    inventory = defaultdict(list)
+
+    with subprocess.Popen(['git', '-C', args.path, 'ls-files', '-z'], stdin=subprocess.DEVNULL, stdout=subprocess.PIPE) as proc:
+        files = sorted(map(lambda child: args.path / child.decode('utf-8').strip(), readnull(proc.stdout)), key=attrgetter('parts'))
+        for child in files:
+            try:
+                with child.open(mode='r') as fh:
+                    yaml = load(fh, Loader=Loader)
+                    if not yaml:
+                        raise ValueError('Could not parse YAML')
+                    if not isinstance(yaml, dict) or not 'sops' in yaml:
+                        raise ValueError('Did not find "sops" key')
+                    sops = yaml['sops']
+
+                    key_info = set()
+                    for k in SOPS_TYPES:
+                        if k in sops:
+                            v = sops[k]
+                            if not v:
+                                continue
+
+                            match k:
+                                case 'pgp':
+                                  for r in v:
+                                      key_info.add(r['fp'])
+                                case 'age':
+                                  for r in v:
+                                      key_info.add(r['recipient'])
+                                case _:
+                                  raise NotImplementedError
+                    inventory[frozenset(key_info)].append(child.relative_to(args.path))
+            except (YAMLError, ValueError) as e:
+                pass
+
+        proc.wait(timeout=1)
+        if proc.returncode != 0:
+            raise RuntimeError(f'git ls-files returned with {proc.returncode}')
 
     if not args.list_files:
-        for keys, files in inventory.items():
+        for keys, files in sorted(inventory.items(), key=itemgetter(0)):
             print(','.join(keys) + ':')
             for file in files:
                 print('  - ' + str(file))
-- 
cgit v1.2.3