diff options
author | Gregor Kleen <gkleen@yggdrasil.li> | 2021-08-03 17:49:13 +0200 |
---|---|---|
committer | Gregor Kleen <gkleen@yggdrasil.li> | 2021-08-03 17:49:13 +0200 |
commit | 59914a02ccdeb88b6370d9a202f40435d5d04feb (patch) | |
tree | 66895f12d3bbe7baa9c1e0311d3f9224904976e9 | |
parent | 4afc0f33fd14959612af14db59231d95035a2556 (diff) | |
download | nixos-59914a02ccdeb88b6370d9a202f40435d5d04feb.tar nixos-59914a02ccdeb88b6370d9a202f40435d5d04feb.tar.gz nixos-59914a02ccdeb88b6370d9a202f40435d5d04feb.tar.bz2 nixos-59914a02ccdeb88b6370d9a202f40435d5d04feb.tar.xz nixos-59914a02ccdeb88b6370d9a202f40435d5d04feb.zip |
stage-1: dereference secrets
-rw-r--r-- | modules/stage-1/default.nix | 669 | ||||
-rw-r--r-- | modules/stage-1/stage-1-init.sh | 638 |
2 files changed, 1307 insertions, 0 deletions
diff --git a/modules/stage-1/default.nix b/modules/stage-1/default.nix new file mode 100644 index 00000000..5a14584e --- /dev/null +++ b/modules/stage-1/default.nix | |||
@@ -0,0 +1,669 @@ | |||
1 | # This module builds the initial ramdisk, which contains an init | ||
2 | # script that performs the first stage of booting the system: it loads | ||
3 | # the modules necessary to mount the root file system, then calls the | ||
4 | # init in the root file system to start the second boot stage. | ||
5 | |||
6 | { config, lib, utils, pkgs, ... }: | ||
7 | |||
8 | with lib; | ||
9 | |||
10 | let | ||
11 | |||
12 | udev = config.systemd.package; | ||
13 | |||
14 | kernel-name = config.boot.kernelPackages.kernel.name or "kernel"; | ||
15 | |||
16 | modulesTree = config.system.modulesTree.override { name = kernel-name + "-modules"; }; | ||
17 | firmware = config.hardware.firmware; | ||
18 | |||
19 | |||
20 | # Determine the set of modules that we need to mount the root FS. | ||
21 | modulesClosure = pkgs.makeModulesClosure { | ||
22 | rootModules = config.boot.initrd.availableKernelModules ++ config.boot.initrd.kernelModules; | ||
23 | kernel = modulesTree; | ||
24 | firmware = firmware; | ||
25 | allowMissing = false; | ||
26 | }; | ||
27 | |||
28 | |||
29 | # The initrd only has to mount `/` or any FS marked as necessary for | ||
30 | # booting (such as the FS containing `/nix/store`, or an FS needed for | ||
31 | # mounting `/`, like `/` on a loopback). | ||
32 | fileSystems = filter utils.fsNeededForBoot config.system.build.fileSystems; | ||
33 | |||
34 | # A utility for enumerating the shared-library dependencies of a program | ||
35 | findLibs = pkgs.buildPackages.writeShellScriptBin "find-libs" '' | ||
36 | set -euo pipefail | ||
37 | |||
38 | declare -A seen | ||
39 | left=() | ||
40 | |||
41 | patchelf="${pkgs.buildPackages.patchelf}/bin/patchelf" | ||
42 | |||
43 | function add_needed { | ||
44 | rpath="$($patchelf --print-rpath $1)" | ||
45 | dir="$(dirname $1)" | ||
46 | for lib in $($patchelf --print-needed $1); do | ||
47 | left+=("$lib" "$rpath" "$dir") | ||
48 | done | ||
49 | } | ||
50 | |||
51 | add_needed "$1" | ||
52 | |||
53 | while [ ''${#left[@]} -ne 0 ]; do | ||
54 | next=''${left[0]} | ||
55 | rpath=''${left[1]} | ||
56 | ORIGIN=''${left[2]} | ||
57 | left=("''${left[@]:3}") | ||
58 | if [ -z ''${seen[$next]+x} ]; then | ||
59 | seen[$next]=1 | ||
60 | |||
61 | # Ignore the dynamic linker which for some reason appears as a DT_NEEDED of glibc but isn't in glibc's RPATH. | ||
62 | case "$next" in | ||
63 | ld*.so.?) continue;; | ||
64 | esac | ||
65 | |||
66 | IFS=: read -ra paths <<< $rpath | ||
67 | res= | ||
68 | for path in "''${paths[@]}"; do | ||
69 | path=$(eval "echo $path") | ||
70 | if [ -f "$path/$next" ]; then | ||
71 | res="$path/$next" | ||
72 | echo "$res" | ||
73 | add_needed "$res" | ||
74 | break | ||
75 | fi | ||
76 | done | ||
77 | if [ -z "$res" ]; then | ||
78 | echo "Couldn't satisfy dependency $next" >&2 | ||
79 | exit 1 | ||
80 | fi | ||
81 | fi | ||
82 | done | ||
83 | ''; | ||
84 | |||
85 | # Some additional utilities needed in stage 1, like mount, lvm, fsck | ||
86 | # etc. We don't want to bring in all of those packages, so we just | ||
87 | # copy what we need. Instead of using statically linked binaries, | ||
88 | # we just copy what we need from Glibc and use patchelf to make it | ||
89 | # work. | ||
90 | extraUtils = pkgs.runCommandCC "extra-utils" | ||
91 | { nativeBuildInputs = [pkgs.buildPackages.nukeReferences]; | ||
92 | allowedReferences = [ "out" ]; # prevent accidents like glibc being included in the initrd | ||
93 | } | ||
94 | '' | ||
95 | set +o pipefail | ||
96 | |||
97 | mkdir -p $out/bin $out/lib | ||
98 | ln -s $out/bin $out/sbin | ||
99 | |||
100 | copy_bin_and_libs () { | ||
101 | [ -f "$out/bin/$(basename $1)" ] && rm "$out/bin/$(basename $1)" | ||
102 | cp -pdv $1 $out/bin | ||
103 | } | ||
104 | |||
105 | # Copy BusyBox. | ||
106 | for BIN in ${pkgs.busybox}/{s,}bin/*; do | ||
107 | copy_bin_and_libs $BIN | ||
108 | done | ||
109 | |||
110 | # Copy some util-linux stuff. | ||
111 | copy_bin_and_libs ${pkgs.util-linux}/sbin/blkid | ||
112 | |||
113 | # Copy dmsetup and lvm. | ||
114 | copy_bin_and_libs ${getBin pkgs.lvm2}/bin/dmsetup | ||
115 | copy_bin_and_libs ${getBin pkgs.lvm2}/bin/lvm | ||
116 | |||
117 | # Add RAID mdadm tool. | ||
118 | copy_bin_and_libs ${pkgs.mdadm}/sbin/mdadm | ||
119 | copy_bin_and_libs ${pkgs.mdadm}/sbin/mdmon | ||
120 | |||
121 | # Copy udev. | ||
122 | copy_bin_and_libs ${udev}/bin/udevadm | ||
123 | copy_bin_and_libs ${udev}/lib/systemd/systemd-sysctl | ||
124 | for BIN in ${udev}/lib/udev/*_id; do | ||
125 | copy_bin_and_libs $BIN | ||
126 | done | ||
127 | # systemd-udevd is only a symlink to udevadm these days | ||
128 | ln -sf udevadm $out/bin/systemd-udevd | ||
129 | |||
130 | # Copy modprobe. | ||
131 | copy_bin_and_libs ${pkgs.kmod}/bin/kmod | ||
132 | ln -sf kmod $out/bin/modprobe | ||
133 | |||
134 | # Copy resize2fs if any ext* filesystems are to be resized | ||
135 | ${optionalString (any (fs: fs.autoResize && (lib.hasPrefix "ext" fs.fsType)) fileSystems) '' | ||
136 | # We need mke2fs in the initrd. | ||
137 | copy_bin_and_libs ${pkgs.e2fsprogs}/sbin/resize2fs | ||
138 | ''} | ||
139 | |||
140 | # Copy secrets if needed. | ||
141 | # | ||
142 | # TODO: move out to a separate script; see #85000. | ||
143 | ${optionalString (!config.boot.loader.supportsInitrdSecrets) | ||
144 | (concatStringsSep "\n" (mapAttrsToList (dest: source: | ||
145 | let source' = if source == null then dest else source; in | ||
146 | '' | ||
147 | mkdir -p $(dirname "$out/secrets/${dest}") | ||
148 | # Some programs (e.g. ssh) doesn't like secrets to be | ||
149 | # symlinks, so we use `cp -L` here to match the | ||
150 | # behaviour when secrets are natively supported. | ||
151 | cp -Lr ${source'} "$out/secrets/${dest}" | ||
152 | '' | ||
153 | ) config.boot.initrd.secrets)) | ||
154 | } | ||
155 | |||
156 | ${config.boot.initrd.extraUtilsCommands} | ||
157 | |||
158 | # Copy ld manually since it isn't detected correctly | ||
159 | cp -pv ${pkgs.stdenv.cc.libc.out}/lib/ld*.so.? $out/lib | ||
160 | |||
161 | # Copy all of the needed libraries | ||
162 | find $out/bin $out/lib -type f | while read BIN; do | ||
163 | echo "Copying libs for executable $BIN" | ||
164 | for LIB in $(${findLibs}/bin/find-libs $BIN); do | ||
165 | TGT="$out/lib/$(basename $LIB)" | ||
166 | if [ ! -f "$TGT" ]; then | ||
167 | SRC="$(readlink -e $LIB)" | ||
168 | cp -pdv "$SRC" "$TGT" | ||
169 | fi | ||
170 | done | ||
171 | done | ||
172 | |||
173 | # Strip binaries further than normal. | ||
174 | chmod -R u+w $out | ||
175 | stripDirs "$STRIP" "lib bin" "-s" | ||
176 | |||
177 | # Run patchelf to make the programs refer to the copied libraries. | ||
178 | find $out/bin $out/lib -type f | while read i; do | ||
179 | if ! test -L $i; then | ||
180 | nuke-refs -e $out $i | ||
181 | fi | ||
182 | done | ||
183 | |||
184 | find $out/bin -type f | while read i; do | ||
185 | if ! test -L $i; then | ||
186 | echo "patching $i..." | ||
187 | patchelf --set-interpreter $out/lib/ld*.so.? --set-rpath $out/lib $i || true | ||
188 | fi | ||
189 | done | ||
190 | |||
191 | if [ -z "${toString (pkgs.stdenv.hostPlatform != pkgs.stdenv.buildPlatform)}" ]; then | ||
192 | # Make sure that the patchelf'ed binaries still work. | ||
193 | echo "testing patched programs..." | ||
194 | $out/bin/ash -c 'echo hello world' | grep "hello world" | ||
195 | export LD_LIBRARY_PATH=$out/lib | ||
196 | $out/bin/mount --help 2>&1 | grep -q "BusyBox" | ||
197 | $out/bin/blkid -V 2>&1 | grep -q 'libblkid' | ||
198 | $out/bin/udevadm --version | ||
199 | $out/bin/dmsetup --version 2>&1 | tee -a log | grep -q "version:" | ||
200 | LVM_SYSTEM_DIR=$out $out/bin/lvm version 2>&1 | tee -a log | grep -q "LVM" | ||
201 | $out/bin/mdadm --version | ||
202 | |||
203 | ${config.boot.initrd.extraUtilsCommandsTest} | ||
204 | fi | ||
205 | ''; # */ | ||
206 | |||
207 | |||
208 | # Networkd link files are used early by udev to set up interfaces early. | ||
209 | # This must be done in stage 1 to avoid race conditions between udev and | ||
210 | # network daemons. | ||
211 | linkUnits = pkgs.runCommand "link-units" { | ||
212 | allowedReferences = [ extraUtils ]; | ||
213 | preferLocalBuild = true; | ||
214 | } ('' | ||
215 | mkdir -p $out | ||
216 | cp -v ${udev}/lib/systemd/network/*.link $out/ | ||
217 | '' + ( | ||
218 | let | ||
219 | links = filterAttrs (n: v: hasSuffix ".link" n) config.systemd.network.units; | ||
220 | files = mapAttrsToList (n: v: "${v.unit}/${n}") links; | ||
221 | in | ||
222 | concatMapStringsSep "\n" (file: "cp -v ${file} $out/") files | ||
223 | )); | ||
224 | |||
225 | udevRules = pkgs.runCommand "udev-rules" { | ||
226 | allowedReferences = [ extraUtils ]; | ||
227 | preferLocalBuild = true; | ||
228 | } '' | ||
229 | mkdir -p $out | ||
230 | |||
231 | echo 'ENV{LD_LIBRARY_PATH}="${extraUtils}/lib"' > $out/00-env.rules | ||
232 | |||
233 | cp -v ${udev}/lib/udev/rules.d/60-cdrom_id.rules $out/ | ||
234 | cp -v ${udev}/lib/udev/rules.d/60-persistent-storage.rules $out/ | ||
235 | cp -v ${udev}/lib/udev/rules.d/75-net-description.rules $out/ | ||
236 | cp -v ${udev}/lib/udev/rules.d/80-drivers.rules $out/ | ||
237 | cp -v ${udev}/lib/udev/rules.d/80-net-setup-link.rules $out/ | ||
238 | cp -v ${pkgs.lvm2}/lib/udev/rules.d/*.rules $out/ | ||
239 | ${config.boot.initrd.extraUdevRulesCommands} | ||
240 | |||
241 | for i in $out/*.rules; do | ||
242 | substituteInPlace $i \ | ||
243 | --replace ata_id ${extraUtils}/bin/ata_id \ | ||
244 | --replace scsi_id ${extraUtils}/bin/scsi_id \ | ||
245 | --replace cdrom_id ${extraUtils}/bin/cdrom_id \ | ||
246 | --replace ${pkgs.coreutils}/bin/basename ${extraUtils}/bin/basename \ | ||
247 | --replace ${pkgs.util-linux}/bin/blkid ${extraUtils}/bin/blkid \ | ||
248 | --replace ${getBin pkgs.lvm2}/bin ${extraUtils}/bin \ | ||
249 | --replace ${pkgs.mdadm}/sbin ${extraUtils}/sbin \ | ||
250 | --replace ${pkgs.bash}/bin/sh ${extraUtils}/bin/sh \ | ||
251 | --replace ${udev} ${extraUtils} | ||
252 | done | ||
253 | |||
254 | # Work around a bug in QEMU, which doesn't implement the "READ | ||
255 | # DISC INFORMATION" SCSI command: | ||
256 | # https://bugzilla.redhat.com/show_bug.cgi?id=609049 | ||
257 | # As a result, `cdrom_id' doesn't print | ||
258 | # ID_CDROM_MEDIA_TRACK_COUNT_DATA, which in turn prevents the | ||
259 | # /dev/disk/by-label symlinks from being created. We need these | ||
260 | # in the NixOS installation CD, so use ID_CDROM_MEDIA in the | ||
261 | # corresponding udev rules for now. This was the behaviour in | ||
262 | # udev <= 154. See also | ||
263 | # http://www.spinics.net/lists/hotplug/msg03935.html | ||
264 | substituteInPlace $out/60-persistent-storage.rules \ | ||
265 | --replace ID_CDROM_MEDIA_TRACK_COUNT_DATA ID_CDROM_MEDIA | ||
266 | ''; # */ | ||
267 | |||
268 | |||
269 | # The init script of boot stage 1 (loading kernel modules for | ||
270 | # mounting the root FS). | ||
271 | bootStage1 = pkgs.substituteAll { | ||
272 | src = ./stage-1-init.sh; | ||
273 | |||
274 | shell = "${extraUtils}/bin/ash"; | ||
275 | |||
276 | isExecutable = true; | ||
277 | |||
278 | postInstall = '' | ||
279 | echo checking syntax | ||
280 | # check both with bash | ||
281 | ${pkgs.buildPackages.bash}/bin/sh -n $target | ||
282 | # and with ash shell, just in case | ||
283 | ${pkgs.buildPackages.busybox}/bin/ash -n $target | ||
284 | ''; | ||
285 | |||
286 | inherit linkUnits udevRules extraUtils modulesClosure; | ||
287 | |||
288 | inherit (config.boot) resumeDevice; | ||
289 | |||
290 | inherit (config.system.build) earlyMountScript; | ||
291 | |||
292 | inherit (config.boot.initrd) checkJournalingFS verbose | ||
293 | preLVMCommands preDeviceCommands postDeviceCommands postMountCommands preFailCommands kernelModules; | ||
294 | |||
295 | resumeDevices = map (sd: if sd ? device then sd.device else "/dev/disk/by-label/${sd.label}") | ||
296 | (filter (sd: hasPrefix "/dev/" sd.device && !sd.randomEncryption.enable | ||
297 | # Don't include zram devices | ||
298 | && !(hasPrefix "/dev/zram" sd.device) | ||
299 | ) config.swapDevices); | ||
300 | |||
301 | fsInfo = | ||
302 | let f = fs: [ fs.mountPoint (if fs.device != null then fs.device else "/dev/disk/by-label/${fs.label}") fs.fsType (builtins.concatStringsSep "," fs.options) ]; | ||
303 | in pkgs.writeText "initrd-fsinfo" (concatStringsSep "\n" (concatMap f fileSystems)); | ||
304 | |||
305 | setHostId = optionalString (config.networking.hostId != null) '' | ||
306 | hi="${config.networking.hostId}" | ||
307 | ${if pkgs.stdenv.isBigEndian then '' | ||
308 | echo -ne "\x''${hi:0:2}\x''${hi:2:2}\x''${hi:4:2}\x''${hi:6:2}" > /etc/hostid | ||
309 | '' else '' | ||
310 | echo -ne "\x''${hi:6:2}\x''${hi:4:2}\x''${hi:2:2}\x''${hi:0:2}" > /etc/hostid | ||
311 | ''} | ||
312 | ''; | ||
313 | }; | ||
314 | |||
315 | |||
316 | # The closure of the init script of boot stage 1 is what we put in | ||
317 | # the initial RAM disk. | ||
318 | initialRamdisk = pkgs.makeInitrd { | ||
319 | name = "initrd-${kernel-name}"; | ||
320 | inherit (config.boot.initrd) compressor compressorArgs prepend; | ||
321 | |||
322 | contents = | ||
323 | [ { object = bootStage1; | ||
324 | symlink = "/init"; | ||
325 | } | ||
326 | { object = pkgs.writeText "mdadm.conf" config.boot.initrd.mdadmConf; | ||
327 | symlink = "/etc/mdadm.conf"; | ||
328 | } | ||
329 | { object = pkgs.runCommand "initrd-kmod-blacklist-ubuntu" { | ||
330 | src = "${pkgs.kmod-blacklist-ubuntu}/modprobe.conf"; | ||
331 | preferLocalBuild = true; | ||
332 | } '' | ||
333 | target=$out | ||
334 | ${pkgs.buildPackages.perl}/bin/perl -0pe 's/## file: iwlwifi.conf(.+?)##/##/s;' $src > $out | ||
335 | ''; | ||
336 | symlink = "/etc/modprobe.d/ubuntu.conf"; | ||
337 | } | ||
338 | { object = pkgs.kmod-debian-aliases; | ||
339 | symlink = "/etc/modprobe.d/debian.conf"; | ||
340 | } | ||
341 | ]; | ||
342 | }; | ||
343 | |||
344 | # Script to add secret files to the initrd at bootloader update time | ||
345 | initialRamdiskSecretAppender = | ||
346 | let | ||
347 | compressorExe = initialRamdisk.compressorExecutableFunction pkgs; | ||
348 | in pkgs.writeScriptBin "append-initrd-secrets" | ||
349 | '' | ||
350 | #!${pkgs.bash}/bin/bash -e | ||
351 | function usage { | ||
352 | echo "USAGE: $0 INITRD_FILE" >&2 | ||
353 | echo "Appends this configuration's secrets to INITRD_FILE" >&2 | ||
354 | } | ||
355 | |||
356 | if [ $# -ne 1 ]; then | ||
357 | usage | ||
358 | exit 1 | ||
359 | fi | ||
360 | |||
361 | if [ "$1"x = "--helpx" ]; then | ||
362 | usage | ||
363 | exit 0 | ||
364 | fi | ||
365 | |||
366 | ${lib.optionalString (config.boot.initrd.secrets == {}) | ||
367 | "exit 0"} | ||
368 | |||
369 | export PATH=${pkgs.coreutils}/bin:${pkgs.cpio}/bin:${pkgs.gzip}/bin:${pkgs.findutils}/bin | ||
370 | |||
371 | function cleanup { | ||
372 | if [ -n "$tmp" -a -d "$tmp" ]; then | ||
373 | rm -fR "$tmp" | ||
374 | fi | ||
375 | } | ||
376 | trap cleanup EXIT | ||
377 | |||
378 | tmp=$(mktemp -d initrd-secrets.XXXXXXXXXX) | ||
379 | |||
380 | ${lib.concatStringsSep "\n" (mapAttrsToList (dest: source: | ||
381 | let source' = if source == null then dest else toString source; in | ||
382 | '' | ||
383 | mkdir -p $(dirname "$tmp/${dest}") | ||
384 | cp -aL ${source'} "$tmp/${dest}" | ||
385 | '' | ||
386 | ) config.boot.initrd.secrets) | ||
387 | } | ||
388 | |||
389 | (cd "$tmp" && find . -print0 | sort -z | cpio --quiet -o -H newc -R +0:+0 --reproducible --null) | \ | ||
390 | ${compressorExe} ${lib.escapeShellArgs initialRamdisk.compressorArgs} >> "$1" | ||
391 | ''; | ||
392 | |||
393 | in | ||
394 | |||
395 | { | ||
396 | disabledModules = [ "system/boot/stage-1.nix" ]; | ||
397 | |||
398 | options = { | ||
399 | |||
400 | boot.resumeDevice = mkOption { | ||
401 | type = types.str; | ||
402 | default = ""; | ||
403 | example = "/dev/sda3"; | ||
404 | description = '' | ||
405 | Device for manual resume attempt during boot. This should be used primarily | ||
406 | if you want to resume from file. If left empty, the swap partitions are used. | ||
407 | Specify here the device where the file resides. | ||
408 | You should also use <varname>boot.kernelParams</varname> to specify | ||
409 | <literal><replaceable>resume_offset</replaceable></literal>. | ||
410 | ''; | ||
411 | }; | ||
412 | |||
413 | boot.initrd.enable = mkOption { | ||
414 | type = types.bool; | ||
415 | default = !config.boot.isContainer; | ||
416 | defaultText = "!config.boot.isContainer"; | ||
417 | description = '' | ||
418 | Whether to enable the NixOS initial RAM disk (initrd). This may be | ||
419 | needed to perform some initialisation tasks (like mounting | ||
420 | network/encrypted file systems) before continuing the boot process. | ||
421 | ''; | ||
422 | }; | ||
423 | |||
424 | boot.initrd.prepend = mkOption { | ||
425 | default = [ ]; | ||
426 | type = types.listOf types.str; | ||
427 | description = '' | ||
428 | Other initrd files to prepend to the final initrd we are building. | ||
429 | ''; | ||
430 | }; | ||
431 | |||
432 | boot.initrd.checkJournalingFS = mkOption { | ||
433 | default = true; | ||
434 | type = types.bool; | ||
435 | description = '' | ||
436 | Whether to run <command>fsck</command> on journaling filesystems such as ext3. | ||
437 | ''; | ||
438 | }; | ||
439 | |||
440 | boot.initrd.mdadmConf = mkOption { | ||
441 | default = ""; | ||
442 | type = types.lines; | ||
443 | description = '' | ||
444 | Contents of <filename>/etc/mdadm.conf</filename> in stage 1. | ||
445 | ''; | ||
446 | }; | ||
447 | |||
448 | boot.initrd.preLVMCommands = mkOption { | ||
449 | default = ""; | ||
450 | type = types.lines; | ||
451 | description = '' | ||
452 | Shell commands to be executed immediately before LVM discovery. | ||
453 | ''; | ||
454 | }; | ||
455 | |||
456 | boot.initrd.preDeviceCommands = mkOption { | ||
457 | default = ""; | ||
458 | type = types.lines; | ||
459 | description = '' | ||
460 | Shell commands to be executed before udev is started to create | ||
461 | device nodes. | ||
462 | ''; | ||
463 | }; | ||
464 | |||
465 | boot.initrd.postDeviceCommands = mkOption { | ||
466 | default = ""; | ||
467 | type = types.lines; | ||
468 | description = '' | ||
469 | Shell commands to be executed immediately after stage 1 of the | ||
470 | boot has loaded kernel modules and created device nodes in | ||
471 | <filename>/dev</filename>. | ||
472 | ''; | ||
473 | }; | ||
474 | |||
475 | boot.initrd.postMountCommands = mkOption { | ||
476 | default = ""; | ||
477 | type = types.lines; | ||
478 | description = '' | ||
479 | Shell commands to be executed immediately after the stage 1 | ||
480 | filesystems have been mounted. | ||
481 | ''; | ||
482 | }; | ||
483 | |||
484 | boot.initrd.preFailCommands = mkOption { | ||
485 | default = ""; | ||
486 | type = types.lines; | ||
487 | description = '' | ||
488 | Shell commands to be executed before the failure prompt is shown. | ||
489 | ''; | ||
490 | }; | ||
491 | |||
492 | boot.initrd.extraUtilsCommands = mkOption { | ||
493 | internal = true; | ||
494 | default = ""; | ||
495 | type = types.lines; | ||
496 | description = '' | ||
497 | Shell commands to be executed in the builder of the | ||
498 | extra-utils derivation. This can be used to provide | ||
499 | additional utilities in the initial ramdisk. | ||
500 | ''; | ||
501 | }; | ||
502 | |||
503 | boot.initrd.extraUtilsCommandsTest = mkOption { | ||
504 | internal = true; | ||
505 | default = ""; | ||
506 | type = types.lines; | ||
507 | description = '' | ||
508 | Shell commands to be executed in the builder of the | ||
509 | extra-utils derivation after patchelf has done its | ||
510 | job. This can be used to test additional utilities | ||
511 | copied in extraUtilsCommands. | ||
512 | ''; | ||
513 | }; | ||
514 | |||
515 | boot.initrd.extraUdevRulesCommands = mkOption { | ||
516 | internal = true; | ||
517 | default = ""; | ||
518 | type = types.lines; | ||
519 | description = '' | ||
520 | Shell commands to be executed in the builder of the | ||
521 | udev-rules derivation. This can be used to add | ||
522 | additional udev rules in the initial ramdisk. | ||
523 | ''; | ||
524 | }; | ||
525 | |||
526 | boot.initrd.compressor = mkOption { | ||
527 | default = ( | ||
528 | if lib.versionAtLeast config.boot.kernelPackages.kernel.version "5.9" | ||
529 | then "zstd" | ||
530 | else "gzip" | ||
531 | ); | ||
532 | defaultText = "zstd if the kernel supports it (5.9+), gzip if not."; | ||
533 | type = types.unspecified; # We don't have a function type... | ||
534 | description = '' | ||
535 | The compressor to use on the initrd image. May be any of: | ||
536 | |||
537 | <itemizedlist> | ||
538 | <listitem><para>The name of one of the predefined compressors, see <filename>pkgs/build-support/kernel/initrd-compressor-meta.nix</filename> for the definitions.</para></listitem> | ||
539 | <listitem><para>A function which, given the nixpkgs package set, returns the path to a compressor tool, e.g. <literal>pkgs: "''${pkgs.pigz}/bin/pigz"</literal></para></listitem> | ||
540 | <listitem><para>(not recommended, because it does not work when cross-compiling) the full path to a compressor tool, e.g. <literal>"''${pkgs.pigz}/bin/pigz"</literal></para></listitem> | ||
541 | </itemizedlist> | ||
542 | |||
543 | The given program should read data from stdin and write it to stdout compressed. | ||
544 | ''; | ||
545 | example = "xz"; | ||
546 | }; | ||
547 | |||
548 | boot.initrd.compressorArgs = mkOption { | ||
549 | default = null; | ||
550 | type = types.nullOr (types.listOf types.str); | ||
551 | description = "Arguments to pass to the compressor for the initrd image, or null to use the compressor's defaults."; | ||
552 | }; | ||
553 | |||
554 | boot.initrd.secrets = mkOption | ||
555 | { default = {}; | ||
556 | type = types.attrsOf (types.nullOr types.path); | ||
557 | description = | ||
558 | '' | ||
559 | Secrets to append to the initrd. The attribute name is the | ||
560 | path the secret should have inside the initrd, the value | ||
561 | is the path it should be copied from (or null for the same | ||
562 | path inside and out). | ||
563 | ''; | ||
564 | example = literalExample | ||
565 | '' | ||
566 | { "/etc/dropbear/dropbear_rsa_host_key" = | ||
567 | ./secret-dropbear-key; | ||
568 | } | ||
569 | ''; | ||
570 | }; | ||
571 | |||
572 | boot.initrd.supportedFilesystems = mkOption { | ||
573 | default = [ ]; | ||
574 | example = [ "btrfs" ]; | ||
575 | type = types.listOf types.str; | ||
576 | description = "Names of supported filesystem types in the initial ramdisk."; | ||
577 | }; | ||
578 | |||
579 | boot.initrd.verbose = mkOption { | ||
580 | default = true; | ||
581 | type = types.bool; | ||
582 | description = | ||
583 | '' | ||
584 | Verbosity of the initrd. Please note that disabling verbosity removes | ||
585 | only the mandatory messages generated by the NixOS scripts. For a | ||
586 | completely silent boot, you might also want to set the two following | ||
587 | configuration options: | ||
588 | |||
589 | <itemizedlist> | ||
590 | <listitem><para><literal>boot.consoleLogLevel = 0;</literal></para></listitem> | ||
591 | <listitem><para><literal>boot.kernelParams = [ "quiet" "udev.log_priority=3" ];</literal></para></listitem> | ||
592 | </itemizedlist> | ||
593 | ''; | ||
594 | }; | ||
595 | |||
596 | boot.loader.supportsInitrdSecrets = mkOption | ||
597 | { internal = true; | ||
598 | default = false; | ||
599 | type = types.bool; | ||
600 | description = | ||
601 | '' | ||
602 | Whether the bootloader setup runs append-initrd-secrets. | ||
603 | If not, any needed secrets must be copied into the initrd | ||
604 | and thus added to the store. | ||
605 | ''; | ||
606 | }; | ||
607 | |||
608 | fileSystems = mkOption { | ||
609 | type = with lib.types; attrsOf (submodule { | ||
610 | options.neededForBoot = mkOption { | ||
611 | default = false; | ||
612 | type = types.bool; | ||
613 | description = '' | ||
614 | If set, this file system will be mounted in the initial ramdisk. | ||
615 | Note that the file system will always be mounted in the initial | ||
616 | ramdisk if its mount point is one of the following: | ||
617 | ${concatStringsSep ", " ( | ||
618 | forEach utils.pathsNeededForBoot (i: "<filename>${i}</filename>") | ||
619 | )}. | ||
620 | ''; | ||
621 | }; | ||
622 | }); | ||
623 | }; | ||
624 | |||
625 | }; | ||
626 | |||
627 | config = mkIf config.boot.initrd.enable { | ||
628 | assertions = [ | ||
629 | { assertion = any (fs: fs.mountPoint == "/") fileSystems; | ||
630 | message = "The ‘fileSystems’ option does not specify your root file system."; | ||
631 | } | ||
632 | { assertion = let inherit (config.boot) resumeDevice; in | ||
633 | resumeDevice == "" || builtins.substring 0 1 resumeDevice == "/"; | ||
634 | message = "boot.resumeDevice has to be an absolute path." | ||
635 | + " Old \"x:y\" style is no longer supported."; | ||
636 | } | ||
637 | # TODO: remove when #85000 is fixed | ||
638 | { assertion = !config.boot.loader.supportsInitrdSecrets -> | ||
639 | all (source: | ||
640 | builtins.isPath source || | ||
641 | (builtins.isString source && hasPrefix builtins.storeDir source)) | ||
642 | (attrValues config.boot.initrd.secrets); | ||
643 | message = '' | ||
644 | boot.loader.initrd.secrets values must be unquoted paths when | ||
645 | using a bootloader that doesn't natively support initrd | ||
646 | secrets, e.g.: | ||
647 | |||
648 | boot.initrd.secrets = { | ||
649 | "/etc/secret" = /path/to/secret; | ||
650 | }; | ||
651 | |||
652 | Note that this will result in all secrets being stored | ||
653 | world-readable in the Nix store! | ||
654 | ''; | ||
655 | } | ||
656 | ]; | ||
657 | |||
658 | system.build = | ||
659 | { inherit bootStage1 initialRamdisk initialRamdiskSecretAppender extraUtils; }; | ||
660 | |||
661 | system.requiredKernelConfig = with config.lib.kernelConfig; [ | ||
662 | (isYes "TMPFS") | ||
663 | (isYes "BLK_DEV_INITRD") | ||
664 | ]; | ||
665 | |||
666 | boot.initrd.supportedFilesystems = map (fs: fs.fsType) fileSystems; | ||
667 | |||
668 | }; | ||
669 | } | ||
diff --git a/modules/stage-1/stage-1-init.sh b/modules/stage-1/stage-1-init.sh new file mode 100644 index 00000000..ddaf9858 --- /dev/null +++ b/modules/stage-1/stage-1-init.sh | |||
@@ -0,0 +1,638 @@ | |||
1 | #! @shell@ | ||
2 | |||
3 | targetRoot=/mnt-root | ||
4 | console=tty1 | ||
5 | verbose="@verbose@" | ||
6 | |||
7 | info() { | ||
8 | if [[ -n "$verbose" ]]; then | ||
9 | echo "$@" | ||
10 | fi | ||
11 | } | ||
12 | |||
13 | extraUtils="@extraUtils@" | ||
14 | export LD_LIBRARY_PATH=@extraUtils@/lib | ||
15 | export PATH=@extraUtils@/bin | ||
16 | ln -s @extraUtils@/bin /bin | ||
17 | |||
18 | # Copy the secrets to their needed location | ||
19 | if [ -d "@extraUtils@/secrets" ]; then | ||
20 | for secret in $(cd "@extraUtils@/secrets"; find . -type f); do | ||
21 | mkdir -p $(dirname "/$secret") | ||
22 | ln -s "@extraUtils@/secrets/$secret" "$secret" | ||
23 | done | ||
24 | fi | ||
25 | |||
26 | # Stop LVM complaining about fd3 | ||
27 | export LVM_SUPPRESS_FD_WARNINGS=true | ||
28 | |||
29 | fail() { | ||
30 | if [ -n "$panicOnFail" ]; then exit 1; fi | ||
31 | |||
32 | @preFailCommands@ | ||
33 | |||
34 | # If starting stage 2 failed, allow the user to repair the problem | ||
35 | # in an interactive shell. | ||
36 | cat <<EOF | ||
37 | |||
38 | An error occurred in stage 1 of the boot process, which must mount the | ||
39 | root filesystem on \`$targetRoot' and then start stage 2. Press one | ||
40 | of the following keys: | ||
41 | |||
42 | EOF | ||
43 | if [ -n "$allowShell" ]; then cat <<EOF | ||
44 | i) to launch an interactive shell | ||
45 | f) to start an interactive shell having pid 1 (needed if you want to | ||
46 | start stage 2's init manually) | ||
47 | EOF | ||
48 | fi | ||
49 | cat <<EOF | ||
50 | r) to reboot immediately | ||
51 | *) to ignore the error and continue | ||
52 | EOF | ||
53 | |||
54 | read -n 1 reply | ||
55 | |||
56 | if [ -n "$allowShell" -a "$reply" = f ]; then | ||
57 | exec setsid @shell@ -c "exec @shell@ < /dev/$console >/dev/$console 2>/dev/$console" | ||
58 | elif [ -n "$allowShell" -a "$reply" = i ]; then | ||
59 | echo "Starting interactive shell..." | ||
60 | setsid @shell@ -c "exec @shell@ < /dev/$console >/dev/$console 2>/dev/$console" || fail | ||
61 | elif [ "$reply" = r ]; then | ||
62 | echo "Rebooting..." | ||
63 | reboot -f | ||
64 | else | ||
65 | info "Continuing..." | ||
66 | fi | ||
67 | } | ||
68 | |||
69 | trap 'fail' 0 | ||
70 | |||
71 | |||
72 | # Print a greeting. | ||
73 | info | ||
74 | info "[1;32m<<< NixOS Stage 1 >>>[0m" | ||
75 | info | ||
76 | |||
77 | # Make several required directories. | ||
78 | mkdir -p /etc/udev | ||
79 | touch /etc/fstab # to shut up mount | ||
80 | ln -s /proc/mounts /etc/mtab # to shut up mke2fs | ||
81 | touch /etc/udev/hwdb.bin # to shut up udev | ||
82 | touch /etc/initrd-release | ||
83 | |||
84 | # Function for waiting a device to appear. | ||
85 | waitDevice() { | ||
86 | local device="$1" | ||
87 | |||
88 | # USB storage devices tend to appear with some delay. It would be | ||
89 | # great if we had a way to synchronously wait for them, but | ||
90 | # alas... So just wait for a few seconds for the device to | ||
91 | # appear. | ||
92 | if test ! -e $device; then | ||
93 | echo -n "waiting for device $device to appear..." | ||
94 | try=20 | ||
95 | while [ $try -gt 0 ]; do | ||
96 | sleep 1 | ||
97 | # also re-try lvm activation now that new block devices might have appeared | ||
98 | lvm vgchange -ay | ||
99 | # and tell udev to create nodes for the new LVs | ||
100 | udevadm trigger --action=add | ||
101 | if test -e $device; then break; fi | ||
102 | echo -n "." | ||
103 | try=$((try - 1)) | ||
104 | done | ||
105 | echo | ||
106 | [ $try -ne 0 ] | ||
107 | fi | ||
108 | } | ||
109 | |||
110 | # Mount special file systems. | ||
111 | specialMount() { | ||
112 | local device="$1" | ||
113 | local mountPoint="$2" | ||
114 | local options="$3" | ||
115 | local fsType="$4" | ||
116 | |||
117 | mkdir -m 0755 -p "$mountPoint" | ||
118 | mount -n -t "$fsType" -o "$options" "$device" "$mountPoint" | ||
119 | } | ||
120 | source @earlyMountScript@ | ||
121 | |||
122 | # Log the script output to /dev/kmsg or /run/log/stage-1-init.log. | ||
123 | mkdir -p /tmp | ||
124 | mkfifo /tmp/stage-1-init.log.fifo | ||
125 | logOutFd=8 && logErrFd=9 | ||
126 | eval "exec $logOutFd>&1 $logErrFd>&2" | ||
127 | if test -w /dev/kmsg; then | ||
128 | tee -i < /tmp/stage-1-init.log.fifo /proc/self/fd/"$logOutFd" | while read -r line; do | ||
129 | if test -n "$line"; then | ||
130 | echo "<7>stage-1-init: [$(date)] $line" > /dev/kmsg | ||
131 | fi | ||
132 | done & | ||
133 | else | ||
134 | mkdir -p /run/log | ||
135 | tee -i < /tmp/stage-1-init.log.fifo /run/log/stage-1-init.log & | ||
136 | fi | ||
137 | exec > /tmp/stage-1-init.log.fifo 2>&1 | ||
138 | |||
139 | |||
140 | # Process the kernel command line. | ||
141 | export stage2Init=/init | ||
142 | for o in $(cat /proc/cmdline); do | ||
143 | case $o in | ||
144 | console=*) | ||
145 | set -- $(IFS==; echo $o) | ||
146 | params=$2 | ||
147 | set -- $(IFS=,; echo $params) | ||
148 | console=$1 | ||
149 | ;; | ||
150 | init=*) | ||
151 | set -- $(IFS==; echo $o) | ||
152 | stage2Init=$2 | ||
153 | ;; | ||
154 | boot.persistence=*) | ||
155 | set -- $(IFS==; echo $o) | ||
156 | persistence=$2 | ||
157 | ;; | ||
158 | boot.persistence.opt=*) | ||
159 | set -- $(IFS==; echo $o) | ||
160 | persistence_opt=$2 | ||
161 | ;; | ||
162 | boot.trace|debugtrace) | ||
163 | # Show each command. | ||
164 | set -x | ||
165 | ;; | ||
166 | boot.shell_on_fail) | ||
167 | allowShell=1 | ||
168 | ;; | ||
169 | boot.debug1|debug1) # stop right away | ||
170 | allowShell=1 | ||
171 | fail | ||
172 | ;; | ||
173 | boot.debug1devices) # stop after loading modules and creating device nodes | ||
174 | allowShell=1 | ||
175 | debug1devices=1 | ||
176 | ;; | ||
177 | boot.debug1mounts) # stop after mounting file systems | ||
178 | allowShell=1 | ||
179 | debug1mounts=1 | ||
180 | ;; | ||
181 | boot.panic_on_fail|stage1panic=1) | ||
182 | panicOnFail=1 | ||
183 | ;; | ||
184 | root=*) | ||
185 | # If a root device is specified on the kernel command | ||
186 | # line, make it available through the symlink /dev/root. | ||
187 | # Recognise LABEL= and UUID= to support UNetbootin. | ||
188 | set -- $(IFS==; echo $o) | ||
189 | if [ $2 = "LABEL" ]; then | ||
190 | root="/dev/disk/by-label/$3" | ||
191 | elif [ $2 = "UUID" ]; then | ||
192 | root="/dev/disk/by-uuid/$3" | ||
193 | else | ||
194 | root=$2 | ||
195 | fi | ||
196 | ln -s "$root" /dev/root | ||
197 | ;; | ||
198 | copytoram) | ||
199 | copytoram=1 | ||
200 | ;; | ||
201 | findiso=*) | ||
202 | # if an iso name is supplied, try to find the device where | ||
203 | # the iso resides on | ||
204 | set -- $(IFS==; echo $o) | ||
205 | isoPath=$2 | ||
206 | ;; | ||
207 | esac | ||
208 | done | ||
209 | |||
210 | # Set hostid before modules are loaded. | ||
211 | # This is needed by the spl/zfs modules. | ||
212 | @setHostId@ | ||
213 | |||
214 | # Load the required kernel modules. | ||
215 | mkdir -p /lib | ||
216 | ln -s @modulesClosure@/lib/modules /lib/modules | ||
217 | ln -s @modulesClosure@/lib/firmware /lib/firmware | ||
218 | echo @extraUtils@/bin/modprobe > /proc/sys/kernel/modprobe | ||
219 | for i in @kernelModules@; do | ||
220 | info "loading module $(basename $i)..." | ||
221 | modprobe $i | ||
222 | done | ||
223 | |||
224 | |||
225 | # Create device nodes in /dev. | ||
226 | @preDeviceCommands@ | ||
227 | info "running udev..." | ||
228 | ln -sfn /proc/self/fd /dev/fd | ||
229 | ln -sfn /proc/self/fd/0 /dev/stdin | ||
230 | ln -sfn /proc/self/fd/1 /dev/stdout | ||
231 | ln -sfn /proc/self/fd/2 /dev/stderr | ||
232 | mkdir -p /etc/systemd | ||
233 | ln -sfn @linkUnits@ /etc/systemd/network | ||
234 | mkdir -p /etc/udev | ||
235 | ln -sfn @udevRules@ /etc/udev/rules.d | ||
236 | mkdir -p /dev/.mdadm | ||
237 | systemd-udevd --daemon | ||
238 | udevadm trigger --action=add | ||
239 | udevadm settle | ||
240 | |||
241 | |||
242 | # XXX: Use case usb->lvm will still fail, usb->luks->lvm is covered | ||
243 | @preLVMCommands@ | ||
244 | |||
245 | info "starting device mapper and LVM..." | ||
246 | lvm vgchange -ay | ||
247 | |||
248 | if test -n "$debug1devices"; then fail; fi | ||
249 | |||
250 | |||
251 | @postDeviceCommands@ | ||
252 | |||
253 | |||
254 | # Return true if the machine is on AC power, or if we can't determine | ||
255 | # whether it's on AC power. | ||
256 | onACPower() { | ||
257 | ! test -d "/proc/acpi/battery" || | ||
258 | ! ls /proc/acpi/battery/BAT[0-9]* > /dev/null 2>&1 || | ||
259 | ! cat /proc/acpi/battery/BAT*/state | grep "^charging state" | grep -q "discharg" | ||
260 | } | ||
261 | |||
262 | |||
263 | # Check the specified file system, if appropriate. | ||
264 | checkFS() { | ||
265 | local device="$1" | ||
266 | local fsType="$2" | ||
267 | |||
268 | # Only check block devices. | ||
269 | if [ ! -b "$device" ]; then return 0; fi | ||
270 | |||
271 | # Don't check ROM filesystems. | ||
272 | if [ "$fsType" = iso9660 -o "$fsType" = udf ]; then return 0; fi | ||
273 | |||
274 | # Don't check resilient COWs as they validate the fs structures at mount time | ||
275 | if [ "$fsType" = btrfs -o "$fsType" = zfs -o "$fsType" = bcachefs ]; then return 0; fi | ||
276 | |||
277 | # Skip fsck for nilfs2 - not needed by design and no fsck tool for this filesystem. | ||
278 | if [ "$fsType" = nilfs2 ]; then return 0; fi | ||
279 | |||
280 | # Skip fsck for inherently readonly filesystems. | ||
281 | if [ "$fsType" = squashfs ]; then return 0; fi | ||
282 | |||
283 | # If we couldn't figure out the FS type, then skip fsck. | ||
284 | if [ "$fsType" = auto ]; then | ||
285 | echo 'cannot check filesystem with type "auto"!' | ||
286 | return 0 | ||
287 | fi | ||
288 | |||
289 | # Device might be already mounted manually | ||
290 | # e.g. NBD-device or the host filesystem of the file which contains encrypted root fs | ||
291 | if mount | grep -q "^$device on "; then | ||
292 | echo "skip checking already mounted $device" | ||
293 | return 0 | ||
294 | fi | ||
295 | |||
296 | # Optionally, skip fsck on journaling filesystems. This option is | ||
297 | # a hack - it's mostly because e2fsck on ext3 takes much longer to | ||
298 | # recover the journal than the ext3 implementation in the kernel | ||
299 | # does (minutes versus seconds). | ||
300 | if test -z "@checkJournalingFS@" -a \ | ||
301 | \( "$fsType" = ext3 -o "$fsType" = ext4 -o "$fsType" = reiserfs \ | ||
302 | -o "$fsType" = xfs -o "$fsType" = jfs -o "$fsType" = f2fs \) | ||
303 | then | ||
304 | return 0 | ||
305 | fi | ||
306 | |||
307 | # Don't run `fsck' if the machine is on battery power. !!! Is | ||
308 | # this a good idea? | ||
309 | if ! onACPower; then | ||
310 | echo "on battery power, so no \`fsck' will be performed on \`$device'" | ||
311 | return 0 | ||
312 | fi | ||
313 | |||
314 | echo "checking $device..." | ||
315 | |||
316 | fsckFlags= | ||
317 | if test "$fsType" != "btrfs"; then | ||
318 | fsckFlags="-V -a" | ||
319 | fi | ||
320 | fsck $fsckFlags "$device" | ||
321 | fsckResult=$? | ||
322 | |||
323 | if test $(($fsckResult | 2)) = $fsckResult; then | ||
324 | echo "fsck finished, rebooting..." | ||
325 | sleep 3 | ||
326 | reboot -f | ||
327 | fi | ||
328 | |||
329 | if test $(($fsckResult | 4)) = $fsckResult; then | ||
330 | echo "$device has unrepaired errors, please fix them manually." | ||
331 | fail | ||
332 | fi | ||
333 | |||
334 | if test $fsckResult -ge 8; then | ||
335 | echo "fsck on $device failed." | ||
336 | fail | ||
337 | fi | ||
338 | |||
339 | return 0 | ||
340 | } | ||
341 | |||
342 | |||
343 | # Function for mounting a file system. | ||
344 | mountFS() { | ||
345 | local device="$1" | ||
346 | local mountPoint="$2" | ||
347 | local options="$3" | ||
348 | local fsType="$4" | ||
349 | |||
350 | if [ "$fsType" = auto ]; then | ||
351 | fsType=$(blkid -o value -s TYPE "$device") | ||
352 | if [ -z "$fsType" ]; then fsType=auto; fi | ||
353 | fi | ||
354 | |||
355 | # Filter out x- options, which busybox doesn't do yet. | ||
356 | local optionsFiltered="$(IFS=,; for i in $options; do if [ "${i:0:2}" != "x-" ]; then echo -n $i,; fi; done)" | ||
357 | # Prefix (lower|upper|work)dir with /mnt-root (overlayfs) | ||
358 | local optionsPrefixed="$( echo "$optionsFiltered" | sed -E 's#\<(lowerdir|upperdir|workdir)=#\1=/mnt-root#g' )" | ||
359 | |||
360 | echo "$device /mnt-root$mountPoint $fsType $optionsPrefixed" >> /etc/fstab | ||
361 | |||
362 | checkFS "$device" "$fsType" | ||
363 | |||
364 | # Optionally resize the filesystem. | ||
365 | case $options in | ||
366 | *x-nixos.autoresize*) | ||
367 | if [ "$fsType" = ext2 -o "$fsType" = ext3 -o "$fsType" = ext4 ]; then | ||
368 | modprobe "$fsType" | ||
369 | echo "resizing $device..." | ||
370 | e2fsck -fp "$device" | ||
371 | resize2fs "$device" | ||
372 | elif [ "$fsType" = f2fs ]; then | ||
373 | echo "resizing $device..." | ||
374 | fsck.f2fs -fp "$device" | ||
375 | resize.f2fs "$device" | ||
376 | fi | ||
377 | ;; | ||
378 | esac | ||
379 | |||
380 | # Create backing directories for overlayfs | ||
381 | if [ "$fsType" = overlay ]; then | ||
382 | for i in upper work; do | ||
383 | dir="$( echo "$optionsPrefixed" | grep -o "${i}dir=[^,]*" )" | ||
384 | mkdir -m 0700 -p "${dir##*=}" | ||
385 | done | ||
386 | fi | ||
387 | |||
388 | info "mounting $device on $mountPoint..." | ||
389 | |||
390 | mkdir -p "/mnt-root$mountPoint" | ||
391 | |||
392 | # For ZFS and CIFS mounts, retry a few times before giving up. | ||
393 | # We do this for ZFS as a workaround for issue NixOS/nixpkgs#25383. | ||
394 | local n=0 | ||
395 | while true; do | ||
396 | mount "/mnt-root$mountPoint" && break | ||
397 | if [ \( "$fsType" != cifs -a "$fsType" != zfs \) -o "$n" -ge 10 ]; then fail; break; fi | ||
398 | echo "retrying..." | ||
399 | sleep 1 | ||
400 | n=$((n + 1)) | ||
401 | done | ||
402 | |||
403 | [ "$mountPoint" == "/" ] && | ||
404 | [ -f "/mnt-root/etc/NIXOS_LUSTRATE" ] && | ||
405 | lustrateRoot "/mnt-root" | ||
406 | |||
407 | true | ||
408 | } | ||
409 | |||
410 | lustrateRoot () { | ||
411 | local root="$1" | ||
412 | |||
413 | echo | ||
414 | echo -e "\e[1;33m<<< NixOS is now lustrating the root filesystem (cruft goes to /old-root) >>>\e[0m" | ||
415 | echo | ||
416 | |||
417 | mkdir -m 0755 -p "$root/old-root.tmp" | ||
418 | |||
419 | echo | ||
420 | echo "Moving impurities out of the way:" | ||
421 | for d in "$root"/* | ||
422 | do | ||
423 | [ "$d" == "$root/nix" ] && continue | ||
424 | [ "$d" == "$root/boot" ] && continue # Don't render the system unbootable | ||
425 | [ "$d" == "$root/old-root.tmp" ] && continue | ||
426 | |||
427 | mv -v "$d" "$root/old-root.tmp" | ||
428 | done | ||
429 | |||
430 | # Use .tmp to make sure subsequent invokations don't clash | ||
431 | mv -v "$root/old-root.tmp" "$root/old-root" | ||
432 | |||
433 | mkdir -m 0755 -p "$root/etc" | ||
434 | touch "$root/etc/NIXOS" | ||
435 | |||
436 | exec 4< "$root/old-root/etc/NIXOS_LUSTRATE" | ||
437 | |||
438 | echo | ||
439 | echo "Restoring selected impurities:" | ||
440 | while read -u 4 keeper; do | ||
441 | dirname="$(dirname "$keeper")" | ||
442 | mkdir -m 0755 -p "$root/$dirname" | ||
443 | cp -av "$root/old-root/$keeper" "$root/$keeper" | ||
444 | done | ||
445 | |||
446 | exec 4>&- | ||
447 | } | ||
448 | |||
449 | |||
450 | |||
451 | if test -e /sys/power/resume -a -e /sys/power/disk; then | ||
452 | if test -n "@resumeDevice@" && waitDevice "@resumeDevice@"; then | ||
453 | resumeDev="@resumeDevice@" | ||
454 | resumeInfo="$(udevadm info -q property "$resumeDev" )" | ||
455 | else | ||
456 | for sd in @resumeDevices@; do | ||
457 | # Try to detect resume device. According to Ubuntu bug: | ||
458 | # https://bugs.launchpad.net/ubuntu/+source/pm-utils/+bug/923326/comments/1 | ||
459 | # when there are multiple swap devices, we can't know where the hibernate | ||
460 | # image will reside. We can check all of them for swsuspend blkid. | ||
461 | if waitDevice "$sd"; then | ||
462 | resumeInfo="$(udevadm info -q property "$sd")" | ||
463 | if [ "$(echo "$resumeInfo" | sed -n 's/^ID_FS_TYPE=//p')" = "swsuspend" ]; then | ||
464 | resumeDev="$sd" | ||
465 | break | ||
466 | fi | ||
467 | fi | ||
468 | done | ||
469 | fi | ||
470 | if test -n "$resumeDev"; then | ||
471 | resumeMajor="$(echo "$resumeInfo" | sed -n 's/^MAJOR=//p')" | ||
472 | resumeMinor="$(echo "$resumeInfo" | sed -n 's/^MINOR=//p')" | ||
473 | echo "$resumeMajor:$resumeMinor" > /sys/power/resume 2> /dev/null || echo "failed to resume..." | ||
474 | fi | ||
475 | fi | ||
476 | |||
477 | # If we have a path to an iso file, find the iso and link it to /dev/root | ||
478 | if [ -n "$isoPath" ]; then | ||
479 | mkdir -p /findiso | ||
480 | |||
481 | for delay in 5 10; do | ||
482 | blkid | while read -r line; do | ||
483 | device=$(echo "$line" | sed 's/:.*//') | ||
484 | type=$(echo "$line" | sed 's/.*TYPE="\([^"]*\)".*/\1/') | ||
485 | |||
486 | mount -t "$type" "$device" /findiso | ||
487 | if [ -e "/findiso$isoPath" ]; then | ||
488 | ln -sf "/findiso$isoPath" /dev/root | ||
489 | break 2 | ||
490 | else | ||
491 | umount /findiso | ||
492 | fi | ||
493 | done | ||
494 | |||
495 | sleep "$delay" | ||
496 | done | ||
497 | fi | ||
498 | |||
499 | # Try to find and mount the root device. | ||
500 | mkdir -p $targetRoot | ||
501 | |||
502 | exec 3< @fsInfo@ | ||
503 | |||
504 | while read -u 3 mountPoint; do | ||
505 | read -u 3 device | ||
506 | read -u 3 fsType | ||
507 | read -u 3 options | ||
508 | |||
509 | # !!! Really quick hack to support bind mounts, i.e., where the | ||
510 | # "device" should be taken relative to /mnt-root, not /. Assume | ||
511 | # that every device that starts with / but doesn't start with /dev | ||
512 | # is a bind mount. | ||
513 | pseudoDevice= | ||
514 | case $device in | ||
515 | /dev/*) | ||
516 | ;; | ||
517 | //*) | ||
518 | # Don't touch SMB/CIFS paths. | ||
519 | pseudoDevice=1 | ||
520 | ;; | ||
521 | /*) | ||
522 | device=/mnt-root$device | ||
523 | ;; | ||
524 | *) | ||
525 | # Not an absolute path; assume that it's a pseudo-device | ||
526 | # like an NFS path (e.g. "server:/path"). | ||
527 | pseudoDevice=1 | ||
528 | ;; | ||
529 | esac | ||
530 | |||
531 | if test -z "$pseudoDevice" && ! waitDevice "$device"; then | ||
532 | # If it doesn't appear, try to mount it anyway (and | ||
533 | # probably fail). This is a fallback for non-device "devices" | ||
534 | # that we don't properly recognise. | ||
535 | echo "Timed out waiting for device $device, trying to mount anyway." | ||
536 | fi | ||
537 | |||
538 | # Wait once more for the udev queue to empty, just in case it's | ||
539 | # doing something with $device right now. | ||
540 | udevadm settle | ||
541 | |||
542 | # If copytoram is enabled: skip mounting the ISO and copy its content to a tmpfs. | ||
543 | if [ -n "$copytoram" ] && [ "$device" = /dev/root ] && [ "$mountPoint" = /iso ]; then | ||
544 | fsType=$(blkid -o value -s TYPE "$device") | ||
545 | fsSize=$(blockdev --getsize64 "$device") | ||
546 | |||
547 | mkdir -p /tmp-iso | ||
548 | mount -t "$fsType" /dev/root /tmp-iso | ||
549 | mountFS tmpfs /iso size="$fsSize" tmpfs | ||
550 | |||
551 | cp -r /tmp-iso/* /mnt-root/iso/ | ||
552 | |||
553 | umount /tmp-iso | ||
554 | rmdir /tmp-iso | ||
555 | continue | ||
556 | fi | ||
557 | |||
558 | if [ "$mountPoint" = / ] && [ "$device" = tmpfs ] && [ ! -z "$persistence" ]; then | ||
559 | echo persistence... | ||
560 | waitDevice "$persistence" | ||
561 | echo enabling persistence... | ||
562 | mountFS "$persistence" "$mountPoint" "$persistence_opt" "auto" | ||
563 | continue | ||
564 | fi | ||
565 | |||
566 | mountFS "$device" "$mountPoint" "$options" "$fsType" | ||
567 | done | ||
568 | |||
569 | exec 3>&- | ||
570 | |||
571 | |||
572 | @postMountCommands@ | ||
573 | |||
574 | |||
575 | # Emit a udev rule for /dev/root to prevent systemd from complaining. | ||
576 | if [ -e /mnt-root/iso ]; then | ||
577 | eval $(udevadm info --export --export-prefix=ROOT_ --device-id-of-file=/mnt-root/iso) | ||
578 | else | ||
579 | eval $(udevadm info --export --export-prefix=ROOT_ --device-id-of-file=$targetRoot) | ||
580 | fi | ||
581 | if [ "$ROOT_MAJOR" -a "$ROOT_MINOR" -a "$ROOT_MAJOR" != 0 ]; then | ||
582 | mkdir -p /run/udev/rules.d | ||
583 | echo 'ACTION=="add|change", SUBSYSTEM=="block", ENV{MAJOR}=="'$ROOT_MAJOR'", ENV{MINOR}=="'$ROOT_MINOR'", SYMLINK+="root"' > /run/udev/rules.d/61-dev-root-link.rules | ||
584 | fi | ||
585 | |||
586 | |||
587 | # Stop udevd. | ||
588 | udevadm control --exit | ||
589 | |||
590 | # Reset the logging file descriptors. | ||
591 | # Do this just before pkill, which will kill the tee process. | ||
592 | exec 1>&$logOutFd 2>&$logErrFd | ||
593 | eval "exec $logOutFd>&- $logErrFd>&-" | ||
594 | |||
595 | # Kill any remaining processes, just to be sure we're not taking any | ||
596 | # with us into stage 2. But keep storage daemons like unionfs-fuse. | ||
597 | # | ||
598 | # Storage daemons are distinguished by an @ in front of their command line: | ||
599 | # https://www.freedesktop.org/wiki/Software/systemd/RootStorageDaemons/ | ||
600 | for pid in $(pgrep -v -f '^@'); do | ||
601 | # Make sure we don't kill kernel processes, see #15226 and: | ||
602 | # http://stackoverflow.com/questions/12213445/identifying-kernel-threads | ||
603 | readlink "/proc/$pid/exe" &> /dev/null || continue | ||
604 | # Try to avoid killing ourselves. | ||
605 | [ $pid -eq $$ ] && continue | ||
606 | kill -9 "$pid" | ||
607 | done | ||
608 | |||
609 | if test -n "$debug1mounts"; then fail; fi | ||
610 | |||
611 | |||
612 | # Restore /proc/sys/kernel/modprobe to its original value. | ||
613 | echo /sbin/modprobe > /proc/sys/kernel/modprobe | ||
614 | |||
615 | |||
616 | # Start stage 2. `switch_root' deletes all files in the ramfs on the | ||
617 | # current root. The path has to be valid in the chroot not outside. | ||
618 | if [ ! -e "$targetRoot/$stage2Init" ]; then | ||
619 | stage2Check=${stage2Init} | ||
620 | while [ "$stage2Check" != "${stage2Check%/*}" ] && [ ! -L "$targetRoot/$stage2Check" ]; do | ||
621 | stage2Check=${stage2Check%/*} | ||
622 | done | ||
623 | if [ ! -L "$targetRoot/$stage2Check" ]; then | ||
624 | echo "stage 2 init script ($targetRoot/$stage2Init) not found" | ||
625 | fail | ||
626 | fi | ||
627 | fi | ||
628 | |||
629 | mkdir -m 0755 -p $targetRoot/proc $targetRoot/sys $targetRoot/dev $targetRoot/run | ||
630 | |||
631 | mount --move /proc $targetRoot/proc | ||
632 | mount --move /sys $targetRoot/sys | ||
633 | mount --move /dev $targetRoot/dev | ||
634 | mount --move /run $targetRoot/run | ||
635 | |||
636 | exec env -i $(type -P switch_root) "$targetRoot" "$stage2Init" | ||
637 | |||
638 | fail # should never be reached | ||