summaryrefslogtreecommitdiff
path: root/modules
diff options
context:
space:
mode:
Diffstat (limited to 'modules')
-rw-r--r--modules/stage-1/default.nix669
-rw-r--r--modules/stage-1/stage-1-init.sh638
2 files changed, 1307 insertions, 0 deletions
diff --git a/modules/stage-1/default.nix b/modules/stage-1/default.nix
new file mode 100644
index 00000000..5a14584e
--- /dev/null
+++ b/modules/stage-1/default.nix
@@ -0,0 +1,669 @@
1# This module builds the initial ramdisk, which contains an init
2# script that performs the first stage of booting the system: it loads
3# the modules necessary to mount the root file system, then calls the
4# init in the root file system to start the second boot stage.
5
6{ config, lib, utils, pkgs, ... }:
7
8with lib;
9
10let
11
12 udev = config.systemd.package;
13
14 kernel-name = config.boot.kernelPackages.kernel.name or "kernel";
15
16 modulesTree = config.system.modulesTree.override { name = kernel-name + "-modules"; };
17 firmware = config.hardware.firmware;
18
19
20 # Determine the set of modules that we need to mount the root FS.
21 modulesClosure = pkgs.makeModulesClosure {
22 rootModules = config.boot.initrd.availableKernelModules ++ config.boot.initrd.kernelModules;
23 kernel = modulesTree;
24 firmware = firmware;
25 allowMissing = false;
26 };
27
28
29 # The initrd only has to mount `/` or any FS marked as necessary for
30 # booting (such as the FS containing `/nix/store`, or an FS needed for
31 # mounting `/`, like `/` on a loopback).
32 fileSystems = filter utils.fsNeededForBoot config.system.build.fileSystems;
33
34 # A utility for enumerating the shared-library dependencies of a program
35 findLibs = pkgs.buildPackages.writeShellScriptBin "find-libs" ''
36 set -euo pipefail
37
38 declare -A seen
39 left=()
40
41 patchelf="${pkgs.buildPackages.patchelf}/bin/patchelf"
42
43 function add_needed {
44 rpath="$($patchelf --print-rpath $1)"
45 dir="$(dirname $1)"
46 for lib in $($patchelf --print-needed $1); do
47 left+=("$lib" "$rpath" "$dir")
48 done
49 }
50
51 add_needed "$1"
52
53 while [ ''${#left[@]} -ne 0 ]; do
54 next=''${left[0]}
55 rpath=''${left[1]}
56 ORIGIN=''${left[2]}
57 left=("''${left[@]:3}")
58 if [ -z ''${seen[$next]+x} ]; then
59 seen[$next]=1
60
61 # Ignore the dynamic linker which for some reason appears as a DT_NEEDED of glibc but isn't in glibc's RPATH.
62 case "$next" in
63 ld*.so.?) continue;;
64 esac
65
66 IFS=: read -ra paths <<< $rpath
67 res=
68 for path in "''${paths[@]}"; do
69 path=$(eval "echo $path")
70 if [ -f "$path/$next" ]; then
71 res="$path/$next"
72 echo "$res"
73 add_needed "$res"
74 break
75 fi
76 done
77 if [ -z "$res" ]; then
78 echo "Couldn't satisfy dependency $next" >&2
79 exit 1
80 fi
81 fi
82 done
83 '';
84
85 # Some additional utilities needed in stage 1, like mount, lvm, fsck
86 # etc. We don't want to bring in all of those packages, so we just
87 # copy what we need. Instead of using statically linked binaries,
88 # we just copy what we need from Glibc and use patchelf to make it
89 # work.
90 extraUtils = pkgs.runCommandCC "extra-utils"
91 { nativeBuildInputs = [pkgs.buildPackages.nukeReferences];
92 allowedReferences = [ "out" ]; # prevent accidents like glibc being included in the initrd
93 }
94 ''
95 set +o pipefail
96
97 mkdir -p $out/bin $out/lib
98 ln -s $out/bin $out/sbin
99
100 copy_bin_and_libs () {
101 [ -f "$out/bin/$(basename $1)" ] && rm "$out/bin/$(basename $1)"
102 cp -pdv $1 $out/bin
103 }
104
105 # Copy BusyBox.
106 for BIN in ${pkgs.busybox}/{s,}bin/*; do
107 copy_bin_and_libs $BIN
108 done
109
110 # Copy some util-linux stuff.
111 copy_bin_and_libs ${pkgs.util-linux}/sbin/blkid
112
113 # Copy dmsetup and lvm.
114 copy_bin_and_libs ${getBin pkgs.lvm2}/bin/dmsetup
115 copy_bin_and_libs ${getBin pkgs.lvm2}/bin/lvm
116
117 # Add RAID mdadm tool.
118 copy_bin_and_libs ${pkgs.mdadm}/sbin/mdadm
119 copy_bin_and_libs ${pkgs.mdadm}/sbin/mdmon
120
121 # Copy udev.
122 copy_bin_and_libs ${udev}/bin/udevadm
123 copy_bin_and_libs ${udev}/lib/systemd/systemd-sysctl
124 for BIN in ${udev}/lib/udev/*_id; do
125 copy_bin_and_libs $BIN
126 done
127 # systemd-udevd is only a symlink to udevadm these days
128 ln -sf udevadm $out/bin/systemd-udevd
129
130 # Copy modprobe.
131 copy_bin_and_libs ${pkgs.kmod}/bin/kmod
132 ln -sf kmod $out/bin/modprobe
133
134 # Copy resize2fs if any ext* filesystems are to be resized
135 ${optionalString (any (fs: fs.autoResize && (lib.hasPrefix "ext" fs.fsType)) fileSystems) ''
136 # We need mke2fs in the initrd.
137 copy_bin_and_libs ${pkgs.e2fsprogs}/sbin/resize2fs
138 ''}
139
140 # Copy secrets if needed.
141 #
142 # TODO: move out to a separate script; see #85000.
143 ${optionalString (!config.boot.loader.supportsInitrdSecrets)
144 (concatStringsSep "\n" (mapAttrsToList (dest: source:
145 let source' = if source == null then dest else source; in
146 ''
147 mkdir -p $(dirname "$out/secrets/${dest}")
148 # Some programs (e.g. ssh) doesn't like secrets to be
149 # symlinks, so we use `cp -L` here to match the
150 # behaviour when secrets are natively supported.
151 cp -Lr ${source'} "$out/secrets/${dest}"
152 ''
153 ) config.boot.initrd.secrets))
154 }
155
156 ${config.boot.initrd.extraUtilsCommands}
157
158 # Copy ld manually since it isn't detected correctly
159 cp -pv ${pkgs.stdenv.cc.libc.out}/lib/ld*.so.? $out/lib
160
161 # Copy all of the needed libraries
162 find $out/bin $out/lib -type f | while read BIN; do
163 echo "Copying libs for executable $BIN"
164 for LIB in $(${findLibs}/bin/find-libs $BIN); do
165 TGT="$out/lib/$(basename $LIB)"
166 if [ ! -f "$TGT" ]; then
167 SRC="$(readlink -e $LIB)"
168 cp -pdv "$SRC" "$TGT"
169 fi
170 done
171 done
172
173 # Strip binaries further than normal.
174 chmod -R u+w $out
175 stripDirs "$STRIP" "lib bin" "-s"
176
177 # Run patchelf to make the programs refer to the copied libraries.
178 find $out/bin $out/lib -type f | while read i; do
179 if ! test -L $i; then
180 nuke-refs -e $out $i
181 fi
182 done
183
184 find $out/bin -type f | while read i; do
185 if ! test -L $i; then
186 echo "patching $i..."
187 patchelf --set-interpreter $out/lib/ld*.so.? --set-rpath $out/lib $i || true
188 fi
189 done
190
191 if [ -z "${toString (pkgs.stdenv.hostPlatform != pkgs.stdenv.buildPlatform)}" ]; then
192 # Make sure that the patchelf'ed binaries still work.
193 echo "testing patched programs..."
194 $out/bin/ash -c 'echo hello world' | grep "hello world"
195 export LD_LIBRARY_PATH=$out/lib
196 $out/bin/mount --help 2>&1 | grep -q "BusyBox"
197 $out/bin/blkid -V 2>&1 | grep -q 'libblkid'
198 $out/bin/udevadm --version
199 $out/bin/dmsetup --version 2>&1 | tee -a log | grep -q "version:"
200 LVM_SYSTEM_DIR=$out $out/bin/lvm version 2>&1 | tee -a log | grep -q "LVM"
201 $out/bin/mdadm --version
202
203 ${config.boot.initrd.extraUtilsCommandsTest}
204 fi
205 ''; # */
206
207
208 # Networkd link files are used early by udev to set up interfaces early.
209 # This must be done in stage 1 to avoid race conditions between udev and
210 # network daemons.
211 linkUnits = pkgs.runCommand "link-units" {
212 allowedReferences = [ extraUtils ];
213 preferLocalBuild = true;
214 } (''
215 mkdir -p $out
216 cp -v ${udev}/lib/systemd/network/*.link $out/
217 '' + (
218 let
219 links = filterAttrs (n: v: hasSuffix ".link" n) config.systemd.network.units;
220 files = mapAttrsToList (n: v: "${v.unit}/${n}") links;
221 in
222 concatMapStringsSep "\n" (file: "cp -v ${file} $out/") files
223 ));
224
225 udevRules = pkgs.runCommand "udev-rules" {
226 allowedReferences = [ extraUtils ];
227 preferLocalBuild = true;
228 } ''
229 mkdir -p $out
230
231 echo 'ENV{LD_LIBRARY_PATH}="${extraUtils}/lib"' > $out/00-env.rules
232
233 cp -v ${udev}/lib/udev/rules.d/60-cdrom_id.rules $out/
234 cp -v ${udev}/lib/udev/rules.d/60-persistent-storage.rules $out/
235 cp -v ${udev}/lib/udev/rules.d/75-net-description.rules $out/
236 cp -v ${udev}/lib/udev/rules.d/80-drivers.rules $out/
237 cp -v ${udev}/lib/udev/rules.d/80-net-setup-link.rules $out/
238 cp -v ${pkgs.lvm2}/lib/udev/rules.d/*.rules $out/
239 ${config.boot.initrd.extraUdevRulesCommands}
240
241 for i in $out/*.rules; do
242 substituteInPlace $i \
243 --replace ata_id ${extraUtils}/bin/ata_id \
244 --replace scsi_id ${extraUtils}/bin/scsi_id \
245 --replace cdrom_id ${extraUtils}/bin/cdrom_id \
246 --replace ${pkgs.coreutils}/bin/basename ${extraUtils}/bin/basename \
247 --replace ${pkgs.util-linux}/bin/blkid ${extraUtils}/bin/blkid \
248 --replace ${getBin pkgs.lvm2}/bin ${extraUtils}/bin \
249 --replace ${pkgs.mdadm}/sbin ${extraUtils}/sbin \
250 --replace ${pkgs.bash}/bin/sh ${extraUtils}/bin/sh \
251 --replace ${udev} ${extraUtils}
252 done
253
254 # Work around a bug in QEMU, which doesn't implement the "READ
255 # DISC INFORMATION" SCSI command:
256 # https://bugzilla.redhat.com/show_bug.cgi?id=609049
257 # As a result, `cdrom_id' doesn't print
258 # ID_CDROM_MEDIA_TRACK_COUNT_DATA, which in turn prevents the
259 # /dev/disk/by-label symlinks from being created. We need these
260 # in the NixOS installation CD, so use ID_CDROM_MEDIA in the
261 # corresponding udev rules for now. This was the behaviour in
262 # udev <= 154. See also
263 # http://www.spinics.net/lists/hotplug/msg03935.html
264 substituteInPlace $out/60-persistent-storage.rules \
265 --replace ID_CDROM_MEDIA_TRACK_COUNT_DATA ID_CDROM_MEDIA
266 ''; # */
267
268
269 # The init script of boot stage 1 (loading kernel modules for
270 # mounting the root FS).
271 bootStage1 = pkgs.substituteAll {
272 src = ./stage-1-init.sh;
273
274 shell = "${extraUtils}/bin/ash";
275
276 isExecutable = true;
277
278 postInstall = ''
279 echo checking syntax
280 # check both with bash
281 ${pkgs.buildPackages.bash}/bin/sh -n $target
282 # and with ash shell, just in case
283 ${pkgs.buildPackages.busybox}/bin/ash -n $target
284 '';
285
286 inherit linkUnits udevRules extraUtils modulesClosure;
287
288 inherit (config.boot) resumeDevice;
289
290 inherit (config.system.build) earlyMountScript;
291
292 inherit (config.boot.initrd) checkJournalingFS verbose
293 preLVMCommands preDeviceCommands postDeviceCommands postMountCommands preFailCommands kernelModules;
294
295 resumeDevices = map (sd: if sd ? device then sd.device else "/dev/disk/by-label/${sd.label}")
296 (filter (sd: hasPrefix "/dev/" sd.device && !sd.randomEncryption.enable
297 # Don't include zram devices
298 && !(hasPrefix "/dev/zram" sd.device)
299 ) config.swapDevices);
300
301 fsInfo =
302 let f = fs: [ fs.mountPoint (if fs.device != null then fs.device else "/dev/disk/by-label/${fs.label}") fs.fsType (builtins.concatStringsSep "," fs.options) ];
303 in pkgs.writeText "initrd-fsinfo" (concatStringsSep "\n" (concatMap f fileSystems));
304
305 setHostId = optionalString (config.networking.hostId != null) ''
306 hi="${config.networking.hostId}"
307 ${if pkgs.stdenv.isBigEndian then ''
308 echo -ne "\x''${hi:0:2}\x''${hi:2:2}\x''${hi:4:2}\x''${hi:6:2}" > /etc/hostid
309 '' else ''
310 echo -ne "\x''${hi:6:2}\x''${hi:4:2}\x''${hi:2:2}\x''${hi:0:2}" > /etc/hostid
311 ''}
312 '';
313 };
314
315
316 # The closure of the init script of boot stage 1 is what we put in
317 # the initial RAM disk.
318 initialRamdisk = pkgs.makeInitrd {
319 name = "initrd-${kernel-name}";
320 inherit (config.boot.initrd) compressor compressorArgs prepend;
321
322 contents =
323 [ { object = bootStage1;
324 symlink = "/init";
325 }
326 { object = pkgs.writeText "mdadm.conf" config.boot.initrd.mdadmConf;
327 symlink = "/etc/mdadm.conf";
328 }
329 { object = pkgs.runCommand "initrd-kmod-blacklist-ubuntu" {
330 src = "${pkgs.kmod-blacklist-ubuntu}/modprobe.conf";
331 preferLocalBuild = true;
332 } ''
333 target=$out
334 ${pkgs.buildPackages.perl}/bin/perl -0pe 's/## file: iwlwifi.conf(.+?)##/##/s;' $src > $out
335 '';
336 symlink = "/etc/modprobe.d/ubuntu.conf";
337 }
338 { object = pkgs.kmod-debian-aliases;
339 symlink = "/etc/modprobe.d/debian.conf";
340 }
341 ];
342 };
343
344 # Script to add secret files to the initrd at bootloader update time
345 initialRamdiskSecretAppender =
346 let
347 compressorExe = initialRamdisk.compressorExecutableFunction pkgs;
348 in pkgs.writeScriptBin "append-initrd-secrets"
349 ''
350 #!${pkgs.bash}/bin/bash -e
351 function usage {
352 echo "USAGE: $0 INITRD_FILE" >&2
353 echo "Appends this configuration's secrets to INITRD_FILE" >&2
354 }
355
356 if [ $# -ne 1 ]; then
357 usage
358 exit 1
359 fi
360
361 if [ "$1"x = "--helpx" ]; then
362 usage
363 exit 0
364 fi
365
366 ${lib.optionalString (config.boot.initrd.secrets == {})
367 "exit 0"}
368
369 export PATH=${pkgs.coreutils}/bin:${pkgs.cpio}/bin:${pkgs.gzip}/bin:${pkgs.findutils}/bin
370
371 function cleanup {
372 if [ -n "$tmp" -a -d "$tmp" ]; then
373 rm -fR "$tmp"
374 fi
375 }
376 trap cleanup EXIT
377
378 tmp=$(mktemp -d initrd-secrets.XXXXXXXXXX)
379
380 ${lib.concatStringsSep "\n" (mapAttrsToList (dest: source:
381 let source' = if source == null then dest else toString source; in
382 ''
383 mkdir -p $(dirname "$tmp/${dest}")
384 cp -aL ${source'} "$tmp/${dest}"
385 ''
386 ) config.boot.initrd.secrets)
387 }
388
389 (cd "$tmp" && find . -print0 | sort -z | cpio --quiet -o -H newc -R +0:+0 --reproducible --null) | \
390 ${compressorExe} ${lib.escapeShellArgs initialRamdisk.compressorArgs} >> "$1"
391 '';
392
393in
394
395{
396 disabledModules = [ "system/boot/stage-1.nix" ];
397
398 options = {
399
400 boot.resumeDevice = mkOption {
401 type = types.str;
402 default = "";
403 example = "/dev/sda3";
404 description = ''
405 Device for manual resume attempt during boot. This should be used primarily
406 if you want to resume from file. If left empty, the swap partitions are used.
407 Specify here the device where the file resides.
408 You should also use <varname>boot.kernelParams</varname> to specify
409 <literal><replaceable>resume_offset</replaceable></literal>.
410 '';
411 };
412
413 boot.initrd.enable = mkOption {
414 type = types.bool;
415 default = !config.boot.isContainer;
416 defaultText = "!config.boot.isContainer";
417 description = ''
418 Whether to enable the NixOS initial RAM disk (initrd). This may be
419 needed to perform some initialisation tasks (like mounting
420 network/encrypted file systems) before continuing the boot process.
421 '';
422 };
423
424 boot.initrd.prepend = mkOption {
425 default = [ ];
426 type = types.listOf types.str;
427 description = ''
428 Other initrd files to prepend to the final initrd we are building.
429 '';
430 };
431
432 boot.initrd.checkJournalingFS = mkOption {
433 default = true;
434 type = types.bool;
435 description = ''
436 Whether to run <command>fsck</command> on journaling filesystems such as ext3.
437 '';
438 };
439
440 boot.initrd.mdadmConf = mkOption {
441 default = "";
442 type = types.lines;
443 description = ''
444 Contents of <filename>/etc/mdadm.conf</filename> in stage 1.
445 '';
446 };
447
448 boot.initrd.preLVMCommands = mkOption {
449 default = "";
450 type = types.lines;
451 description = ''
452 Shell commands to be executed immediately before LVM discovery.
453 '';
454 };
455
456 boot.initrd.preDeviceCommands = mkOption {
457 default = "";
458 type = types.lines;
459 description = ''
460 Shell commands to be executed before udev is started to create
461 device nodes.
462 '';
463 };
464
465 boot.initrd.postDeviceCommands = mkOption {
466 default = "";
467 type = types.lines;
468 description = ''
469 Shell commands to be executed immediately after stage 1 of the
470 boot has loaded kernel modules and created device nodes in
471 <filename>/dev</filename>.
472 '';
473 };
474
475 boot.initrd.postMountCommands = mkOption {
476 default = "";
477 type = types.lines;
478 description = ''
479 Shell commands to be executed immediately after the stage 1
480 filesystems have been mounted.
481 '';
482 };
483
484 boot.initrd.preFailCommands = mkOption {
485 default = "";
486 type = types.lines;
487 description = ''
488 Shell commands to be executed before the failure prompt is shown.
489 '';
490 };
491
492 boot.initrd.extraUtilsCommands = mkOption {
493 internal = true;
494 default = "";
495 type = types.lines;
496 description = ''
497 Shell commands to be executed in the builder of the
498 extra-utils derivation. This can be used to provide
499 additional utilities in the initial ramdisk.
500 '';
501 };
502
503 boot.initrd.extraUtilsCommandsTest = mkOption {
504 internal = true;
505 default = "";
506 type = types.lines;
507 description = ''
508 Shell commands to be executed in the builder of the
509 extra-utils derivation after patchelf has done its
510 job. This can be used to test additional utilities
511 copied in extraUtilsCommands.
512 '';
513 };
514
515 boot.initrd.extraUdevRulesCommands = mkOption {
516 internal = true;
517 default = "";
518 type = types.lines;
519 description = ''
520 Shell commands to be executed in the builder of the
521 udev-rules derivation. This can be used to add
522 additional udev rules in the initial ramdisk.
523 '';
524 };
525
526 boot.initrd.compressor = mkOption {
527 default = (
528 if lib.versionAtLeast config.boot.kernelPackages.kernel.version "5.9"
529 then "zstd"
530 else "gzip"
531 );
532 defaultText = "zstd if the kernel supports it (5.9+), gzip if not.";
533 type = types.unspecified; # We don't have a function type...
534 description = ''
535 The compressor to use on the initrd image. May be any of:
536
537 <itemizedlist>
538 <listitem><para>The name of one of the predefined compressors, see <filename>pkgs/build-support/kernel/initrd-compressor-meta.nix</filename> for the definitions.</para></listitem>
539 <listitem><para>A function which, given the nixpkgs package set, returns the path to a compressor tool, e.g. <literal>pkgs: "''${pkgs.pigz}/bin/pigz"</literal></para></listitem>
540 <listitem><para>(not recommended, because it does not work when cross-compiling) the full path to a compressor tool, e.g. <literal>"''${pkgs.pigz}/bin/pigz"</literal></para></listitem>
541 </itemizedlist>
542
543 The given program should read data from stdin and write it to stdout compressed.
544 '';
545 example = "xz";
546 };
547
548 boot.initrd.compressorArgs = mkOption {
549 default = null;
550 type = types.nullOr (types.listOf types.str);
551 description = "Arguments to pass to the compressor for the initrd image, or null to use the compressor's defaults.";
552 };
553
554 boot.initrd.secrets = mkOption
555 { default = {};
556 type = types.attrsOf (types.nullOr types.path);
557 description =
558 ''
559 Secrets to append to the initrd. The attribute name is the
560 path the secret should have inside the initrd, the value
561 is the path it should be copied from (or null for the same
562 path inside and out).
563 '';
564 example = literalExample
565 ''
566 { "/etc/dropbear/dropbear_rsa_host_key" =
567 ./secret-dropbear-key;
568 }
569 '';
570 };
571
572 boot.initrd.supportedFilesystems = mkOption {
573 default = [ ];
574 example = [ "btrfs" ];
575 type = types.listOf types.str;
576 description = "Names of supported filesystem types in the initial ramdisk.";
577 };
578
579 boot.initrd.verbose = mkOption {
580 default = true;
581 type = types.bool;
582 description =
583 ''
584 Verbosity of the initrd. Please note that disabling verbosity removes
585 only the mandatory messages generated by the NixOS scripts. For a
586 completely silent boot, you might also want to set the two following
587 configuration options:
588
589 <itemizedlist>
590 <listitem><para><literal>boot.consoleLogLevel = 0;</literal></para></listitem>
591 <listitem><para><literal>boot.kernelParams = [ "quiet" "udev.log_priority=3" ];</literal></para></listitem>
592 </itemizedlist>
593 '';
594 };
595
596 boot.loader.supportsInitrdSecrets = mkOption
597 { internal = true;
598 default = false;
599 type = types.bool;
600 description =
601 ''
602 Whether the bootloader setup runs append-initrd-secrets.
603 If not, any needed secrets must be copied into the initrd
604 and thus added to the store.
605 '';
606 };
607
608 fileSystems = mkOption {
609 type = with lib.types; attrsOf (submodule {
610 options.neededForBoot = mkOption {
611 default = false;
612 type = types.bool;
613 description = ''
614 If set, this file system will be mounted in the initial ramdisk.
615 Note that the file system will always be mounted in the initial
616 ramdisk if its mount point is one of the following:
617 ${concatStringsSep ", " (
618 forEach utils.pathsNeededForBoot (i: "<filename>${i}</filename>")
619 )}.
620 '';
621 };
622 });
623 };
624
625 };
626
627 config = mkIf config.boot.initrd.enable {
628 assertions = [
629 { assertion = any (fs: fs.mountPoint == "/") fileSystems;
630 message = "The ‘fileSystems’ option does not specify your root file system.";
631 }
632 { assertion = let inherit (config.boot) resumeDevice; in
633 resumeDevice == "" || builtins.substring 0 1 resumeDevice == "/";
634 message = "boot.resumeDevice has to be an absolute path."
635 + " Old \"x:y\" style is no longer supported.";
636 }
637 # TODO: remove when #85000 is fixed
638 { assertion = !config.boot.loader.supportsInitrdSecrets ->
639 all (source:
640 builtins.isPath source ||
641 (builtins.isString source && hasPrefix builtins.storeDir source))
642 (attrValues config.boot.initrd.secrets);
643 message = ''
644 boot.loader.initrd.secrets values must be unquoted paths when
645 using a bootloader that doesn't natively support initrd
646 secrets, e.g.:
647
648 boot.initrd.secrets = {
649 "/etc/secret" = /path/to/secret;
650 };
651
652 Note that this will result in all secrets being stored
653 world-readable in the Nix store!
654 '';
655 }
656 ];
657
658 system.build =
659 { inherit bootStage1 initialRamdisk initialRamdiskSecretAppender extraUtils; };
660
661 system.requiredKernelConfig = with config.lib.kernelConfig; [
662 (isYes "TMPFS")
663 (isYes "BLK_DEV_INITRD")
664 ];
665
666 boot.initrd.supportedFilesystems = map (fs: fs.fsType) fileSystems;
667
668 };
669}
diff --git a/modules/stage-1/stage-1-init.sh b/modules/stage-1/stage-1-init.sh
new file mode 100644
index 00000000..ddaf9858
--- /dev/null
+++ b/modules/stage-1/stage-1-init.sh
@@ -0,0 +1,638 @@
1#! @shell@
2
3targetRoot=/mnt-root
4console=tty1
5verbose="@verbose@"
6
7info() {
8 if [[ -n "$verbose" ]]; then
9 echo "$@"
10 fi
11}
12
13extraUtils="@extraUtils@"
14export LD_LIBRARY_PATH=@extraUtils@/lib
15export PATH=@extraUtils@/bin
16ln -s @extraUtils@/bin /bin
17
18# Copy the secrets to their needed location
19if [ -d "@extraUtils@/secrets" ]; then
20 for secret in $(cd "@extraUtils@/secrets"; find . -type f); do
21 mkdir -p $(dirname "/$secret")
22 ln -s "@extraUtils@/secrets/$secret" "$secret"
23 done
24fi
25
26# Stop LVM complaining about fd3
27export LVM_SUPPRESS_FD_WARNINGS=true
28
29fail() {
30 if [ -n "$panicOnFail" ]; then exit 1; fi
31
32 @preFailCommands@
33
34 # If starting stage 2 failed, allow the user to repair the problem
35 # in an interactive shell.
36 cat <<EOF
37
38An error occurred in stage 1 of the boot process, which must mount the
39root filesystem on \`$targetRoot' and then start stage 2. Press one
40of the following keys:
41
42EOF
43 if [ -n "$allowShell" ]; then cat <<EOF
44 i) to launch an interactive shell
45 f) to start an interactive shell having pid 1 (needed if you want to
46 start stage 2's init manually)
47EOF
48 fi
49 cat <<EOF
50 r) to reboot immediately
51 *) to ignore the error and continue
52EOF
53
54 read -n 1 reply
55
56 if [ -n "$allowShell" -a "$reply" = f ]; then
57 exec setsid @shell@ -c "exec @shell@ < /dev/$console >/dev/$console 2>/dev/$console"
58 elif [ -n "$allowShell" -a "$reply" = i ]; then
59 echo "Starting interactive shell..."
60 setsid @shell@ -c "exec @shell@ < /dev/$console >/dev/$console 2>/dev/$console" || fail
61 elif [ "$reply" = r ]; then
62 echo "Rebooting..."
63 reboot -f
64 else
65 info "Continuing..."
66 fi
67}
68
69trap 'fail' 0
70
71
72# Print a greeting.
73info
74info "<<< NixOS Stage 1 >>>"
75info
76
77# Make several required directories.
78mkdir -p /etc/udev
79touch /etc/fstab # to shut up mount
80ln -s /proc/mounts /etc/mtab # to shut up mke2fs
81touch /etc/udev/hwdb.bin # to shut up udev
82touch /etc/initrd-release
83
84# Function for waiting a device to appear.
85waitDevice() {
86 local device="$1"
87
88 # USB storage devices tend to appear with some delay. It would be
89 # great if we had a way to synchronously wait for them, but
90 # alas... So just wait for a few seconds for the device to
91 # appear.
92 if test ! -e $device; then
93 echo -n "waiting for device $device to appear..."
94 try=20
95 while [ $try -gt 0 ]; do
96 sleep 1
97 # also re-try lvm activation now that new block devices might have appeared
98 lvm vgchange -ay
99 # and tell udev to create nodes for the new LVs
100 udevadm trigger --action=add
101 if test -e $device; then break; fi
102 echo -n "."
103 try=$((try - 1))
104 done
105 echo
106 [ $try -ne 0 ]
107 fi
108}
109
110# Mount special file systems.
111specialMount() {
112 local device="$1"
113 local mountPoint="$2"
114 local options="$3"
115 local fsType="$4"
116
117 mkdir -m 0755 -p "$mountPoint"
118 mount -n -t "$fsType" -o "$options" "$device" "$mountPoint"
119}
120source @earlyMountScript@
121
122# Log the script output to /dev/kmsg or /run/log/stage-1-init.log.
123mkdir -p /tmp
124mkfifo /tmp/stage-1-init.log.fifo
125logOutFd=8 && logErrFd=9
126eval "exec $logOutFd>&1 $logErrFd>&2"
127if test -w /dev/kmsg; then
128 tee -i < /tmp/stage-1-init.log.fifo /proc/self/fd/"$logOutFd" | while read -r line; do
129 if test -n "$line"; then
130 echo "<7>stage-1-init: [$(date)] $line" > /dev/kmsg
131 fi
132 done &
133else
134 mkdir -p /run/log
135 tee -i < /tmp/stage-1-init.log.fifo /run/log/stage-1-init.log &
136fi
137exec > /tmp/stage-1-init.log.fifo 2>&1
138
139
140# Process the kernel command line.
141export stage2Init=/init
142for o in $(cat /proc/cmdline); do
143 case $o in
144 console=*)
145 set -- $(IFS==; echo $o)
146 params=$2
147 set -- $(IFS=,; echo $params)
148 console=$1
149 ;;
150 init=*)
151 set -- $(IFS==; echo $o)
152 stage2Init=$2
153 ;;
154 boot.persistence=*)
155 set -- $(IFS==; echo $o)
156 persistence=$2
157 ;;
158 boot.persistence.opt=*)
159 set -- $(IFS==; echo $o)
160 persistence_opt=$2
161 ;;
162 boot.trace|debugtrace)
163 # Show each command.
164 set -x
165 ;;
166 boot.shell_on_fail)
167 allowShell=1
168 ;;
169 boot.debug1|debug1) # stop right away
170 allowShell=1
171 fail
172 ;;
173 boot.debug1devices) # stop after loading modules and creating device nodes
174 allowShell=1
175 debug1devices=1
176 ;;
177 boot.debug1mounts) # stop after mounting file systems
178 allowShell=1
179 debug1mounts=1
180 ;;
181 boot.panic_on_fail|stage1panic=1)
182 panicOnFail=1
183 ;;
184 root=*)
185 # If a root device is specified on the kernel command
186 # line, make it available through the symlink /dev/root.
187 # Recognise LABEL= and UUID= to support UNetbootin.
188 set -- $(IFS==; echo $o)
189 if [ $2 = "LABEL" ]; then
190 root="/dev/disk/by-label/$3"
191 elif [ $2 = "UUID" ]; then
192 root="/dev/disk/by-uuid/$3"
193 else
194 root=$2
195 fi
196 ln -s "$root" /dev/root
197 ;;
198 copytoram)
199 copytoram=1
200 ;;
201 findiso=*)
202 # if an iso name is supplied, try to find the device where
203 # the iso resides on
204 set -- $(IFS==; echo $o)
205 isoPath=$2
206 ;;
207 esac
208done
209
210# Set hostid before modules are loaded.
211# This is needed by the spl/zfs modules.
212@setHostId@
213
214# Load the required kernel modules.
215mkdir -p /lib
216ln -s @modulesClosure@/lib/modules /lib/modules
217ln -s @modulesClosure@/lib/firmware /lib/firmware
218echo @extraUtils@/bin/modprobe > /proc/sys/kernel/modprobe
219for i in @kernelModules@; do
220 info "loading module $(basename $i)..."
221 modprobe $i
222done
223
224
225# Create device nodes in /dev.
226@preDeviceCommands@
227info "running udev..."
228ln -sfn /proc/self/fd /dev/fd
229ln -sfn /proc/self/fd/0 /dev/stdin
230ln -sfn /proc/self/fd/1 /dev/stdout
231ln -sfn /proc/self/fd/2 /dev/stderr
232mkdir -p /etc/systemd
233ln -sfn @linkUnits@ /etc/systemd/network
234mkdir -p /etc/udev
235ln -sfn @udevRules@ /etc/udev/rules.d
236mkdir -p /dev/.mdadm
237systemd-udevd --daemon
238udevadm trigger --action=add
239udevadm settle
240
241
242# XXX: Use case usb->lvm will still fail, usb->luks->lvm is covered
243@preLVMCommands@
244
245info "starting device mapper and LVM..."
246lvm vgchange -ay
247
248if test -n "$debug1devices"; then fail; fi
249
250
251@postDeviceCommands@
252
253
254# Return true if the machine is on AC power, or if we can't determine
255# whether it's on AC power.
256onACPower() {
257 ! test -d "/proc/acpi/battery" ||
258 ! ls /proc/acpi/battery/BAT[0-9]* > /dev/null 2>&1 ||
259 ! cat /proc/acpi/battery/BAT*/state | grep "^charging state" | grep -q "discharg"
260}
261
262
263# Check the specified file system, if appropriate.
264checkFS() {
265 local device="$1"
266 local fsType="$2"
267
268 # Only check block devices.
269 if [ ! -b "$device" ]; then return 0; fi
270
271 # Don't check ROM filesystems.
272 if [ "$fsType" = iso9660 -o "$fsType" = udf ]; then return 0; fi
273
274 # Don't check resilient COWs as they validate the fs structures at mount time
275 if [ "$fsType" = btrfs -o "$fsType" = zfs -o "$fsType" = bcachefs ]; then return 0; fi
276
277 # Skip fsck for nilfs2 - not needed by design and no fsck tool for this filesystem.
278 if [ "$fsType" = nilfs2 ]; then return 0; fi
279
280 # Skip fsck for inherently readonly filesystems.
281 if [ "$fsType" = squashfs ]; then return 0; fi
282
283 # If we couldn't figure out the FS type, then skip fsck.
284 if [ "$fsType" = auto ]; then
285 echo 'cannot check filesystem with type "auto"!'
286 return 0
287 fi
288
289 # Device might be already mounted manually
290 # e.g. NBD-device or the host filesystem of the file which contains encrypted root fs
291 if mount | grep -q "^$device on "; then
292 echo "skip checking already mounted $device"
293 return 0
294 fi
295
296 # Optionally, skip fsck on journaling filesystems. This option is
297 # a hack - it's mostly because e2fsck on ext3 takes much longer to
298 # recover the journal than the ext3 implementation in the kernel
299 # does (minutes versus seconds).
300 if test -z "@checkJournalingFS@" -a \
301 \( "$fsType" = ext3 -o "$fsType" = ext4 -o "$fsType" = reiserfs \
302 -o "$fsType" = xfs -o "$fsType" = jfs -o "$fsType" = f2fs \)
303 then
304 return 0
305 fi
306
307 # Don't run `fsck' if the machine is on battery power. !!! Is
308 # this a good idea?
309 if ! onACPower; then
310 echo "on battery power, so no \`fsck' will be performed on \`$device'"
311 return 0
312 fi
313
314 echo "checking $device..."
315
316 fsckFlags=
317 if test "$fsType" != "btrfs"; then
318 fsckFlags="-V -a"
319 fi
320 fsck $fsckFlags "$device"
321 fsckResult=$?
322
323 if test $(($fsckResult | 2)) = $fsckResult; then
324 echo "fsck finished, rebooting..."
325 sleep 3
326 reboot -f
327 fi
328
329 if test $(($fsckResult | 4)) = $fsckResult; then
330 echo "$device has unrepaired errors, please fix them manually."
331 fail
332 fi
333
334 if test $fsckResult -ge 8; then
335 echo "fsck on $device failed."
336 fail
337 fi
338
339 return 0
340}
341
342
343# Function for mounting a file system.
344mountFS() {
345 local device="$1"
346 local mountPoint="$2"
347 local options="$3"
348 local fsType="$4"
349
350 if [ "$fsType" = auto ]; then
351 fsType=$(blkid -o value -s TYPE "$device")
352 if [ -z "$fsType" ]; then fsType=auto; fi
353 fi
354
355 # Filter out x- options, which busybox doesn't do yet.
356 local optionsFiltered="$(IFS=,; for i in $options; do if [ "${i:0:2}" != "x-" ]; then echo -n $i,; fi; done)"
357 # Prefix (lower|upper|work)dir with /mnt-root (overlayfs)
358 local optionsPrefixed="$( echo "$optionsFiltered" | sed -E 's#\<(lowerdir|upperdir|workdir)=#\1=/mnt-root#g' )"
359
360 echo "$device /mnt-root$mountPoint $fsType $optionsPrefixed" >> /etc/fstab
361
362 checkFS "$device" "$fsType"
363
364 # Optionally resize the filesystem.
365 case $options in
366 *x-nixos.autoresize*)
367 if [ "$fsType" = ext2 -o "$fsType" = ext3 -o "$fsType" = ext4 ]; then
368 modprobe "$fsType"
369 echo "resizing $device..."
370 e2fsck -fp "$device"
371 resize2fs "$device"
372 elif [ "$fsType" = f2fs ]; then
373 echo "resizing $device..."
374 fsck.f2fs -fp "$device"
375 resize.f2fs "$device"
376 fi
377 ;;
378 esac
379
380 # Create backing directories for overlayfs
381 if [ "$fsType" = overlay ]; then
382 for i in upper work; do
383 dir="$( echo "$optionsPrefixed" | grep -o "${i}dir=[^,]*" )"
384 mkdir -m 0700 -p "${dir##*=}"
385 done
386 fi
387
388 info "mounting $device on $mountPoint..."
389
390 mkdir -p "/mnt-root$mountPoint"
391
392 # For ZFS and CIFS mounts, retry a few times before giving up.
393 # We do this for ZFS as a workaround for issue NixOS/nixpkgs#25383.
394 local n=0
395 while true; do
396 mount "/mnt-root$mountPoint" && break
397 if [ \( "$fsType" != cifs -a "$fsType" != zfs \) -o "$n" -ge 10 ]; then fail; break; fi
398 echo "retrying..."
399 sleep 1
400 n=$((n + 1))
401 done
402
403 [ "$mountPoint" == "/" ] &&
404 [ -f "/mnt-root/etc/NIXOS_LUSTRATE" ] &&
405 lustrateRoot "/mnt-root"
406
407 true
408}
409
410lustrateRoot () {
411 local root="$1"
412
413 echo
414 echo -e "\e[1;33m<<< NixOS is now lustrating the root filesystem (cruft goes to /old-root) >>>\e[0m"
415 echo
416
417 mkdir -m 0755 -p "$root/old-root.tmp"
418
419 echo
420 echo "Moving impurities out of the way:"
421 for d in "$root"/*
422 do
423 [ "$d" == "$root/nix" ] && continue
424 [ "$d" == "$root/boot" ] && continue # Don't render the system unbootable
425 [ "$d" == "$root/old-root.tmp" ] && continue
426
427 mv -v "$d" "$root/old-root.tmp"
428 done
429
430 # Use .tmp to make sure subsequent invokations don't clash
431 mv -v "$root/old-root.tmp" "$root/old-root"
432
433 mkdir -m 0755 -p "$root/etc"
434 touch "$root/etc/NIXOS"
435
436 exec 4< "$root/old-root/etc/NIXOS_LUSTRATE"
437
438 echo
439 echo "Restoring selected impurities:"
440 while read -u 4 keeper; do
441 dirname="$(dirname "$keeper")"
442 mkdir -m 0755 -p "$root/$dirname"
443 cp -av "$root/old-root/$keeper" "$root/$keeper"
444 done
445
446 exec 4>&-
447}
448
449
450
451if test -e /sys/power/resume -a -e /sys/power/disk; then
452 if test -n "@resumeDevice@" && waitDevice "@resumeDevice@"; then
453 resumeDev="@resumeDevice@"
454 resumeInfo="$(udevadm info -q property "$resumeDev" )"
455 else
456 for sd in @resumeDevices@; do
457 # Try to detect resume device. According to Ubuntu bug:
458 # https://bugs.launchpad.net/ubuntu/+source/pm-utils/+bug/923326/comments/1
459 # when there are multiple swap devices, we can't know where the hibernate
460 # image will reside. We can check all of them for swsuspend blkid.
461 if waitDevice "$sd"; then
462 resumeInfo="$(udevadm info -q property "$sd")"
463 if [ "$(echo "$resumeInfo" | sed -n 's/^ID_FS_TYPE=//p')" = "swsuspend" ]; then
464 resumeDev="$sd"
465 break
466 fi
467 fi
468 done
469 fi
470 if test -n "$resumeDev"; then
471 resumeMajor="$(echo "$resumeInfo" | sed -n 's/^MAJOR=//p')"
472 resumeMinor="$(echo "$resumeInfo" | sed -n 's/^MINOR=//p')"
473 echo "$resumeMajor:$resumeMinor" > /sys/power/resume 2> /dev/null || echo "failed to resume..."
474 fi
475fi
476
477# If we have a path to an iso file, find the iso and link it to /dev/root
478if [ -n "$isoPath" ]; then
479 mkdir -p /findiso
480
481 for delay in 5 10; do
482 blkid | while read -r line; do
483 device=$(echo "$line" | sed 's/:.*//')
484 type=$(echo "$line" | sed 's/.*TYPE="\([^"]*\)".*/\1/')
485
486 mount -t "$type" "$device" /findiso
487 if [ -e "/findiso$isoPath" ]; then
488 ln -sf "/findiso$isoPath" /dev/root
489 break 2
490 else
491 umount /findiso
492 fi
493 done
494
495 sleep "$delay"
496 done
497fi
498
499# Try to find and mount the root device.
500mkdir -p $targetRoot
501
502exec 3< @fsInfo@
503
504while read -u 3 mountPoint; do
505 read -u 3 device
506 read -u 3 fsType
507 read -u 3 options
508
509 # !!! Really quick hack to support bind mounts, i.e., where the
510 # "device" should be taken relative to /mnt-root, not /. Assume
511 # that every device that starts with / but doesn't start with /dev
512 # is a bind mount.
513 pseudoDevice=
514 case $device in
515 /dev/*)
516 ;;
517 //*)
518 # Don't touch SMB/CIFS paths.
519 pseudoDevice=1
520 ;;
521 /*)
522 device=/mnt-root$device
523 ;;
524 *)
525 # Not an absolute path; assume that it's a pseudo-device
526 # like an NFS path (e.g. "server:/path").
527 pseudoDevice=1
528 ;;
529 esac
530
531 if test -z "$pseudoDevice" && ! waitDevice "$device"; then
532 # If it doesn't appear, try to mount it anyway (and
533 # probably fail). This is a fallback for non-device "devices"
534 # that we don't properly recognise.
535 echo "Timed out waiting for device $device, trying to mount anyway."
536 fi
537
538 # Wait once more for the udev queue to empty, just in case it's
539 # doing something with $device right now.
540 udevadm settle
541
542 # If copytoram is enabled: skip mounting the ISO and copy its content to a tmpfs.
543 if [ -n "$copytoram" ] && [ "$device" = /dev/root ] && [ "$mountPoint" = /iso ]; then
544 fsType=$(blkid -o value -s TYPE "$device")
545 fsSize=$(blockdev --getsize64 "$device")
546
547 mkdir -p /tmp-iso
548 mount -t "$fsType" /dev/root /tmp-iso
549 mountFS tmpfs /iso size="$fsSize" tmpfs
550
551 cp -r /tmp-iso/* /mnt-root/iso/
552
553 umount /tmp-iso
554 rmdir /tmp-iso
555 continue
556 fi
557
558 if [ "$mountPoint" = / ] && [ "$device" = tmpfs ] && [ ! -z "$persistence" ]; then
559 echo persistence...
560 waitDevice "$persistence"
561 echo enabling persistence...
562 mountFS "$persistence" "$mountPoint" "$persistence_opt" "auto"
563 continue
564 fi
565
566 mountFS "$device" "$mountPoint" "$options" "$fsType"
567done
568
569exec 3>&-
570
571
572@postMountCommands@
573
574
575# Emit a udev rule for /dev/root to prevent systemd from complaining.
576if [ -e /mnt-root/iso ]; then
577 eval $(udevadm info --export --export-prefix=ROOT_ --device-id-of-file=/mnt-root/iso)
578else
579 eval $(udevadm info --export --export-prefix=ROOT_ --device-id-of-file=$targetRoot)
580fi
581if [ "$ROOT_MAJOR" -a "$ROOT_MINOR" -a "$ROOT_MAJOR" != 0 ]; then
582 mkdir -p /run/udev/rules.d
583 echo 'ACTION=="add|change", SUBSYSTEM=="block", ENV{MAJOR}=="'$ROOT_MAJOR'", ENV{MINOR}=="'$ROOT_MINOR'", SYMLINK+="root"' > /run/udev/rules.d/61-dev-root-link.rules
584fi
585
586
587# Stop udevd.
588udevadm control --exit
589
590# Reset the logging file descriptors.
591# Do this just before pkill, which will kill the tee process.
592exec 1>&$logOutFd 2>&$logErrFd
593eval "exec $logOutFd>&- $logErrFd>&-"
594
595# Kill any remaining processes, just to be sure we're not taking any
596# with us into stage 2. But keep storage daemons like unionfs-fuse.
597#
598# Storage daemons are distinguished by an @ in front of their command line:
599# https://www.freedesktop.org/wiki/Software/systemd/RootStorageDaemons/
600for pid in $(pgrep -v -f '^@'); do
601 # Make sure we don't kill kernel processes, see #15226 and:
602 # http://stackoverflow.com/questions/12213445/identifying-kernel-threads
603 readlink "/proc/$pid/exe" &> /dev/null || continue
604 # Try to avoid killing ourselves.
605 [ $pid -eq $$ ] && continue
606 kill -9 "$pid"
607done
608
609if test -n "$debug1mounts"; then fail; fi
610
611
612# Restore /proc/sys/kernel/modprobe to its original value.
613echo /sbin/modprobe > /proc/sys/kernel/modprobe
614
615
616# Start stage 2. `switch_root' deletes all files in the ramfs on the
617# current root. The path has to be valid in the chroot not outside.
618if [ ! -e "$targetRoot/$stage2Init" ]; then
619 stage2Check=${stage2Init}
620 while [ "$stage2Check" != "${stage2Check%/*}" ] && [ ! -L "$targetRoot/$stage2Check" ]; do
621 stage2Check=${stage2Check%/*}
622 done
623 if [ ! -L "$targetRoot/$stage2Check" ]; then
624 echo "stage 2 init script ($targetRoot/$stage2Init) not found"
625 fail
626 fi
627fi
628
629mkdir -m 0755 -p $targetRoot/proc $targetRoot/sys $targetRoot/dev $targetRoot/run
630
631mount --move /proc $targetRoot/proc
632mount --move /sys $targetRoot/sys
633mount --move /dev $targetRoot/dev
634mount --move /run $targetRoot/run
635
636exec env -i $(type -P switch_root) "$targetRoot" "$stage2Init"
637
638fail # should never be reached