diff options
| -rw-r--r-- | modules/stage-1/default.nix | 669 | ||||
| -rw-r--r-- | modules/stage-1/stage-1-init.sh | 638 |
2 files changed, 1307 insertions, 0 deletions
diff --git a/modules/stage-1/default.nix b/modules/stage-1/default.nix new file mode 100644 index 00000000..5a14584e --- /dev/null +++ b/modules/stage-1/default.nix | |||
| @@ -0,0 +1,669 @@ | |||
| 1 | # This module builds the initial ramdisk, which contains an init | ||
| 2 | # script that performs the first stage of booting the system: it loads | ||
| 3 | # the modules necessary to mount the root file system, then calls the | ||
| 4 | # init in the root file system to start the second boot stage. | ||
| 5 | |||
| 6 | { config, lib, utils, pkgs, ... }: | ||
| 7 | |||
| 8 | with lib; | ||
| 9 | |||
| 10 | let | ||
| 11 | |||
| 12 | udev = config.systemd.package; | ||
| 13 | |||
| 14 | kernel-name = config.boot.kernelPackages.kernel.name or "kernel"; | ||
| 15 | |||
| 16 | modulesTree = config.system.modulesTree.override { name = kernel-name + "-modules"; }; | ||
| 17 | firmware = config.hardware.firmware; | ||
| 18 | |||
| 19 | |||
| 20 | # Determine the set of modules that we need to mount the root FS. | ||
| 21 | modulesClosure = pkgs.makeModulesClosure { | ||
| 22 | rootModules = config.boot.initrd.availableKernelModules ++ config.boot.initrd.kernelModules; | ||
| 23 | kernel = modulesTree; | ||
| 24 | firmware = firmware; | ||
| 25 | allowMissing = false; | ||
| 26 | }; | ||
| 27 | |||
| 28 | |||
| 29 | # The initrd only has to mount `/` or any FS marked as necessary for | ||
| 30 | # booting (such as the FS containing `/nix/store`, or an FS needed for | ||
| 31 | # mounting `/`, like `/` on a loopback). | ||
| 32 | fileSystems = filter utils.fsNeededForBoot config.system.build.fileSystems; | ||
| 33 | |||
| 34 | # A utility for enumerating the shared-library dependencies of a program | ||
| 35 | findLibs = pkgs.buildPackages.writeShellScriptBin "find-libs" '' | ||
| 36 | set -euo pipefail | ||
| 37 | |||
| 38 | declare -A seen | ||
| 39 | left=() | ||
| 40 | |||
| 41 | patchelf="${pkgs.buildPackages.patchelf}/bin/patchelf" | ||
| 42 | |||
| 43 | function add_needed { | ||
| 44 | rpath="$($patchelf --print-rpath $1)" | ||
| 45 | dir="$(dirname $1)" | ||
| 46 | for lib in $($patchelf --print-needed $1); do | ||
| 47 | left+=("$lib" "$rpath" "$dir") | ||
| 48 | done | ||
| 49 | } | ||
| 50 | |||
| 51 | add_needed "$1" | ||
| 52 | |||
| 53 | while [ ''${#left[@]} -ne 0 ]; do | ||
| 54 | next=''${left[0]} | ||
| 55 | rpath=''${left[1]} | ||
| 56 | ORIGIN=''${left[2]} | ||
| 57 | left=("''${left[@]:3}") | ||
| 58 | if [ -z ''${seen[$next]+x} ]; then | ||
| 59 | seen[$next]=1 | ||
| 60 | |||
| 61 | # Ignore the dynamic linker which for some reason appears as a DT_NEEDED of glibc but isn't in glibc's RPATH. | ||
| 62 | case "$next" in | ||
| 63 | ld*.so.?) continue;; | ||
| 64 | esac | ||
| 65 | |||
| 66 | IFS=: read -ra paths <<< $rpath | ||
| 67 | res= | ||
| 68 | for path in "''${paths[@]}"; do | ||
| 69 | path=$(eval "echo $path") | ||
| 70 | if [ -f "$path/$next" ]; then | ||
| 71 | res="$path/$next" | ||
| 72 | echo "$res" | ||
| 73 | add_needed "$res" | ||
| 74 | break | ||
| 75 | fi | ||
| 76 | done | ||
| 77 | if [ -z "$res" ]; then | ||
| 78 | echo "Couldn't satisfy dependency $next" >&2 | ||
| 79 | exit 1 | ||
| 80 | fi | ||
| 81 | fi | ||
| 82 | done | ||
| 83 | ''; | ||
| 84 | |||
| 85 | # Some additional utilities needed in stage 1, like mount, lvm, fsck | ||
| 86 | # etc. We don't want to bring in all of those packages, so we just | ||
| 87 | # copy what we need. Instead of using statically linked binaries, | ||
| 88 | # we just copy what we need from Glibc and use patchelf to make it | ||
| 89 | # work. | ||
| 90 | extraUtils = pkgs.runCommandCC "extra-utils" | ||
| 91 | { nativeBuildInputs = [pkgs.buildPackages.nukeReferences]; | ||
| 92 | allowedReferences = [ "out" ]; # prevent accidents like glibc being included in the initrd | ||
| 93 | } | ||
| 94 | '' | ||
| 95 | set +o pipefail | ||
| 96 | |||
| 97 | mkdir -p $out/bin $out/lib | ||
| 98 | ln -s $out/bin $out/sbin | ||
| 99 | |||
| 100 | copy_bin_and_libs () { | ||
| 101 | [ -f "$out/bin/$(basename $1)" ] && rm "$out/bin/$(basename $1)" | ||
| 102 | cp -pdv $1 $out/bin | ||
| 103 | } | ||
| 104 | |||
| 105 | # Copy BusyBox. | ||
| 106 | for BIN in ${pkgs.busybox}/{s,}bin/*; do | ||
| 107 | copy_bin_and_libs $BIN | ||
| 108 | done | ||
| 109 | |||
| 110 | # Copy some util-linux stuff. | ||
| 111 | copy_bin_and_libs ${pkgs.util-linux}/sbin/blkid | ||
| 112 | |||
| 113 | # Copy dmsetup and lvm. | ||
| 114 | copy_bin_and_libs ${getBin pkgs.lvm2}/bin/dmsetup | ||
| 115 | copy_bin_and_libs ${getBin pkgs.lvm2}/bin/lvm | ||
| 116 | |||
| 117 | # Add RAID mdadm tool. | ||
| 118 | copy_bin_and_libs ${pkgs.mdadm}/sbin/mdadm | ||
| 119 | copy_bin_and_libs ${pkgs.mdadm}/sbin/mdmon | ||
| 120 | |||
| 121 | # Copy udev. | ||
| 122 | copy_bin_and_libs ${udev}/bin/udevadm | ||
| 123 | copy_bin_and_libs ${udev}/lib/systemd/systemd-sysctl | ||
| 124 | for BIN in ${udev}/lib/udev/*_id; do | ||
| 125 | copy_bin_and_libs $BIN | ||
| 126 | done | ||
| 127 | # systemd-udevd is only a symlink to udevadm these days | ||
| 128 | ln -sf udevadm $out/bin/systemd-udevd | ||
| 129 | |||
| 130 | # Copy modprobe. | ||
| 131 | copy_bin_and_libs ${pkgs.kmod}/bin/kmod | ||
| 132 | ln -sf kmod $out/bin/modprobe | ||
| 133 | |||
| 134 | # Copy resize2fs if any ext* filesystems are to be resized | ||
| 135 | ${optionalString (any (fs: fs.autoResize && (lib.hasPrefix "ext" fs.fsType)) fileSystems) '' | ||
| 136 | # We need mke2fs in the initrd. | ||
| 137 | copy_bin_and_libs ${pkgs.e2fsprogs}/sbin/resize2fs | ||
| 138 | ''} | ||
| 139 | |||
| 140 | # Copy secrets if needed. | ||
| 141 | # | ||
| 142 | # TODO: move out to a separate script; see #85000. | ||
| 143 | ${optionalString (!config.boot.loader.supportsInitrdSecrets) | ||
| 144 | (concatStringsSep "\n" (mapAttrsToList (dest: source: | ||
| 145 | let source' = if source == null then dest else source; in | ||
| 146 | '' | ||
| 147 | mkdir -p $(dirname "$out/secrets/${dest}") | ||
| 148 | # Some programs (e.g. ssh) doesn't like secrets to be | ||
| 149 | # symlinks, so we use `cp -L` here to match the | ||
| 150 | # behaviour when secrets are natively supported. | ||
| 151 | cp -Lr ${source'} "$out/secrets/${dest}" | ||
| 152 | '' | ||
| 153 | ) config.boot.initrd.secrets)) | ||
| 154 | } | ||
| 155 | |||
| 156 | ${config.boot.initrd.extraUtilsCommands} | ||
| 157 | |||
| 158 | # Copy ld manually since it isn't detected correctly | ||
| 159 | cp -pv ${pkgs.stdenv.cc.libc.out}/lib/ld*.so.? $out/lib | ||
| 160 | |||
| 161 | # Copy all of the needed libraries | ||
| 162 | find $out/bin $out/lib -type f | while read BIN; do | ||
| 163 | echo "Copying libs for executable $BIN" | ||
| 164 | for LIB in $(${findLibs}/bin/find-libs $BIN); do | ||
| 165 | TGT="$out/lib/$(basename $LIB)" | ||
| 166 | if [ ! -f "$TGT" ]; then | ||
| 167 | SRC="$(readlink -e $LIB)" | ||
| 168 | cp -pdv "$SRC" "$TGT" | ||
| 169 | fi | ||
| 170 | done | ||
| 171 | done | ||
| 172 | |||
| 173 | # Strip binaries further than normal. | ||
| 174 | chmod -R u+w $out | ||
| 175 | stripDirs "$STRIP" "lib bin" "-s" | ||
| 176 | |||
| 177 | # Run patchelf to make the programs refer to the copied libraries. | ||
| 178 | find $out/bin $out/lib -type f | while read i; do | ||
| 179 | if ! test -L $i; then | ||
| 180 | nuke-refs -e $out $i | ||
| 181 | fi | ||
| 182 | done | ||
| 183 | |||
| 184 | find $out/bin -type f | while read i; do | ||
| 185 | if ! test -L $i; then | ||
| 186 | echo "patching $i..." | ||
| 187 | patchelf --set-interpreter $out/lib/ld*.so.? --set-rpath $out/lib $i || true | ||
| 188 | fi | ||
| 189 | done | ||
| 190 | |||
| 191 | if [ -z "${toString (pkgs.stdenv.hostPlatform != pkgs.stdenv.buildPlatform)}" ]; then | ||
| 192 | # Make sure that the patchelf'ed binaries still work. | ||
| 193 | echo "testing patched programs..." | ||
| 194 | $out/bin/ash -c 'echo hello world' | grep "hello world" | ||
| 195 | export LD_LIBRARY_PATH=$out/lib | ||
| 196 | $out/bin/mount --help 2>&1 | grep -q "BusyBox" | ||
| 197 | $out/bin/blkid -V 2>&1 | grep -q 'libblkid' | ||
| 198 | $out/bin/udevadm --version | ||
| 199 | $out/bin/dmsetup --version 2>&1 | tee -a log | grep -q "version:" | ||
| 200 | LVM_SYSTEM_DIR=$out $out/bin/lvm version 2>&1 | tee -a log | grep -q "LVM" | ||
| 201 | $out/bin/mdadm --version | ||
| 202 | |||
| 203 | ${config.boot.initrd.extraUtilsCommandsTest} | ||
| 204 | fi | ||
| 205 | ''; # */ | ||
| 206 | |||
| 207 | |||
| 208 | # Networkd link files are used early by udev to set up interfaces early. | ||
| 209 | # This must be done in stage 1 to avoid race conditions between udev and | ||
| 210 | # network daemons. | ||
| 211 | linkUnits = pkgs.runCommand "link-units" { | ||
| 212 | allowedReferences = [ extraUtils ]; | ||
| 213 | preferLocalBuild = true; | ||
| 214 | } ('' | ||
| 215 | mkdir -p $out | ||
| 216 | cp -v ${udev}/lib/systemd/network/*.link $out/ | ||
| 217 | '' + ( | ||
| 218 | let | ||
| 219 | links = filterAttrs (n: v: hasSuffix ".link" n) config.systemd.network.units; | ||
| 220 | files = mapAttrsToList (n: v: "${v.unit}/${n}") links; | ||
| 221 | in | ||
| 222 | concatMapStringsSep "\n" (file: "cp -v ${file} $out/") files | ||
| 223 | )); | ||
| 224 | |||
| 225 | udevRules = pkgs.runCommand "udev-rules" { | ||
| 226 | allowedReferences = [ extraUtils ]; | ||
| 227 | preferLocalBuild = true; | ||
| 228 | } '' | ||
| 229 | mkdir -p $out | ||
| 230 | |||
| 231 | echo 'ENV{LD_LIBRARY_PATH}="${extraUtils}/lib"' > $out/00-env.rules | ||
| 232 | |||
| 233 | cp -v ${udev}/lib/udev/rules.d/60-cdrom_id.rules $out/ | ||
| 234 | cp -v ${udev}/lib/udev/rules.d/60-persistent-storage.rules $out/ | ||
| 235 | cp -v ${udev}/lib/udev/rules.d/75-net-description.rules $out/ | ||
| 236 | cp -v ${udev}/lib/udev/rules.d/80-drivers.rules $out/ | ||
| 237 | cp -v ${udev}/lib/udev/rules.d/80-net-setup-link.rules $out/ | ||
| 238 | cp -v ${pkgs.lvm2}/lib/udev/rules.d/*.rules $out/ | ||
| 239 | ${config.boot.initrd.extraUdevRulesCommands} | ||
| 240 | |||
| 241 | for i in $out/*.rules; do | ||
| 242 | substituteInPlace $i \ | ||
| 243 | --replace ata_id ${extraUtils}/bin/ata_id \ | ||
| 244 | --replace scsi_id ${extraUtils}/bin/scsi_id \ | ||
| 245 | --replace cdrom_id ${extraUtils}/bin/cdrom_id \ | ||
| 246 | --replace ${pkgs.coreutils}/bin/basename ${extraUtils}/bin/basename \ | ||
| 247 | --replace ${pkgs.util-linux}/bin/blkid ${extraUtils}/bin/blkid \ | ||
| 248 | --replace ${getBin pkgs.lvm2}/bin ${extraUtils}/bin \ | ||
| 249 | --replace ${pkgs.mdadm}/sbin ${extraUtils}/sbin \ | ||
| 250 | --replace ${pkgs.bash}/bin/sh ${extraUtils}/bin/sh \ | ||
| 251 | --replace ${udev} ${extraUtils} | ||
| 252 | done | ||
| 253 | |||
| 254 | # Work around a bug in QEMU, which doesn't implement the "READ | ||
| 255 | # DISC INFORMATION" SCSI command: | ||
| 256 | # https://bugzilla.redhat.com/show_bug.cgi?id=609049 | ||
| 257 | # As a result, `cdrom_id' doesn't print | ||
| 258 | # ID_CDROM_MEDIA_TRACK_COUNT_DATA, which in turn prevents the | ||
| 259 | # /dev/disk/by-label symlinks from being created. We need these | ||
| 260 | # in the NixOS installation CD, so use ID_CDROM_MEDIA in the | ||
| 261 | # corresponding udev rules for now. This was the behaviour in | ||
| 262 | # udev <= 154. See also | ||
| 263 | # http://www.spinics.net/lists/hotplug/msg03935.html | ||
| 264 | substituteInPlace $out/60-persistent-storage.rules \ | ||
| 265 | --replace ID_CDROM_MEDIA_TRACK_COUNT_DATA ID_CDROM_MEDIA | ||
| 266 | ''; # */ | ||
| 267 | |||
| 268 | |||
| 269 | # The init script of boot stage 1 (loading kernel modules for | ||
| 270 | # mounting the root FS). | ||
| 271 | bootStage1 = pkgs.substituteAll { | ||
| 272 | src = ./stage-1-init.sh; | ||
| 273 | |||
| 274 | shell = "${extraUtils}/bin/ash"; | ||
| 275 | |||
| 276 | isExecutable = true; | ||
| 277 | |||
| 278 | postInstall = '' | ||
| 279 | echo checking syntax | ||
| 280 | # check both with bash | ||
| 281 | ${pkgs.buildPackages.bash}/bin/sh -n $target | ||
| 282 | # and with ash shell, just in case | ||
| 283 | ${pkgs.buildPackages.busybox}/bin/ash -n $target | ||
| 284 | ''; | ||
| 285 | |||
| 286 | inherit linkUnits udevRules extraUtils modulesClosure; | ||
| 287 | |||
| 288 | inherit (config.boot) resumeDevice; | ||
| 289 | |||
| 290 | inherit (config.system.build) earlyMountScript; | ||
| 291 | |||
| 292 | inherit (config.boot.initrd) checkJournalingFS verbose | ||
| 293 | preLVMCommands preDeviceCommands postDeviceCommands postMountCommands preFailCommands kernelModules; | ||
| 294 | |||
| 295 | resumeDevices = map (sd: if sd ? device then sd.device else "/dev/disk/by-label/${sd.label}") | ||
| 296 | (filter (sd: hasPrefix "/dev/" sd.device && !sd.randomEncryption.enable | ||
| 297 | # Don't include zram devices | ||
| 298 | && !(hasPrefix "/dev/zram" sd.device) | ||
| 299 | ) config.swapDevices); | ||
| 300 | |||
| 301 | fsInfo = | ||
| 302 | let f = fs: [ fs.mountPoint (if fs.device != null then fs.device else "/dev/disk/by-label/${fs.label}") fs.fsType (builtins.concatStringsSep "," fs.options) ]; | ||
| 303 | in pkgs.writeText "initrd-fsinfo" (concatStringsSep "\n" (concatMap f fileSystems)); | ||
| 304 | |||
| 305 | setHostId = optionalString (config.networking.hostId != null) '' | ||
| 306 | hi="${config.networking.hostId}" | ||
| 307 | ${if pkgs.stdenv.isBigEndian then '' | ||
| 308 | echo -ne "\x''${hi:0:2}\x''${hi:2:2}\x''${hi:4:2}\x''${hi:6:2}" > /etc/hostid | ||
| 309 | '' else '' | ||
| 310 | echo -ne "\x''${hi:6:2}\x''${hi:4:2}\x''${hi:2:2}\x''${hi:0:2}" > /etc/hostid | ||
| 311 | ''} | ||
| 312 | ''; | ||
| 313 | }; | ||
| 314 | |||
| 315 | |||
| 316 | # The closure of the init script of boot stage 1 is what we put in | ||
| 317 | # the initial RAM disk. | ||
| 318 | initialRamdisk = pkgs.makeInitrd { | ||
| 319 | name = "initrd-${kernel-name}"; | ||
| 320 | inherit (config.boot.initrd) compressor compressorArgs prepend; | ||
| 321 | |||
| 322 | contents = | ||
| 323 | [ { object = bootStage1; | ||
| 324 | symlink = "/init"; | ||
| 325 | } | ||
| 326 | { object = pkgs.writeText "mdadm.conf" config.boot.initrd.mdadmConf; | ||
| 327 | symlink = "/etc/mdadm.conf"; | ||
| 328 | } | ||
| 329 | { object = pkgs.runCommand "initrd-kmod-blacklist-ubuntu" { | ||
| 330 | src = "${pkgs.kmod-blacklist-ubuntu}/modprobe.conf"; | ||
| 331 | preferLocalBuild = true; | ||
| 332 | } '' | ||
| 333 | target=$out | ||
| 334 | ${pkgs.buildPackages.perl}/bin/perl -0pe 's/## file: iwlwifi.conf(.+?)##/##/s;' $src > $out | ||
| 335 | ''; | ||
| 336 | symlink = "/etc/modprobe.d/ubuntu.conf"; | ||
| 337 | } | ||
| 338 | { object = pkgs.kmod-debian-aliases; | ||
| 339 | symlink = "/etc/modprobe.d/debian.conf"; | ||
| 340 | } | ||
| 341 | ]; | ||
| 342 | }; | ||
| 343 | |||
| 344 | # Script to add secret files to the initrd at bootloader update time | ||
| 345 | initialRamdiskSecretAppender = | ||
| 346 | let | ||
| 347 | compressorExe = initialRamdisk.compressorExecutableFunction pkgs; | ||
| 348 | in pkgs.writeScriptBin "append-initrd-secrets" | ||
| 349 | '' | ||
| 350 | #!${pkgs.bash}/bin/bash -e | ||
| 351 | function usage { | ||
| 352 | echo "USAGE: $0 INITRD_FILE" >&2 | ||
| 353 | echo "Appends this configuration's secrets to INITRD_FILE" >&2 | ||
| 354 | } | ||
| 355 | |||
| 356 | if [ $# -ne 1 ]; then | ||
| 357 | usage | ||
| 358 | exit 1 | ||
| 359 | fi | ||
| 360 | |||
| 361 | if [ "$1"x = "--helpx" ]; then | ||
| 362 | usage | ||
| 363 | exit 0 | ||
| 364 | fi | ||
| 365 | |||
| 366 | ${lib.optionalString (config.boot.initrd.secrets == {}) | ||
| 367 | "exit 0"} | ||
| 368 | |||
| 369 | export PATH=${pkgs.coreutils}/bin:${pkgs.cpio}/bin:${pkgs.gzip}/bin:${pkgs.findutils}/bin | ||
| 370 | |||
| 371 | function cleanup { | ||
| 372 | if [ -n "$tmp" -a -d "$tmp" ]; then | ||
| 373 | rm -fR "$tmp" | ||
| 374 | fi | ||
| 375 | } | ||
| 376 | trap cleanup EXIT | ||
| 377 | |||
| 378 | tmp=$(mktemp -d initrd-secrets.XXXXXXXXXX) | ||
| 379 | |||
| 380 | ${lib.concatStringsSep "\n" (mapAttrsToList (dest: source: | ||
| 381 | let source' = if source == null then dest else toString source; in | ||
| 382 | '' | ||
| 383 | mkdir -p $(dirname "$tmp/${dest}") | ||
| 384 | cp -aL ${source'} "$tmp/${dest}" | ||
| 385 | '' | ||
| 386 | ) config.boot.initrd.secrets) | ||
| 387 | } | ||
| 388 | |||
| 389 | (cd "$tmp" && find . -print0 | sort -z | cpio --quiet -o -H newc -R +0:+0 --reproducible --null) | \ | ||
| 390 | ${compressorExe} ${lib.escapeShellArgs initialRamdisk.compressorArgs} >> "$1" | ||
| 391 | ''; | ||
| 392 | |||
| 393 | in | ||
| 394 | |||
| 395 | { | ||
| 396 | disabledModules = [ "system/boot/stage-1.nix" ]; | ||
| 397 | |||
| 398 | options = { | ||
| 399 | |||
| 400 | boot.resumeDevice = mkOption { | ||
| 401 | type = types.str; | ||
| 402 | default = ""; | ||
| 403 | example = "/dev/sda3"; | ||
| 404 | description = '' | ||
| 405 | Device for manual resume attempt during boot. This should be used primarily | ||
| 406 | if you want to resume from file. If left empty, the swap partitions are used. | ||
| 407 | Specify here the device where the file resides. | ||
| 408 | You should also use <varname>boot.kernelParams</varname> to specify | ||
| 409 | <literal><replaceable>resume_offset</replaceable></literal>. | ||
| 410 | ''; | ||
| 411 | }; | ||
| 412 | |||
| 413 | boot.initrd.enable = mkOption { | ||
| 414 | type = types.bool; | ||
| 415 | default = !config.boot.isContainer; | ||
| 416 | defaultText = "!config.boot.isContainer"; | ||
| 417 | description = '' | ||
| 418 | Whether to enable the NixOS initial RAM disk (initrd). This may be | ||
| 419 | needed to perform some initialisation tasks (like mounting | ||
| 420 | network/encrypted file systems) before continuing the boot process. | ||
| 421 | ''; | ||
| 422 | }; | ||
| 423 | |||
| 424 | boot.initrd.prepend = mkOption { | ||
| 425 | default = [ ]; | ||
| 426 | type = types.listOf types.str; | ||
| 427 | description = '' | ||
| 428 | Other initrd files to prepend to the final initrd we are building. | ||
| 429 | ''; | ||
| 430 | }; | ||
| 431 | |||
| 432 | boot.initrd.checkJournalingFS = mkOption { | ||
| 433 | default = true; | ||
| 434 | type = types.bool; | ||
| 435 | description = '' | ||
| 436 | Whether to run <command>fsck</command> on journaling filesystems such as ext3. | ||
| 437 | ''; | ||
| 438 | }; | ||
| 439 | |||
| 440 | boot.initrd.mdadmConf = mkOption { | ||
| 441 | default = ""; | ||
| 442 | type = types.lines; | ||
| 443 | description = '' | ||
| 444 | Contents of <filename>/etc/mdadm.conf</filename> in stage 1. | ||
| 445 | ''; | ||
| 446 | }; | ||
| 447 | |||
| 448 | boot.initrd.preLVMCommands = mkOption { | ||
| 449 | default = ""; | ||
| 450 | type = types.lines; | ||
| 451 | description = '' | ||
| 452 | Shell commands to be executed immediately before LVM discovery. | ||
| 453 | ''; | ||
| 454 | }; | ||
| 455 | |||
| 456 | boot.initrd.preDeviceCommands = mkOption { | ||
| 457 | default = ""; | ||
| 458 | type = types.lines; | ||
| 459 | description = '' | ||
| 460 | Shell commands to be executed before udev is started to create | ||
| 461 | device nodes. | ||
| 462 | ''; | ||
| 463 | }; | ||
| 464 | |||
| 465 | boot.initrd.postDeviceCommands = mkOption { | ||
| 466 | default = ""; | ||
| 467 | type = types.lines; | ||
| 468 | description = '' | ||
| 469 | Shell commands to be executed immediately after stage 1 of the | ||
| 470 | boot has loaded kernel modules and created device nodes in | ||
| 471 | <filename>/dev</filename>. | ||
| 472 | ''; | ||
| 473 | }; | ||
| 474 | |||
| 475 | boot.initrd.postMountCommands = mkOption { | ||
| 476 | default = ""; | ||
| 477 | type = types.lines; | ||
| 478 | description = '' | ||
| 479 | Shell commands to be executed immediately after the stage 1 | ||
| 480 | filesystems have been mounted. | ||
| 481 | ''; | ||
| 482 | }; | ||
| 483 | |||
| 484 | boot.initrd.preFailCommands = mkOption { | ||
| 485 | default = ""; | ||
| 486 | type = types.lines; | ||
| 487 | description = '' | ||
| 488 | Shell commands to be executed before the failure prompt is shown. | ||
| 489 | ''; | ||
| 490 | }; | ||
| 491 | |||
| 492 | boot.initrd.extraUtilsCommands = mkOption { | ||
| 493 | internal = true; | ||
| 494 | default = ""; | ||
| 495 | type = types.lines; | ||
| 496 | description = '' | ||
| 497 | Shell commands to be executed in the builder of the | ||
| 498 | extra-utils derivation. This can be used to provide | ||
| 499 | additional utilities in the initial ramdisk. | ||
| 500 | ''; | ||
| 501 | }; | ||
| 502 | |||
| 503 | boot.initrd.extraUtilsCommandsTest = mkOption { | ||
| 504 | internal = true; | ||
| 505 | default = ""; | ||
| 506 | type = types.lines; | ||
| 507 | description = '' | ||
| 508 | Shell commands to be executed in the builder of the | ||
| 509 | extra-utils derivation after patchelf has done its | ||
| 510 | job. This can be used to test additional utilities | ||
| 511 | copied in extraUtilsCommands. | ||
| 512 | ''; | ||
| 513 | }; | ||
| 514 | |||
| 515 | boot.initrd.extraUdevRulesCommands = mkOption { | ||
| 516 | internal = true; | ||
| 517 | default = ""; | ||
| 518 | type = types.lines; | ||
| 519 | description = '' | ||
| 520 | Shell commands to be executed in the builder of the | ||
| 521 | udev-rules derivation. This can be used to add | ||
| 522 | additional udev rules in the initial ramdisk. | ||
| 523 | ''; | ||
| 524 | }; | ||
| 525 | |||
| 526 | boot.initrd.compressor = mkOption { | ||
| 527 | default = ( | ||
| 528 | if lib.versionAtLeast config.boot.kernelPackages.kernel.version "5.9" | ||
| 529 | then "zstd" | ||
| 530 | else "gzip" | ||
| 531 | ); | ||
| 532 | defaultText = "zstd if the kernel supports it (5.9+), gzip if not."; | ||
| 533 | type = types.unspecified; # We don't have a function type... | ||
| 534 | description = '' | ||
| 535 | The compressor to use on the initrd image. May be any of: | ||
| 536 | |||
| 537 | <itemizedlist> | ||
| 538 | <listitem><para>The name of one of the predefined compressors, see <filename>pkgs/build-support/kernel/initrd-compressor-meta.nix</filename> for the definitions.</para></listitem> | ||
| 539 | <listitem><para>A function which, given the nixpkgs package set, returns the path to a compressor tool, e.g. <literal>pkgs: "''${pkgs.pigz}/bin/pigz"</literal></para></listitem> | ||
| 540 | <listitem><para>(not recommended, because it does not work when cross-compiling) the full path to a compressor tool, e.g. <literal>"''${pkgs.pigz}/bin/pigz"</literal></para></listitem> | ||
| 541 | </itemizedlist> | ||
| 542 | |||
| 543 | The given program should read data from stdin and write it to stdout compressed. | ||
| 544 | ''; | ||
| 545 | example = "xz"; | ||
| 546 | }; | ||
| 547 | |||
| 548 | boot.initrd.compressorArgs = mkOption { | ||
| 549 | default = null; | ||
| 550 | type = types.nullOr (types.listOf types.str); | ||
| 551 | description = "Arguments to pass to the compressor for the initrd image, or null to use the compressor's defaults."; | ||
| 552 | }; | ||
| 553 | |||
| 554 | boot.initrd.secrets = mkOption | ||
| 555 | { default = {}; | ||
| 556 | type = types.attrsOf (types.nullOr types.path); | ||
| 557 | description = | ||
| 558 | '' | ||
| 559 | Secrets to append to the initrd. The attribute name is the | ||
| 560 | path the secret should have inside the initrd, the value | ||
| 561 | is the path it should be copied from (or null for the same | ||
| 562 | path inside and out). | ||
| 563 | ''; | ||
| 564 | example = literalExample | ||
| 565 | '' | ||
| 566 | { "/etc/dropbear/dropbear_rsa_host_key" = | ||
| 567 | ./secret-dropbear-key; | ||
| 568 | } | ||
| 569 | ''; | ||
| 570 | }; | ||
| 571 | |||
| 572 | boot.initrd.supportedFilesystems = mkOption { | ||
| 573 | default = [ ]; | ||
| 574 | example = [ "btrfs" ]; | ||
| 575 | type = types.listOf types.str; | ||
| 576 | description = "Names of supported filesystem types in the initial ramdisk."; | ||
| 577 | }; | ||
| 578 | |||
| 579 | boot.initrd.verbose = mkOption { | ||
| 580 | default = true; | ||
| 581 | type = types.bool; | ||
| 582 | description = | ||
| 583 | '' | ||
| 584 | Verbosity of the initrd. Please note that disabling verbosity removes | ||
| 585 | only the mandatory messages generated by the NixOS scripts. For a | ||
| 586 | completely silent boot, you might also want to set the two following | ||
| 587 | configuration options: | ||
| 588 | |||
| 589 | <itemizedlist> | ||
| 590 | <listitem><para><literal>boot.consoleLogLevel = 0;</literal></para></listitem> | ||
| 591 | <listitem><para><literal>boot.kernelParams = [ "quiet" "udev.log_priority=3" ];</literal></para></listitem> | ||
| 592 | </itemizedlist> | ||
| 593 | ''; | ||
| 594 | }; | ||
| 595 | |||
| 596 | boot.loader.supportsInitrdSecrets = mkOption | ||
| 597 | { internal = true; | ||
| 598 | default = false; | ||
| 599 | type = types.bool; | ||
| 600 | description = | ||
| 601 | '' | ||
| 602 | Whether the bootloader setup runs append-initrd-secrets. | ||
| 603 | If not, any needed secrets must be copied into the initrd | ||
| 604 | and thus added to the store. | ||
| 605 | ''; | ||
| 606 | }; | ||
| 607 | |||
| 608 | fileSystems = mkOption { | ||
| 609 | type = with lib.types; attrsOf (submodule { | ||
| 610 | options.neededForBoot = mkOption { | ||
| 611 | default = false; | ||
| 612 | type = types.bool; | ||
| 613 | description = '' | ||
| 614 | If set, this file system will be mounted in the initial ramdisk. | ||
| 615 | Note that the file system will always be mounted in the initial | ||
| 616 | ramdisk if its mount point is one of the following: | ||
| 617 | ${concatStringsSep ", " ( | ||
| 618 | forEach utils.pathsNeededForBoot (i: "<filename>${i}</filename>") | ||
| 619 | )}. | ||
| 620 | ''; | ||
| 621 | }; | ||
| 622 | }); | ||
| 623 | }; | ||
| 624 | |||
| 625 | }; | ||
| 626 | |||
| 627 | config = mkIf config.boot.initrd.enable { | ||
| 628 | assertions = [ | ||
| 629 | { assertion = any (fs: fs.mountPoint == "/") fileSystems; | ||
| 630 | message = "The ‘fileSystems’ option does not specify your root file system."; | ||
| 631 | } | ||
| 632 | { assertion = let inherit (config.boot) resumeDevice; in | ||
| 633 | resumeDevice == "" || builtins.substring 0 1 resumeDevice == "/"; | ||
| 634 | message = "boot.resumeDevice has to be an absolute path." | ||
| 635 | + " Old \"x:y\" style is no longer supported."; | ||
| 636 | } | ||
| 637 | # TODO: remove when #85000 is fixed | ||
| 638 | { assertion = !config.boot.loader.supportsInitrdSecrets -> | ||
| 639 | all (source: | ||
| 640 | builtins.isPath source || | ||
| 641 | (builtins.isString source && hasPrefix builtins.storeDir source)) | ||
| 642 | (attrValues config.boot.initrd.secrets); | ||
| 643 | message = '' | ||
| 644 | boot.loader.initrd.secrets values must be unquoted paths when | ||
| 645 | using a bootloader that doesn't natively support initrd | ||
| 646 | secrets, e.g.: | ||
| 647 | |||
| 648 | boot.initrd.secrets = { | ||
| 649 | "/etc/secret" = /path/to/secret; | ||
| 650 | }; | ||
| 651 | |||
| 652 | Note that this will result in all secrets being stored | ||
| 653 | world-readable in the Nix store! | ||
| 654 | ''; | ||
| 655 | } | ||
| 656 | ]; | ||
| 657 | |||
| 658 | system.build = | ||
| 659 | { inherit bootStage1 initialRamdisk initialRamdiskSecretAppender extraUtils; }; | ||
| 660 | |||
| 661 | system.requiredKernelConfig = with config.lib.kernelConfig; [ | ||
| 662 | (isYes "TMPFS") | ||
| 663 | (isYes "BLK_DEV_INITRD") | ||
| 664 | ]; | ||
| 665 | |||
| 666 | boot.initrd.supportedFilesystems = map (fs: fs.fsType) fileSystems; | ||
| 667 | |||
| 668 | }; | ||
| 669 | } | ||
diff --git a/modules/stage-1/stage-1-init.sh b/modules/stage-1/stage-1-init.sh new file mode 100644 index 00000000..ddaf9858 --- /dev/null +++ b/modules/stage-1/stage-1-init.sh | |||
| @@ -0,0 +1,638 @@ | |||
| 1 | #! @shell@ | ||
| 2 | |||
| 3 | targetRoot=/mnt-root | ||
| 4 | console=tty1 | ||
| 5 | verbose="@verbose@" | ||
| 6 | |||
| 7 | info() { | ||
| 8 | if [[ -n "$verbose" ]]; then | ||
| 9 | echo "$@" | ||
| 10 | fi | ||
| 11 | } | ||
| 12 | |||
| 13 | extraUtils="@extraUtils@" | ||
| 14 | export LD_LIBRARY_PATH=@extraUtils@/lib | ||
| 15 | export PATH=@extraUtils@/bin | ||
| 16 | ln -s @extraUtils@/bin /bin | ||
| 17 | |||
| 18 | # Copy the secrets to their needed location | ||
| 19 | if [ -d "@extraUtils@/secrets" ]; then | ||
| 20 | for secret in $(cd "@extraUtils@/secrets"; find . -type f); do | ||
| 21 | mkdir -p $(dirname "/$secret") | ||
| 22 | ln -s "@extraUtils@/secrets/$secret" "$secret" | ||
| 23 | done | ||
| 24 | fi | ||
| 25 | |||
| 26 | # Stop LVM complaining about fd3 | ||
| 27 | export LVM_SUPPRESS_FD_WARNINGS=true | ||
| 28 | |||
| 29 | fail() { | ||
| 30 | if [ -n "$panicOnFail" ]; then exit 1; fi | ||
| 31 | |||
| 32 | @preFailCommands@ | ||
| 33 | |||
| 34 | # If starting stage 2 failed, allow the user to repair the problem | ||
| 35 | # in an interactive shell. | ||
| 36 | cat <<EOF | ||
| 37 | |||
| 38 | An error occurred in stage 1 of the boot process, which must mount the | ||
| 39 | root filesystem on \`$targetRoot' and then start stage 2. Press one | ||
| 40 | of the following keys: | ||
| 41 | |||
| 42 | EOF | ||
| 43 | if [ -n "$allowShell" ]; then cat <<EOF | ||
| 44 | i) to launch an interactive shell | ||
| 45 | f) to start an interactive shell having pid 1 (needed if you want to | ||
| 46 | start stage 2's init manually) | ||
| 47 | EOF | ||
| 48 | fi | ||
| 49 | cat <<EOF | ||
| 50 | r) to reboot immediately | ||
| 51 | *) to ignore the error and continue | ||
| 52 | EOF | ||
| 53 | |||
| 54 | read -n 1 reply | ||
| 55 | |||
| 56 | if [ -n "$allowShell" -a "$reply" = f ]; then | ||
| 57 | exec setsid @shell@ -c "exec @shell@ < /dev/$console >/dev/$console 2>/dev/$console" | ||
| 58 | elif [ -n "$allowShell" -a "$reply" = i ]; then | ||
| 59 | echo "Starting interactive shell..." | ||
| 60 | setsid @shell@ -c "exec @shell@ < /dev/$console >/dev/$console 2>/dev/$console" || fail | ||
| 61 | elif [ "$reply" = r ]; then | ||
| 62 | echo "Rebooting..." | ||
| 63 | reboot -f | ||
| 64 | else | ||
| 65 | info "Continuing..." | ||
| 66 | fi | ||
| 67 | } | ||
| 68 | |||
| 69 | trap 'fail' 0 | ||
| 70 | |||
| 71 | |||
| 72 | # Print a greeting. | ||
| 73 | info | ||
| 74 | info "[1;32m<<< NixOS Stage 1 >>>[0m" | ||
| 75 | info | ||
| 76 | |||
| 77 | # Make several required directories. | ||
| 78 | mkdir -p /etc/udev | ||
| 79 | touch /etc/fstab # to shut up mount | ||
| 80 | ln -s /proc/mounts /etc/mtab # to shut up mke2fs | ||
| 81 | touch /etc/udev/hwdb.bin # to shut up udev | ||
| 82 | touch /etc/initrd-release | ||
| 83 | |||
| 84 | # Function for waiting a device to appear. | ||
| 85 | waitDevice() { | ||
| 86 | local device="$1" | ||
| 87 | |||
| 88 | # USB storage devices tend to appear with some delay. It would be | ||
| 89 | # great if we had a way to synchronously wait for them, but | ||
| 90 | # alas... So just wait for a few seconds for the device to | ||
| 91 | # appear. | ||
| 92 | if test ! -e $device; then | ||
| 93 | echo -n "waiting for device $device to appear..." | ||
| 94 | try=20 | ||
| 95 | while [ $try -gt 0 ]; do | ||
| 96 | sleep 1 | ||
| 97 | # also re-try lvm activation now that new block devices might have appeared | ||
| 98 | lvm vgchange -ay | ||
| 99 | # and tell udev to create nodes for the new LVs | ||
| 100 | udevadm trigger --action=add | ||
| 101 | if test -e $device; then break; fi | ||
| 102 | echo -n "." | ||
| 103 | try=$((try - 1)) | ||
| 104 | done | ||
| 105 | echo | ||
| 106 | [ $try -ne 0 ] | ||
| 107 | fi | ||
| 108 | } | ||
| 109 | |||
| 110 | # Mount special file systems. | ||
| 111 | specialMount() { | ||
| 112 | local device="$1" | ||
| 113 | local mountPoint="$2" | ||
| 114 | local options="$3" | ||
| 115 | local fsType="$4" | ||
| 116 | |||
| 117 | mkdir -m 0755 -p "$mountPoint" | ||
| 118 | mount -n -t "$fsType" -o "$options" "$device" "$mountPoint" | ||
| 119 | } | ||
| 120 | source @earlyMountScript@ | ||
| 121 | |||
| 122 | # Log the script output to /dev/kmsg or /run/log/stage-1-init.log. | ||
| 123 | mkdir -p /tmp | ||
| 124 | mkfifo /tmp/stage-1-init.log.fifo | ||
| 125 | logOutFd=8 && logErrFd=9 | ||
| 126 | eval "exec $logOutFd>&1 $logErrFd>&2" | ||
| 127 | if test -w /dev/kmsg; then | ||
| 128 | tee -i < /tmp/stage-1-init.log.fifo /proc/self/fd/"$logOutFd" | while read -r line; do | ||
| 129 | if test -n "$line"; then | ||
| 130 | echo "<7>stage-1-init: [$(date)] $line" > /dev/kmsg | ||
| 131 | fi | ||
| 132 | done & | ||
| 133 | else | ||
| 134 | mkdir -p /run/log | ||
| 135 | tee -i < /tmp/stage-1-init.log.fifo /run/log/stage-1-init.log & | ||
| 136 | fi | ||
| 137 | exec > /tmp/stage-1-init.log.fifo 2>&1 | ||
| 138 | |||
| 139 | |||
| 140 | # Process the kernel command line. | ||
| 141 | export stage2Init=/init | ||
| 142 | for o in $(cat /proc/cmdline); do | ||
| 143 | case $o in | ||
| 144 | console=*) | ||
| 145 | set -- $(IFS==; echo $o) | ||
| 146 | params=$2 | ||
| 147 | set -- $(IFS=,; echo $params) | ||
| 148 | console=$1 | ||
| 149 | ;; | ||
| 150 | init=*) | ||
| 151 | set -- $(IFS==; echo $o) | ||
| 152 | stage2Init=$2 | ||
| 153 | ;; | ||
| 154 | boot.persistence=*) | ||
| 155 | set -- $(IFS==; echo $o) | ||
| 156 | persistence=$2 | ||
| 157 | ;; | ||
| 158 | boot.persistence.opt=*) | ||
| 159 | set -- $(IFS==; echo $o) | ||
| 160 | persistence_opt=$2 | ||
| 161 | ;; | ||
| 162 | boot.trace|debugtrace) | ||
| 163 | # Show each command. | ||
| 164 | set -x | ||
| 165 | ;; | ||
| 166 | boot.shell_on_fail) | ||
| 167 | allowShell=1 | ||
| 168 | ;; | ||
| 169 | boot.debug1|debug1) # stop right away | ||
| 170 | allowShell=1 | ||
| 171 | fail | ||
| 172 | ;; | ||
| 173 | boot.debug1devices) # stop after loading modules and creating device nodes | ||
| 174 | allowShell=1 | ||
| 175 | debug1devices=1 | ||
| 176 | ;; | ||
| 177 | boot.debug1mounts) # stop after mounting file systems | ||
| 178 | allowShell=1 | ||
| 179 | debug1mounts=1 | ||
| 180 | ;; | ||
| 181 | boot.panic_on_fail|stage1panic=1) | ||
| 182 | panicOnFail=1 | ||
| 183 | ;; | ||
| 184 | root=*) | ||
| 185 | # If a root device is specified on the kernel command | ||
| 186 | # line, make it available through the symlink /dev/root. | ||
| 187 | # Recognise LABEL= and UUID= to support UNetbootin. | ||
| 188 | set -- $(IFS==; echo $o) | ||
| 189 | if [ $2 = "LABEL" ]; then | ||
| 190 | root="/dev/disk/by-label/$3" | ||
| 191 | elif [ $2 = "UUID" ]; then | ||
| 192 | root="/dev/disk/by-uuid/$3" | ||
| 193 | else | ||
| 194 | root=$2 | ||
| 195 | fi | ||
| 196 | ln -s "$root" /dev/root | ||
| 197 | ;; | ||
| 198 | copytoram) | ||
| 199 | copytoram=1 | ||
| 200 | ;; | ||
| 201 | findiso=*) | ||
| 202 | # if an iso name is supplied, try to find the device where | ||
| 203 | # the iso resides on | ||
| 204 | set -- $(IFS==; echo $o) | ||
| 205 | isoPath=$2 | ||
| 206 | ;; | ||
| 207 | esac | ||
| 208 | done | ||
| 209 | |||
| 210 | # Set hostid before modules are loaded. | ||
| 211 | # This is needed by the spl/zfs modules. | ||
| 212 | @setHostId@ | ||
| 213 | |||
| 214 | # Load the required kernel modules. | ||
| 215 | mkdir -p /lib | ||
| 216 | ln -s @modulesClosure@/lib/modules /lib/modules | ||
| 217 | ln -s @modulesClosure@/lib/firmware /lib/firmware | ||
| 218 | echo @extraUtils@/bin/modprobe > /proc/sys/kernel/modprobe | ||
| 219 | for i in @kernelModules@; do | ||
| 220 | info "loading module $(basename $i)..." | ||
| 221 | modprobe $i | ||
| 222 | done | ||
| 223 | |||
| 224 | |||
| 225 | # Create device nodes in /dev. | ||
| 226 | @preDeviceCommands@ | ||
| 227 | info "running udev..." | ||
| 228 | ln -sfn /proc/self/fd /dev/fd | ||
| 229 | ln -sfn /proc/self/fd/0 /dev/stdin | ||
| 230 | ln -sfn /proc/self/fd/1 /dev/stdout | ||
| 231 | ln -sfn /proc/self/fd/2 /dev/stderr | ||
| 232 | mkdir -p /etc/systemd | ||
| 233 | ln -sfn @linkUnits@ /etc/systemd/network | ||
| 234 | mkdir -p /etc/udev | ||
| 235 | ln -sfn @udevRules@ /etc/udev/rules.d | ||
| 236 | mkdir -p /dev/.mdadm | ||
| 237 | systemd-udevd --daemon | ||
| 238 | udevadm trigger --action=add | ||
| 239 | udevadm settle | ||
| 240 | |||
| 241 | |||
| 242 | # XXX: Use case usb->lvm will still fail, usb->luks->lvm is covered | ||
| 243 | @preLVMCommands@ | ||
| 244 | |||
| 245 | info "starting device mapper and LVM..." | ||
| 246 | lvm vgchange -ay | ||
| 247 | |||
| 248 | if test -n "$debug1devices"; then fail; fi | ||
| 249 | |||
| 250 | |||
| 251 | @postDeviceCommands@ | ||
| 252 | |||
| 253 | |||
| 254 | # Return true if the machine is on AC power, or if we can't determine | ||
| 255 | # whether it's on AC power. | ||
| 256 | onACPower() { | ||
| 257 | ! test -d "/proc/acpi/battery" || | ||
| 258 | ! ls /proc/acpi/battery/BAT[0-9]* > /dev/null 2>&1 || | ||
| 259 | ! cat /proc/acpi/battery/BAT*/state | grep "^charging state" | grep -q "discharg" | ||
| 260 | } | ||
| 261 | |||
| 262 | |||
| 263 | # Check the specified file system, if appropriate. | ||
| 264 | checkFS() { | ||
| 265 | local device="$1" | ||
| 266 | local fsType="$2" | ||
| 267 | |||
| 268 | # Only check block devices. | ||
| 269 | if [ ! -b "$device" ]; then return 0; fi | ||
| 270 | |||
| 271 | # Don't check ROM filesystems. | ||
| 272 | if [ "$fsType" = iso9660 -o "$fsType" = udf ]; then return 0; fi | ||
| 273 | |||
| 274 | # Don't check resilient COWs as they validate the fs structures at mount time | ||
| 275 | if [ "$fsType" = btrfs -o "$fsType" = zfs -o "$fsType" = bcachefs ]; then return 0; fi | ||
| 276 | |||
| 277 | # Skip fsck for nilfs2 - not needed by design and no fsck tool for this filesystem. | ||
| 278 | if [ "$fsType" = nilfs2 ]; then return 0; fi | ||
| 279 | |||
| 280 | # Skip fsck for inherently readonly filesystems. | ||
| 281 | if [ "$fsType" = squashfs ]; then return 0; fi | ||
| 282 | |||
| 283 | # If we couldn't figure out the FS type, then skip fsck. | ||
| 284 | if [ "$fsType" = auto ]; then | ||
| 285 | echo 'cannot check filesystem with type "auto"!' | ||
| 286 | return 0 | ||
| 287 | fi | ||
| 288 | |||
| 289 | # Device might be already mounted manually | ||
| 290 | # e.g. NBD-device or the host filesystem of the file which contains encrypted root fs | ||
| 291 | if mount | grep -q "^$device on "; then | ||
| 292 | echo "skip checking already mounted $device" | ||
| 293 | return 0 | ||
| 294 | fi | ||
| 295 | |||
| 296 | # Optionally, skip fsck on journaling filesystems. This option is | ||
| 297 | # a hack - it's mostly because e2fsck on ext3 takes much longer to | ||
| 298 | # recover the journal than the ext3 implementation in the kernel | ||
| 299 | # does (minutes versus seconds). | ||
| 300 | if test -z "@checkJournalingFS@" -a \ | ||
| 301 | \( "$fsType" = ext3 -o "$fsType" = ext4 -o "$fsType" = reiserfs \ | ||
| 302 | -o "$fsType" = xfs -o "$fsType" = jfs -o "$fsType" = f2fs \) | ||
| 303 | then | ||
| 304 | return 0 | ||
| 305 | fi | ||
| 306 | |||
| 307 | # Don't run `fsck' if the machine is on battery power. !!! Is | ||
| 308 | # this a good idea? | ||
| 309 | if ! onACPower; then | ||
| 310 | echo "on battery power, so no \`fsck' will be performed on \`$device'" | ||
| 311 | return 0 | ||
| 312 | fi | ||
| 313 | |||
| 314 | echo "checking $device..." | ||
| 315 | |||
| 316 | fsckFlags= | ||
| 317 | if test "$fsType" != "btrfs"; then | ||
| 318 | fsckFlags="-V -a" | ||
| 319 | fi | ||
| 320 | fsck $fsckFlags "$device" | ||
| 321 | fsckResult=$? | ||
| 322 | |||
| 323 | if test $(($fsckResult | 2)) = $fsckResult; then | ||
| 324 | echo "fsck finished, rebooting..." | ||
| 325 | sleep 3 | ||
| 326 | reboot -f | ||
| 327 | fi | ||
| 328 | |||
| 329 | if test $(($fsckResult | 4)) = $fsckResult; then | ||
| 330 | echo "$device has unrepaired errors, please fix them manually." | ||
| 331 | fail | ||
| 332 | fi | ||
| 333 | |||
| 334 | if test $fsckResult -ge 8; then | ||
| 335 | echo "fsck on $device failed." | ||
| 336 | fail | ||
| 337 | fi | ||
| 338 | |||
| 339 | return 0 | ||
| 340 | } | ||
| 341 | |||
| 342 | |||
| 343 | # Function for mounting a file system. | ||
| 344 | mountFS() { | ||
| 345 | local device="$1" | ||
| 346 | local mountPoint="$2" | ||
| 347 | local options="$3" | ||
| 348 | local fsType="$4" | ||
| 349 | |||
| 350 | if [ "$fsType" = auto ]; then | ||
| 351 | fsType=$(blkid -o value -s TYPE "$device") | ||
| 352 | if [ -z "$fsType" ]; then fsType=auto; fi | ||
| 353 | fi | ||
| 354 | |||
| 355 | # Filter out x- options, which busybox doesn't do yet. | ||
| 356 | local optionsFiltered="$(IFS=,; for i in $options; do if [ "${i:0:2}" != "x-" ]; then echo -n $i,; fi; done)" | ||
| 357 | # Prefix (lower|upper|work)dir with /mnt-root (overlayfs) | ||
| 358 | local optionsPrefixed="$( echo "$optionsFiltered" | sed -E 's#\<(lowerdir|upperdir|workdir)=#\1=/mnt-root#g' )" | ||
| 359 | |||
| 360 | echo "$device /mnt-root$mountPoint $fsType $optionsPrefixed" >> /etc/fstab | ||
| 361 | |||
| 362 | checkFS "$device" "$fsType" | ||
| 363 | |||
| 364 | # Optionally resize the filesystem. | ||
| 365 | case $options in | ||
| 366 | *x-nixos.autoresize*) | ||
| 367 | if [ "$fsType" = ext2 -o "$fsType" = ext3 -o "$fsType" = ext4 ]; then | ||
| 368 | modprobe "$fsType" | ||
| 369 | echo "resizing $device..." | ||
| 370 | e2fsck -fp "$device" | ||
| 371 | resize2fs "$device" | ||
| 372 | elif [ "$fsType" = f2fs ]; then | ||
| 373 | echo "resizing $device..." | ||
| 374 | fsck.f2fs -fp "$device" | ||
| 375 | resize.f2fs "$device" | ||
| 376 | fi | ||
| 377 | ;; | ||
| 378 | esac | ||
| 379 | |||
| 380 | # Create backing directories for overlayfs | ||
| 381 | if [ "$fsType" = overlay ]; then | ||
| 382 | for i in upper work; do | ||
| 383 | dir="$( echo "$optionsPrefixed" | grep -o "${i}dir=[^,]*" )" | ||
| 384 | mkdir -m 0700 -p "${dir##*=}" | ||
| 385 | done | ||
| 386 | fi | ||
| 387 | |||
| 388 | info "mounting $device on $mountPoint..." | ||
| 389 | |||
| 390 | mkdir -p "/mnt-root$mountPoint" | ||
| 391 | |||
| 392 | # For ZFS and CIFS mounts, retry a few times before giving up. | ||
| 393 | # We do this for ZFS as a workaround for issue NixOS/nixpkgs#25383. | ||
| 394 | local n=0 | ||
| 395 | while true; do | ||
| 396 | mount "/mnt-root$mountPoint" && break | ||
| 397 | if [ \( "$fsType" != cifs -a "$fsType" != zfs \) -o "$n" -ge 10 ]; then fail; break; fi | ||
| 398 | echo "retrying..." | ||
| 399 | sleep 1 | ||
| 400 | n=$((n + 1)) | ||
| 401 | done | ||
| 402 | |||
| 403 | [ "$mountPoint" == "/" ] && | ||
| 404 | [ -f "/mnt-root/etc/NIXOS_LUSTRATE" ] && | ||
| 405 | lustrateRoot "/mnt-root" | ||
| 406 | |||
| 407 | true | ||
| 408 | } | ||
| 409 | |||
| 410 | lustrateRoot () { | ||
| 411 | local root="$1" | ||
| 412 | |||
| 413 | echo | ||
| 414 | echo -e "\e[1;33m<<< NixOS is now lustrating the root filesystem (cruft goes to /old-root) >>>\e[0m" | ||
| 415 | echo | ||
| 416 | |||
| 417 | mkdir -m 0755 -p "$root/old-root.tmp" | ||
| 418 | |||
| 419 | echo | ||
| 420 | echo "Moving impurities out of the way:" | ||
| 421 | for d in "$root"/* | ||
| 422 | do | ||
| 423 | [ "$d" == "$root/nix" ] && continue | ||
| 424 | [ "$d" == "$root/boot" ] && continue # Don't render the system unbootable | ||
| 425 | [ "$d" == "$root/old-root.tmp" ] && continue | ||
| 426 | |||
| 427 | mv -v "$d" "$root/old-root.tmp" | ||
| 428 | done | ||
| 429 | |||
| 430 | # Use .tmp to make sure subsequent invokations don't clash | ||
| 431 | mv -v "$root/old-root.tmp" "$root/old-root" | ||
| 432 | |||
| 433 | mkdir -m 0755 -p "$root/etc" | ||
| 434 | touch "$root/etc/NIXOS" | ||
| 435 | |||
| 436 | exec 4< "$root/old-root/etc/NIXOS_LUSTRATE" | ||
| 437 | |||
| 438 | echo | ||
| 439 | echo "Restoring selected impurities:" | ||
| 440 | while read -u 4 keeper; do | ||
| 441 | dirname="$(dirname "$keeper")" | ||
| 442 | mkdir -m 0755 -p "$root/$dirname" | ||
| 443 | cp -av "$root/old-root/$keeper" "$root/$keeper" | ||
| 444 | done | ||
| 445 | |||
| 446 | exec 4>&- | ||
| 447 | } | ||
| 448 | |||
| 449 | |||
| 450 | |||
| 451 | if test -e /sys/power/resume -a -e /sys/power/disk; then | ||
| 452 | if test -n "@resumeDevice@" && waitDevice "@resumeDevice@"; then | ||
| 453 | resumeDev="@resumeDevice@" | ||
| 454 | resumeInfo="$(udevadm info -q property "$resumeDev" )" | ||
| 455 | else | ||
| 456 | for sd in @resumeDevices@; do | ||
| 457 | # Try to detect resume device. According to Ubuntu bug: | ||
| 458 | # https://bugs.launchpad.net/ubuntu/+source/pm-utils/+bug/923326/comments/1 | ||
| 459 | # when there are multiple swap devices, we can't know where the hibernate | ||
| 460 | # image will reside. We can check all of them for swsuspend blkid. | ||
| 461 | if waitDevice "$sd"; then | ||
| 462 | resumeInfo="$(udevadm info -q property "$sd")" | ||
| 463 | if [ "$(echo "$resumeInfo" | sed -n 's/^ID_FS_TYPE=//p')" = "swsuspend" ]; then | ||
| 464 | resumeDev="$sd" | ||
| 465 | break | ||
| 466 | fi | ||
| 467 | fi | ||
| 468 | done | ||
| 469 | fi | ||
| 470 | if test -n "$resumeDev"; then | ||
| 471 | resumeMajor="$(echo "$resumeInfo" | sed -n 's/^MAJOR=//p')" | ||
| 472 | resumeMinor="$(echo "$resumeInfo" | sed -n 's/^MINOR=//p')" | ||
| 473 | echo "$resumeMajor:$resumeMinor" > /sys/power/resume 2> /dev/null || echo "failed to resume..." | ||
| 474 | fi | ||
| 475 | fi | ||
| 476 | |||
| 477 | # If we have a path to an iso file, find the iso and link it to /dev/root | ||
| 478 | if [ -n "$isoPath" ]; then | ||
| 479 | mkdir -p /findiso | ||
| 480 | |||
| 481 | for delay in 5 10; do | ||
| 482 | blkid | while read -r line; do | ||
| 483 | device=$(echo "$line" | sed 's/:.*//') | ||
| 484 | type=$(echo "$line" | sed 's/.*TYPE="\([^"]*\)".*/\1/') | ||
| 485 | |||
| 486 | mount -t "$type" "$device" /findiso | ||
| 487 | if [ -e "/findiso$isoPath" ]; then | ||
| 488 | ln -sf "/findiso$isoPath" /dev/root | ||
| 489 | break 2 | ||
| 490 | else | ||
| 491 | umount /findiso | ||
| 492 | fi | ||
| 493 | done | ||
| 494 | |||
| 495 | sleep "$delay" | ||
| 496 | done | ||
| 497 | fi | ||
| 498 | |||
| 499 | # Try to find and mount the root device. | ||
| 500 | mkdir -p $targetRoot | ||
| 501 | |||
| 502 | exec 3< @fsInfo@ | ||
| 503 | |||
| 504 | while read -u 3 mountPoint; do | ||
| 505 | read -u 3 device | ||
| 506 | read -u 3 fsType | ||
| 507 | read -u 3 options | ||
| 508 | |||
| 509 | # !!! Really quick hack to support bind mounts, i.e., where the | ||
| 510 | # "device" should be taken relative to /mnt-root, not /. Assume | ||
| 511 | # that every device that starts with / but doesn't start with /dev | ||
| 512 | # is a bind mount. | ||
| 513 | pseudoDevice= | ||
| 514 | case $device in | ||
| 515 | /dev/*) | ||
| 516 | ;; | ||
| 517 | //*) | ||
| 518 | # Don't touch SMB/CIFS paths. | ||
| 519 | pseudoDevice=1 | ||
| 520 | ;; | ||
| 521 | /*) | ||
| 522 | device=/mnt-root$device | ||
| 523 | ;; | ||
| 524 | *) | ||
| 525 | # Not an absolute path; assume that it's a pseudo-device | ||
| 526 | # like an NFS path (e.g. "server:/path"). | ||
| 527 | pseudoDevice=1 | ||
| 528 | ;; | ||
| 529 | esac | ||
| 530 | |||
| 531 | if test -z "$pseudoDevice" && ! waitDevice "$device"; then | ||
| 532 | # If it doesn't appear, try to mount it anyway (and | ||
| 533 | # probably fail). This is a fallback for non-device "devices" | ||
| 534 | # that we don't properly recognise. | ||
| 535 | echo "Timed out waiting for device $device, trying to mount anyway." | ||
| 536 | fi | ||
| 537 | |||
| 538 | # Wait once more for the udev queue to empty, just in case it's | ||
| 539 | # doing something with $device right now. | ||
| 540 | udevadm settle | ||
| 541 | |||
| 542 | # If copytoram is enabled: skip mounting the ISO and copy its content to a tmpfs. | ||
| 543 | if [ -n "$copytoram" ] && [ "$device" = /dev/root ] && [ "$mountPoint" = /iso ]; then | ||
| 544 | fsType=$(blkid -o value -s TYPE "$device") | ||
| 545 | fsSize=$(blockdev --getsize64 "$device") | ||
| 546 | |||
| 547 | mkdir -p /tmp-iso | ||
| 548 | mount -t "$fsType" /dev/root /tmp-iso | ||
| 549 | mountFS tmpfs /iso size="$fsSize" tmpfs | ||
| 550 | |||
| 551 | cp -r /tmp-iso/* /mnt-root/iso/ | ||
| 552 | |||
| 553 | umount /tmp-iso | ||
| 554 | rmdir /tmp-iso | ||
| 555 | continue | ||
| 556 | fi | ||
| 557 | |||
| 558 | if [ "$mountPoint" = / ] && [ "$device" = tmpfs ] && [ ! -z "$persistence" ]; then | ||
| 559 | echo persistence... | ||
| 560 | waitDevice "$persistence" | ||
| 561 | echo enabling persistence... | ||
| 562 | mountFS "$persistence" "$mountPoint" "$persistence_opt" "auto" | ||
| 563 | continue | ||
| 564 | fi | ||
| 565 | |||
| 566 | mountFS "$device" "$mountPoint" "$options" "$fsType" | ||
| 567 | done | ||
| 568 | |||
| 569 | exec 3>&- | ||
| 570 | |||
| 571 | |||
| 572 | @postMountCommands@ | ||
| 573 | |||
| 574 | |||
| 575 | # Emit a udev rule for /dev/root to prevent systemd from complaining. | ||
| 576 | if [ -e /mnt-root/iso ]; then | ||
| 577 | eval $(udevadm info --export --export-prefix=ROOT_ --device-id-of-file=/mnt-root/iso) | ||
| 578 | else | ||
| 579 | eval $(udevadm info --export --export-prefix=ROOT_ --device-id-of-file=$targetRoot) | ||
| 580 | fi | ||
| 581 | if [ "$ROOT_MAJOR" -a "$ROOT_MINOR" -a "$ROOT_MAJOR" != 0 ]; then | ||
| 582 | mkdir -p /run/udev/rules.d | ||
| 583 | echo 'ACTION=="add|change", SUBSYSTEM=="block", ENV{MAJOR}=="'$ROOT_MAJOR'", ENV{MINOR}=="'$ROOT_MINOR'", SYMLINK+="root"' > /run/udev/rules.d/61-dev-root-link.rules | ||
| 584 | fi | ||
| 585 | |||
| 586 | |||
| 587 | # Stop udevd. | ||
| 588 | udevadm control --exit | ||
| 589 | |||
| 590 | # Reset the logging file descriptors. | ||
| 591 | # Do this just before pkill, which will kill the tee process. | ||
| 592 | exec 1>&$logOutFd 2>&$logErrFd | ||
| 593 | eval "exec $logOutFd>&- $logErrFd>&-" | ||
| 594 | |||
| 595 | # Kill any remaining processes, just to be sure we're not taking any | ||
| 596 | # with us into stage 2. But keep storage daemons like unionfs-fuse. | ||
| 597 | # | ||
| 598 | # Storage daemons are distinguished by an @ in front of their command line: | ||
| 599 | # https://www.freedesktop.org/wiki/Software/systemd/RootStorageDaemons/ | ||
| 600 | for pid in $(pgrep -v -f '^@'); do | ||
| 601 | # Make sure we don't kill kernel processes, see #15226 and: | ||
| 602 | # http://stackoverflow.com/questions/12213445/identifying-kernel-threads | ||
| 603 | readlink "/proc/$pid/exe" &> /dev/null || continue | ||
| 604 | # Try to avoid killing ourselves. | ||
| 605 | [ $pid -eq $$ ] && continue | ||
| 606 | kill -9 "$pid" | ||
| 607 | done | ||
| 608 | |||
| 609 | if test -n "$debug1mounts"; then fail; fi | ||
| 610 | |||
| 611 | |||
| 612 | # Restore /proc/sys/kernel/modprobe to its original value. | ||
| 613 | echo /sbin/modprobe > /proc/sys/kernel/modprobe | ||
| 614 | |||
| 615 | |||
| 616 | # Start stage 2. `switch_root' deletes all files in the ramfs on the | ||
| 617 | # current root. The path has to be valid in the chroot not outside. | ||
| 618 | if [ ! -e "$targetRoot/$stage2Init" ]; then | ||
| 619 | stage2Check=${stage2Init} | ||
| 620 | while [ "$stage2Check" != "${stage2Check%/*}" ] && [ ! -L "$targetRoot/$stage2Check" ]; do | ||
| 621 | stage2Check=${stage2Check%/*} | ||
| 622 | done | ||
| 623 | if [ ! -L "$targetRoot/$stage2Check" ]; then | ||
| 624 | echo "stage 2 init script ($targetRoot/$stage2Init) not found" | ||
| 625 | fail | ||
| 626 | fi | ||
| 627 | fi | ||
| 628 | |||
| 629 | mkdir -m 0755 -p $targetRoot/proc $targetRoot/sys $targetRoot/dev $targetRoot/run | ||
| 630 | |||
| 631 | mount --move /proc $targetRoot/proc | ||
| 632 | mount --move /sys $targetRoot/sys | ||
| 633 | mount --move /dev $targetRoot/dev | ||
| 634 | mount --move /run $targetRoot/run | ||
| 635 | |||
| 636 | exec env -i $(type -P switch_root) "$targetRoot" "$stage2Init" | ||
| 637 | |||
| 638 | fail # should never be reached | ||
