Created
October 30, 2025 16:47
-
-
Save flipphillips/a1b291d8cf65409388f0d4c36f7df5ec to your computer and use it in GitHub Desktop.
Ugh. Rocky. NVidia Drivers
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| # Kernel update verification and recovery script | |
| # Adds checks for free space on /boot and an optional automatic cleanup. | |
| # Enhancements: | |
| # - When run with --auto, will attempt additional heuristics to free space in /boot: | |
| # 1) remove old kernel packages (existing behavior) | |
| # 2) remove orphaned files in /boot that don't match any installed kernel | |
| # 3) as a last resort, archive the oldest non-active boot files to /var/tmp/boot-archive-<timestamp>.tar.xz | |
| # Note: Archiving moves files out of /boot but keeps them on the same filesystem if /var/tmp is on the rootfs. | |
| set -euo pipefail | |
| BOOT_DIR="/boot" | |
| # Allow --auto (non-interactive removal of oldest kernels) and --keep (how many kernels to keep) | |
| AUTO_REMOVE=false | |
| KEEP_COUNT=2 | |
| while [[ ${1:-} != "" ]]; do | |
| case "$1" in | |
| --auto|-a) AUTO_REMOVE=true; shift ;; | |
| --keep) KEEP_COUNT=${2:-2}; shift 2 ;; | |
| --help|-h) echo "Usage: $0 [--auto|-a] [--keep N]"; exit 0 ;; | |
| *) shift ;; | |
| esac | |
| done | |
| LATEST_KERNEL=$(rpm -q kernel --last | head -1 | awk '{print $1}' | sed 's/kernel-//') | |
| MODULE_DIR="/lib/modules/$LATEST_KERNEL" | |
| INITRAMFS="$BOOT_DIR/initramfs-$LATEST_KERNEL.img" | |
| echo "Checking kernel: $LATEST_KERNEL" | |
| get_boot_avail_kb() { | |
| # Use --block-size=1K so output is in KB and avoid mixing short and long options | |
| df --block-size=1K --output=avail "$BOOT_DIR" | tail -1 | tr -d ' ' | |
| } | |
| max_existing_init_kb=0 | |
| for f in "$BOOT_DIR"/initramfs-*.img; do | |
| if [ -f "$f" ]; then | |
| size_bytes=$(stat -c%s "$f") | |
| size_kb=$(( (size_bytes + 1023) / 1024 )) | |
| if [ $size_kb -gt $max_existing_init_kb ]; then | |
| max_existing_init_kb=$size_kb | |
| fi | |
| fi | |
| done | |
| # default estimate if no initramfs found | |
| if [ $max_existing_init_kb -eq 0 ]; then | |
| max_existing_init_kb=$((200 * 1024)) | |
| fi | |
| # Add a safety buffer (50MB) | |
| NEEDED_KB=$(( max_existing_init_kb + 50 * 1024 )) | |
| BOOT_AVAIL_KB=$(get_boot_avail_kb) | |
| echo "Available on $BOOT_DIR: $BOOT_AVAIL_KB KB; estimated needed: $NEEDED_KB KB" | |
| # Verify kernel modules | |
| if [ ! -d "$MODULE_DIR" ]; then | |
| echo "ERROR: Missing kernel modules for $LATEST_KERNEL" | |
| echo "Reinstalling kernel packages..." | |
| dnf reinstall "kernel-core-$LATEST_KERNEL" "kernel-modules-$LATEST_KERNEL" -y | |
| fi | |
| # Helper: list installed kernel versions (without the leading 'kernel-') | |
| installed_kernels() { | |
| rpm -q kernel --last | awk '{print $1}' | sed 's/^kernel-//' || true | |
| } | |
| # Helper: find boot files that look like kernel artifacts but do not correspond to installed kernels | |
| find_orphaned_boot_files() { | |
| # Patterns: vmlinuz-*, initramfs-*.img, System.map-*, config-*, abi-* | |
| local -a patterns=("vmlinuz-" "initramfs-" "System.map-" "config-" "abi-") | |
| local k | |
| # build list of installed kernels for quick grep | |
| mapfile -t installed < <(installed_kernels) | |
| for f in "$BOOT_DIR"/*; do | |
| [ -e "$f" ] || continue | |
| base=$(basename "$f") | |
| for p in "${patterns[@]}"; do | |
| if [[ "$base" == $p* ]]; then | |
| # extract version suffix | |
| ver=${base#${p}} | |
| # strip extensions like .img | |
| ver=${ver%%.*} | |
| keep=false | |
| for k in "${installed[@]}"; do | |
| if [[ "$k" == "$ver" ]]; then | |
| keep=true | |
| break | |
| fi | |
| done | |
| if [ "$keep" = false ]; then | |
| echo "$f" | |
| fi | |
| fi | |
| done | |
| done | |
| } | |
| # If target initramfs missing, try to ensure there's enough space first | |
| if [ ! -f "$INITRAMFS" ]; then | |
| echo "WARNING: Missing initramfs for $LATEST_KERNEL" | |
| echo "Preparing to rebuild initramfs..." | |
| if [ "$BOOT_AVAIL_KB" -lt "$NEEDED_KB" ]; then | |
| echo "Not enough free space on $BOOT_DIR to build initramfs." | |
| echo "Free: $BOOT_AVAIL_KB KB, required: $NEEDED_KB KB." | |
| # List candidate files/kernels to remove | |
| echo "Current files in $BOOT_DIR (largest first):" | |
| ls -lhS "$BOOT_DIR" | head -n 30 | |
| # Show installed kernels and sizes of their initramfs (if exist) | |
| echo | |
| echo "Installed kernels (newest first):" | |
| rpm -q kernel --last | awk '{print $1}' | sed 's/kernel-//' | |
| if [ "$AUTO_REMOVE" = true ]; then | |
| echo "Auto-remove enabled: attempting to remove oldest kernels until enough space is available." | |
| running_kernel=$(uname -r) | |
| # get kernels in oldest-first order | |
| mapfile -t kernels < <(rpm -q kernel --last | awk '{print $1}' | sed 's/kernel-//' | tac) | |
| removed_any=false | |
| for ver in "${kernels[@]}"; do | |
| # do not remove running kernel or the target kernel; keep at least KEEP_COUNT newest | |
| if [[ "$ver" == "$running_kernel" ]] || [[ "$ver" == "$LATEST_KERNEL" ]]; then | |
| continue | |
| fi | |
| # Count how many kernels remain (newest-first) | |
| total_installed=$(rpm -q kernel | wc -l) | |
| if [ $total_installed -le $KEEP_COUNT ]; then | |
| echo "Reached keep limit ($KEEP_COUNT); stopping removals." | |
| break | |
| fi | |
| echo "Attempting to remove kernel packages for $ver" | |
| # Try removing kernel-core and kernel-modules for that version; fall back to kernel-<ver> | |
| if dnf remove -y "kernel-core-$ver" "kernel-modules-$ver" >/dev/null 2>&1; then | |
| removed_any=true | |
| else | |
| if dnf remove -y "kernel-$ver" >/dev/null 2>&1; then | |
| removed_any=true | |
| else | |
| echo "Failed to remove kernel packages for $ver; skipping." | |
| fi | |
| fi | |
| BOOT_AVAIL_KB=$(get_boot_avail_kb) | |
| echo "Available after attempt: $BOOT_AVAIL_KB KB" | |
| if [ "$BOOT_AVAIL_KB" -ge "$NEEDED_KB" ]; then | |
| echo "Enough space freed." | |
| break | |
| fi | |
| done | |
| # If still not enough space, try removing orphaned files in /boot | |
| if [ "$BOOT_AVAIL_KB" -lt "$NEEDED_KB" ]; then | |
| echo "Attempting to remove orphaned /boot files that don't match installed kernels..." | |
| mapfile -t orphans < <(find_orphaned_boot_files) | |
| if [ ${#orphans[@]} -gt 0 ]; then | |
| for f in "${orphans[@]}"; do | |
| echo "Removing orphaned file: $f" | |
| if rm -f "$f"; then | |
| echo "Removed $f" | |
| else | |
| echo "Failed to remove $f; skipping." | |
| fi | |
| BOOT_AVAIL_KB=$(get_boot_avail_kb) | |
| if [ "$BOOT_AVAIL_KB" -ge "$NEEDED_KB" ]; then | |
| echo "Enough space freed by removing orphans." | |
| break | |
| fi | |
| done | |
| else | |
| echo "No orphaned boot files detected." | |
| fi | |
| fi | |
| # Last resort: archive oldest non-active boot files to /var/tmp | |
| if [ "$BOOT_AVAIL_KB" -lt "$NEEDED_KB" ]; then | |
| echo "Not enough space after package removals and orphan cleanup. Preparing archival fallback." | |
| ARCHIVE_DIR=/var/tmp | |
| ts=$(date +%Y%m%d%H%M%S) | |
| ARCHIVE="$ARCHIVE_DIR/boot-archive-$ts.tar.xz" | |
| # Choose candidates: files in /boot sorted by mtime (oldest first) excluding running/kernel target artifacts | |
| echo "Building list of archival candidates..." | |
| running_kernel=$(uname -r) | |
| mapfile -t candidates < <(find "$BOOT_DIR" -maxdepth 1 -type f -printf '%T@ %p\n' | sort -n | awk '{print $2}') | |
| to_archive=() | |
| for f in "${candidates[@]}"; do | |
| base=$(basename "$f") | |
| # skip files for running kernel or target kernel | |
| if [[ "$base" == *"$running_kernel"* ]] || [[ "$base" == *"$LATEST_KERNEL"* ]]; then | |
| continue | |
| fi | |
| to_archive+=("$f") | |
| # stop collecting if archiving these will likely free enough (heuristic: collect up to 10 files) | |
| if [ ${#to_archive[@]} -ge 10 ]; then | |
| break | |
| fi | |
| done | |
| if [ ${#to_archive[@]} -eq 0 ]; then | |
| echo "No safe archival candidates found in $BOOT_DIR. Cannot proceed automatically." | |
| else | |
| echo "Archiving ${#to_archive[@]} files to $ARCHIVE to free space." | |
| # Create archive; use tar with xz compression | |
| # Build array of basenames to pass safely to tar | |
| basenames=() | |
| for f in "${to_archive[@]}"; do | |
| basenames+=("$(basename "$f")") | |
| done | |
| if tar -cJf "$ARCHIVE" -C "$BOOT_DIR" -- "${basenames[@]:-}"; then | |
| echo "Archive created at $ARCHIVE. Removing archived files from $BOOT_DIR." | |
| for f in "${to_archive[@]}"; do rm -f "$f" || true; done | |
| BOOT_AVAIL_KB=$(get_boot_avail_kb) | |
| echo "Available after archival: $BOOT_AVAIL_KB KB" | |
| else | |
| echo "Failed to create archive $ARCHIVE. Leaving files intact." | |
| fi | |
| fi | |
| fi | |
| if [ "$BOOT_AVAIL_KB" -lt "$NEEDED_KB" ]; then | |
| echo "Unable to free enough space automatically. Please free space in $BOOT_DIR and retry." | |
| exit 1 | |
| fi | |
| if [ "$removed_any" = true ]; then | |
| echo "Cleaning up package metadata..." | |
| dnf -y autoremove || true | |
| fi | |
| else | |
| echo "To automatically remove old kernels and try again, re-run with --auto or -a" | |
| echo "Aborting rebuild to avoid filling $BOOT_DIR." | |
| exit 1 | |
| fi | |
| fi | |
| echo "Rebuilding initramfs..." | |
| dracut -f "$INITRAMFS" "$LATEST_KERNEL" | |
| fi | |
| # Verify NVIDIA modules (if applicable) | |
| if lsmod | grep -q nvidia; then | |
| if ! modinfo nvidia -k "$LATEST_KERNEL" &>/dev/null; then | |
| echo "WARNING: NVIDIA modules missing for $LATEST_KERNEL" | |
| echo "Rebuilding NVIDIA driver..." | |
| # Add NVIDIA rebuild command based on your installation method | |
| # For .run installer: /path/to/NVIDIA*.run --silent | |
| # For akmod: akmods --force | |
| fi | |
| fi | |
| # Update GRUB - this is probably wrong since there's some EFI related stuff | |
| echo "Updating GRUB configuration..." | |
| grub2-mkconfig -o /boot/grub2/grub.cfg |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment