Skip to content

Instantly share code, notes, and snippets.

@flipphillips
Created October 30, 2025 16:47
Show Gist options
  • Select an option

  • Save flipphillips/a1b291d8cf65409388f0d4c36f7df5ec to your computer and use it in GitHub Desktop.

Select an option

Save flipphillips/a1b291d8cf65409388f0d4c36f7df5ec to your computer and use it in GitHub Desktop.
Ugh. Rocky. NVidia Drivers
#!/bin/bash
# Kernel update verification and recovery script
# Adds checks for free space on /boot and an optional automatic cleanup.
# Enhancements:
# - When run with --auto, will attempt additional heuristics to free space in /boot:
# 1) remove old kernel packages (existing behavior)
# 2) remove orphaned files in /boot that don't match any installed kernel
# 3) as a last resort, archive the oldest non-active boot files to /var/tmp/boot-archive-<timestamp>.tar.xz
# Note: Archiving moves files out of /boot but keeps them on the same filesystem if /var/tmp is on the rootfs.
set -euo pipefail
BOOT_DIR="/boot"
# Allow --auto (non-interactive removal of oldest kernels) and --keep (how many kernels to keep)
AUTO_REMOVE=false
KEEP_COUNT=2
while [[ ${1:-} != "" ]]; do
case "$1" in
--auto|-a) AUTO_REMOVE=true; shift ;;
--keep) KEEP_COUNT=${2:-2}; shift 2 ;;
--help|-h) echo "Usage: $0 [--auto|-a] [--keep N]"; exit 0 ;;
*) shift ;;
esac
done
LATEST_KERNEL=$(rpm -q kernel --last | head -1 | awk '{print $1}' | sed 's/kernel-//')
MODULE_DIR="/lib/modules/$LATEST_KERNEL"
INITRAMFS="$BOOT_DIR/initramfs-$LATEST_KERNEL.img"
echo "Checking kernel: $LATEST_KERNEL"
get_boot_avail_kb() {
# Use --block-size=1K so output is in KB and avoid mixing short and long options
df --block-size=1K --output=avail "$BOOT_DIR" | tail -1 | tr -d ' '
}
max_existing_init_kb=0
for f in "$BOOT_DIR"/initramfs-*.img; do
if [ -f "$f" ]; then
size_bytes=$(stat -c%s "$f")
size_kb=$(( (size_bytes + 1023) / 1024 ))
if [ $size_kb -gt $max_existing_init_kb ]; then
max_existing_init_kb=$size_kb
fi
fi
done
# default estimate if no initramfs found
if [ $max_existing_init_kb -eq 0 ]; then
max_existing_init_kb=$((200 * 1024))
fi
# Add a safety buffer (50MB)
NEEDED_KB=$(( max_existing_init_kb + 50 * 1024 ))
BOOT_AVAIL_KB=$(get_boot_avail_kb)
echo "Available on $BOOT_DIR: $BOOT_AVAIL_KB KB; estimated needed: $NEEDED_KB KB"
# Verify kernel modules
if [ ! -d "$MODULE_DIR" ]; then
echo "ERROR: Missing kernel modules for $LATEST_KERNEL"
echo "Reinstalling kernel packages..."
dnf reinstall "kernel-core-$LATEST_KERNEL" "kernel-modules-$LATEST_KERNEL" -y
fi
# Helper: list installed kernel versions (without the leading 'kernel-')
installed_kernels() {
rpm -q kernel --last | awk '{print $1}' | sed 's/^kernel-//' || true
}
# Helper: find boot files that look like kernel artifacts but do not correspond to installed kernels
find_orphaned_boot_files() {
# Patterns: vmlinuz-*, initramfs-*.img, System.map-*, config-*, abi-*
local -a patterns=("vmlinuz-" "initramfs-" "System.map-" "config-" "abi-")
local k
# build list of installed kernels for quick grep
mapfile -t installed < <(installed_kernels)
for f in "$BOOT_DIR"/*; do
[ -e "$f" ] || continue
base=$(basename "$f")
for p in "${patterns[@]}"; do
if [[ "$base" == $p* ]]; then
# extract version suffix
ver=${base#${p}}
# strip extensions like .img
ver=${ver%%.*}
keep=false
for k in "${installed[@]}"; do
if [[ "$k" == "$ver" ]]; then
keep=true
break
fi
done
if [ "$keep" = false ]; then
echo "$f"
fi
fi
done
done
}
# If target initramfs missing, try to ensure there's enough space first
if [ ! -f "$INITRAMFS" ]; then
echo "WARNING: Missing initramfs for $LATEST_KERNEL"
echo "Preparing to rebuild initramfs..."
if [ "$BOOT_AVAIL_KB" -lt "$NEEDED_KB" ]; then
echo "Not enough free space on $BOOT_DIR to build initramfs."
echo "Free: $BOOT_AVAIL_KB KB, required: $NEEDED_KB KB."
# List candidate files/kernels to remove
echo "Current files in $BOOT_DIR (largest first):"
ls -lhS "$BOOT_DIR" | head -n 30
# Show installed kernels and sizes of their initramfs (if exist)
echo
echo "Installed kernels (newest first):"
rpm -q kernel --last | awk '{print $1}' | sed 's/kernel-//'
if [ "$AUTO_REMOVE" = true ]; then
echo "Auto-remove enabled: attempting to remove oldest kernels until enough space is available."
running_kernel=$(uname -r)
# get kernels in oldest-first order
mapfile -t kernels < <(rpm -q kernel --last | awk '{print $1}' | sed 's/kernel-//' | tac)
removed_any=false
for ver in "${kernels[@]}"; do
# do not remove running kernel or the target kernel; keep at least KEEP_COUNT newest
if [[ "$ver" == "$running_kernel" ]] || [[ "$ver" == "$LATEST_KERNEL" ]]; then
continue
fi
# Count how many kernels remain (newest-first)
total_installed=$(rpm -q kernel | wc -l)
if [ $total_installed -le $KEEP_COUNT ]; then
echo "Reached keep limit ($KEEP_COUNT); stopping removals."
break
fi
echo "Attempting to remove kernel packages for $ver"
# Try removing kernel-core and kernel-modules for that version; fall back to kernel-<ver>
if dnf remove -y "kernel-core-$ver" "kernel-modules-$ver" >/dev/null 2>&1; then
removed_any=true
else
if dnf remove -y "kernel-$ver" >/dev/null 2>&1; then
removed_any=true
else
echo "Failed to remove kernel packages for $ver; skipping."
fi
fi
BOOT_AVAIL_KB=$(get_boot_avail_kb)
echo "Available after attempt: $BOOT_AVAIL_KB KB"
if [ "$BOOT_AVAIL_KB" -ge "$NEEDED_KB" ]; then
echo "Enough space freed."
break
fi
done
# If still not enough space, try removing orphaned files in /boot
if [ "$BOOT_AVAIL_KB" -lt "$NEEDED_KB" ]; then
echo "Attempting to remove orphaned /boot files that don't match installed kernels..."
mapfile -t orphans < <(find_orphaned_boot_files)
if [ ${#orphans[@]} -gt 0 ]; then
for f in "${orphans[@]}"; do
echo "Removing orphaned file: $f"
if rm -f "$f"; then
echo "Removed $f"
else
echo "Failed to remove $f; skipping."
fi
BOOT_AVAIL_KB=$(get_boot_avail_kb)
if [ "$BOOT_AVAIL_KB" -ge "$NEEDED_KB" ]; then
echo "Enough space freed by removing orphans."
break
fi
done
else
echo "No orphaned boot files detected."
fi
fi
# Last resort: archive oldest non-active boot files to /var/tmp
if [ "$BOOT_AVAIL_KB" -lt "$NEEDED_KB" ]; then
echo "Not enough space after package removals and orphan cleanup. Preparing archival fallback."
ARCHIVE_DIR=/var/tmp
ts=$(date +%Y%m%d%H%M%S)
ARCHIVE="$ARCHIVE_DIR/boot-archive-$ts.tar.xz"
# Choose candidates: files in /boot sorted by mtime (oldest first) excluding running/kernel target artifacts
echo "Building list of archival candidates..."
running_kernel=$(uname -r)
mapfile -t candidates < <(find "$BOOT_DIR" -maxdepth 1 -type f -printf '%T@ %p\n' | sort -n | awk '{print $2}')
to_archive=()
for f in "${candidates[@]}"; do
base=$(basename "$f")
# skip files for running kernel or target kernel
if [[ "$base" == *"$running_kernel"* ]] || [[ "$base" == *"$LATEST_KERNEL"* ]]; then
continue
fi
to_archive+=("$f")
# stop collecting if archiving these will likely free enough (heuristic: collect up to 10 files)
if [ ${#to_archive[@]} -ge 10 ]; then
break
fi
done
if [ ${#to_archive[@]} -eq 0 ]; then
echo "No safe archival candidates found in $BOOT_DIR. Cannot proceed automatically."
else
echo "Archiving ${#to_archive[@]} files to $ARCHIVE to free space."
# Create archive; use tar with xz compression
# Build array of basenames to pass safely to tar
basenames=()
for f in "${to_archive[@]}"; do
basenames+=("$(basename "$f")")
done
if tar -cJf "$ARCHIVE" -C "$BOOT_DIR" -- "${basenames[@]:-}"; then
echo "Archive created at $ARCHIVE. Removing archived files from $BOOT_DIR."
for f in "${to_archive[@]}"; do rm -f "$f" || true; done
BOOT_AVAIL_KB=$(get_boot_avail_kb)
echo "Available after archival: $BOOT_AVAIL_KB KB"
else
echo "Failed to create archive $ARCHIVE. Leaving files intact."
fi
fi
fi
if [ "$BOOT_AVAIL_KB" -lt "$NEEDED_KB" ]; then
echo "Unable to free enough space automatically. Please free space in $BOOT_DIR and retry."
exit 1
fi
if [ "$removed_any" = true ]; then
echo "Cleaning up package metadata..."
dnf -y autoremove || true
fi
else
echo "To automatically remove old kernels and try again, re-run with --auto or -a"
echo "Aborting rebuild to avoid filling $BOOT_DIR."
exit 1
fi
fi
echo "Rebuilding initramfs..."
dracut -f "$INITRAMFS" "$LATEST_KERNEL"
fi
# Verify NVIDIA modules (if applicable)
if lsmod | grep -q nvidia; then
if ! modinfo nvidia -k "$LATEST_KERNEL" &>/dev/null; then
echo "WARNING: NVIDIA modules missing for $LATEST_KERNEL"
echo "Rebuilding NVIDIA driver..."
# Add NVIDIA rebuild command based on your installation method
# For .run installer: /path/to/NVIDIA*.run --silent
# For akmod: akmods --force
fi
fi
# Update GRUB - this is probably wrong since there's some EFI related stuff
echo "Updating GRUB configuration..."
grub2-mkconfig -o /boot/grub2/grub.cfg
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment