Skip to content

Instantly share code, notes, and snippets.

@gheber
Forked from mslacken/instant-slurm.sh
Created July 28, 2021 20:18
Show Gist options
  • Select an option

  • Save gheber/d9c2dc35e83c5b6da9e74222de12fa57 to your computer and use it in GitHub Desktop.

Select an option

Save gheber/d9c2dc35e83c5b6da9e74222de12fa57 to your computer and use it in GitHub Desktop.

Revisions

  1. @mslacken mslacken revised this gist Mar 22, 2019. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion instant-slurm.sh
    Original file line number Diff line number Diff line change
    @@ -54,7 +54,7 @@ PartitionName=normal Nodes=localhost Default=YES MaxTime=UNLIMITED State=UP
    NodeName=localhost FOO $($SLURMD -C | grep -v UpTime | cut -f 2-32 -d ' ')
    EOF

    if [ -e /dev/nvidia* ] ; then
    if [ -e /dev/nvidia0 ] ; then
    gpucount=$(ls /dev/nvidia[0-9] | wc -l)
    if [ $gpucount -eq 1 ] ; then
    cat > $GRESCONF <<EOF
  2. @mslacken mslacken created this gist Mar 22, 2019.
    98 changes: 98 additions & 0 deletions instant-slurm.sh
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,98 @@
    #!/bin/bash
    # Copyright (C) 2019 by Christian Goll <[email protected]>
    #
    # Permission to use, copy, modify, and/or distribute this software for any
    # purpose with or without fee is hereby granted.

    # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
    # REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
    # FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
    # INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
    # LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
    # OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
    # PERFORMANCE OF THIS SOFTWARE.
    export SLURMD=/usr/sbin/slurmd
    export SLURMCONF=/etc/slurm/slurm.conf
    export GRESCONF=/etc/slurm/gres.conf
    export NOW=$(date +%Y%m%d%H%M)
    if [ -e $SLURMCONF ] ; then
    cp $SLURMCONF $SLURMCONF.$NOW
    fi
    if [ -e $GRESCONF ] ; then
    cp $GRESCONF $GRESCONF.$NOW
    fi

    cat > $SLURMCONF <<EOF
    # instant slurm file, automatically generated
    ClusterName=$(cat /etc/machine-id)
    ControlMachine=localhost
    SlurmdUser=root
    SlurmctldPort=6817
    SlurmdPort=6818
    StateSaveLocation=/var/lib/slurm
    SlurmdSpoolDir=/var/spool/slurm
    SwitchType=switch/none
    MpiDefault=none
    SlurmctldPidFile=/var/run/slurm/slurmctld.pid
    SlurmdPidFile=/var/run/slurm/slurmd.pid
    ProctrackType=proctrack/pgid
    SlurmctldTimeout=300
    SlurmdTimeout=300
    InactiveLimit=0
    MinJobAge=300
    KillWait=30
    Waittime=0
    SchedulerType=sched/builtin
    FastSchedule=1
    SlurmctldDebug=3
    SlurmctldLogFile=/var/log/slurmctld.log
    SlurmdDebug=3
    SlurmdLogFile=/var/log/slurmd.log
    JobCompType=jobcomp/none
    PropagateResourceLimitsExcept=MEMLOCK
    PartitionName=normal Nodes=localhost Default=YES MaxTime=UNLIMITED State=UP
    NodeName=localhost FOO $($SLURMD -C | grep -v UpTime | cut -f 2-32 -d ' ')
    EOF

    if [ -e /dev/nvidia* ] ; then
    gpucount=$(ls /dev/nvidia[0-9] | wc -l)
    if [ $gpucount -eq 1 ] ; then
    cat > $GRESCONF <<EOF
    Name=gpu File=/dev/nvidia0
    EOF
    cat >> $SLURMCONF <<EOF
    GresTypes=gpu
    EOF
    sed -i 's/FOO/GresType=gpu/' $SLURMCONF
    elif [ $gpucount -gt 1 ] ; then
    cat > $GRESCONF <<EOF
    Name=gpu File=/dev/nvidia[0-$(($gpucount-1))]
    EOF
    cat >> $SLURMCONF <<EOF
    GresTypes=gpu
    EOF
    sed -i 's/FOO/GresType=gpu/' $SLURMCONF
    fi
    else
    sed -i 's/FOO//' $SLURMCONF
    fi

    cat <<EOF
    # Wrote minimal instant slurm configuration to $SLURMCONF
    # Orginal $SLURMCONF can be found under $SLURMCONF.$NOW
    EOF
    if [ -e $GRESCONF ] ; then
    cat <<EOF
    # Wrote $GRESCONF
    EOF
    fi
    cat <<EOF
    # WARNING: slurmd and slurmctl will run
    # Now slurmd, slurmctld and munge can be started with:
    systemctl start munge
    systemctl start slurmctld
    systemctl start slurmd
    # You may also have to enable node with
    # scontrol update NodeName=localhost State=RESUME
    EOF