Skip to content

Instantly share code, notes, and snippets.

@rnwolf
Created November 13, 2016 21:25
Show Gist options
  • Select an option

  • Save rnwolf/87545373bf1294c14c481b9d8c72bc8c to your computer and use it in GitHub Desktop.

Select an option

Save rnwolf/87545373bf1294c14c481b9d8c72bc8c to your computer and use it in GitHub Desktop.

Revisions

  1. @mtompkins mtompkins revised this gist Nov 10, 2016. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion snapScript.sh
    Original file line number Diff line number Diff line change
    @@ -99,7 +99,7 @@ function main(){
    CONTENT_FILE=`grep -v '^$\|^\s*\#' /etc/snapraid.conf | grep snapraid.content | head -n 1 | cut -d " " -f2`
    PARITY_FILE=`grep -v '^$\|^\s*\#' /etc/snapraid.conf | grep snapraid.parity | head -n 1 | cut -d " " -f2`

    # redirect all output to screen and two files
    # redirect all output to screen and file
    > $TMP_OUTPUT
    exec 3>&1 4>&2
    # NOTE: Not preferred format but valid: exec &> >(tee -ia "${TMP_OUTPUT}" )
  2. @mtompkins mtompkins revised this gist Nov 10, 2016. 1 changed file with 80 additions and 44 deletions.
    124 changes: 80 additions & 44 deletions snapScript.sh
    Original file line number Diff line number Diff line change
    @@ -69,8 +69,6 @@ SCRUB_AGE=10
    # Set the option to log SMART info. 1 to enable, any other values to disable
    SMART_LOG=1

    # this script will log its actions to a file at this location
    LOG_FILE="/var/log/snapraid/snapScript.log"
    # location of the snapraid binary
    SNAPRAID_BIN="/usr/bin/snapraid"
    # location of the mail program binary
    @@ -85,13 +83,15 @@ function main(){
    ######################
    CHK_FAIL=0
    DO_SYNC=0
    EMAIL_SUBJECT_PREFIX="[`hostname`] SnapRAID - "
    EMAIL_SUBJECT_PREFIX="(SnapRAID on `hostname`)"
    GRACEFUL=0
    SOPHOS_RUNNING=0
    SYNC_WARN_FILE="/tmp/snapRAID.warnCount"
    SYNC_WARN_COUNT=""
    TMP_OUTPUT="/tmp/snapRAID.out"

    # Capture time
    SECONDS=0

    # Expand PATH for smartctl
    PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin

    @@ -101,24 +101,27 @@ function main(){

    # redirect all output to screen and two files
    > $TMP_OUTPUT
    exec &> >(tee -ia "${TMP_OUTPUT}" "${LOG_FILE}")
    exec 3>&1 4>&2
    # NOTE: Not preferred format but valid: exec &> >(tee -ia "${TMP_OUTPUT}" )
    exec > >(tee -ia "${TMP_OUTPUT}" ) 2>&1

    # timestamp the job
    echo "#SnapRAID Script Job started [`date`]"
    echo "SnapRAID Script Job started [`date`]"
    echo
    echo "----------------------------------------"

    # Remove any plex created anomolies
    echo "##Preprocessing"
    echo "###NFO Scrub [`date`]"
    echo "Removing any 0 byte .nfo's before snapraid exeuction."
    find /mnt/volume/media -name '*.nfo' -type f -size 0 -print0 | xargs -0 /bin/rm -f

    # Stop any services that may inhibit optimum execution
    echo "###Stop Services [`date`]"
    stop_services

    echo "###Remove Zero Byte NFOs [`date`]"
    echo "Removing any 0 byte .nfo's before SnapRAID exeuction."
    find /mnt/volume/media -name '*.nfo' -type f -size 0 -print0 | xargs -0 /bin/rm -f

    #sanity check first to make sure we can access the content and parity files
    # sanity check first to make sure we can access the content and parity files
    if [ ! -e $CONTENT_FILE ]; then
    echo "**ERROR** Content file ($CONTENT_FILE) not found!"
    exit 1;
    @@ -136,7 +139,7 @@ function main(){
    chk_zero

    # run the snapraid DIFF command
    echo "###Snapraid DIFF [`date`]"
    echo "###SnapRAID DIFF [`date`]"
    $SNAPRAID_BIN diff
    # wait for the above cmd to finish
    wait
    @@ -156,7 +159,7 @@ function main(){
    echo "**ERROR** - failed to get one or more count values. Unable to proceed."
    echo "Exiting script. [`date`]"
    if [ $EMAIL_ADDRESS ]; then
    SUBJECT="$EMAIL_SUBJECT_PREFIX WARNING - Unable to proceed with SYNC/SCRUB job(s). Check DIFF job output inside."
    SUBJECT="$EMAIL_SUBJECT_PREFIX WARNING - Unable to proceed with SYNC/SCRUB job(s). Check DIFF job output."
    send_mail
    fi
    exit 1;
    @@ -185,25 +188,24 @@ function main(){

    # Now run sync if conditions are met
    if [ $DO_SYNC -eq 1 ]; then
    echo "###Snapraid SYNC [`date`]"
    echo "###SnapRAID SYNC [`date`]"
    $SNAPRAID_BIN sync -q
    #wait for the job to finish
    wait
    echo "SYNC finished [`date`]"
    JOBS_DONE="$JOBS_DONE + SYNC"
    # insert SYNC marker to 'Everything OK' or 'Nothing to do' string to differentiate it from SCRUB job later
    exec &>/dev/tty
    sed -i 's/^Everything OK/SYNC_JOB--Everything OK/g;s/^Nothing to do/SYNC_JOB--Nothing to do/g' $TMP_OUTPUT
    exec &> >(tee -ia "${LOG_FILE}" "${TMP_OUTPUT}")
    sed_me "s/^Everything OK/SYNC_JOB--Everything OK/g;s/^Nothing to do/SYNC_JOB--Nothing to do/g" "$TMP_OUTPUT"
    # Remove any warning flags if set previously. This is done in this step to take care of scenarios when user
    # has manually synced or restored deleted files and we will have missed it in the checks above.
    if [ -e $SYNC_WARN_FILE ]; then
    rm $SYNC_WARN_FILE
    fi
    echo "###Snapraid Scrub Newly Added [`date`]"
    echo
    echo "SnapRAID SCRUB *Newly Added*"
    $SNAPRAID_BIN scrub -p new -q
    wait
    echo "SCRUB New finished [`date`]"
    echo "SCRUB *Newly Added* finished [`date`]"
    echo
    fi

    @@ -221,17 +223,15 @@ function main(){
    echo "**WARNING** - check output of SYNC job. Could not detect marker <SYNC_JOB-->. Not proceeding with SCRUB job. [`date`]"
    else
    # Everything ok - let's run the scrub job!
    echo "###Snapraid SCRUB [`date`]"
    echo "###SnapRAID SCRUB [`date`]"
    $SNAPRAID_BIN scrub -p $SCRUB_PERCENT -o $SCRUB_AGE -q
    #wait for the job to finish
    wait
    echo "SCRUB finished [`date`]"
    echo
    JOBS_DONE="$JOBS_DONE + SCRUB"
    # insert SCRUB marker to 'Everything OK' or 'Nothing to do' string to differentiate it from SYNC job above
    exec &>/dev/tty
    sed -i 's/^Everything OK/SCRUB_JOB--Everything OK/g;s/^Nothing to do/SCRUB_JOB--Nothing to do/g' $TMP_OUTPUT
    exec &> >(tee -ia "${LOG_FILE}" "${TMP_OUTPUT}")
    sed_me "s/^Everything OK/SCRUB_JOB--Everything OK/g;s/^Nothing to do/SCRUB_JOB--Nothing to do/g" "$TMP_OUTPUT"
    fi
    fi
    else
    @@ -258,11 +258,9 @@ function main(){

    echo "All jobs ended. [`date`] "

    exec &>/dev/tty

    # all jobs done, let's send output to user if configured
    if [ $EMAIL_ADDRESS ]; then
    echo "Email address is set. Sending email report to <$EMAIL_ADDRESS> [`date`]"
    echo -e "Email address is set. Sending email report to **$EMAIL_ADDRESS** [`date`]"
    # check if deleted count exceeded threshold
    if [ $CHK_FAIL -eq 1 ]; then
    if [ $DEL_COUNT -gt $DEL_THRESHOLD -a $DO_SYNC -eq 0 ]; then
    @@ -277,43 +275,61 @@ function main(){
    MSG="$MSG Changed Files ($UPDATE_COUNT) / ($UP_THRESHOLD) Violation"
    fi

    SUBJECT="$EMAIL_SUBJECT_PREFIX WARNING $SYNC_WARN_COUNT - ($MSG)"
    SUBJECT="[WARNING] $SYNC_WARN_COUNT - ($MSG) $EMAIL_SUBJECT_PREFIX"
    elif [ -z "${JOBS_DONE##*"SYNC"*}" -a -z "$(grep -w "SYNC_JOB-" $TMP_OUTPUT)" ]; then
    # Sync ran but did not complete successfully so lets warn the user
    SUBJECT="$EMAIL_SUBJECT_PREFIX WARNING - SYNC job ran but did not complete successfully"
    SUBJECT="[WARNING] SYNC job ran but did not complete successfully $EMAIL_SUBJECT_PREFIX"
    elif [ -z "${JOBS_DONE##*"SCRUB"*}" -a -z "$(grep -w "SCRUB_JOB-" $TMP_OUTPUT)" ]; then
    # Scrub ran but did not complete successfully so lets warn the user
    SUBJECT="$EMAIL_SUBJECT_PREFIX WARNING - SCRUB job ran but did not complete successfully"
    SUBJECT="[WARNING] SCRUB job ran but did not complete successfully $EMAIL_SUBJECT_PREFIX"
    else
    SUBJECT="$EMAIL_SUBJECT_PREFIX INFO - $JOBS_DONE Jobs COMPLETED"
    SUBJECT="[COMPLETED] $JOBS_DONE Jobs $EMAIL_SUBJECT_PREFIX"
    fi
    ELAPSED="$(($SECONDS / 3600))hrs $((($SECONDS / 60) % 60))min $(($SECONDS % 60))sec"
    echo
    echo "----------------------------------------"
    echo "##Total time elapsed for SnapRAID: $ELAPSED"

    # Add a topline to email body
    sed_me "1s/^/##$SUBJECT \n/" "${TMP_OUTPUT}"
    send_mail
    fi

    clean_desc

    exit 0;
    }

    #######################
    # FUNCTIONS & METHODS #
    #######################

    function sed_me(){
    # The various redirects used force us to be a bit hands-on with sed
    exec 1>&3 2>&4 3>&- 4>&-
    $(sed -i "$1" "$2")
    exec 3>&1 4>&2
    exec > >(tee -ia "${TMP_OUTPUT}" ) 2>&1
    wait
    }

    function chk_del(){
    if [ $DEL_COUNT -lt $DEL_THRESHOLD ]; then
    # NO, delete threshold not reached, lets run the sync job
    echo "Changes detected [A-$ADD_COUNT,D-$DEL_COUNT,M-$MOVE_COUNT,C-$COPY_COUNT,U-$UPDATE_COUNT] and deleted files ($DEL_COUNT) is below threshold ($DEL_THRESHOLD). SYNC Authorized. [`date`]"
    echo "Changes detected [A-$ADD_COUNT,D-$DEL_COUNT,M-$MOVE_COUNT,C-$COPY_COUNT,U-$UPDATE_COUNT] and deleted files ($DEL_COUNT) is below threshold ($DEL_THRESHOLD). SYNC Authorized."
    DO_SYNC=1
    else
    echo "**WARNING** - Deleted files ($DEL_COUNT) exceeded threshold ($DEL_THRESHOLD). Check $LOG_FILE for details. [`date`]"
    echo "**WARNING** Deleted files ($DEL_COUNT) exceeded threshold ($DEL_THRESHOLD)."
    CHK_FAIL=1
    fi
    }

    function chk_updated(){
    if [ $UPDATE_COUNT -lt $UP_THRESHOLD ]; then
    echo "Changes detected [A-$ADD_COUNT,D-$DEL_COUNT,M-$MOVE_COUNT,C-$COPY_COUNT,U-$UPDATE_COUNT] and updated files ($UPDATE_COUNT) is below threshold ($UP_THRESHOLD). SYNC Authorized. [`date`]"
    echo "Changes detected [A-$ADD_COUNT,D-$DEL_COUNT,M-$MOVE_COUNT,C-$COPY_COUNT,U-$UPDATE_COUNT] and updated files ($UPDATE_COUNT) is below threshold ($UP_THRESHOLD). SYNC Authorized."
    DO_SYNC=1
    else
    echo "**WARNING** - Updated files ($UPDATE_COUNT) exceeded threshold ($UP_THRESHOLD). Check $LOG_FILE for details."
    echo "**WARNING** Updated files ($UPDATE_COUNT) exceeded threshold ($UP_THRESHOLD)."
    CHK_FAIL=1
    fi
    }
    @@ -327,38 +343,43 @@ function chk_sync_warn(){

    if [ $SYNC_WARN_COUNT -ge $SYNC_WARN_THRESHOLD ]; then
    # YES, lets force a sync job. Do not need to remove warning marker here as it is automatically removed when the sync job is run by this script
    echo "Number of warning(s) ($SYNC_WARN_COUNT) has reached/exceeded threshold ($SYNC_WARN_THRESHOLD). Forcing a sync job to run. [`date`]"
    echo "Number of warning(s) ($SYNC_WARN_COUNT) has reached/exceeded threshold ($SYNC_WARN_THRESHOLD). Forcing a SYNC job to run. [`date`]"
    DO_SYNC=1
    else
    # NO, so let's increment the warning count and skip the sync job
    ((SYNC_WARN_COUNT += 1))
    echo $SYNC_WARN_COUNT > $SYNC_WARN_FILE
    echo "$((SYNC_WARN_THRESHOLD - SYNC_WARN_COUNT)) warning(s) till forced sync. NOT proceeding with sync job. [`date`]"
    echo "$((SYNC_WARN_THRESHOLD - SYNC_WARN_COUNT)) warning(s) till forced sync. NOT proceeding with SYNC job. [`date`]"
    DO_SYNC=0
    fi
    else
    # NO, so let's skip SYNC
    echo "Forced sync is not enabled. Check $TMP_OUTPUT for details. NOT proceeding with sync job. [`date`]"
    echo "Forced sync is not enabled. Check $TMP_OUTPUT for details. NOT proceeding with SYNC job. [`date`]"
    DO_SYNC=0
    fi
    }

    function chk_zero(){
    echo "###Timestamps [`date`]"
    if $SNAPRAID_BIN status | grep 'You have [1-9][0-9]* files with zero sub-second timestamp\.'; then
    echo "Found zero sub-second files. Running touch to timestamp. [`date`]"
    echo "Timestamping"
    echo "###SnapRAID TOUCH [`date`]"
    echo "Checking for zero sub-second files."
    TIMESTATUS=$($SNAPRAID_BIN status | grep 'You have [1-9][0-9]* files with zero sub-second timestamp\.' | sed 's/^You have/Found/g')
    if [ -n "$TIMESTATUS" ]; then
    echo "$TIMESTATUS"
    echo "Running TOUCH job to timestamp. [`date`]"
    $SNAPRAID_BIN touch
    wait
    echo "TOUCH finished [`date`]"
    else
    echo "No zero sub-second timestamp files found."
    fi
    echo "Finished"
    }

    function stop_services(){
    # Disable Sophos on-access
    if /opt/sophos-av/bin/savdstatus | grep -v "not running" > /dev/null; then
    echo "Sophos on-access detected, shutting it down..."
    /opt/sophos-av/bin/savdctl disable
    wait
    SOPHOS_RUNNING=1
    fi

    @@ -373,10 +394,11 @@ function stop_services(){

    function restore_services(){
    echo
    # Restart Sophos on-access scanning
    # Enable Sophos on-access scanning
    if [ $SOPHOS_RUNNING -eq 1 ]; then
    /opt/sophos-av/bin/savdctl enable
    echo "Restoring Sohpos on-access to active state..."
    /opt/sophos-av/bin/savdctl enable
    wait
    SOPHOS_RUNNING=0
    fi

    @@ -387,12 +409,26 @@ function restore_services(){
    # SERVICE_A_RUNNING=0
    #fi

    if [ $GRACEFUL -eq 1 ]; then
    return
    fi

    clean_desc

    exit
    }

    function clean_desc(){
    # Cleanup file descriptors
    exec 1>&3 2>&4

    # If interactive shell restore output
    [[ $- == *i* ]] && exec &>/dev/tty
    }

    function send_mail(){
    # Format for markdown
    sed -i 's/$/ /' $TMP_OUTPUT
    sed_me "s/$/ /" "$TMP_OUTPUT"
    $MAIL_BIN -a 'Content-Type: text/html' -s "$SUBJECT" "$EMAIL_ADDRESS" < <(python -m markdown $TMP_OUTPUT)
    }

  3. @mtompkins mtompkins revised this gist Nov 7, 2016. 1 changed file with 289 additions and 267 deletions.
    556 changes: 289 additions & 267 deletions snapScript.sh
    Original file line number Diff line number Diff line change
    @@ -3,7 +3,7 @@
    #######################################################################
    # This is a helper script that keeps snapraid parity info in sync with
    # your data and optionally verifies the parity info. Here's how it works:
    # 1) Checks for size 0 .nfo files (plex)
    # 1) Checks for size 0 .nzb files (plex)
    # 2) Calls diff to figure out if the parity info is out of sync.
    # 3) If parity info is out of sync, AND the number of deleted or changed files exceed
    # X (each configurable), it triggers an alert email and stops. (In case of
    @@ -35,51 +35,310 @@
    # - mailx (simplify sending HTML emails)
    # - python markdown (render Markdown to HTML)

    #############
    # FUNCTIONS #
    #############
    ######################
    # USER VARIABLES #
    ######################

    ####################### USER CONFIGURATION START #######################

    # address where the output of the jobs will be emailed to.
    # comment it out to disable email output
    EMAIL_ADDRESS="root"

    # Set the threshold of deleted files to stop the sync job from running.
    # NOTE that depending on how active your filesystem is being used, a low
    # number here may result in your parity info being out of sync often and/or
    # you having to do lots of manual sync.
    DEL_THRESHOLD=100
    UP_THRESHOLD=500

    # Set number of warnings before we force a sync job.
    # This option comes in handy when you cannot be bothered to manually
    # start a sync job when DEL_THRESHOLD is breached due to false alarm.
    # Set to 0 to ALWAYS force a sync (i.e. ignore the delete threshold above)
    # Set to -1 to NEVER force a sync (i.e. need to manual sync if delete threshold is breached)
    #SYNC_WARN_THRESHOLD=3
    SYNC_WARN_THRESHOLD=-1

    # Set percentage of array to scrub if it is in sync.
    # i.e. 0 to disable and 100 to scrub the full array in one go
    # WARNING - depending on size of your array, setting to 100 will take a very long time!
    SCRUB_PERCENT=10
    SCRUB_AGE=10

    # Set the option to log SMART info. 1 to enable, any other values to disable
    SMART_LOG=1

    # this script will log its actions to a file at this location
    LOG_FILE="/var/log/snapraid/snapScript.log"
    # location of the snapraid binary
    SNAPRAID_BIN="/usr/bin/snapraid"
    # location of the mail program binary
    MAIL_BIN="/usr/bin/mailx"

    ##### USER CONFIGURATION STOP ##### MAKE NO CHANGES BELOW THIS LINE ####

    function main(){

    ######################
    # INIT VARIABLES #
    ######################
    CHK_FAIL=0
    DO_SYNC=0
    EMAIL_SUBJECT_PREFIX="[`hostname`] SnapRAID - "
    GRACEFUL=0
    SOPHOS_RUNNING=0
    SYNC_WARN_FILE="/tmp/snapRAID.warnCount"
    SYNC_WARN_COUNT=""
    TMP_OUTPUT="/tmp/snapRAID.out"

    # Expand PATH for smartctl
    PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin

    # auto determine names of content and parity files
    CONTENT_FILE=`grep -v '^$\|^\s*\#' /etc/snapraid.conf | grep snapraid.content | head -n 1 | cut -d " " -f2`
    PARITY_FILE=`grep -v '^$\|^\s*\#' /etc/snapraid.conf | grep snapraid.parity | head -n 1 | cut -d " " -f2`

    # redirect all output to screen and two files
    > $TMP_OUTPUT
    exec &> >(tee -ia "${TMP_OUTPUT}" "${LOG_FILE}")

    # timestamp the job
    echo "#SnapRAID Script Job started [`date`]"
    echo
    echo "----------------------------------------"

    # Remove any plex created anomolies
    echo "##Preprocessing"
    echo "###NFO Scrub [`date`]"
    echo "Removing any 0 byte .nfo's before snapraid exeuction."
    find /mnt/volume/media -name '*.nfo' -type f -size 0 -print0 | xargs -0 /bin/rm -f

    # Stop any services that may inhibit optimum execution
    echo "###Stop Services [`date`]"
    stop_services

    #sanity check first to make sure we can access the content and parity files
    if [ ! -e $CONTENT_FILE ]; then
    echo "**ERROR** Content file ($CONTENT_FILE) not found!"
    exit 1;
    fi

    if [ ! -e $PARITY_FILE ]; then
    echo "**ERROR** Parity file ($PARITY_FILE) not found!"
    exit 1;
    fi
    echo
    echo "----------------------------------------"
    echo "##Processing"

    # Fix timestamps
    chk_zero

    # run the snapraid DIFF command
    echo "###Snapraid DIFF [`date`]"
    $SNAPRAID_BIN diff
    # wait for the above cmd to finish
    wait
    echo
    echo "DIFF finished [`date`]"
    JOBS_DONE="DIFF"

    DEL_COUNT=$(grep -w '^ \{1,\}[0-9]* removed$' $TMP_OUTPUT | sed 's/^ *//g' | cut -d ' ' -f1)
    ADD_COUNT=$(grep -w '^ \{1,\}[0-9]* added$' $TMP_OUTPUT | sed 's/^ *//g' | cut -d ' ' -f1)
    MOVE_COUNT=$(grep -w '^ \{1,\}[0-9]* moved$' $TMP_OUTPUT | sed 's/^ *//g' | cut -d ' ' -f1)
    COPY_COUNT=$(grep -w '^ \{1,\}[0-9]* copied$' $TMP_OUTPUT | sed 's/^ *//g' | cut -d ' ' -f1)
    UPDATE_COUNT=$(grep -w '^ \{1,\}[0-9]* updated$' $TMP_OUTPUT | sed 's/^ *//g' | cut -d ' ' -f1)

    # sanity check to make sure that we were able to get our counts from the output of the DIFF job
    if [ -z "$DEL_COUNT" -o -z "$ADD_COUNT" -o -z "$MOVE_COUNT" -o -z "$COPY_COUNT" -o -z "$UPDATE_COUNT" ]; then
    # failed to get one or more of the count values, lets report to user and exit with error code
    echo "**ERROR** - failed to get one or more count values. Unable to proceed."
    echo "Exiting script. [`date`]"
    if [ $EMAIL_ADDRESS ]; then
    SUBJECT="$EMAIL_SUBJECT_PREFIX WARNING - Unable to proceed with SYNC/SCRUB job(s). Check DIFF job output inside."
    send_mail
    fi
    exit 1;
    fi
    echo
    echo "**SUMMARY of changes - Added [$ADD_COUNT] - Deleted [$DEL_COUNT] - Moved [$MOVE_COUNT] - Copied [$COPY_COUNT] - Updated [$UPDATE_COUNT]**"
    echo

    # check if the conditions to run SYNC are met
    # CHK 1 - if files have changed
    if [ $DEL_COUNT -gt 0 -o $ADD_COUNT -gt 0 -o $MOVE_COUNT -gt 0 -o $COPY_COUNT -gt 0 -o $UPDATE_COUNT -gt 0 ]; then
    chk_del

    if [ $CHK_FAIL -eq 0 ]; then
    chk_updated
    fi

    if [ $CHK_FAIL -eq 1 ]; then
    chk_sync_warn
    fi
    else
    # NO, so let's skip SYNC
    echo "No change detected. Not running SYNC job. [`date`] "
    DO_SYNC=0
    fi

    # Now run sync if conditions are met
    if [ $DO_SYNC -eq 1 ]; then
    echo "###Snapraid SYNC [`date`]"
    $SNAPRAID_BIN sync -q
    #wait for the job to finish
    wait
    echo "SYNC finished [`date`]"
    JOBS_DONE="$JOBS_DONE + SYNC"
    # insert SYNC marker to 'Everything OK' or 'Nothing to do' string to differentiate it from SCRUB job later
    exec &>/dev/tty
    sed -i 's/^Everything OK/SYNC_JOB--Everything OK/g;s/^Nothing to do/SYNC_JOB--Nothing to do/g' $TMP_OUTPUT
    exec &> >(tee -ia "${LOG_FILE}" "${TMP_OUTPUT}")
    # Remove any warning flags if set previously. This is done in this step to take care of scenarios when user
    # has manually synced or restored deleted files and we will have missed it in the checks above.
    if [ -e $SYNC_WARN_FILE ]; then
    rm $SYNC_WARN_FILE
    fi
    echo "###Snapraid Scrub Newly Added [`date`]"
    $SNAPRAID_BIN scrub -p new -q
    wait
    echo "SCRUB New finished [`date`]"
    echo
    fi

    # Moving onto scrub now. Check if user has enabled scrub
    if [ $SCRUB_PERCENT -gt 0 ]; then
    # YES, first let's check if delete threshold has been breached and we have not forced a sync.
    if [ $CHK_FAIL -eq 1 -a $DO_SYNC -eq 0 ]; then
    # YES, parity is out of sync so let's not run scrub job
    echo "Scrub job cancelled as parity info is out of sync (deleted or changed files threshold has been breached). [`date`]"
    else
    # NO, delete threshold has not been breached OR we forced a sync, but we have one last test -
    # let's make sure if sync ran, it completed successfully (by checking for our marker text "SYNC_JOB--" in the output).
    if [ $DO_SYNC -eq 1 -a -z "$(grep -w "SYNC_JOB-" $TMP_OUTPUT)" ]; then
    # Sync ran but did not complete successfully so lets not run scrub to be safe
    echo "**WARNING** - check output of SYNC job. Could not detect marker <SYNC_JOB-->. Not proceeding with SCRUB job. [`date`]"
    else
    # Everything ok - let's run the scrub job!
    echo "###Snapraid SCRUB [`date`]"
    $SNAPRAID_BIN scrub -p $SCRUB_PERCENT -o $SCRUB_AGE -q
    #wait for the job to finish
    wait
    echo "SCRUB finished [`date`]"
    echo
    JOBS_DONE="$JOBS_DONE + SCRUB"
    # insert SCRUB marker to 'Everything OK' or 'Nothing to do' string to differentiate it from SYNC job above
    exec &>/dev/tty
    sed -i 's/^Everything OK/SCRUB_JOB--Everything OK/g;s/^Nothing to do/SCRUB_JOB--Nothing to do/g' $TMP_OUTPUT
    exec &> >(tee -ia "${LOG_FILE}" "${TMP_OUTPUT}")
    fi
    fi
    else
    echo "Scrub job is not enabled. Not running SCRUB job. [`date`] "
    fi

    echo
    echo "----------------------------------------"
    echo "##Postprocessing"

    # Moving onto logging SMART info if enabled
    if [ $SMART_LOG -eq 1 ]; then
    echo
    $SNAPRAID_BIN smart
    wait
    fi

    echo "Spinning down disks..."
    $SNAPRAID_BIN down

    # Graceful restore of services outside of trap - for messaging
    GRACEFUL=1
    restore_services

    echo "All jobs ended. [`date`] "

    exec &>/dev/tty

    # all jobs done, let's send output to user if configured
    if [ $EMAIL_ADDRESS ]; then
    echo "Email address is set. Sending email report to <$EMAIL_ADDRESS> [`date`]"
    # check if deleted count exceeded threshold
    if [ $CHK_FAIL -eq 1 ]; then
    if [ $DEL_COUNT -gt $DEL_THRESHOLD -a $DO_SYNC -eq 0 ]; then
    MSG="Deleted Files ($DEL_COUNT) / ($DEL_THRESHOLD) Violation"
    fi

    if [ $DEL_COUNT -gt $DEL_THRESHOLD -a $UPDATE_COUNT -gt $UP_THRESHOLD -a $DO_SYNC -eq 0 ]; then
    MSG="$MSG & "
    fi

    if [ $UPDATE_COUNT -gt $UP_THRESHOLD -a $DO_SYNC -eq 0 ]; then
    MSG="$MSG Changed Files ($UPDATE_COUNT) / ($UP_THRESHOLD) Violation"
    fi

    SUBJECT="$EMAIL_SUBJECT_PREFIX WARNING $SYNC_WARN_COUNT - ($MSG)"
    elif [ -z "${JOBS_DONE##*"SYNC"*}" -a -z "$(grep -w "SYNC_JOB-" $TMP_OUTPUT)" ]; then
    # Sync ran but did not complete successfully so lets warn the user
    SUBJECT="$EMAIL_SUBJECT_PREFIX WARNING - SYNC job ran but did not complete successfully"
    elif [ -z "${JOBS_DONE##*"SCRUB"*}" -a -z "$(grep -w "SCRUB_JOB-" $TMP_OUTPUT)" ]; then
    # Scrub ran but did not complete successfully so lets warn the user
    SUBJECT="$EMAIL_SUBJECT_PREFIX WARNING - SCRUB job ran but did not complete successfully"
    else
    SUBJECT="$EMAIL_SUBJECT_PREFIX INFO - $JOBS_DONE Jobs COMPLETED"
    fi
    send_mail
    fi

    exit 0;
    }

    #######################
    # FUNCTIONS & METHODS #
    #######################

    function chk_del(){
    if [ $DEL_COUNT -lt $DEL_THRESHOLD ]; then
    # NO, delete threshold not reached, lets run the sync job
    echo "[`date`] Changes detected [A-$ADD_COUNT,D-$DEL_COUNT,M-$MOVE_COUNT,C-$COPY_COUNT,U-$UPDATE_COUNT] and deleted files ($DEL_COUNT) is below threshold ($DEL_THRESHOLD). SYNC Authorized."
    echo "Changes detected [A-$ADD_COUNT,D-$DEL_COUNT,M-$MOVE_COUNT,C-$COPY_COUNT,U-$UPDATE_COUNT] and deleted files ($DEL_COUNT) is below threshold ($DEL_THRESHOLD). SYNC Authorized. [`date`]"
    DO_SYNC=1
    else
    echo "[`date`] WARNING - Deleted files ($DEL_COUNT) exceeded threshold ($DEL_THRESHOLD). Check $LOG_FILE for details."
    echo "**WARNING** - Deleted files ($DEL_COUNT) exceeded threshold ($DEL_THRESHOLD). Check $LOG_FILE for details. [`date`]"
    CHK_FAIL=1
    fi
    fi
    }

    function chk_updated(){
    if [ $UPDATE_COUNT -lt $UP_THRESHOLD ]; then
    echo "[`date`] Changes detected [A-$ADD_COUNT,D-$DEL_COUNT,M-$MOVE_COUNT,C-$COPY_COUNT,U-$UPDATE_COUNT] and updated files ($UPDATE_COUNT) is below threshold ($UP_THRESHOLD). SYNC Authorized."
    echo "Changes detected [A-$ADD_COUNT,D-$DEL_COUNT,M-$MOVE_COUNT,C-$COPY_COUNT,U-$UPDATE_COUNT] and updated files ($UPDATE_COUNT) is below threshold ($UP_THRESHOLD). SYNC Authorized. [`date`]"
    DO_SYNC=1
    else
    echo "[`date`] WARNING - Updated files ($UPDATE_COUNT) exceeded threshold ($UP_THRESHOLD). Check $LOG_FILE for details."
    echo "**WARNING** - Updated files ($UPDATE_COUNT) exceeded threshold ($UP_THRESHOLD). Check $LOG_FILE for details."
    CHK_FAIL=1
    fi
    fi
    }

    function chk_sync_warn(){
    if [ $SYNC_WARN_THRESHOLD -gt -1 ]; then
    echo "[`date`] Forced sync is enabled."
    echo "Forced sync is enabled. [`date`]"

    SYNC_WARN_COUNT=$(sed 'q;/^[0-9][0-9]*$/!d' $SYNC_WARN_FILE 2>/dev/null)
    SYNC_WARN_COUNT=${SYNC_WARN_COUNT:-0} #value is zero if file does not exist or does not contain what we are expecting

    if [ $SYNC_WARN_COUNT -ge $SYNC_WARN_THRESHOLD ]; then
    # YES, lets force a sync job. Do not need to remove warning marker here as it is automatically removed when the sync job is run by this script
    echo "[`date`] Number of warning(s) ($SYNC_WARN_COUNT) has reached/exceeded threshold ($SYNC_WARN_THRESHOLD). Forcing a sync job to run."
    echo "Number of warning(s) ($SYNC_WARN_COUNT) has reached/exceeded threshold ($SYNC_WARN_THRESHOLD). Forcing a sync job to run. [`date`]"
    DO_SYNC=1
    else
    # NO, so let's increment the warning count and skip the sync job
    ((SYNC_WARN_COUNT += 1))
    echo $SYNC_WARN_COUNT > $SYNC_WARN_FILE
    echo "[`date`] $((SYNC_WARN_THRESHOLD - SYNC_WARN_COUNT)) warning(s) till forced sync. NOT proceeding with sync job."
    echo "$((SYNC_WARN_THRESHOLD - SYNC_WARN_COUNT)) warning(s) till forced sync. NOT proceeding with sync job. [`date`]"
    DO_SYNC=0
    fi
    else
    # NO, so let's skip SYNC
    echo "[`date`] Forced sync is not enabled. Check $TMP_OUTPUT for details. NOT proceeding with sync job."
    echo "Forced sync is not enabled. Check $TMP_OUTPUT for details. NOT proceeding with sync job. [`date`]"
    DO_SYNC=0
    fi
    }
    @@ -102,6 +361,14 @@ function stop_services(){
    /opt/sophos-av/bin/savdctl disable
    SOPHOS_RUNNING=1
    fi

    # Systemctl example
    # Be sure to add an associated SERVICE_A_RUNNING=0 INIT VARIABLES above
    #if [ `systemctl is-active service-A.service` == "active" ]; then
    # echo "SERVICE_A detected, shutting it down..."
    # systemctl stop service-A.service
    # SERVICE_A_RUNNING=1
    #fi
    }

    function restore_services(){
    @@ -113,9 +380,12 @@ function restore_services(){
    SOPHOS_RUNNING=0
    fi

    if [ $GRACEFUL -eq 1 ]; then
    return
    fi
    # Systemctl example
    #if [ $SERVICE_A_RUNNING -eq 1 ]; then
    # systemctl start service-A.service
    # echo "Restoring SERVICE_A to active state..."
    # SERVICE_A_RUNNING=0
    #fi

    exit
    }
    @@ -126,255 +396,7 @@ function send_mail(){
    $MAIL_BIN -a 'Content-Type: text/html' -s "$SUBJECT" "$EMAIL_ADDRESS" < <(python -m markdown $TMP_OUTPUT)
    }

    #####################
    # INIT VARIABLES
    #####################
    SOPHOS_RUNNING=0
    GRACEFUL=0

    # Set TRAP
    trap restore_services INT EXIT

    DO_SYNC=0
    CHK_FAIL=0

    # Expand PATH for smartctl
    PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin

    ## USER DEFINED SETTINGS ##
    # address where the output of the jobs will be emailed to.
    # comment it out to disable email output
    EMAIL_ADDRESS="root"

    # Set the threshold of deleted files to stop the sync job from running.
    # NOTE that depending on how active your filesystem is being used, a low
    # number here may result in your parity info being out of sync often and/or
    # you having to do lots of manual sync.
    DEL_THRESHOLD=100
    UP_THRESHOLD=500

    # Set number of warnings before we force a sync job.
    # This option comes in handy when you cannot be bothered to manually
    # start a sync job when DEL_THRESHOLD is breached due to false alarm.
    # Set to 0 to ALWAYS force a sync (i.e. ignore the delete threshold above)
    # Set to -1 to NEVER force a sync (i.e. need to manual sync if delete threshold is breached)
    #SYNC_WARN_THRESHOLD=3
    SYNC_WARN_THRESHOLD=-1

    # Set percentage of array to scrub if it is in sync.
    # i.e. 0 to disable and 100 to scrub the full array in one go
    # WARNING - depending on size of your array, setting to 100 will take a very long time!
    SCRUB_PERCENT=10
    SCRUB_AGE=10

    # Set the option to log SMART info. 1 to enable, any other values to disable
    SMART_LOG=1

    # this script will log its actions to a file at this location
    LOG_FILE="/var/log/snapraid/snapScript.log"
    # location of the snapraid binary
    SNAPRAID_BIN="/usr/bin/snapraid"
    # location of the mail program binary
    MAIL_BIN="/usr/bin/mailx"

    ## INTERNAL TEMP VARS ##
    EMAIL_SUBJECT_PREFIX="[`hostname`] SnapRAID - "
    TMP_OUTPUT="/tmp/snapRAID.out"
    SYNC_WARN_FILE="/tmp/snapRAID.warnCount"
    SYNC_WARN_COUNT=""

    # auto determine names of content and parity files
    CONTENT_FILE=`grep -v '^$\|^\s*\#' /etc/snapraid.conf | grep snapraid.content | head -n 1 | cut -d " " -f2`
    PARITY_FILE=`grep -v '^$\|^\s*\#' /etc/snapraid.conf | grep snapraid.parity | head -n 1 | cut -d " " -f2`

    # redirect all output to screen and two files
    > $TMP_OUTPUT
    exec &> >(tee -ia "${TMP_OUTPUT}" "${LOG_FILE}")

    # timestamp the job
    echo "#SnapRAID Script Job started [`date`]"
    echo
    echo "----------------------------------------"

    # Remove any plex created anomolies
    echo "##Preprocessing"
    echo "###NFO Scrub [`date`]"
    echo "Removing any 0 byte .nfo's before snapraid exeuction."
    find /mnt/volume/media -name '*.nfo' -type f -size 0 -print0 | xargs -0 /bin/rm -f

    # Stop any services that may inhibit optimum execution
    echo "###Stop Services [`date`]"
    stop_services

    #sanity check first to make sure we can access the content and parity files
    if [ ! -e $CONTENT_FILE ]; then
    echo "**ERROR** Content file ($CONTENT_FILE) not found!"
    exit 1;
    fi

    if [ ! -e $PARITY_FILE ]; then
    echo "**ERROR** Parity file ($PARITY_FILE) not found!"
    exit 1;
    fi
    echo
    echo "----------------------------------------"
    echo "##Processing"

    # Fix timestamps
    chk_zero

    # run the snapraid DIFF command
    echo "###Snapraid DIFF [`date`]"
    $SNAPRAID_BIN diff
    # wait for the above cmd to finish
    wait
    echo
    echo "DIFF finished [`date`]"
    JOBS_DONE="DIFF"

    DEL_COUNT=$(grep -w '^ \{1,\}[0-9]* removed$' $TMP_OUTPUT | sed 's/^ *//g' | cut -d ' ' -f1)
    ADD_COUNT=$(grep -w '^ \{1,\}[0-9]* added$' $TMP_OUTPUT | sed 's/^ *//g' | cut -d ' ' -f1)
    MOVE_COUNT=$(grep -w '^ \{1,\}[0-9]* moved$' $TMP_OUTPUT | sed 's/^ *//g' | cut -d ' ' -f1)
    COPY_COUNT=$(grep -w '^ \{1,\}[0-9]* copied$' $TMP_OUTPUT | sed 's/^ *//g' | cut -d ' ' -f1)
    UPDATE_COUNT=$(grep -w '^ \{1,\}[0-9]* updated$' $TMP_OUTPUT | sed 's/^ *//g' | cut -d ' ' -f1)

    # sanity check to make sure that we were able to get our counts from the output of the DIFF job
    if [ -z "$DEL_COUNT" -o -z "$ADD_COUNT" -o -z "$MOVE_COUNT" -o -z "$COPY_COUNT" -o -z "$UPDATE_COUNT" ]; then
    # failed to get one or more of the count values, lets report to user and exit with error code
    echo "**ERROR** - failed to get one or more count values. Unable to proceed."
    echo "Exiting script. [`date`]"
    if [ $EMAIL_ADDRESS ]; then
    SUBJECT="$EMAIL_SUBJECT_PREFIX WARNING - Unable to proceed with SYNC/SCRUB job(s). Check DIFF job output inside."
    send_mail
    fi
    exit 1;
    fi
    echo
    echo "**SUMMARY of changes - Added [$ADD_COUNT] - Deleted [$DEL_COUNT] - Moved [$MOVE_COUNT] - Copied [$COPY_COUNT] - Updated [$UPDATE_COUNT]**"
    echo

    # check if the conditions to run SYNC are met
    # CHK 1 - if files have changed
    if [ $DEL_COUNT -gt 0 -o $ADD_COUNT -gt 0 -o $MOVE_COUNT -gt 0 -o $COPY_COUNT -gt 0 -o $UPDATE_COUNT -gt 0 ]; then
    chk_del

    if [ $CHK_FAIL -eq 0 ]; then
    chk_updated
    fi

    if [ $CHK_FAIL -eq 1 ]; then
    chk_sync_warn
    fi
    else
    # NO, so let's skip SYNC
    echo "No change detected. Not running SYNC job. [`date`] "
    DO_SYNC=0
    fi

    # Now run sync if conditions are met
    if [ $DO_SYNC -eq 1 ]; then
    echo "###Snapraid SYNC [`date`]"
    $SNAPRAID_BIN sync -q
    #wait for the job to finish
    wait
    echo "SYNC finished [`date`]"
    JOBS_DONE="$JOBS_DONE + SYNC"
    # insert SYNC marker to 'Everything OK' or 'Nothing to do' string to differentiate it from SCRUB job later
    exec &>/dev/tty
    sed -i 's/^Everything OK/SYNC_JOB--Everything OK/g;s/^Nothing to do/SYNC_JOB--Nothing to do/g' $TMP_OUTPUT
    exec &> >(tee -ia "${LOG_FILE}" "${TMP_OUTPUT}")
    # Remove any warning flags if set previously. This is done in this step to take care of scenarios when user
    # has manually synced or restored deleted files and we will have missed it in the checks above.
    if [ -e $SYNC_WARN_FILE ]; then
    rm $SYNC_WARN_FILE
    fi
    echo "###Snapraid Scrub Newly Added [`date`]"
    $SNAPRAID_BIN scrub -p new -q
    wait
    echo "SCRUB New finished [`date`]"
    echo
    fi

    # Moving onto scrub now. Check if user has enabled scrub
    if [ $SCRUB_PERCENT -gt 0 ]; then
    # YES, first let's check if delete threshold has been breached and we have not forced a sync.
    if [ $CHK_FAIL -eq 1 -a $DO_SYNC -eq 0 ]; then
    # YES, parity is out of sync so let's not run scrub job
    echo "Scrub job cancelled as parity info is out of sync (deleted or changed files threshold has been breached). [`date`]"
    else
    # NO, delete threshold has not been breached OR we forced a sync, but we have one last test -
    # let's make sure if sync ran, it completed successfully (by checking for our marker text "SYNC_JOB--" in the output).
    if [ $DO_SYNC -eq 1 -a -z "$(grep -w "SYNC_JOB-" $TMP_OUTPUT)" ]; then
    # Sync ran but did not complete successfully so lets not run scrub to be safe
    echo "**WARNING** - check output of SYNC job. Could not detect marker <SYNC_JOB-->. Not proceeding with SCRUB job. [`date`]"
    else
    # Everything ok - let's run the scrub job!
    echo "###Snapraid SCRUB [`date`]"
    $SNAPRAID_BIN scrub -p $SCRUB_PERCENT -o $SCRUB_AGE -q
    #wait for the job to finish
    wait
    echo "SCRUB finished [`date`]"
    echo
    JOBS_DONE="$JOBS_DONE + SCRUB"
    # insert SCRUB marker to 'Everything OK' or 'Nothing to do' string to differentiate it from SYNC job above
    exec &>/dev/tty
    sed -i 's/^Everything OK/SCRUB_JOB--Everything OK/g;s/^Nothing to do/SCRUB_JOB--Nothing to do/g' $TMP_OUTPUT
    exec &> >(tee -ia "${LOG_FILE}" "${TMP_OUTPUT}")
    fi
    fi
    else
    echo "Scrub job is not enabled. Not running SCRUB job. [`date`] "
    fi

    echo
    echo "----------------------------------------"
    echo "##Postprocessing"

    # Moving onto logging SMART info if enabled
    if [ $SMART_LOG -eq 1 ]; then
    echo
    $SNAPRAID_BIN smart
    wait
    fi

    echo "Spinning down disks..."
    $SNAPRAID_BIN down

    # Graceful restore of services outside of trap - for messaging
    GRACEFUL=1
    restore_services

    echo "All jobs ended. [`date`] "

    exec &>/dev/tty

    # all jobs done, let's send output to user if configured
    if [ $EMAIL_ADDRESS ]; then
    echo "Email address is set. Sending email report to <$EMAIL_ADDRESS> [`date`]"
    # check if deleted count exceeded threshold
    if [ $CHK_FAIL -eq 1 ]; then
    if [ $DEL_COUNT -gt $DEL_THRESHOLD -a $DO_SYNC -eq 0 ]; then
    MSG="Deleted Files ($DEL_COUNT) / ($DEL_THRESHOLD) Violation"
    fi

    if [ $DEL_COUNT -gt $DEL_THRESHOLD -a $UPDATE_COUNT -gt $UP_THRESHOLD -a $DO_SYNC -eq 0 ]; then
    MSG="$MSG & "
    fi

    if [ $UPDATE_COUNT -gt $UP_THRESHOLD -a $DO_SYNC -eq 0 ]; then
    MSG="$MSG Changed Files ($UPDATE_COUNT) / ($UP_THRESHOLD) Violation"
    fi

    SUBJECT="$EMAIL_SUBJECT_PREFIX WARNING $SYNC_WARN_COUNT - ($MSG)"
    elif [ -z "${JOBS_DONE##*"SYNC"*}" -a -z "$(grep -w "SYNC_JOB-" $TMP_OUTPUT)" ]; then
    # Sync ran but did not complete successfully so lets warn the user
    SUBJECT="$EMAIL_SUBJECT_PREFIX WARNING - SYNC job ran but did not complete successfully"
    elif [ -z "${JOBS_DONE##*"SCRUB"*}" -a -z "$(grep -w "SCRUB_JOB-" $TMP_OUTPUT)" ]; then
    # Scrub ran but did not complete successfully so lets warn the user
    SUBJECT="$EMAIL_SUBJECT_PREFIX WARNING - SCRUB job ran but did not complete successfully"
    else
    SUBJECT="$EMAIL_SUBJECT_PREFIX INFO - $JOBS_DONE Jobs COMPLETED"
    fi
    send_mail
    fi

    exit 0;
    main "$@"
  4. @mtompkins mtompkins revised this gist Nov 7, 2016. 1 changed file with 203 additions and 174 deletions.
    377 changes: 203 additions & 174 deletions snapScript.sh
    Original file line number Diff line number Diff line change
    @@ -3,7 +3,7 @@
    #######################################################################
    # This is a helper script that keeps snapraid parity info in sync with
    # your data and optionally verifies the parity info. Here's how it works:
    # 1) Checks for size 0 .nzb files (plex)
    # 1) Checks for size 0 .nfo files (plex)
    # 2) Calls diff to figure out if the parity info is out of sync.
    # 3) If parity info is out of sync, AND the number of deleted or changed files exceed
    # X (each configurable), it triggers an alert email and stops. (In case of
    @@ -31,6 +31,109 @@
    #
    #######################################################################

    # REQUIRES:
    # - mailx (simplify sending HTML emails)
    # - python markdown (render Markdown to HTML)

    #############
    # FUNCTIONS #
    #############
    function chk_del(){
    if [ $DEL_COUNT -lt $DEL_THRESHOLD ]; then
    # NO, delete threshold not reached, lets run the sync job
    echo "[`date`] Changes detected [A-$ADD_COUNT,D-$DEL_COUNT,M-$MOVE_COUNT,C-$COPY_COUNT,U-$UPDATE_COUNT] and deleted files ($DEL_COUNT) is below threshold ($DEL_THRESHOLD). SYNC Authorized."
    DO_SYNC=1
    else
    echo "[`date`] WARNING - Deleted files ($DEL_COUNT) exceeded threshold ($DEL_THRESHOLD). Check $LOG_FILE for details."
    CHK_FAIL=1
    fi
    }

    function chk_updated(){
    if [ $UPDATE_COUNT -lt $UP_THRESHOLD ]; then
    echo "[`date`] Changes detected [A-$ADD_COUNT,D-$DEL_COUNT,M-$MOVE_COUNT,C-$COPY_COUNT,U-$UPDATE_COUNT] and updated files ($UPDATE_COUNT) is below threshold ($UP_THRESHOLD). SYNC Authorized."
    DO_SYNC=1
    else
    echo "[`date`] WARNING - Updated files ($UPDATE_COUNT) exceeded threshold ($UP_THRESHOLD). Check $LOG_FILE for details."
    CHK_FAIL=1
    fi
    }

    function chk_sync_warn(){
    if [ $SYNC_WARN_THRESHOLD -gt -1 ]; then
    echo "[`date`] Forced sync is enabled."

    SYNC_WARN_COUNT=$(sed 'q;/^[0-9][0-9]*$/!d' $SYNC_WARN_FILE 2>/dev/null)
    SYNC_WARN_COUNT=${SYNC_WARN_COUNT:-0} #value is zero if file does not exist or does not contain what we are expecting

    if [ $SYNC_WARN_COUNT -ge $SYNC_WARN_THRESHOLD ]; then
    # YES, lets force a sync job. Do not need to remove warning marker here as it is automatically removed when the sync job is run by this script
    echo "[`date`] Number of warning(s) ($SYNC_WARN_COUNT) has reached/exceeded threshold ($SYNC_WARN_THRESHOLD). Forcing a sync job to run."
    DO_SYNC=1
    else
    # NO, so let's increment the warning count and skip the sync job
    ((SYNC_WARN_COUNT += 1))
    echo $SYNC_WARN_COUNT > $SYNC_WARN_FILE
    echo "[`date`] $((SYNC_WARN_THRESHOLD - SYNC_WARN_COUNT)) warning(s) till forced sync. NOT proceeding with sync job."
    DO_SYNC=0
    fi
    else
    # NO, so let's skip SYNC
    echo "[`date`] Forced sync is not enabled. Check $TMP_OUTPUT for details. NOT proceeding with sync job."
    DO_SYNC=0
    fi
    }

    function chk_zero(){
    echo "###Timestamps [`date`]"
    if $SNAPRAID_BIN status | grep 'You have [1-9][0-9]* files with zero sub-second timestamp\.'; then
    echo "Found zero sub-second files. Running touch to timestamp. [`date`]"
    echo "Timestamping"
    $SNAPRAID_BIN touch
    wait
    fi
    echo "Finished"
    }

    function stop_services(){
    # Disable Sophos on-access
    if /opt/sophos-av/bin/savdstatus | grep -v "not running" > /dev/null; then
    echo "Sophos on-access detected, shutting it down..."
    /opt/sophos-av/bin/savdctl disable
    SOPHOS_RUNNING=1
    fi
    }

    function restore_services(){
    echo
    # Restart Sophos on-access scanning
    if [ $SOPHOS_RUNNING -eq 1 ]; then
    /opt/sophos-av/bin/savdctl enable
    echo "Restoring Sohpos on-access to active state..."
    SOPHOS_RUNNING=0
    fi

    if [ $GRACEFUL -eq 1 ]; then
    return
    fi

    exit
    }

    function send_mail(){
    # Format for markdown
    sed -i 's/$/ /' $TMP_OUTPUT
    $MAIL_BIN -a 'Content-Type: text/html' -s "$SUBJECT" "$EMAIL_ADDRESS" < <(python -m markdown $TMP_OUTPUT)
    }

    #####################
    # INIT VARIABLES
    #####################
    SOPHOS_RUNNING=0
    GRACEFUL=0

    trap restore_services INT EXIT

    DO_SYNC=0
    CHK_FAIL=0

    @@ -71,14 +174,7 @@ LOG_FILE="/var/log/snapraid/snapScript.log"
    # location of the snapraid binary
    SNAPRAID_BIN="/usr/bin/snapraid"
    # location of the mail program binary
    #MAIL_BIN="/usr/bin/mutt"
    MAIL_BIN="/usr/bin/mail"

    # how much progress output do we want to keep in email
    # Default is 2 which means report progress in 10% intervals
    # Set to 1 to report progress in 1% intervals
    # Set to 0 to report everything
    TERSE=2
    MAIL_BIN="/usr/bin/mailx"

    ## INTERNAL TEMP VARS ##
    EMAIL_SUBJECT_PREFIX="[`hostname`] SnapRAID - "
    @@ -90,48 +186,49 @@ SYNC_WARN_COUNT=""
    CONTENT_FILE=`grep -v '^$\|^\s*\#' /etc/snapraid.conf | grep snapraid.content | head -n 1 | cut -d " " -f2`
    PARITY_FILE=`grep -v '^$\|^\s*\#' /etc/snapraid.conf | grep snapraid.parity | head -n 1 | cut -d " " -f2`

    # redirect all stdout to log file (leave stderr alone thou)
    exec >> $LOG_FILE
    # redirect all output to screen and two files
    > $TMP_OUTPUT
    exec &> >(tee -ia "${TMP_OUTPUT}" "${LOG_FILE}")

    # timestamp the job
    echo "[`date`] SnapRAID Job started."
    echo "SnapRAID DIFF Job started on `date`" > $TMP_OUTPUT
    echo "----------------------------------------" >> $TMP_OUTPUT
    echo "#SnapRAID Script Job started [`date`]"
    echo
    echo "----------------------------------------"

    # Remove any plex created anomolies
    echo "##Preprocessing"
    echo "###NFO Scrub [`date`]"
    echo "Removing any 0 byte .nfo's before snapraid exeuction."
    find /mnt/volume/media -name '*.nfo' -type f -size 0 -print0 | xargs -0 /bin/rm -f

    # Temporarily disable nZEDb if running
    if [ `systemctl is-active nzedb-tmux.service` == "active" ]; then
    echo "nZEDb detected active, temporarily shutting it down . . ." >> $TMP_OUTPUT
    systemctl stop nzedb-tmux.service
    NZEDB_RUNNING=1
    fi

    #TODO - mount and unmount parity disk on demand!
    # Stop any services that may inhibit optimum execution
    echo "###Stop Services [`date`]"
    stop_services

    #sanity check first to make sure we can access the content and parity files
    if [ ! -e $CONTENT_FILE ]; then
    echo "[`date`] ERROR - Content file ($CONTENT_FILE) not found!"
    echo "ERROR - Content file ($CONTENT_FILE) not found!" >> $TMP_OUTPUT
    exit 1;
    echo "**ERROR** Content file ($CONTENT_FILE) not found!"
    exit 1;
    fi

    if [ ! -e $PARITY_FILE ]; then
    echo "[`date`] ERROR - Parity file ($PARITY_FILE) not found!"
    echo "ERROR - Parity file ($PARITY_FILE) not found!" >> $TMP_OUTPUT
    exit 1;
    echo "**ERROR** Parity file ($PARITY_FILE) not found!"
    exit 1;
    fi
    echo
    echo "----------------------------------------"
    echo "##Processing"

    # Fix timestamps
    chk_zero

    # run the snapraid DIFF command
    echo "[`date`] Running DIFF Command."
    $SNAPRAID_BIN diff >> $TMP_OUTPUT
    echo "###Snapraid DIFF [`date`]"
    $SNAPRAID_BIN diff
    # wait for the above cmd to finish
    wait

    echo "----------------------------------------" >> $TMP_OUTPUT
    echo "SnapRAID DIFF Job finished on `date`" >> $TMP_OUTPUT
    echo "[`date`] DIFF finished."
    echo
    echo "DIFF finished [`date`]"
    JOBS_DONE="DIFF"

    DEL_COUNT=$(grep -w '^ \{1,\}[0-9]* removed$' $TMP_OUTPUT | sed 's/^ *//g' | cut -d ' ' -f1)
    @@ -142,81 +239,18 @@ UPDATE_COUNT=$(grep -w '^ \{1,\}[0-9]* updated$' $TMP_OUTPUT | sed 's/^ *//g' |

    # sanity check to make sure that we were able to get our counts from the output of the DIFF job
    if [ -z "$DEL_COUNT" -o -z "$ADD_COUNT" -o -z "$MOVE_COUNT" -o -z "$COPY_COUNT" -o -z "$UPDATE_COUNT" ]; then
    # failed to get one or more of the count values, lets report to user and exit with error code
    echo "[`date`] ERROR - failed to get one or more count values. Unable to proceed. Exiting script."
    if [ $EMAIL_ADDRESS ]; then
    $MAIL_BIN -s "$EMAIL_SUBJECT_PREFIX WARNING - Unable to proceed with SYNC/SCRUB job(s). Check DIFF job output inside." "$EMAIL_ADDRESS" < $TMP_OUTPUT
    fi
    exit 1;
    fi

    echo "SUMMARY of changes - Added [$ADD_COUNT] - Deleted [$DEL_COUNT] - Moved [$MOVE_COUNT] - Copied [$COPY_COUNT] - Updated [$UPDATE_COUNT]" >> $TMP_OUTPUT

    #############
    # FUNCTIONS #
    #############
    function chk_del(){
    if [ $DEL_COUNT -lt $DEL_THRESHOLD ]; then
    # NO, delete threshold not reached, lets run the sync job
    echo "Deleted files ($DEL_COUNT) did not exceed threshold ($DEL_THRESHOLD), proceeding with sync job." >> $TMP_OUTPUT
    echo "[`date`] Changes detected [A-$ADD_COUNT,D-$DEL_COUNT,M-$MOVE_COUNT,C-$COPY_COUNT,U-$UPDATE_COUNT] and deleted files ($DEL_COUNT) is below threshold ($DEL_THRESHOLD). SYNC Authorized."
    DO_SYNC=1
    else
    echo "Number of deleted files ($DEL_COUNT) exceeded threshold ($DEL_THRESHOLD)." >> $TMP_OUTPUT
    echo "[`date`] WARNING - Deleted files ($DEL_COUNT) exceeded threshold ($DEL_THRESHOLD). Check $TMP_OUTPUT for details."
    CHK_FAIL=1
    fi
    }

    function chk_updated(){
    if [ $UPDATE_COUNT -lt $UP_THRESHOLD ]; then
    echo "Updated files ($UPDATE_COUNT) did not exceed threshold ($UP_THRESHOLD), proceeding with sync job." >> $TMP_OUTPUT
    echo "[`date`] Changes detected [A-$ADD_COUNT,D-$DEL_COUNT,M-$MOVE_COUNT,C-$COPY_COUNT,U-$UPDATE_COUNT] and updated files ($UPDATE_COUNT) is below threshold ($UP_THRESHOLD). SYNC Authorized."
    DO_SYNC=1
    else
    echo "Number of changed files ($UPDATE_COUNT) exceeded threshold ($UP_THRESHOLD)." >> $TMP_OUTPUT
    echo "[`date`] WARNING - Updated files ($UPDATE_COUNT) exceeded threshold ($UP_THRESHOLD). Check $TMP_OUTPUT for details."
    CHK_FAIL=1
    fi
    }

    function chk_sync_warn(){
    if [ $SYNC_WARN_THRESHOLD -gt -1 ]; then
    echo "Forced sync is enabled." >> $TMP_OUTPUT
    echo "[`date`] Forced sync is enabled."

    SYNC_WARN_COUNT=$(sed 'q;/^[0-9][0-9]*$/!d' $SYNC_WARN_FILE 2>/dev/null)
    SYNC_WARN_COUNT=${SYNC_WARN_COUNT:-0} #value is zero if file does not exist or does not contain what we are expecting

    if [ $SYNC_WARN_COUNT -ge $SYNC_WARN_THRESHOLD ]; then
    # YES, lets force a sync job. Do not need to remove warning marker here as it is automatically removed when the sync job is run by this script
    echo "Number of warning(s) ($SYNC_WARN_COUNT) has reached/exceeded threshold ($SYNC_WARN_THRESHOLD). Forcing a sync job to run." >> $TMP_OUTPUT
    echo "[`date`] Number of warning(s) ($SYNC_WARN_COUNT) has reached/exceeded threshold ($SYNC_WARN_THRESHOLD). Forcing a sync job to run."
    DO_SYNC=1
    else
    # NO, so let's increment the warning count and skip the sync job
    ((SYNC_WARN_COUNT += 1))
    echo $SYNC_WARN_COUNT > $SYNC_WARN_FILE
    echo "$((SYNC_WARN_THRESHOLD - SYNC_WARN_COUNT)) warning(s) till forced sync. NOT proceeding with sync job." >> $TMP_OUTPUT
    echo "[`date`] $((SYNC_WARN_THRESHOLD - SYNC_WARN_COUNT)) warning(s) till forced sync. NOT proceeding with sync job."
    DO_SYNC=0
    fi
    else
    # NO, so let's skip SYNC
    echo "Forced sync is not enabled. NOT proceeding with sync job. Please run sync manually if this is not an error condition." >> $TMP_OUTPUT
    echo "[`date`] Forced sync is not enabled. Check $TMP_OUTPUT for details. NOT proceeding with sync job."
    DO_SYNC=0
    # failed to get one or more of the count values, lets report to user and exit with error code
    echo "**ERROR** - failed to get one or more count values. Unable to proceed."
    echo "Exiting script. [`date`]"
    if [ $EMAIL_ADDRESS ]; then
    SUBJECT="$EMAIL_SUBJECT_PREFIX WARNING - Unable to proceed with SYNC/SCRUB job(s). Check DIFF job output inside."
    send_mail
    fi
    }

    function chk_zero(){
    if snapraid status | grep 'You have [1-9][0-9]* files with zero sub-second timestamp\.'; then
    echo "Found zero sub-second files. Running touch to timestamp." >> $TMP_OUTPUT
    echo "[`date`] Found zero sub-second files. Running touch to timestamp."

    $SNAPRAID_BIN touch | sed -e 's/\r/\n/g' >> $TMP_OUTPUT
    fi
    }
    exit 1;
    fi
    echo
    echo "**SUMMARY of changes - Added [$ADD_COUNT] - Deleted [$DEL_COUNT] - Moved [$MOVE_COUNT] - Copied [$COPY_COUNT] - Updated [$UPDATE_COUNT]**"
    echo

    # check if the conditions to run SYNC are met
    # CHK 1 - if files have changed
    @@ -232,120 +266,115 @@ if [ $DEL_COUNT -gt 0 -o $ADD_COUNT -gt 0 -o $MOVE_COUNT -gt 0 -o $COPY_COUNT -g
    fi
    else
    # NO, so let's skip SYNC
    echo "[`date`] No change detected. Not running SYNC job."
    echo "No change detected. Not running SYNC job. [`date`] "
    DO_SYNC=0
    fi

    # Now run sync if conditions are met
    if [ $DO_SYNC -eq 1 ]; then
    echo "[`date`] SYNC started."
    echo "SnapRAID SYNC Job started on `date`" >> $TMP_OUTPUT
    echo "----------------------------------------" >> $TMP_OUTPUT
    chk_zero
    $SNAPRAID_BIN sync | sed -e 's/\r/\n/g' >> $TMP_OUTPUT
    echo "###Snapraid SYNC [`date`]"
    $SNAPRAID_BIN sync -q
    #wait for the job to finish
    wait
    echo "----------------------------------------" >> $TMP_OUTPUT
    echo "SnapRAID SYNC Job finished on `date`" >> $TMP_OUTPUT
    echo "[`date`] SYNC finished."
    echo "SYNC finished [`date`]"
    JOBS_DONE="$JOBS_DONE + SYNC"
    # insert SYNC marker to 'Everything OK' or 'Nothing to do' string to differentiate it from SCRUB job later
    sed -i 's/^Everything OK/SYNC_JOB--Everything OK/g;s/^Nothing to do/SYNC_JOB--Nothing to do/g' $TMP_OUTPUT
    # Remove any warning flags if set previously. This is done in this step to take care of scenarios when user has manually synced or restored deleted files and we will have missed it in the checks above.
    exec &>/dev/tty
    sed -i 's/^Everything OK/SYNC_JOB--Everything OK/g;s/^Nothing to do/SYNC_JOB--Nothing to do/g' $TMP_OUTPUT
    exec &> >(tee -ia "${LOG_FILE}" "${TMP_OUTPUT}")
    # Remove any warning flags if set previously. This is done in this step to take care of scenarios when user
    # has manually synced or restored deleted files and we will have missed it in the checks above.
    if [ -e $SYNC_WARN_FILE ]; then
    rm $SYNC_WARN_FILE
    fi
    $SNAPRAID_BIN scrub -p new
    echo "###Snapraid Scrub Newly Added [`date`]"
    $SNAPRAID_BIN scrub -p new -q
    wait
    echo "SCRUB New finished [`date`]"
    echo
    fi

    # Moving onto scrub now. Check if user has enabled scrub
    if [ $SCRUB_PERCENT -gt 0 ]; then
    # YES, first let's check if delete threshold has been breached and we have not forced a sync.
    if [ $CHK_FAIL -eq 1 -a $DO_SYNC -eq 0 ]; then
    # YES, parity is out of sync so let's not run scrub job
    echo "[`date`] Scrub job cancelled as parity info is out of sync (deleted or changed files threshold has been breached)."
    echo "Scrub job cancelled as parity info is out of sync (deleted or changed files threshold has been breached). [`date`]"
    else
    # NO, delete threshold has not been breached OR we forced a sync, but we have one last test -
    # let's make sure if sync ran, it completed successfully (by checking for our marker text "SYNC_JOB--" in the output).
    if [ $DO_SYNC -eq 1 -a -z "$(grep -w "SYNC_JOB-" $TMP_OUTPUT)" ]; then
    # Sync ran but did not complete successfully so lets not run scrub to be safe
    echo "[`date`] WARNING - check output of SYNC job. Could not detect marker <SYNC_JOB-->. Not proceeding with SCRUB job."
    echo "WARNING - check output of SYNC job. Could not detect marker <SYNC_JOB-->. Not proceeding with SCRUB job." >> $TMP_OUTPUT
    echo "**WARNING** - check output of SYNC job. Could not detect marker <SYNC_JOB-->. Not proceeding with SCRUB job. [`date`]"
    else
    # Everything ok - let's run the scrub job!
    echo "[`date`] Running SCRUB Command."
    echo "SnapRAID SCRUB Job started on `date`" >> $TMP_OUTPUT
    echo "----------------------------------------" >> $TMP_OUTPUT
    $SNAPRAID_BIN scrub -p $SCRUB_PERCENT -o $SCRUB_AGE | sed -e 's/\r/\n/g' >> $TMP_OUTPUT
    echo "###Snapraid SCRUB [`date`]"
    $SNAPRAID_BIN scrub -p $SCRUB_PERCENT -o $SCRUB_AGE -q
    #wait for the job to finish
    wait
    echo "----------------------------------------" >> $TMP_OUTPUT
    echo "SnapRAID SCRUB Job finished on `date`" >> $TMP_OUTPUT
    echo "[`date`] SCRUB finished."
    echo "SCRUB finished [`date`]"
    echo
    JOBS_DONE="$JOBS_DONE + SCRUB"
    # insert SCRUB marker to 'Everything OK' or 'Nothing to do' string to differentiate it from SYNC job above
    sed -i 's/^Everything OK/SCRUB_JOB--Everything OK/g;s/^Nothing to do/SCRUB_JOB--Nothing to do/g' $TMP_OUTPUT
    exec &>/dev/tty
    sed -i 's/^Everything OK/SCRUB_JOB--Everything OK/g;s/^Nothing to do/SCRUB_JOB--Nothing to do/g' $TMP_OUTPUT
    exec &> >(tee -ia "${LOG_FILE}" "${TMP_OUTPUT}")
    fi
    fi
    else
    echo "[`date`] Scrub job is not scheduled. Not running SCRUB job."
    echo "Scrub job is not enabled. Not running SCRUB job. [`date`] "
    fi

    echo
    echo "----------------------------------------"
    echo "##Postprocessing"

    # Moving onto logging SMART info if enabled
    if [ $SMART_LOG -eq 1 ]; then
    $SNAPRAID_BIN smart >> $TMP_OUTPUT
    echo
    $SNAPRAID_BIN smart
    wait
    fi

    echo "Spinning down disks..." >> $TMP_OUTPUT
    echo "Spinning down disks..."
    $SNAPRAID_BIN down

    # Restart nZEDb if was initially active
    if [ $NZEDB_RUNNING -eq 1 ]; then
    systemctl start nzedb-tmux.service
    echo "Restoring nZEDb to previously active state . . ." >> $TMP_OUTPUT
    fi
    # Graceful restore of services outside of trap - for messaging
    GRACEFUL=1
    restore_services

    echo "All jobs ended. [`date`] "

    exec &>/dev/tty

    # all jobs done, let's send output to user if configured
    if [ $EMAIL_ADDRESS ]; then
    echo "[`date`] Email address is set. Sending email report to <$EMAIL_ADDRESS>"
    # check if deleted count exceeded threshold
    if [ $CHK_FAIL -eq 1 ]; then
    if [ $DEL_COUNT -gt $DEL_THRESHOLD -a $DO_SYNC -eq 0 ]; then
    MSG="Deleted Files ($DEL_COUNT) / ($DEL_THRESHOLD) Violation"
    fi

    if [ $DEL_COUNT -gt $DEL_THRESHOLD -a $UPDATE_COUNT -gt $UP_THRESHOLD -a $DO_SYNC -eq 0 ]; then
    MSG="$MSG & "
    fi
    echo "Email address is set. Sending email report to <$EMAIL_ADDRESS> [`date`]"
    # check if deleted count exceeded threshold
    if [ $CHK_FAIL -eq 1 ]; then
    if [ $DEL_COUNT -gt $DEL_THRESHOLD -a $DO_SYNC -eq 0 ]; then
    MSG="Deleted Files ($DEL_COUNT) / ($DEL_THRESHOLD) Violation"
    fi

    if [ $UPDATE_COUNT -gt $UP_THRESHOLD -a $DO_SYNC -eq 0 ]; then
    MSG="$MSG Changed Files ($UPDATE_COUNT) / ($UP_THRESHOLD) Violation"
    fi
    if [ $DEL_COUNT -gt $DEL_THRESHOLD -a $UPDATE_COUNT -gt $UP_THRESHOLD -a $DO_SYNC -eq 0 ]; then
    MSG="$MSG & "
    fi

    $MAIL_BIN -s "$EMAIL_SUBJECT_PREFIX WARNING $SYNC_WARN_COUNT - ($MSG)" "$EMAIL_ADDRESS" < $TMP_OUTPUT

    if [ $UPDATE_COUNT -gt $UP_THRESHOLD -a $DO_SYNC -eq 0 ]; then
    MSG="$MSG Changed Files ($UPDATE_COUNT) / ($UP_THRESHOLD) Violation"
    fi

    elif [ -z "${JOBS_DONE##*"SYNC"*}" -a -z "$(grep -w "SYNC_JOB-" $TMP_OUTPUT)" ]; then
    # Sync ran but did not complete successfully so lets warn the user
    $MAIL_BIN -s "$EMAIL_SUBJECT_PREFIX WARNING - SYNC job ran but did not complete successfully" "$EMAIL_ADDRESS" < $TMP_OUTPUT
    elif [ -z "${JOBS_DONE##*"SCRUB"*}" -a -z "$(grep -w "SCRUB_JOB-" $TMP_OUTPUT)" ]; then
    # Scrub ran but did not complete successfully so lets warn the user
    $MAIL_BIN -s "$EMAIL_SUBJECT_PREFIX WARNING - SCRUB job ran but did not complete successfully" "$EMAIL_ADDRESS" < $TMP_OUTPUT
    else
    # OPTIONALLY, let's reduce the amount of status lines in output.
    if [ $TERSE -gt 1 ]; then
    # Report progress in interval of tens %
    sed -i '$!N; /^\([0-9]\).*\n\1.*$/!P; D' $TMP_OUTPUT
    sed -i '/^[1-8]%.*$/d' $TMP_OUTPUT
    elif [ $TERSE -gt 0 ]; then
    # Report progress in interval of ones %
    sed -i '$!N; /^\([0-9]*\)%.*\n\1.*$/!P; D' $TMP_OUTPUT
    fi
    $MAIL_BIN -s "$EMAIL_SUBJECT_PREFIX INFO - $JOBS_DONE Jobs COMPLETED" "$EMAIL_ADDRESS" < $TMP_OUTPUT
    fi
    SUBJECT="$EMAIL_SUBJECT_PREFIX WARNING $SYNC_WARN_COUNT - ($MSG)"
    elif [ -z "${JOBS_DONE##*"SYNC"*}" -a -z "$(grep -w "SYNC_JOB-" $TMP_OUTPUT)" ]; then
    # Sync ran but did not complete successfully so lets warn the user
    SUBJECT="$EMAIL_SUBJECT_PREFIX WARNING - SYNC job ran but did not complete successfully"
    elif [ -z "${JOBS_DONE##*"SCRUB"*}" -a -z "$(grep -w "SCRUB_JOB-" $TMP_OUTPUT)" ]; then
    # Scrub ran but did not complete successfully so lets warn the user
    SUBJECT="$EMAIL_SUBJECT_PREFIX WARNING - SCRUB job ran but did not complete successfully"
    else
    SUBJECT="$EMAIL_SUBJECT_PREFIX INFO - $JOBS_DONE Jobs COMPLETED"
    fi
    send_mail
    fi

    echo "[`date`] All jobs ended."

    exit 0;
  5. @mtompkins mtompkins revised this gist Nov 3, 2016. 1 changed file with 17 additions and 3 deletions.
    20 changes: 17 additions & 3 deletions snapScript.sh
    Original file line number Diff line number Diff line change
    @@ -31,8 +31,6 @@
    #
    #######################################################################

    find /mnt/volume/media -name '*.nfo' -type f -size 0 -print0 | xargs -0 /bin/rm -f

    DO_SYNC=0
    CHK_FAIL=0

    @@ -62,7 +60,7 @@ SYNC_WARN_THRESHOLD=-1
    # Set percentage of array to scrub if it is in sync.
    # i.e. 0 to disable and 100 to scrub the full array in one go
    # WARNING - depending on size of your array, setting to 100 will take a very long time!
    SCRUB_PERCENT=3
    SCRUB_PERCENT=10
    SCRUB_AGE=10

    # Set the option to log SMART info. 1 to enable, any other values to disable
    @@ -100,6 +98,16 @@ echo "[`date`] SnapRAID Job started."
    echo "SnapRAID DIFF Job started on `date`" > $TMP_OUTPUT
    echo "----------------------------------------" >> $TMP_OUTPUT

    # Remove any plex created anomolies
    find /mnt/volume/media -name '*.nfo' -type f -size 0 -print0 | xargs -0 /bin/rm -f

    # Temporarily disable nZEDb if running
    if [ `systemctl is-active nzedb-tmux.service` == "active" ]; then
    echo "nZEDb detected active, temporarily shutting it down . . ." >> $TMP_OUTPUT
    systemctl stop nzedb-tmux.service
    NZEDB_RUNNING=1
    fi

    #TODO - mount and unmount parity disk on demand!

    #sanity check first to make sure we can access the content and parity files
    @@ -292,6 +300,12 @@ fi
    echo "Spinning down disks..." >> $TMP_OUTPUT
    $SNAPRAID_BIN down

    # Restart nZEDb if was initially active
    if [ $NZEDB_RUNNING -eq 1 ]; then
    systemctl start nzedb-tmux.service
    echo "Restoring nZEDb to previously active state . . ." >> $TMP_OUTPUT
    fi

    # all jobs done, let's send output to user if configured
    if [ $EMAIL_ADDRESS ]; then
    echo "[`date`] Email address is set. Sending email report to <$EMAIL_ADDRESS>"
  6. @mtompkins mtompkins revised this gist Oct 25, 2016. 2 changed files with 337 additions and 291 deletions.
    291 changes: 0 additions & 291 deletions snapRAID_helper.sh
    Original file line number Diff line number Diff line change
    @@ -1,291 +0,0 @@
    #!/bin/bash
    #######################################################################
    # This is a helper script that keeps snapraid parity info in sync with
    # your data and optionally verifies the parity info. Here's how it works:
    # 1) It first calls diff to figure out if the parity info is out of sync.
    # 2) If parity info is out of sync, AND the number of deleted files exceed
    # X (configurable), it triggers an alert email and stops. (In case of
    # accidental deletions, you have the opportunity to recover them from
    # the existing parity info)
    # 3) If partiy info is out of sync, AND the number of deleted files exceed X
    # AND it has reached/exceeded Y (configurable) number of warnings, force
    # a sync. (Useful when you get a false alarm above and you can't be bothered
    # to login and do a manual sync. Note the risk is if its not a false alarm
    # and you can't access the box before Y number of times the job is run to
    # fix the issue... Well I hope you have other backups...)
    # 4) If parity info is out of sync BUT the number of deleted files did NOT
    # exceed X, it calls sync to update the parity info.
    # 5) If the parity info is in sync (either because nothing changed or after it
    # has successfully completed the sync job, it runs the scrub command to
    # validate the integrity of the data (both the files and the parity info).
    # Note that each run of the scrub command will validate only a (configurable)
    # portion of parity info to avoid having a long running job and affecting
    # the performance of the box.
    # 6) Once all jobs are completed, it sends an email with the output to user
    # (if configured).
    #
    # Author: SidneyC <sidneyc_at_outlook_dot_com>
    #
    # CHANGELOG
    # ---------
    # 23/10/2011 Initial release
    # 04/01/2015 Updated script to handle changes in SnapRAID v7.0
    # Added scrub job as an optional task (after diff and sync)
    # 06/01/2015 Made the script more robust by adding checks to make sure preceding
    # jobs completed as expected before continuing with the subsequent jobs.
    # Made emailing output to user optional.
    # 24/01/2015 Inserted a sed step to clean up crlf (aka dos/unix formatting issue)
    # in sync & scrub outputs.
    # Detect sync and scrub job failures and highlight to user via warning
    # subject line in email to user.
    # 25/01/2015 Added option to reduce progress report output in email (default is 2 -
    # report only in 10% intervals).
    # 26/01/2015 For terse = 2 setting, removed lines for 1-8% from output
    # 05/02/2015 Added logic to perform forced sync after X number of warnings
    # Cleaned up formatting in script file (changed tabs to spaces)
    # Made consistent the use of [ in the test statements
    # 08/02/2015 Added warning number to the email subject line so that it is easier to
    # tell how many warnings have been issued so far
    # 04/03/2015 Corrected Scrub job status check (i.e. added check for text "Nothing
    # to do") to avoid sending false warning email
    # 27/10/2015 Corrected Sync job status check (i.e. added check for text "Nothing to
    # do") to avoid sending false warning email
    # 29/10/2015 Fixed a bug with the job status check not detecting the right strings
    #
    #######################################################################

    ## USER DEFINED SETTINGS ##
    # address where the output of the jobs will be emailed to.
    # comment it out to disable email output
    EMAIL_ADDRESS="root"

    # Set the threshold of deleted files to stop the sync job from running.
    # NOTE that depending on how active your filesystem is being used, a low
    # number here may result in your parity info being out of sync often and/or
    # you having to do lots of manual sync.
    DEL_THRESHOLD=50

    # Set number of warnings before we force a sync job.
    # This option comes in handy when you cannot be bothered to manually
    # start a sync job when DEL_THRESHOLD is breached due to false alarm.
    # Set to 0 to ALWAYS force a sync (i.e. ignore the delete threshold above)
    # Set to -1 to NEVER force a sync (i.e. need to manual sync if delete threshold is breached)
    SYNC_WARN_THRESHOLD=3

    # Set percentage of array to scrub if it is in sync.
    # i.e. 0 to disable and 100 to scrub the full array in one go
    # WARNING - depending on size of your array, setting to 100 will take a very long time!
    SCRUB_PERCENT=5
    SCRUB_AGE=10

    # Set the option to log SMART info. 1 to enable, any other values to disable
    SMART_LOG=1

    # this script will log its actions to a file at this location
    LOG_FILE="/tmp/snapRAID.log"
    # location of the snapraid binary
    SNAPRAID_BIN="/usr/bin/snapraid"
    # location of the mail program binary
    MAIL_BIN="/usr/bin/mail"

    # how much progress output do we want to keep in email
    # Default is 2 which means report progress in 10% intervals
    # Set to 1 to report progress in 1% intervals
    # Set to 0 to report everything
    TERSE=2

    ## INTERNAL TEMP VARS ##
    EMAIL_SUBJECT_PREFIX="[`hostname`] SnapRAID - "
    TMP_OUTPUT="/tmp/snapRAID.out"
    SYNC_WARN_FILE="/tmp/snapRAID.warnCount"
    SYNC_WARN_COUNT=""

    # auto determine names of content and parity files
    CONTENT_FILE=`cat /etc/snapraid.conf | grep snapraid.content | head -n 1 | cut -d " " -f2`
    PARITY_FILE=`cat /etc/snapraid.conf | grep snapraid.parity | head -n 1 | cut -d " " -f2`

    # redirect all stdout to log file (leave stderr alone thou)
    exec >> $LOG_FILE

    # timestamp the job
    echo "[`date`] SnapRAID Job started."
    echo "SnapRAID DIFF Job started on `date`" > $TMP_OUTPUT
    echo "----------------------------------------" >> $TMP_OUTPUT

    #TODO - mount and unmount parity disk on demand!

    #sanity check first to make sure we can access the content and parity files
    if [ ! -e $CONTENT_FILE ]; then
    echo "[`date`] ERROR - Content file ($CONTENT_FILE) not found!"
    echo "ERROR - Content file ($CONTENT_FILE) not found!" >> $TMP_OUTPUT
    exit 1;
    fi

    if [ ! -e $PARITY_FILE ]; then
    echo "[`date`] ERROR - Parity file ($PARITY_FILE) not found!"
    echo "ERROR - Parity file ($PARITY_FILE) not found!" >> $TMP_OUTPUT
    exit 1;
    fi

    # run the snapraid DIFF command
    echo "[`date`] Running DIFF Command."
    $SNAPRAID_BIN diff >> $TMP_OUTPUT
    # wait for the above cmd to finish
    wait

    echo "----------------------------------------" >> $TMP_OUTPUT
    echo "SnapRAID DIFF Job finished on `date`" >> $TMP_OUTPUT
    JOBS_DONE="DIFF"

    DEL_COUNT=$(grep -w '^ \{1,\}[0-9]* removed$' $TMP_OUTPUT | sed 's/^ *//g' | cut -d ' ' -f1)
    ADD_COUNT=$(grep -w '^ \{1,\}[0-9]* added$' $TMP_OUTPUT | sed 's/^ *//g' | cut -d ' ' -f1)
    MOVE_COUNT=$(grep -w '^ \{1,\}[0-9]* moved$' $TMP_OUTPUT | sed 's/^ *//g' | cut -d ' ' -f1)
    COPY_COUNT=$(grep -w '^ \{1,\}[0-9]* copied$' $TMP_OUTPUT | sed 's/^ *//g' | cut -d ' ' -f1)
    UPDATE_COUNT=$(grep -w '^ \{1,\}[0-9]* updated$' $TMP_OUTPUT | sed 's/^ *//g' | cut -d ' ' -f1)

    # sanity check to make sure that we were able to get our counts from the output of the DIFF job
    if [ -z "$DEL_COUNT" -o -z "$ADD_COUNT" -o -z "$MOVE_COUNT" -o -z "$COPY_COUNT" -o -z "$UPDATE_COUNT" ]; then
    # failed to get one or more of the count values, lets report to user and exit with error code
    echo "[`date`] ERROR - failed to get one or more count values. Unable to proceed. Exiting script."
    if [ $EMAIL_ADDRESS ]; then
    $MAIL_BIN -s "$EMAIL_SUBJECT_PREFIX WARNING - Unable to proceed with SYNC/SCRUB job(s). Check DIFF job output inside." "$EMAIL_ADDRESS" < $TMP_OUTPUT
    fi
    exit 1;
    fi

    echo "SUMMARY of changes - Added [$ADD_COUNT] - Deleted [$DEL_COUNT] - Moved [$MOVE_COUNT] - Copied [$COPY_COUNT] - Updated [$UPDATE_COUNT]" >> $TMP_OUTPUT

    # check if the conditions to run SYNC are met
    # CHK 1 - if files have changed
    if [ $DEL_COUNT -gt 0 -o $ADD_COUNT -gt 0 -o $MOVE_COUNT -gt 0 -o $COPY_COUNT -gt 0 -o $UPDATE_COUNT -gt 0 ]; then
    # CHK 1 - YES, files have changed
    # CHK 2 - if number of deleted files exceed DEL_THRESHOLD
    if [ $DEL_COUNT -lt $DEL_THRESHOLD ]; then
    # CHK 2 - NO, delete threshold not reached, lets run the sync job
    echo "Deleted files ($DEL_COUNT) did not exceed threshold ($DEL_THRESHOLD), proceeding with sync job." >> $TMP_OUTPUT
    echo "[`date`] Changes detected [A-$ADD_COUNT,D-$DEL_COUNT,M-$MOVE_COUNT,C-$COPY_COUNT,U-$UPDATE_COUNT] and deleted files ($DEL_COUNT) is below threshold ($DEL_THRESHOLD). Running SYNC Command."
    DO_SYNC=1
    else
    #CHK 2 - YES, delete threshold breached! print warning message to both outputs
    echo "Number of deleted files ($DEL_COUNT) exceeded threshold ($DEL_THRESHOLD)." >> $TMP_OUTPUT
    echo "[`date`] WARNING - Deleted files ($DEL_COUNT) exceeded threshold ($DEL_THRESHOLD). Check $TMP_OUTPUT for details."
    # CHK 3 - if forced sync is set
    if [ $SYNC_WARN_THRESHOLD -gt -1 ]; then
    # CHK 3 - YES
    echo "Forced sync is enabled." >> $TMP_OUTPUT
    echo "[`date`] Forced sync is enabled."
    # CHK 4 - if number of warnings has exceeded threshold
    SYNC_WARN_COUNT=$(sed 'q;/^[0-9][0-9]*$/!d' $SYNC_WARN_FILE 2>/dev/null)
    SYNC_WARN_COUNT=${SYNC_WARN_COUNT:-0} #value is zero if file does not exist or does not contain what we are expecting
    if [ $SYNC_WARN_COUNT -ge $SYNC_WARN_THRESHOLD ]; then
    # CHK 5 - YES, lets force a sync job. Do not need to remove warning marker here as it is automatically removed when the sync job is run by this script
    echo "Number of warning(s) ($SYNC_WARN_COUNT) has reached/exceeded threshold ($SYNC_WARN_THRESHOLD). Forcing a sync job to run." >> $TMP_OUTPUT
    echo "[`date`] Number of warning(s) ($SYNC_WARN_COUNT) has reached/exceeded threshold ($SYNC_WARN_THRESHOLD). Forcing a sync job to run."
    DO_SYNC=1
    else
    # CHK 4 - NO, so let's increment the warning count and skip the sync job
    ((SYNC_WARN_COUNT += 1))
    echo $SYNC_WARN_COUNT > $SYNC_WARN_FILE
    echo "$((SYNC_WARN_THRESHOLD - SYNC_WARN_COUNT)) warning(s) till forced sync. NOT proceeding with sync job." >> $TMP_OUTPUT
    echo "[`date`] $((SYNC_WARN_THRESHOLD - SYNC_WARN_COUNT)) warning(s) till forced sync. NOT proceeding with sync job."
    DO_SYNC=0
    fi
    else
    # CHK 3 - NO, so let's skip SYNC
    echo "Forced sync is not enabled. NOT proceeding with sync job. Please run sync manually if this is not an error condition." >> $TMP_OUTPUT
    echo "[`date`] Forced sync is not enabled. Check $TMP_OUTPUT for details. NOT proceeding with sync job."
    DO_SYNC=0
    fi
    fi
    else
    # CHK 1 - NO, so let's skip SYNC
    echo "[`date`] No change detected. Not running SYNC job."
    DO_SYNC=0
    fi

    # Now run sync if conditions are met
    if [ $DO_SYNC -eq 1 ]; then
    echo "SnapRAID SYNC Job started on `date`" >> $TMP_OUTPUT
    echo "----------------------------------------" >> $TMP_OUTPUT
    $SNAPRAID_BIN sync | sed -e 's/\r/\n/g' >> $TMP_OUTPUT
    #wait for the job to finish
    wait
    echo "----------------------------------------" >> $TMP_OUTPUT
    echo "SnapRAID SYNC Job finished on `date`" >> $TMP_OUTPUT
    JOBS_DONE="$JOBS_DONE + SYNC"
    # insert SYNC marker to 'Everything OK' or 'Nothing to do' string to differentiate it from SCRUB job later
    sed -i 's/^Everything OK/SYNC_JOB--Everything OK/g;s/^Nothing to do/SYNC_JOB--Nothing to do/g' $TMP_OUTPUT
    # Remove any warning flags if set previously. This is done in this step to take care of scenarios when user has manually synced or restored deleted files and we will have missed it in the checks above.
    if [ -e $SYNC_WARN_FILE ]; then
    rm $SYNC_WARN_FILE
    fi
    fi

    # Moving onto scrub now. Check if user has enabled scrub
    if [ $SCRUB_PERCENT -gt 0 ]; then
    # YES, first let's check if delete threshold has been breached and we have not forced a sync.
    if [ $DEL_COUNT -gt $DEL_THRESHOLD -a $DO_SYNC -eq 0 ]; then
    # YES, parity is out of sync so let's not run scrub job
    echo "[`date`] Scrub job cancelled as parity info is out of sync (deleted files threshold has been breached)."
    else
    # NO, delete threshold has not been breached OR we forced a sync, but we have one last test -
    # let's make sure if sync ran, it completed successfully (by checking for our marker text "SYNC_JOB--" in the output).
    if [ $DO_SYNC -eq 1 -a -z "$(grep -w "SYNC_JOB-" $TMP_OUTPUT)" ]; then
    # Sync ran but did not complete successfully so lets not run scrub to be safe
    echo "[`date`] WARNING - check output of SYNC job. Could not detect marker <SYNC_JOB-->. Not proceeding with SCRUB job."
    echo "WARNING - check output of SYNC job. Could not detect marker <SYNC_JOB-->. Not proceeding with SCRUB job." >> $TMP_OUTPUT
    else
    # Everything ok - let's run the scrub job!
    echo "[`date`] Running SCRUB Command."
    echo "SnapRAID SCRUB Job started on `date`" >> $TMP_OUTPUT
    echo "----------------------------------------" >> $TMP_OUTPUT
    $SNAPRAID_BIN scrub -p $SCRUB_PERCENT -o $SCRUB_AGE | sed -e 's/\r/\n/g' >> $TMP_OUTPUT
    #wait for the job to finish
    wait
    echo "----------------------------------------" >> $TMP_OUTPUT
    echo "SnapRAID SCRUB Job finished on `date`" >> $TMP_OUTPUT
    JOBS_DONE="$JOBS_DONE + SCRUB"
    # insert SCRUB marker to 'Everything OK' or 'Nothing to do' string to differentiate it from SYNC job above
    sed -i 's/^Everything OK/SCRUB_JOB--Everything OK/g;s/^Nothing to do/SCRUB_JOB--Nothing to do/g' $TMP_OUTPUT
    fi
    fi
    else
    echo "[`date`] Scrub job is not scheduled. Not running SCRUB job."
    fi

    # Moving onto logging SMART info if enabled
    if [ $SMART_LOG -eq 1 ]; then
    $SNAPRAID_BIN smart >> $TMP_OUTPUT
    wait
    fi

    # all jobs done, let's send output to user if configured
    if [ $EMAIL_ADDRESS ]; then
    echo "[`date`] Email address is set. Sending email report to <$EMAIL_ADDRESS>"
    # check if deleted count exceeded threshold
    if [ $DEL_COUNT -gt $DEL_THRESHOLD -a $DO_SYNC -eq 0 ]; then
    # YES, lets inform user with an appropriate subject line
    $MAIL_BIN -s "$EMAIL_SUBJECT_PREFIX WARNING $SYNC_WARN_COUNT - Number of deleted files ($DEL_COUNT) exceeded threshold ($DEL_THRESHOLD)" "$EMAIL_ADDRESS" < $TMP_OUTPUT
    elif [ -z "${JOBS_DONE##*"SYNC"*}" -a -z "$(grep -w "SYNC_JOB-" $TMP_OUTPUT)" ]; then
    # Sync ran but did not complete successfully so lets warn the user
    $MAIL_BIN -s "$EMAIL_SUBJECT_PREFIX WARNING - SYNC job ran but did not complete successfully" "$EMAIL_ADDRESS" < $TMP_OUTPUT
    elif [ -z "${JOBS_DONE##*"SCRUB"*}" -a -z "$(grep -w "SCRUB_JOB-" $TMP_OUTPUT)" ]; then
    # Scrub ran but did not complete successfully so lets warn the user
    $MAIL_BIN -s "$EMAIL_SUBJECT_PREFIX WARNING - SCRUB job ran but did not complete successfully" "$EMAIL_ADDRESS" < $TMP_OUTPUT
    else
    # OPTIONALLY, let's reduce the amount of status lines in output.
    if [ $TERSE -gt 1 ]; then
    # Report progress in interval of tens %
    sed -i '$!N; /^\([0-9]\).*\n\1.*$/!P; D' $TMP_OUTPUT
    sed -i '/^[1-8]%.*$/d' $TMP_OUTPUT
    elif [ $TERSE -gt 0 ]; then
    # Report progress in interval of ones %
    sed -i '$!N; /^\([0-9]*\)%.*\n\1.*$/!P; D' $TMP_OUTPUT
    fi
    $MAIL_BIN -s "$EMAIL_SUBJECT_PREFIX INFO - $JOBS_DONE Jobs COMPLETED" "$EMAIL_ADDRESS" < $TMP_OUTPUT
    fi
    fi

    echo "[`date`] All jobs ended."

    exit 0;
    337 changes: 337 additions & 0 deletions snapScript.sh
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,337 @@
    #!/bin/bash

    #######################################################################
    # This is a helper script that keeps snapraid parity info in sync with
    # your data and optionally verifies the parity info. Here's how it works:
    # 1) Checks for size 0 .nzb files (plex)
    # 2) Calls diff to figure out if the parity info is out of sync.
    # 3) If parity info is out of sync, AND the number of deleted or changed files exceed
    # X (each configurable), it triggers an alert email and stops. (In case of
    # accidental deletions, you have the opportunity to recover them from
    # the existing parity info. This also mitigates to a degree encryption malware.)
    # 4) If partiy info is out of sync, AND the number of deleted or changed files exceed X
    # AND it has reached/exceeded Y (configurable) number of warnings, force
    # a sync. (Useful when you get a false alarm above and you can't be bothered
    # to login and do a manual sync. Note the risk is if its not a false alarm
    # and you can't access the box before Y number of times the job is run to
    # fix the issue... Well I hope you have other backups...)
    # 5) If parity info is out of sync BUT the number of deleted files did NOT
    # exceed X, it calls sync to update the parity info.
    # 6) If the parity info is in sync (either because nothing changed or after it
    # has successfully completed the sync job, it runs the scrub command to
    # validate the integrity of the data (both the files and the parity info).
    # Note that each run of the scrub command will validate only a (configurable)
    # portion of parity info to avoid having a long running job and affecting
    # the performance of the box.
    # 7) Once all jobs are completed, it sends an email with the output to user
    # (if configured).
    #
    #
    # Inspired by Zack Reed (http://zackreed.me/articles/83-updated-snapraid-sync-script)
    #
    #######################################################################

    find /mnt/volume/media -name '*.nfo' -type f -size 0 -print0 | xargs -0 /bin/rm -f

    DO_SYNC=0
    CHK_FAIL=0

    # Expand PATH for smartctl
    PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin

    ## USER DEFINED SETTINGS ##
    # address where the output of the jobs will be emailed to.
    # comment it out to disable email output
    EMAIL_ADDRESS="root"

    # Set the threshold of deleted files to stop the sync job from running.
    # NOTE that depending on how active your filesystem is being used, a low
    # number here may result in your parity info being out of sync often and/or
    # you having to do lots of manual sync.
    DEL_THRESHOLD=100
    UP_THRESHOLD=500

    # Set number of warnings before we force a sync job.
    # This option comes in handy when you cannot be bothered to manually
    # start a sync job when DEL_THRESHOLD is breached due to false alarm.
    # Set to 0 to ALWAYS force a sync (i.e. ignore the delete threshold above)
    # Set to -1 to NEVER force a sync (i.e. need to manual sync if delete threshold is breached)
    #SYNC_WARN_THRESHOLD=3
    SYNC_WARN_THRESHOLD=-1

    # Set percentage of array to scrub if it is in sync.
    # i.e. 0 to disable and 100 to scrub the full array in one go
    # WARNING - depending on size of your array, setting to 100 will take a very long time!
    SCRUB_PERCENT=3
    SCRUB_AGE=10

    # Set the option to log SMART info. 1 to enable, any other values to disable
    SMART_LOG=1

    # this script will log its actions to a file at this location
    LOG_FILE="/var/log/snapraid/snapScript.log"
    # location of the snapraid binary
    SNAPRAID_BIN="/usr/bin/snapraid"
    # location of the mail program binary
    #MAIL_BIN="/usr/bin/mutt"
    MAIL_BIN="/usr/bin/mail"

    # how much progress output do we want to keep in email
    # Default is 2 which means report progress in 10% intervals
    # Set to 1 to report progress in 1% intervals
    # Set to 0 to report everything
    TERSE=2

    ## INTERNAL TEMP VARS ##
    EMAIL_SUBJECT_PREFIX="[`hostname`] SnapRAID - "
    TMP_OUTPUT="/tmp/snapRAID.out"
    SYNC_WARN_FILE="/tmp/snapRAID.warnCount"
    SYNC_WARN_COUNT=""

    # auto determine names of content and parity files
    CONTENT_FILE=`grep -v '^$\|^\s*\#' /etc/snapraid.conf | grep snapraid.content | head -n 1 | cut -d " " -f2`
    PARITY_FILE=`grep -v '^$\|^\s*\#' /etc/snapraid.conf | grep snapraid.parity | head -n 1 | cut -d " " -f2`

    # redirect all stdout to log file (leave stderr alone thou)
    exec >> $LOG_FILE

    # timestamp the job
    echo "[`date`] SnapRAID Job started."
    echo "SnapRAID DIFF Job started on `date`" > $TMP_OUTPUT
    echo "----------------------------------------" >> $TMP_OUTPUT

    #TODO - mount and unmount parity disk on demand!

    #sanity check first to make sure we can access the content and parity files
    if [ ! -e $CONTENT_FILE ]; then
    echo "[`date`] ERROR - Content file ($CONTENT_FILE) not found!"
    echo "ERROR - Content file ($CONTENT_FILE) not found!" >> $TMP_OUTPUT
    exit 1;
    fi

    if [ ! -e $PARITY_FILE ]; then
    echo "[`date`] ERROR - Parity file ($PARITY_FILE) not found!"
    echo "ERROR - Parity file ($PARITY_FILE) not found!" >> $TMP_OUTPUT
    exit 1;
    fi

    # run the snapraid DIFF command
    echo "[`date`] Running DIFF Command."
    $SNAPRAID_BIN diff >> $TMP_OUTPUT
    # wait for the above cmd to finish
    wait

    echo "----------------------------------------" >> $TMP_OUTPUT
    echo "SnapRAID DIFF Job finished on `date`" >> $TMP_OUTPUT
    echo "[`date`] DIFF finished."
    JOBS_DONE="DIFF"

    DEL_COUNT=$(grep -w '^ \{1,\}[0-9]* removed$' $TMP_OUTPUT | sed 's/^ *//g' | cut -d ' ' -f1)
    ADD_COUNT=$(grep -w '^ \{1,\}[0-9]* added$' $TMP_OUTPUT | sed 's/^ *//g' | cut -d ' ' -f1)
    MOVE_COUNT=$(grep -w '^ \{1,\}[0-9]* moved$' $TMP_OUTPUT | sed 's/^ *//g' | cut -d ' ' -f1)
    COPY_COUNT=$(grep -w '^ \{1,\}[0-9]* copied$' $TMP_OUTPUT | sed 's/^ *//g' | cut -d ' ' -f1)
    UPDATE_COUNT=$(grep -w '^ \{1,\}[0-9]* updated$' $TMP_OUTPUT | sed 's/^ *//g' | cut -d ' ' -f1)

    # sanity check to make sure that we were able to get our counts from the output of the DIFF job
    if [ -z "$DEL_COUNT" -o -z "$ADD_COUNT" -o -z "$MOVE_COUNT" -o -z "$COPY_COUNT" -o -z "$UPDATE_COUNT" ]; then
    # failed to get one or more of the count values, lets report to user and exit with error code
    echo "[`date`] ERROR - failed to get one or more count values. Unable to proceed. Exiting script."
    if [ $EMAIL_ADDRESS ]; then
    $MAIL_BIN -s "$EMAIL_SUBJECT_PREFIX WARNING - Unable to proceed with SYNC/SCRUB job(s). Check DIFF job output inside." "$EMAIL_ADDRESS" < $TMP_OUTPUT
    fi
    exit 1;
    fi

    echo "SUMMARY of changes - Added [$ADD_COUNT] - Deleted [$DEL_COUNT] - Moved [$MOVE_COUNT] - Copied [$COPY_COUNT] - Updated [$UPDATE_COUNT]" >> $TMP_OUTPUT

    #############
    # FUNCTIONS #
    #############
    function chk_del(){
    if [ $DEL_COUNT -lt $DEL_THRESHOLD ]; then
    # NO, delete threshold not reached, lets run the sync job
    echo "Deleted files ($DEL_COUNT) did not exceed threshold ($DEL_THRESHOLD), proceeding with sync job." >> $TMP_OUTPUT
    echo "[`date`] Changes detected [A-$ADD_COUNT,D-$DEL_COUNT,M-$MOVE_COUNT,C-$COPY_COUNT,U-$UPDATE_COUNT] and deleted files ($DEL_COUNT) is below threshold ($DEL_THRESHOLD). SYNC Authorized."
    DO_SYNC=1
    else
    echo "Number of deleted files ($DEL_COUNT) exceeded threshold ($DEL_THRESHOLD)." >> $TMP_OUTPUT
    echo "[`date`] WARNING - Deleted files ($DEL_COUNT) exceeded threshold ($DEL_THRESHOLD). Check $TMP_OUTPUT for details."
    CHK_FAIL=1
    fi
    }

    function chk_updated(){
    if [ $UPDATE_COUNT -lt $UP_THRESHOLD ]; then
    echo "Updated files ($UPDATE_COUNT) did not exceed threshold ($UP_THRESHOLD), proceeding with sync job." >> $TMP_OUTPUT
    echo "[`date`] Changes detected [A-$ADD_COUNT,D-$DEL_COUNT,M-$MOVE_COUNT,C-$COPY_COUNT,U-$UPDATE_COUNT] and updated files ($UPDATE_COUNT) is below threshold ($UP_THRESHOLD). SYNC Authorized."
    DO_SYNC=1
    else
    echo "Number of changed files ($UPDATE_COUNT) exceeded threshold ($UP_THRESHOLD)." >> $TMP_OUTPUT
    echo "[`date`] WARNING - Updated files ($UPDATE_COUNT) exceeded threshold ($UP_THRESHOLD). Check $TMP_OUTPUT for details."
    CHK_FAIL=1
    fi
    }

    function chk_sync_warn(){
    if [ $SYNC_WARN_THRESHOLD -gt -1 ]; then
    echo "Forced sync is enabled." >> $TMP_OUTPUT
    echo "[`date`] Forced sync is enabled."

    SYNC_WARN_COUNT=$(sed 'q;/^[0-9][0-9]*$/!d' $SYNC_WARN_FILE 2>/dev/null)
    SYNC_WARN_COUNT=${SYNC_WARN_COUNT:-0} #value is zero if file does not exist or does not contain what we are expecting

    if [ $SYNC_WARN_COUNT -ge $SYNC_WARN_THRESHOLD ]; then
    # YES, lets force a sync job. Do not need to remove warning marker here as it is automatically removed when the sync job is run by this script
    echo "Number of warning(s) ($SYNC_WARN_COUNT) has reached/exceeded threshold ($SYNC_WARN_THRESHOLD). Forcing a sync job to run." >> $TMP_OUTPUT
    echo "[`date`] Number of warning(s) ($SYNC_WARN_COUNT) has reached/exceeded threshold ($SYNC_WARN_THRESHOLD). Forcing a sync job to run."
    DO_SYNC=1
    else
    # NO, so let's increment the warning count and skip the sync job
    ((SYNC_WARN_COUNT += 1))
    echo $SYNC_WARN_COUNT > $SYNC_WARN_FILE
    echo "$((SYNC_WARN_THRESHOLD - SYNC_WARN_COUNT)) warning(s) till forced sync. NOT proceeding with sync job." >> $TMP_OUTPUT
    echo "[`date`] $((SYNC_WARN_THRESHOLD - SYNC_WARN_COUNT)) warning(s) till forced sync. NOT proceeding with sync job."
    DO_SYNC=0
    fi
    else
    # NO, so let's skip SYNC
    echo "Forced sync is not enabled. NOT proceeding with sync job. Please run sync manually if this is not an error condition." >> $TMP_OUTPUT
    echo "[`date`] Forced sync is not enabled. Check $TMP_OUTPUT for details. NOT proceeding with sync job."
    DO_SYNC=0
    fi
    }

    function chk_zero(){
    if snapraid status | grep 'You have [1-9][0-9]* files with zero sub-second timestamp\.'; then
    echo "Found zero sub-second files. Running touch to timestamp." >> $TMP_OUTPUT
    echo "[`date`] Found zero sub-second files. Running touch to timestamp."

    $SNAPRAID_BIN touch | sed -e 's/\r/\n/g' >> $TMP_OUTPUT
    fi
    }

    # check if the conditions to run SYNC are met
    # CHK 1 - if files have changed
    if [ $DEL_COUNT -gt 0 -o $ADD_COUNT -gt 0 -o $MOVE_COUNT -gt 0 -o $COPY_COUNT -gt 0 -o $UPDATE_COUNT -gt 0 ]; then
    chk_del

    if [ $CHK_FAIL -eq 0 ]; then
    chk_updated
    fi

    if [ $CHK_FAIL -eq 1 ]; then
    chk_sync_warn
    fi
    else
    # NO, so let's skip SYNC
    echo "[`date`] No change detected. Not running SYNC job."
    DO_SYNC=0
    fi

    # Now run sync if conditions are met
    if [ $DO_SYNC -eq 1 ]; then
    echo "[`date`] SYNC started."
    echo "SnapRAID SYNC Job started on `date`" >> $TMP_OUTPUT
    echo "----------------------------------------" >> $TMP_OUTPUT
    chk_zero
    $SNAPRAID_BIN sync | sed -e 's/\r/\n/g' >> $TMP_OUTPUT
    #wait for the job to finish
    wait
    echo "----------------------------------------" >> $TMP_OUTPUT
    echo "SnapRAID SYNC Job finished on `date`" >> $TMP_OUTPUT
    echo "[`date`] SYNC finished."
    JOBS_DONE="$JOBS_DONE + SYNC"
    # insert SYNC marker to 'Everything OK' or 'Nothing to do' string to differentiate it from SCRUB job later
    sed -i 's/^Everything OK/SYNC_JOB--Everything OK/g;s/^Nothing to do/SYNC_JOB--Nothing to do/g' $TMP_OUTPUT
    # Remove any warning flags if set previously. This is done in this step to take care of scenarios when user has manually synced or restored deleted files and we will have missed it in the checks above.
    if [ -e $SYNC_WARN_FILE ]; then
    rm $SYNC_WARN_FILE
    fi
    $SNAPRAID_BIN scrub -p new
    fi

    # Moving onto scrub now. Check if user has enabled scrub
    if [ $SCRUB_PERCENT -gt 0 ]; then
    # YES, first let's check if delete threshold has been breached and we have not forced a sync.
    if [ $CHK_FAIL -eq 1 -a $DO_SYNC -eq 0 ]; then
    # YES, parity is out of sync so let's not run scrub job
    echo "[`date`] Scrub job cancelled as parity info is out of sync (deleted or changed files threshold has been breached)."
    else
    # NO, delete threshold has not been breached OR we forced a sync, but we have one last test -
    # let's make sure if sync ran, it completed successfully (by checking for our marker text "SYNC_JOB--" in the output).
    if [ $DO_SYNC -eq 1 -a -z "$(grep -w "SYNC_JOB-" $TMP_OUTPUT)" ]; then
    # Sync ran but did not complete successfully so lets not run scrub to be safe
    echo "[`date`] WARNING - check output of SYNC job. Could not detect marker <SYNC_JOB-->. Not proceeding with SCRUB job."
    echo "WARNING - check output of SYNC job. Could not detect marker <SYNC_JOB-->. Not proceeding with SCRUB job." >> $TMP_OUTPUT
    else
    # Everything ok - let's run the scrub job!
    echo "[`date`] Running SCRUB Command."
    echo "SnapRAID SCRUB Job started on `date`" >> $TMP_OUTPUT
    echo "----------------------------------------" >> $TMP_OUTPUT
    $SNAPRAID_BIN scrub -p $SCRUB_PERCENT -o $SCRUB_AGE | sed -e 's/\r/\n/g' >> $TMP_OUTPUT
    #wait for the job to finish
    wait
    echo "----------------------------------------" >> $TMP_OUTPUT
    echo "SnapRAID SCRUB Job finished on `date`" >> $TMP_OUTPUT
    echo "[`date`] SCRUB finished."
    JOBS_DONE="$JOBS_DONE + SCRUB"
    # insert SCRUB marker to 'Everything OK' or 'Nothing to do' string to differentiate it from SYNC job above
    sed -i 's/^Everything OK/SCRUB_JOB--Everything OK/g;s/^Nothing to do/SCRUB_JOB--Nothing to do/g' $TMP_OUTPUT
    fi
    fi
    else
    echo "[`date`] Scrub job is not scheduled. Not running SCRUB job."
    fi

    # Moving onto logging SMART info if enabled
    if [ $SMART_LOG -eq 1 ]; then
    $SNAPRAID_BIN smart >> $TMP_OUTPUT
    wait
    fi

    echo "Spinning down disks..." >> $TMP_OUTPUT
    $SNAPRAID_BIN down

    # all jobs done, let's send output to user if configured
    if [ $EMAIL_ADDRESS ]; then
    echo "[`date`] Email address is set. Sending email report to <$EMAIL_ADDRESS>"
    # check if deleted count exceeded threshold
    if [ $CHK_FAIL -eq 1 ]; then
    if [ $DEL_COUNT -gt $DEL_THRESHOLD -a $DO_SYNC -eq 0 ]; then
    MSG="Deleted Files ($DEL_COUNT) / ($DEL_THRESHOLD) Violation"
    fi

    if [ $DEL_COUNT -gt $DEL_THRESHOLD -a $UPDATE_COUNT -gt $UP_THRESHOLD -a $DO_SYNC -eq 0 ]; then
    MSG="$MSG & "
    fi

    if [ $UPDATE_COUNT -gt $UP_THRESHOLD -a $DO_SYNC -eq 0 ]; then
    MSG="$MSG Changed Files ($UPDATE_COUNT) / ($UP_THRESHOLD) Violation"
    fi

    $MAIL_BIN -s "$EMAIL_SUBJECT_PREFIX WARNING $SYNC_WARN_COUNT - ($MSG)" "$EMAIL_ADDRESS" < $TMP_OUTPUT


    elif [ -z "${JOBS_DONE##*"SYNC"*}" -a -z "$(grep -w "SYNC_JOB-" $TMP_OUTPUT)" ]; then
    # Sync ran but did not complete successfully so lets warn the user
    $MAIL_BIN -s "$EMAIL_SUBJECT_PREFIX WARNING - SYNC job ran but did not complete successfully" "$EMAIL_ADDRESS" < $TMP_OUTPUT
    elif [ -z "${JOBS_DONE##*"SCRUB"*}" -a -z "$(grep -w "SCRUB_JOB-" $TMP_OUTPUT)" ]; then
    # Scrub ran but did not complete successfully so lets warn the user
    $MAIL_BIN -s "$EMAIL_SUBJECT_PREFIX WARNING - SCRUB job ran but did not complete successfully" "$EMAIL_ADDRESS" < $TMP_OUTPUT
    else
    # OPTIONALLY, let's reduce the amount of status lines in output.
    if [ $TERSE -gt 1 ]; then
    # Report progress in interval of tens %
    sed -i '$!N; /^\([0-9]\).*\n\1.*$/!P; D' $TMP_OUTPUT
    sed -i '/^[1-8]%.*$/d' $TMP_OUTPUT
    elif [ $TERSE -gt 0 ]; then
    # Report progress in interval of ones %
    sed -i '$!N; /^\([0-9]*\)%.*\n\1.*$/!P; D' $TMP_OUTPUT
    fi
    $MAIL_BIN -s "$EMAIL_SUBJECT_PREFIX INFO - $JOBS_DONE Jobs COMPLETED" "$EMAIL_ADDRESS" < $TMP_OUTPUT
    fi
    fi

    echo "[`date`] All jobs ended."

    exit 0;
  7. @bfg100k bfg100k revised this gist Oct 30, 2015. 1 changed file with 4 additions and 3 deletions.
    7 changes: 4 additions & 3 deletions snapRAID_helper.sh
    Original file line number Diff line number Diff line change
    @@ -50,6 +50,7 @@
    # to do") to avoid sending false warning email
    # 27/10/2015 Corrected Sync job status check (i.e. added check for text "Nothing to
    # do") to avoid sending false warning email
    # 29/10/2015 Fixed a bug with the job status check not detecting the right strings
    #
    #######################################################################

    @@ -229,7 +230,7 @@ if [ $SCRUB_PERCENT -gt 0 ]; then
    else
    # NO, delete threshold has not been breached OR we forced a sync, but we have one last test -
    # let's make sure if sync ran, it completed successfully (by checking for our marker text "SYNC_JOB--" in the output).
    if [ $DO_SYNC -eq 1 -a -z "$(grep -w "SYNC_JOB--" $TMP_OUTPUT)" ]; then
    if [ $DO_SYNC -eq 1 -a -z "$(grep -w "SYNC_JOB-" $TMP_OUTPUT)" ]; then
    # Sync ran but did not complete successfully so lets not run scrub to be safe
    echo "[`date`] WARNING - check output of SYNC job. Could not detect marker <SYNC_JOB-->. Not proceeding with SCRUB job."
    echo "WARNING - check output of SYNC job. Could not detect marker <SYNC_JOB-->. Not proceeding with SCRUB job." >> $TMP_OUTPUT
    @@ -265,10 +266,10 @@ if [ $EMAIL_ADDRESS ]; then
    if [ $DEL_COUNT -gt $DEL_THRESHOLD -a $DO_SYNC -eq 0 ]; then
    # YES, lets inform user with an appropriate subject line
    $MAIL_BIN -s "$EMAIL_SUBJECT_PREFIX WARNING $SYNC_WARN_COUNT - Number of deleted files ($DEL_COUNT) exceeded threshold ($DEL_THRESHOLD)" "$EMAIL_ADDRESS" < $TMP_OUTPUT
    elif [ -z "${JOBS_DONE##*"SYNC"*}" -a -z "$(grep -w "SYNC_JOB--" $TMP_OUTPUT)" ]; then
    elif [ -z "${JOBS_DONE##*"SYNC"*}" -a -z "$(grep -w "SYNC_JOB-" $TMP_OUTPUT)" ]; then
    # Sync ran but did not complete successfully so lets warn the user
    $MAIL_BIN -s "$EMAIL_SUBJECT_PREFIX WARNING - SYNC job ran but did not complete successfully" "$EMAIL_ADDRESS" < $TMP_OUTPUT
    elif [ -z "${JOBS_DONE##*"SCRUB"*}" -a -z "$(grep -w "SCRUB_JOB--" $TMP_OUTPUT)" ]; then
    elif [ -z "${JOBS_DONE##*"SCRUB"*}" -a -z "$(grep -w "SCRUB_JOB-" $TMP_OUTPUT)" ]; then
    # Scrub ran but did not complete successfully so lets warn the user
    $MAIL_BIN -s "$EMAIL_SUBJECT_PREFIX WARNING - SCRUB job ran but did not complete successfully" "$EMAIL_ADDRESS" < $TMP_OUTPUT
    else
  8. @bfg100k bfg100k revised this gist Oct 26, 2015. 1 changed file with 23 additions and 12 deletions.
    35 changes: 23 additions & 12 deletions snapRAID_helper.sh
    Original file line number Diff line number Diff line change
    @@ -48,6 +48,8 @@
    # tell how many warnings have been issued so far
    # 04/03/2015 Corrected Scrub job status check (i.e. added check for text "Nothing
    # to do") to avoid sending false warning email
    # 27/10/2015 Corrected Sync job status check (i.e. added check for text "Nothing to
    # do") to avoid sending false warning email
    #
    #######################################################################

    @@ -60,7 +62,7 @@ EMAIL_ADDRESS="root"
    # NOTE that depending on how active your filesystem is being used, a low
    # number here may result in your parity info being out of sync often and/or
    # you having to do lots of manual sync.
    DEL_THRESHOLD=20
    DEL_THRESHOLD=50

    # Set number of warnings before we force a sync job.
    # This option comes in handy when you cannot be bothered to manually
    @@ -75,6 +77,9 @@ SYNC_WARN_THRESHOLD=3
    SCRUB_PERCENT=5
    SCRUB_AGE=10

    # Set the option to log SMART info. 1 to enable, any other values to disable
    SMART_LOG=1

    # this script will log its actions to a file at this location
    LOG_FILE="/tmp/snapRAID.log"
    # location of the snapraid binary
    @@ -207,8 +212,8 @@ if [ $DO_SYNC -eq 1 ]; then
    echo "----------------------------------------" >> $TMP_OUTPUT
    echo "SnapRAID SYNC Job finished on `date`" >> $TMP_OUTPUT
    JOBS_DONE="$JOBS_DONE + SYNC"
    # insert SYNC marker to 'Everything OK' string to differentiate it from SCRUB job later
    sed -i 's/^Everything OK/SYNC-Everything OK/g' $TMP_OUTPUT
    # insert SYNC marker to 'Everything OK' or 'Nothing to do' string to differentiate it from SCRUB job later
    sed -i 's/^Everything OK/SYNC_JOB--Everything OK/g;s/^Nothing to do/SYNC_JOB--Nothing to do/g' $TMP_OUTPUT
    # Remove any warning flags if set previously. This is done in this step to take care of scenarios when user has manually synced or restored deleted files and we will have missed it in the checks above.
    if [ -e $SYNC_WARN_FILE ]; then
    rm $SYNC_WARN_FILE
    @@ -223,11 +228,11 @@ if [ $SCRUB_PERCENT -gt 0 ]; then
    echo "[`date`] Scrub job cancelled as parity info is out of sync (deleted files threshold has been breached)."
    else
    # NO, delete threshold has not been breached OR we forced a sync, but we have one last test -
    # let's make sure if sync ran, it completed successfully (by checking if snapRAID wrote "Everything OK" to the output).
    if [ $DO_SYNC -eq 1 -a -z "$(grep -w "SYNC-Everything OK" $TMP_OUTPUT)" ]; then
    # let's make sure if sync ran, it completed successfully (by checking for our marker text "SYNC_JOB--" in the output).
    if [ $DO_SYNC -eq 1 -a -z "$(grep -w "SYNC_JOB--" $TMP_OUTPUT)" ]; then
    # Sync ran but did not complete successfully so lets not run scrub to be safe
    echo "[`date`] WARNING - check output of SYNC job. Could not detect string <Everything OK>. Not proceeding with SCRUB job."
    echo "WARNING - check output of SYNC job. Could not detect string <Everything OK>. Not proceeding with SCRUB job." >> $TMP_OUTPUT
    echo "[`date`] WARNING - check output of SYNC job. Could not detect marker <SYNC_JOB-->. Not proceeding with SCRUB job."
    echo "WARNING - check output of SYNC job. Could not detect marker <SYNC_JOB-->. Not proceeding with SCRUB job." >> $TMP_OUTPUT
    else
    # Everything ok - let's run the scrub job!
    echo "[`date`] Running SCRUB Command."
    @@ -239,25 +244,31 @@ if [ $SCRUB_PERCENT -gt 0 ]; then
    echo "----------------------------------------" >> $TMP_OUTPUT
    echo "SnapRAID SCRUB Job finished on `date`" >> $TMP_OUTPUT
    JOBS_DONE="$JOBS_DONE + SCRUB"
    # insert SCRUB marker to 'Everything OK' string to differentiate it from SYNC job above
    sed -i 's/^Everything OK/SCRUB-Everything OK/g' $TMP_OUTPUT
    # insert SCRUB marker to 'Everything OK' or 'Nothing to do' string to differentiate it from SYNC job above
    sed -i 's/^Everything OK/SCRUB_JOB--Everything OK/g;s/^Nothing to do/SCRUB_JOB--Nothing to do/g' $TMP_OUTPUT
    fi
    fi
    else
    echo "[`date`] Scrub job is not scheduled. Not running SCRUB job."
    fi

    # Moving onto logging SMART info if enabled
    if [ $SMART_LOG -eq 1 ]; then
    $SNAPRAID_BIN smart >> $TMP_OUTPUT
    wait
    fi

    # all jobs done, let's send output to user if configured
    if [ $EMAIL_ADDRESS ]; then
    echo "[`date`] Email address is set. Sending email report to <$EMAIL_ADDRESS>"
    # check if deleted count exceeded threshold
    if [ $DEL_COUNT -gt $DEL_THRESHOLD -a $DO_SYNC -eq 0 ]; then
    # YES, lets inform user with an appropriate subject line
    $MAIL_BIN -s "$EMAIL_SUBJECT_PREFIX WARNING $SYNC_WARN_COUNT - Number of deleted files ($DEL_COUNT) exceeded threshold ($DEL_THRESHOLD)" "$EMAIL_ADDRESS" < $TMP_OUTPUT
    elif [ -z "${JOBS_DONE##*"SYNC"*}" -a -z "$(grep -w "SYNC-Everything OK" $TMP_OUTPUT)" ]; then
    elif [ -z "${JOBS_DONE##*"SYNC"*}" -a -z "$(grep -w "SYNC_JOB--" $TMP_OUTPUT)" ]; then
    # Sync ran but did not complete successfully so lets warn the user
    $MAIL_BIN -s "$EMAIL_SUBJECT_PREFIX WARNING - SYNC job ran but did not complete successfully" "$EMAIL_ADDRESS" < $TMP_OUTPUT
    elif [ -z "${JOBS_DONE##*"SCRUB"*}" -a -z "$(grep -w "SCRUB-Everything OK" $TMP_OUTPUT)" -a -z "$(grep -w "Nothing to do" $TMP_OUTPUT)" ]; then
    elif [ -z "${JOBS_DONE##*"SCRUB"*}" -a -z "$(grep -w "SCRUB_JOB--" $TMP_OUTPUT)" ]; then
    # Scrub ran but did not complete successfully so lets warn the user
    $MAIL_BIN -s "$EMAIL_SUBJECT_PREFIX WARNING - SCRUB job ran but did not complete successfully" "$EMAIL_ADDRESS" < $TMP_OUTPUT
    else
    @@ -276,4 +287,4 @@ fi

    echo "[`date`] All jobs ended."

    exit 0;
    exit 0;
  9. @bfg100k bfg100k revised this gist Mar 4, 2015. 1 changed file with 9 additions and 4 deletions.
    13 changes: 9 additions & 4 deletions snapRAID_helper.sh
    Original file line number Diff line number Diff line change
    @@ -44,6 +44,10 @@
    # 05/02/2015 Added logic to perform forced sync after X number of warnings
    # Cleaned up formatting in script file (changed tabs to spaces)
    # Made consistent the use of [ in the test statements
    # 08/02/2015 Added warning number to the email subject line so that it is easier to
    # tell how many warnings have been issued so far
    # 04/03/2015 Corrected Scrub job status check (i.e. added check for text "Nothing
    # to do") to avoid sending false warning email
    #
    #######################################################################

    @@ -88,6 +92,7 @@ TERSE=2
    EMAIL_SUBJECT_PREFIX="[`hostname`] SnapRAID - "
    TMP_OUTPUT="/tmp/snapRAID.out"
    SYNC_WARN_FILE="/tmp/snapRAID.warnCount"
    SYNC_WARN_COUNT=""

    # auto determine names of content and parity files
    CONTENT_FILE=`cat /etc/snapraid.conf | grep snapraid.content | head -n 1 | cut -d " " -f2`
    @@ -144,7 +149,7 @@ fi

    echo "SUMMARY of changes - Added [$ADD_COUNT] - Deleted [$DEL_COUNT] - Moved [$MOVE_COUNT] - Copied [$COPY_COUNT] - Updated [$UPDATE_COUNT]" >> $TMP_OUTPUT

    # check if the conditions to run SYNC is met
    # check if the conditions to run SYNC are met
    # CHK 1 - if files have changed
    if [ $DEL_COUNT -gt 0 -o $ADD_COUNT -gt 0 -o $MOVE_COUNT -gt 0 -o $COPY_COUNT -gt 0 -o $UPDATE_COUNT -gt 0 ]; then
    # CHK 1 - YES, files have changed
    @@ -247,12 +252,12 @@ if [ $EMAIL_ADDRESS ]; then
    echo "[`date`] Email address is set. Sending email report to <$EMAIL_ADDRESS>"
    # check if deleted count exceeded threshold
    if [ $DEL_COUNT -gt $DEL_THRESHOLD -a $DO_SYNC -eq 0 ]; then
    # YES, lets inform user with an appropriate subject line
    $MAIL_BIN -s "$EMAIL_SUBJECT_PREFIX WARNING - Number of deleted files ($DEL_COUNT) exceeded threshold ($DEL_THRESHOLD)" "$EMAIL_ADDRESS" < $TMP_OUTPUT
    # YES, lets inform user with an appropriate subject line
    $MAIL_BIN -s "$EMAIL_SUBJECT_PREFIX WARNING $SYNC_WARN_COUNT - Number of deleted files ($DEL_COUNT) exceeded threshold ($DEL_THRESHOLD)" "$EMAIL_ADDRESS" < $TMP_OUTPUT
    elif [ -z "${JOBS_DONE##*"SYNC"*}" -a -z "$(grep -w "SYNC-Everything OK" $TMP_OUTPUT)" ]; then
    # Sync ran but did not complete successfully so lets warn the user
    $MAIL_BIN -s "$EMAIL_SUBJECT_PREFIX WARNING - SYNC job ran but did not complete successfully" "$EMAIL_ADDRESS" < $TMP_OUTPUT
    elif [ -z "${JOBS_DONE##*"SCRUB"*}" -a -z "$(grep -w "SCRUB-Everything OK" $TMP_OUTPUT)" ]; then
    elif [ -z "${JOBS_DONE##*"SCRUB"*}" -a -z "$(grep -w "SCRUB-Everything OK" $TMP_OUTPUT)" -a -z "$(grep -w "Nothing to do" $TMP_OUTPUT)" ]; then
    # Scrub ran but did not complete successfully so lets warn the user
    $MAIL_BIN -s "$EMAIL_SUBJECT_PREFIX WARNING - SCRUB job ran but did not complete successfully" "$EMAIL_ADDRESS" < $TMP_OUTPUT
    else
  10. @bfg100k bfg100k revised this gist Feb 5, 2015. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion snapRAID_helper.sh
    Original file line number Diff line number Diff line change
    @@ -75,7 +75,7 @@ SCRUB_AGE=10
    LOG_FILE="/tmp/snapRAID.log"
    # location of the snapraid binary
    SNAPRAID_BIN="/usr/bin/snapraid"
    # location of the mail problem binary
    # location of the mail program binary
    MAIL_BIN="/usr/bin/mail"

    # how much progress output do we want to keep in email
  11. @bfg100k bfg100k revised this gist Feb 5, 2015. 1 changed file with 166 additions and 70 deletions.
    236 changes: 166 additions & 70 deletions snapRAID_helper.sh
    Original file line number Diff line number Diff line change
    @@ -2,20 +2,27 @@
    #######################################################################
    # This is a helper script that keeps snapraid parity info in sync with
    # your data and optionally verifies the parity info. Here's how it works:
    # 1) It first calls diff to figure out if the parity info is out of sync
    # 2) If parity info is out of sync, AND the number of deleted files
    # exceed X (configurable), it triggers an alert email and stops.
    # (In case of accidental deletions, you have the opportunity to
    # recover them from the existing parity info)
    # 3) If parity info is out of sync BUT the number of deleted files did NOT
    # 1) It first calls diff to figure out if the parity info is out of sync.
    # 2) If parity info is out of sync, AND the number of deleted files exceed
    # X (configurable), it triggers an alert email and stops. (In case of
    # accidental deletions, you have the opportunity to recover them from
    # the existing parity info)
    # 3) If partiy info is out of sync, AND the number of deleted files exceed X
    # AND it has reached/exceeded Y (configurable) number of warnings, force
    # a sync. (Useful when you get a false alarm above and you can't be bothered
    # to login and do a manual sync. Note the risk is if its not a false alarm
    # and you can't access the box before Y number of times the job is run to
    # fix the issue... Well I hope you have other backups...)
    # 4) If parity info is out of sync BUT the number of deleted files did NOT
    # exceed X, it calls sync to update the parity info.
    # 4) When sync finishes successfully, it sends an email with the output to user.
    # 5) If the parity info is in sync (either because nothing changed or after it has
    # successfully completed the sync job, it runs the scrub command to validate
    # the integrity of the data (both the files and the parity info). Note that
    # each run of the scrub command will validate only a (configurable) portion of
    # parity info to avoid having a long running job and affecting the performance
    # of the box.
    # 5) If the parity info is in sync (either because nothing changed or after it
    # has successfully completed the sync job, it runs the scrub command to
    # validate the integrity of the data (both the files and the parity info).
    # Note that each run of the scrub command will validate only a (configurable)
    # portion of parity info to avoid having a long running job and affecting
    # the performance of the box.
    # 6) Once all jobs are completed, it sends an email with the output to user
    # (if configured).
    #
    # Author: SidneyC <sidneyc_at_outlook_dot_com>
    #
    @@ -27,15 +34,37 @@
    # 06/01/2015 Made the script more robust by adding checks to make sure preceding
    # jobs completed as expected before continuing with the subsequent jobs.
    # Made emailing output to user optional.
    # 24/01/2015 Inserted a sed step to clean up crlf (aka dos/unix formatting issue)
    # in sync & scrub outputs.
    # Detect sync and scrub job failures and highlight to user via warning
    # subject line in email to user.
    # 25/01/2015 Added option to reduce progress report output in email (default is 2 -
    # report only in 10% intervals).
    # 26/01/2015 For terse = 2 setting, removed lines for 1-8% from output
    # 05/02/2015 Added logic to perform forced sync after X number of warnings
    # Cleaned up formatting in script file (changed tabs to spaces)
    # Made consistent the use of [ in the test statements
    #
    #######################################################################

    ## USER DEFINED SETTINGS ##
    # address where the output of the jobs will be emailed to.
    # comment it out to disable email output
    EMAIL_ADDRESS="root"

    # Set the threshold of deleted files to stop the sync job from running.
    # NOTE that depending on how active your filesystem is being used, a low
    # number here may result in your parity info being out of sync often and/or
    # you having to do lots of manual sync.
    DEL_THRESHOLD=20

    # Set number of warnings before we force a sync job.
    # This option comes in handy when you cannot be bothered to manually
    # start a sync job when DEL_THRESHOLD is breached due to false alarm.
    # Set to 0 to ALWAYS force a sync (i.e. ignore the delete threshold above)
    # Set to -1 to NEVER force a sync (i.e. need to manual sync if delete threshold is breached)
    SYNC_WARN_THRESHOLD=3

    # Set percentage of array to scrub if it is in sync.
    # i.e. 0 to disable and 100 to scrub the full array in one go
    # WARNING - depending on size of your array, setting to 100 will take a very long time!
    @@ -49,9 +78,17 @@ SNAPRAID_BIN="/usr/bin/snapraid"
    # location of the mail problem binary
    MAIL_BIN="/usr/bin/mail"

    # how much progress output do we want to keep in email
    # Default is 2 which means report progress in 10% intervals
    # Set to 1 to report progress in 1% intervals
    # Set to 0 to report everything
    TERSE=2

    ## INTERNAL TEMP VARS ##
    EMAIL_SUBJECT_PREFIX="[`hostname`] SnapRAID - "
    TMP_OUTPUT="/tmp/snapRAID.out"
    SYNC_WARN_FILE="/tmp/snapRAID.warnCount"

    # auto determine names of content and parity files
    CONTENT_FILE=`cat /etc/snapraid.conf | grep snapraid.content | head -n 1 | cut -d " " -f2`
    PARITY_FILE=`cat /etc/snapraid.conf | grep snapraid.parity | head -n 1 | cut -d " " -f2`
    @@ -97,82 +134,141 @@ UPDATE_COUNT=$(grep -w '^ \{1,\}[0-9]* updated$' $TMP_OUTPUT | sed 's/^ *//g' |

    # sanity check to make sure that we were able to get our counts from the output of the DIFF job
    if [ -z "$DEL_COUNT" -o -z "$ADD_COUNT" -o -z "$MOVE_COUNT" -o -z "$COPY_COUNT" -o -z "$UPDATE_COUNT" ]; then
    # failed to get one or more of the count values, lets report to user and exit with error code
    echo "[`date`] ERROR - failed to get one or more count values. Unable to proceed. Exiting script."
    if [ $EMAIL_ADDRESS ]; then
    $MAIL_BIN -s "$EMAIL_SUBJECT_PREFIX WARNING - Unable to proceed with SYNC/SCRUB job(s). Check DIFF job output inside." "$EMAIL_ADDRESS" < $TMP_OUTPUT
    fi
    exit 1;
    # failed to get one or more of the count values, lets report to user and exit with error code
    echo "[`date`] ERROR - failed to get one or more count values. Unable to proceed. Exiting script."
    if [ $EMAIL_ADDRESS ]; then
    $MAIL_BIN -s "$EMAIL_SUBJECT_PREFIX WARNING - Unable to proceed with SYNC/SCRUB job(s). Check DIFF job output inside." "$EMAIL_ADDRESS" < $TMP_OUTPUT
    fi
    exit 1;
    fi

    echo "SUMMARY of changes - Added [$ADD_COUNT] - Deleted [$DEL_COUNT] - Moved [$MOVE_COUNT] - Copied [$COPY_COUNT] - Updated [$UPDATE_COUNT]" >> $TMP_OUTPUT

    # check if files have changed
    # check if the conditions to run SYNC is met
    # CHK 1 - if files have changed
    if [ $DEL_COUNT -gt 0 -o $ADD_COUNT -gt 0 -o $MOVE_COUNT -gt 0 -o $COPY_COUNT -gt 0 -o $UPDATE_COUNT -gt 0 ]; then
    # YES, check if number of deleted files exceed DEL_THRESHOLD
    if [ $DEL_COUNT -lt $DEL_THRESHOLD ]; then
    # NO, delete threshold not reached, lets run the sync job
    echo "Deleted files ($DEL_COUNT) did not exceed threshold ($DEL_THRESHOLD), proceeding with sync job." >> $TMP_OUTPUT
    echo "[`date`] Changes detected [A-$ADD_COUNT,D-$DEL_COUNT,M-$MOVE_COUNT,C-$COPY_COUNT,U-$UPDATE_COUNT] and deleted files ($DEL_COUNT) is below threshold ($DEL_THRESHOLD). Running SYNC Command."
    echo "SnapRAID SYNC Job started on `date`" >> $TMP_OUTPUT
    echo "----------------------------------------" >> $TMP_OUTPUT
    $SNAPRAID_BIN sync >> $TMP_OUTPUT
    #wait for the job to finish
    wait
    echo "----------------------------------------" >> $TMP_OUTPUT
    echo "SnapRAID SYNC Job finished on `date`" >> $TMP_OUTPUT
    JOBS_DONE="$JOBS_DONE + SYNC"
    # CHK 1 - YES, files have changed
    # CHK 2 - if number of deleted files exceed DEL_THRESHOLD
    if [ $DEL_COUNT -lt $DEL_THRESHOLD ]; then
    # CHK 2 - NO, delete threshold not reached, lets run the sync job
    echo "Deleted files ($DEL_COUNT) did not exceed threshold ($DEL_THRESHOLD), proceeding with sync job." >> $TMP_OUTPUT
    echo "[`date`] Changes detected [A-$ADD_COUNT,D-$DEL_COUNT,M-$MOVE_COUNT,C-$COPY_COUNT,U-$UPDATE_COUNT] and deleted files ($DEL_COUNT) is below threshold ($DEL_THRESHOLD). Running SYNC Command."
    DO_SYNC=1
    else
    #CHK 2 - YES, delete threshold breached! print warning message to both outputs
    echo "Number of deleted files ($DEL_COUNT) exceeded threshold ($DEL_THRESHOLD)." >> $TMP_OUTPUT
    echo "[`date`] WARNING - Deleted files ($DEL_COUNT) exceeded threshold ($DEL_THRESHOLD). Check $TMP_OUTPUT for details."
    # CHK 3 - if forced sync is set
    if [ $SYNC_WARN_THRESHOLD -gt -1 ]; then
    # CHK 3 - YES
    echo "Forced sync is enabled." >> $TMP_OUTPUT
    echo "[`date`] Forced sync is enabled."
    # CHK 4 - if number of warnings has exceeded threshold
    SYNC_WARN_COUNT=$(sed 'q;/^[0-9][0-9]*$/!d' $SYNC_WARN_FILE 2>/dev/null)
    SYNC_WARN_COUNT=${SYNC_WARN_COUNT:-0} #value is zero if file does not exist or does not contain what we are expecting
    if [ $SYNC_WARN_COUNT -ge $SYNC_WARN_THRESHOLD ]; then
    # CHK 5 - YES, lets force a sync job. Do not need to remove warning marker here as it is automatically removed when the sync job is run by this script
    echo "Number of warning(s) ($SYNC_WARN_COUNT) has reached/exceeded threshold ($SYNC_WARN_THRESHOLD). Forcing a sync job to run." >> $TMP_OUTPUT
    echo "[`date`] Number of warning(s) ($SYNC_WARN_COUNT) has reached/exceeded threshold ($SYNC_WARN_THRESHOLD). Forcing a sync job to run."
    DO_SYNC=1
    else
    # CHK 4 - NO, so let's increment the warning count and skip the sync job
    ((SYNC_WARN_COUNT += 1))
    echo $SYNC_WARN_COUNT > $SYNC_WARN_FILE
    echo "$((SYNC_WARN_THRESHOLD - SYNC_WARN_COUNT)) warning(s) till forced sync. NOT proceeding with sync job." >> $TMP_OUTPUT
    echo "[`date`] $((SYNC_WARN_THRESHOLD - SYNC_WARN_COUNT)) warning(s) till forced sync. NOT proceeding with sync job."
    DO_SYNC=0
    fi
    else
    # YES, delete threshold breached! print warning message to both outputs
    echo "Number of deleted files ($DEL_COUNT) exceeded threshold ($DEL_THRESHOLD). NOT proceeding with sync job. Please run sync manually if this is not an error condition." >> $TMP_OUTPUT
    echo "[`date`] WARNING - Deleted files ($DEL_COUNT) exceeded threshold ($DEL_THRESHOLD). Check $TMP_OUTPUT for details. NOT proceeding with sync job."
    fi
    # CHK 3 - NO, so let's skip SYNC
    echo "Forced sync is not enabled. NOT proceeding with sync job. Please run sync manually if this is not an error condition." >> $TMP_OUTPUT
    echo "[`date`] Forced sync is not enabled. Check $TMP_OUTPUT for details. NOT proceeding with sync job."
    DO_SYNC=0
    fi
    fi
    else
    # NO, so let's skip SYNC
    echo "[`date`] No change detected. Not running SYNC job."
    # CHK 1 - NO, so let's skip SYNC
    echo "[`date`] No change detected. Not running SYNC job."
    DO_SYNC=0
    fi

    # Now run sync if conditions are met
    if [ $DO_SYNC -eq 1 ]; then
    echo "SnapRAID SYNC Job started on `date`" >> $TMP_OUTPUT
    echo "----------------------------------------" >> $TMP_OUTPUT
    $SNAPRAID_BIN sync | sed -e 's/\r/\n/g' >> $TMP_OUTPUT
    #wait for the job to finish
    wait
    echo "----------------------------------------" >> $TMP_OUTPUT
    echo "SnapRAID SYNC Job finished on `date`" >> $TMP_OUTPUT
    JOBS_DONE="$JOBS_DONE + SYNC"
    # insert SYNC marker to 'Everything OK' string to differentiate it from SCRUB job later
    sed -i 's/^Everything OK/SYNC-Everything OK/g' $TMP_OUTPUT
    # Remove any warning flags if set previously. This is done in this step to take care of scenarios when user has manually synced or restored deleted files and we will have missed it in the checks above.
    if [ -e $SYNC_WARN_FILE ]; then
    rm $SYNC_WARN_FILE
    fi
    fi

    # Moving onto scrub now. Check if user has enabled scrub
    if [ $SCRUB_PERCENT -gt 0 ]; then
    # YES, first let's check if delete threshold has been breached.
    if [ $DEL_COUNT -gt $DEL_THRESHOLD ]; then
    # YES, parity is out of sync due to delete threshold being breached so let's not run scrub job
    echo "[`date`] Scrub job cancelled as parity info is out of sync (deleted files threshold has been breached)."
    # YES, first let's check if delete threshold has been breached and we have not forced a sync.
    if [ $DEL_COUNT -gt $DEL_THRESHOLD -a $DO_SYNC -eq 0 ]; then
    # YES, parity is out of sync so let's not run scrub job
    echo "[`date`] Scrub job cancelled as parity info is out of sync (deleted files threshold has been breached)."
    else
    # NO, delete threshold has not been breached OR we forced a sync, but we have one last test -
    # let's make sure if sync ran, it completed successfully (by checking if snapRAID wrote "Everything OK" to the output).
    if [ $DO_SYNC -eq 1 -a -z "$(grep -w "SYNC-Everything OK" $TMP_OUTPUT)" ]; then
    # Sync ran but did not complete successfully so lets not run scrub to be safe
    echo "[`date`] WARNING - check output of SYNC job. Could not detect string <Everything OK>. Not proceeding with SCRUB job."
    echo "WARNING - check output of SYNC job. Could not detect string <Everything OK>. Not proceeding with SCRUB job." >> $TMP_OUTPUT
    else
    # NO, delete threshold has not been breached, but we have one last test -
    # let's make sure if sync ran, it completed successfully (by checking if snapRAID wrote "Everything OK" to the output).
    if [[ $JOBS_DONE == *"SYNC"* && -z $(grep -w "Everything OK" $TMP_OUTPUT) ]]; then
    # Sync ran but did not complete successfully so lets not run scrub to be safe
    echo "[`date`] WARNING - check output of SYNC job. Could not detect string <Everything OK>. Not proceeding with SCRUB job."
    echo "WARNING - check output of SYNC job. Could not detect string <Everything OK>. Not proceeding with SCRUB job." >> $TMP_OUTPUT
    else
    # Everything ok - let's run the scrub job!
    echo "[`date`] Running SCRUB Command."
    echo "SnapRAID SCRUB Job started on `date`" >> $TMP_OUTPUT
    echo "----------------------------------------" >> $TMP_OUTPUT
    $SNAPRAID_BIN scrub -p $SCRUB_PERCENT -o $SCRUB_AGE >> $TMP_OUTPUT
    #wait for the job to finish
    wait
    echo "----------------------------------------" >> $TMP_OUTPUT
    echo "SnapRAID SCRUB Job finished on `date`" >> $TMP_OUTPUT
    JOBS_DONE="$JOBS_DONE + SCRUB"
    fi
    # Everything ok - let's run the scrub job!
    echo "[`date`] Running SCRUB Command."
    echo "SnapRAID SCRUB Job started on `date`" >> $TMP_OUTPUT
    echo "----------------------------------------" >> $TMP_OUTPUT
    $SNAPRAID_BIN scrub -p $SCRUB_PERCENT -o $SCRUB_AGE | sed -e 's/\r/\n/g' >> $TMP_OUTPUT
    #wait for the job to finish
    wait
    echo "----------------------------------------" >> $TMP_OUTPUT
    echo "SnapRAID SCRUB Job finished on `date`" >> $TMP_OUTPUT
    JOBS_DONE="$JOBS_DONE + SCRUB"
    # insert SCRUB marker to 'Everything OK' string to differentiate it from SYNC job above
    sed -i 's/^Everything OK/SCRUB-Everything OK/g' $TMP_OUTPUT
    fi
    fi
    else
    echo "[`date`] Scrub job is not scheduled. Not running SCRUB job."
    echo "[`date`] Scrub job is not scheduled. Not running SCRUB job."
    fi

    # all jobs done, let's send output to user if configured
    if [ $EMAIL_ADDRESS ]; then
    # check if deleted count exceeded threshold
    if [ $DEL_COUNT -gt $DEL_THRESHOLD ]; then
    # YES, lets inform user with an appropriate subject line
    $MAIL_BIN -s "$EMAIL_SUBJECT_PREFIX WARNING - Number of deleted files ($DEL_COUNT) exceeded threshold ($DEL_THRESHOLD)" "$EMAIL_ADDRESS" < $TMP_OUTPUT
    else
    $MAIL_BIN -s "$EMAIL_SUBJECT_PREFIX INFO - $JOBS_DONE Jobs COMPLETED" "$EMAIL_ADDRESS" < $TMP_OUTPUT
    fi
    echo "[`date`] Email address is set. Sending email report to <$EMAIL_ADDRESS>"
    # check if deleted count exceeded threshold
    if [ $DEL_COUNT -gt $DEL_THRESHOLD -a $DO_SYNC -eq 0 ]; then
    # YES, lets inform user with an appropriate subject line
    $MAIL_BIN -s "$EMAIL_SUBJECT_PREFIX WARNING - Number of deleted files ($DEL_COUNT) exceeded threshold ($DEL_THRESHOLD)" "$EMAIL_ADDRESS" < $TMP_OUTPUT
    elif [ -z "${JOBS_DONE##*"SYNC"*}" -a -z "$(grep -w "SYNC-Everything OK" $TMP_OUTPUT)" ]; then
    # Sync ran but did not complete successfully so lets warn the user
    $MAIL_BIN -s "$EMAIL_SUBJECT_PREFIX WARNING - SYNC job ran but did not complete successfully" "$EMAIL_ADDRESS" < $TMP_OUTPUT
    elif [ -z "${JOBS_DONE##*"SCRUB"*}" -a -z "$(grep -w "SCRUB-Everything OK" $TMP_OUTPUT)" ]; then
    # Scrub ran but did not complete successfully so lets warn the user
    $MAIL_BIN -s "$EMAIL_SUBJECT_PREFIX WARNING - SCRUB job ran but did not complete successfully" "$EMAIL_ADDRESS" < $TMP_OUTPUT
    else
    # OPTIONALLY, let's reduce the amount of status lines in output.
    if [ $TERSE -gt 1 ]; then
    # Report progress in interval of tens %
    sed -i '$!N; /^\([0-9]\).*\n\1.*$/!P; D' $TMP_OUTPUT
    sed -i '/^[1-8]%.*$/d' $TMP_OUTPUT
    elif [ $TERSE -gt 0 ]; then
    # Report progress in interval of ones %
    sed -i '$!N; /^\([0-9]*\)%.*\n\1.*$/!P; D' $TMP_OUTPUT
    fi
    $MAIL_BIN -s "$EMAIL_SUBJECT_PREFIX INFO - $JOBS_DONE Jobs COMPLETED" "$EMAIL_ADDRESS" < $TMP_OUTPUT
    fi
    fi

    echo "[`date`] All jobs ended."

    exit 0;
    exit 0;
  12. @bfg100k bfg100k created this gist Jan 6, 2015.
    178 changes: 178 additions & 0 deletions snapRAID_helper.sh
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,178 @@
    #!/bin/bash
    #######################################################################
    # This is a helper script that keeps snapraid parity info in sync with
    # your data and optionally verifies the parity info. Here's how it works:
    # 1) It first calls diff to figure out if the parity info is out of sync
    # 2) If parity info is out of sync, AND the number of deleted files
    # exceed X (configurable), it triggers an alert email and stops.
    # (In case of accidental deletions, you have the opportunity to
    # recover them from the existing parity info)
    # 3) If parity info is out of sync BUT the number of deleted files did NOT
    # exceed X, it calls sync to update the parity info.
    # 4) When sync finishes successfully, it sends an email with the output to user.
    # 5) If the parity info is in sync (either because nothing changed or after it has
    # successfully completed the sync job, it runs the scrub command to validate
    # the integrity of the data (both the files and the parity info). Note that
    # each run of the scrub command will validate only a (configurable) portion of
    # parity info to avoid having a long running job and affecting the performance
    # of the box.
    #
    # Author: SidneyC <sidneyc_at_outlook_dot_com>
    #
    # CHANGELOG
    # ---------
    # 23/10/2011 Initial release
    # 04/01/2015 Updated script to handle changes in SnapRAID v7.0
    # Added scrub job as an optional task (after diff and sync)
    # 06/01/2015 Made the script more robust by adding checks to make sure preceding
    # jobs completed as expected before continuing with the subsequent jobs.
    # Made emailing output to user optional.
    #
    #######################################################################

    ## USER DEFINED SETTINGS ##
    # address where the output of the jobs will be emailed to.
    # comment it out to disable email output
    EMAIL_ADDRESS="root"
    DEL_THRESHOLD=20

    # Set percentage of array to scrub if it is in sync.
    # i.e. 0 to disable and 100 to scrub the full array in one go
    # WARNING - depending on size of your array, setting to 100 will take a very long time!
    SCRUB_PERCENT=5
    SCRUB_AGE=10

    # this script will log its actions to a file at this location
    LOG_FILE="/tmp/snapRAID.log"
    # location of the snapraid binary
    SNAPRAID_BIN="/usr/bin/snapraid"
    # location of the mail problem binary
    MAIL_BIN="/usr/bin/mail"

    ## INTERNAL TEMP VARS ##
    EMAIL_SUBJECT_PREFIX="[`hostname`] SnapRAID - "
    TMP_OUTPUT="/tmp/snapRAID.out"
    # auto determine names of content and parity files
    CONTENT_FILE=`cat /etc/snapraid.conf | grep snapraid.content | head -n 1 | cut -d " " -f2`
    PARITY_FILE=`cat /etc/snapraid.conf | grep snapraid.parity | head -n 1 | cut -d " " -f2`

    # redirect all stdout to log file (leave stderr alone thou)
    exec >> $LOG_FILE

    # timestamp the job
    echo "[`date`] SnapRAID Job started."
    echo "SnapRAID DIFF Job started on `date`" > $TMP_OUTPUT
    echo "----------------------------------------" >> $TMP_OUTPUT

    #TODO - mount and unmount parity disk on demand!

    #sanity check first to make sure we can access the content and parity files
    if [ ! -e $CONTENT_FILE ]; then
    echo "[`date`] ERROR - Content file ($CONTENT_FILE) not found!"
    echo "ERROR - Content file ($CONTENT_FILE) not found!" >> $TMP_OUTPUT
    exit 1;
    fi

    if [ ! -e $PARITY_FILE ]; then
    echo "[`date`] ERROR - Parity file ($PARITY_FILE) not found!"
    echo "ERROR - Parity file ($PARITY_FILE) not found!" >> $TMP_OUTPUT
    exit 1;
    fi

    # run the snapraid DIFF command
    echo "[`date`] Running DIFF Command."
    $SNAPRAID_BIN diff >> $TMP_OUTPUT
    # wait for the above cmd to finish
    wait

    echo "----------------------------------------" >> $TMP_OUTPUT
    echo "SnapRAID DIFF Job finished on `date`" >> $TMP_OUTPUT
    JOBS_DONE="DIFF"

    DEL_COUNT=$(grep -w '^ \{1,\}[0-9]* removed$' $TMP_OUTPUT | sed 's/^ *//g' | cut -d ' ' -f1)
    ADD_COUNT=$(grep -w '^ \{1,\}[0-9]* added$' $TMP_OUTPUT | sed 's/^ *//g' | cut -d ' ' -f1)
    MOVE_COUNT=$(grep -w '^ \{1,\}[0-9]* moved$' $TMP_OUTPUT | sed 's/^ *//g' | cut -d ' ' -f1)
    COPY_COUNT=$(grep -w '^ \{1,\}[0-9]* copied$' $TMP_OUTPUT | sed 's/^ *//g' | cut -d ' ' -f1)
    UPDATE_COUNT=$(grep -w '^ \{1,\}[0-9]* updated$' $TMP_OUTPUT | sed 's/^ *//g' | cut -d ' ' -f1)

    # sanity check to make sure that we were able to get our counts from the output of the DIFF job
    if [ -z "$DEL_COUNT" -o -z "$ADD_COUNT" -o -z "$MOVE_COUNT" -o -z "$COPY_COUNT" -o -z "$UPDATE_COUNT" ]; then
    # failed to get one or more of the count values, lets report to user and exit with error code
    echo "[`date`] ERROR - failed to get one or more count values. Unable to proceed. Exiting script."
    if [ $EMAIL_ADDRESS ]; then
    $MAIL_BIN -s "$EMAIL_SUBJECT_PREFIX WARNING - Unable to proceed with SYNC/SCRUB job(s). Check DIFF job output inside." "$EMAIL_ADDRESS" < $TMP_OUTPUT
    fi
    exit 1;
    fi

    echo "SUMMARY of changes - Added [$ADD_COUNT] - Deleted [$DEL_COUNT] - Moved [$MOVE_COUNT] - Copied [$COPY_COUNT] - Updated [$UPDATE_COUNT]" >> $TMP_OUTPUT

    # check if files have changed
    if [ $DEL_COUNT -gt 0 -o $ADD_COUNT -gt 0 -o $MOVE_COUNT -gt 0 -o $COPY_COUNT -gt 0 -o $UPDATE_COUNT -gt 0 ]; then
    # YES, check if number of deleted files exceed DEL_THRESHOLD
    if [ $DEL_COUNT -lt $DEL_THRESHOLD ]; then
    # NO, delete threshold not reached, lets run the sync job
    echo "Deleted files ($DEL_COUNT) did not exceed threshold ($DEL_THRESHOLD), proceeding with sync job." >> $TMP_OUTPUT
    echo "[`date`] Changes detected [A-$ADD_COUNT,D-$DEL_COUNT,M-$MOVE_COUNT,C-$COPY_COUNT,U-$UPDATE_COUNT] and deleted files ($DEL_COUNT) is below threshold ($DEL_THRESHOLD). Running SYNC Command."
    echo "SnapRAID SYNC Job started on `date`" >> $TMP_OUTPUT
    echo "----------------------------------------" >> $TMP_OUTPUT
    $SNAPRAID_BIN sync >> $TMP_OUTPUT
    #wait for the job to finish
    wait
    echo "----------------------------------------" >> $TMP_OUTPUT
    echo "SnapRAID SYNC Job finished on `date`" >> $TMP_OUTPUT
    JOBS_DONE="$JOBS_DONE + SYNC"
    else
    # YES, delete threshold breached! print warning message to both outputs
    echo "Number of deleted files ($DEL_COUNT) exceeded threshold ($DEL_THRESHOLD). NOT proceeding with sync job. Please run sync manually if this is not an error condition." >> $TMP_OUTPUT
    echo "[`date`] WARNING - Deleted files ($DEL_COUNT) exceeded threshold ($DEL_THRESHOLD). Check $TMP_OUTPUT for details. NOT proceeding with sync job."
    fi
    else
    # NO, so let's skip SYNC
    echo "[`date`] No change detected. Not running SYNC job."
    fi

    # Moving onto scrub now. Check if user has enabled scrub
    if [ $SCRUB_PERCENT -gt 0 ]; then
    # YES, first let's check if delete threshold has been breached.
    if [ $DEL_COUNT -gt $DEL_THRESHOLD ]; then
    # YES, parity is out of sync due to delete threshold being breached so let's not run scrub job
    echo "[`date`] Scrub job cancelled as parity info is out of sync (deleted files threshold has been breached)."
    else
    # NO, delete threshold has not been breached, but we have one last test -
    # let's make sure if sync ran, it completed successfully (by checking if snapRAID wrote "Everything OK" to the output).
    if [[ $JOBS_DONE == *"SYNC"* && -z $(grep -w "Everything OK" $TMP_OUTPUT) ]]; then
    # Sync ran but did not complete successfully so lets not run scrub to be safe
    echo "[`date`] WARNING - check output of SYNC job. Could not detect string <Everything OK>. Not proceeding with SCRUB job."
    echo "WARNING - check output of SYNC job. Could not detect string <Everything OK>. Not proceeding with SCRUB job." >> $TMP_OUTPUT
    else
    # Everything ok - let's run the scrub job!
    echo "[`date`] Running SCRUB Command."
    echo "SnapRAID SCRUB Job started on `date`" >> $TMP_OUTPUT
    echo "----------------------------------------" >> $TMP_OUTPUT
    $SNAPRAID_BIN scrub -p $SCRUB_PERCENT -o $SCRUB_AGE >> $TMP_OUTPUT
    #wait for the job to finish
    wait
    echo "----------------------------------------" >> $TMP_OUTPUT
    echo "SnapRAID SCRUB Job finished on `date`" >> $TMP_OUTPUT
    JOBS_DONE="$JOBS_DONE + SCRUB"
    fi
    fi
    else
    echo "[`date`] Scrub job is not scheduled. Not running SCRUB job."
    fi

    # all jobs done, let's send output to user if configured
    if [ $EMAIL_ADDRESS ]; then
    # check if deleted count exceeded threshold
    if [ $DEL_COUNT -gt $DEL_THRESHOLD ]; then
    # YES, lets inform user with an appropriate subject line
    $MAIL_BIN -s "$EMAIL_SUBJECT_PREFIX WARNING - Number of deleted files ($DEL_COUNT) exceeded threshold ($DEL_THRESHOLD)" "$EMAIL_ADDRESS" < $TMP_OUTPUT
    else
    $MAIL_BIN -s "$EMAIL_SUBJECT_PREFIX INFO - $JOBS_DONE Jobs COMPLETED" "$EMAIL_ADDRESS" < $TMP_OUTPUT
    fi
    fi

    echo "[`date`] All jobs ended."

    exit 0;