Skip to content

Instantly share code, notes, and snippets.

@ctran
Created August 30, 2021 21:12
Show Gist options
  • Save ctran/0c20887d2ef5d456ab4ede44c9615841 to your computer and use it in GitHub Desktop.
Save ctran/0c20887d2ef5d456ab4ede44c9615841 to your computer and use it in GitHub Desktop.

Revisions

  1. ctran created this gist Aug 30, 2021.
    305 changes: 305 additions & 0 deletions collectPerformanceData.sh
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,305 @@
    #!/bin/bash


    ## The following is automatically generated code, do not manually modify.
    ## Template is available in # scripts/commons-templates.sh
    ## START AUTOGENERATED CODE
    # shellcheck disable=SC2034
    SCRIPT_VERSION=1630078691

    # Useful variables
    HERE="$( cd -P "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"

    # Logging functions

    LOGGING_LEVEL=${CBSUPPORT_LOGGING_LEVEL:-'DEBUG'}

    function log_info() {
    local -r txt="${1}"
    __log 'INFO' "${txt}"
    }

    function log_debug() {
    local -r txt="${1}"
    __log 'DEBUG' "${txt}"
    }

    function log_error() {
    local -r txt="${1}"
    __log 'DEBUG' "${txt}"
    }

    # A simpler solution would be to use associative arrays, but we cannot assume bash 4...
    function __level_to_int() {
    local -r level="${1}"
    if [ "${level}" == 'ERROR' ]; then
    echo 2
    elif [ "${level}" == 'INFO' ]; then
    echo 1
    else
    # default
    echo 0
    fi
    }

    function __log() {
    local -r level="${1}"
    local -r txt="${2}"
    (( "$(__level_to_int "$level")" < "$(__level_to_int "$LOGGING_LEVEL")" )) && return
    echo "[${level}] ${txt}"
    }

    function check_tool() {
    local -r cmd="${1}"
    local -r verbose="${2:-true}"
    is_tool_installed ${cmd} || ("${verbose}" && log_debug "${cmd} is recommended but it's not installed.")
    }

    function is_tool_installed() {
    local -r cmd="${1}"
    command -v "${cmd}" >/dev/null 2>&1
    }
    ## END AUTOGENERATED CODE

    ####################################################################################
    # This script is used to collect data for
    # 'RequiredData: Performance, Hang or High CPU Issues for a Java process running on Linux'
    #
    #####################################################################################

    function print_help() {
    cat <<EOM
    Unable to find required PID argument. Please rerun the script as follows:
    $(basename $0) PID [duration] [frequency]
    PID: Java process (Jenkins, CI, CD) PID
    duration: Tests duration time in seconds (default 60 seconds)
    frequency: Number of seconds that will wait until next data require (default 5 seconds)
    Optional environment vars
    JAVA_HOME used to locate JDK
    JATTACH_HOME path to directory containing jattach (optional: is used only if no JDK is found and jattach is not in the path)
    JAVA_USERID Java userid if this script is run as root instead of the userid running the Java process
    PERFORMANCE_DATA_OUTPUT_DIR output dir
    In case no JDK is found, the script will try to use jattach: https://github.com/apangin/jattach
    Run $(basename $0) --help to print help.
    EOM
    }

    function script_validation() {
    log_debug "Script Validation Results"
    log_debug "Moving to ${PERFORMANCE_DATA_OUTPUT_DIR}"

    pushd "${PERFORMANCE_DATA_OUTPUT_DIR}" >/dev/null || exit

    #check if the directory can be written to by the user that is running the script, i.e. user
    touch testFile.txt 2>/dev/null

    if [ -e testFile.txt ]; then
    log_debug 'This directory can be written to by the script'
    else
    log_error 'This directory cannot be written to by the script. Please either run this script from a directory that can be written to or use the optional environment variable: PERFORMANCE_DATA_OUTPUT_DIR .'
    exit 1
    fi
    rm -rf testFile.txt

    check_tool 'top'
    check_tool 'vmstat'
    check_tool 'netstat'
    check_tool 'iostat'

    log_debug "Moving back to current dir ${HERE}"
    popd >/dev/null || exit
    }

    duration=60
    frequency=5

    if [ $# -eq 1 ]; then
    if [ "$1" = "--help" ]; then
    print_help
    exit 0
    fi
    pid="${1}"
    elif [ $# -eq 2 ]; then
    pid="${1}"
    duration="${2}"
    elif [ $# -eq 3 ]; then
    pid="${1}"
    duration="${2}"
    frequency="${3}"
    else
    print_help "${0}"
    exit 1
    fi

    if [ -z "$PERFORMANCE_DATA_OUTPUT_DIR" ]; then
    PERFORMANCE_DATA_OUTPUT_DIR="$(pwd)"
    log_debug "Output dir ${PERFORMANCE_DATA_OUTPUT_DIR}"
    fi

    script_validation "${0}"

    declare jcmd_bin="jcmd"
    declare jstack_bin="jstack"
    declare jattach_bin="jattach"

    if [ -n "${JAVA_HOME}" ]; then
    # shellcheck disable=SC2016
    log_debug 'JAVA_HOME is set. Looking for JDK tools in ${JAVA_HOME}/bin.'
    jcmd_bin="${JAVA_HOME}/bin/jcmd"
    jstack_bin="${JAVA_HOME}/bin/jstack"
    else
    log_debug 'JAVA_HOME is NOT set. Looking for a JDK on the PATH.'
    fi
    if ! is_tool_installed "${jcmd_bin}" && ! is_tool_installed "${jstack_bin}"; then
    log_debug 'jcmd or jstack not found. Looking for jattach'
    if [ -n "${JATTACH_HOME}" ]; then
    log_debug "JATTACH_HOME is set. Looking for the binary in ${JATTACH_HOME}"
    jattach_bin="${JATTACH_HOME}/jattach"
    else
    log_debug 'JATTACH_HOME is NOT set. Looking for jattach on the PATH.'
    fi
    if ! is_tool_installed "${jattach_bin}"; then
    log_error 'Could not find a JDK nor jattach. Either the full Java JDK and jattach are not installed or they are not the path of the user that is running the Java process.'
    exit 1
    fi
    fi

    declare cmd_prefix=""
    if [ -n "${JAVA_USERID}" ]; then
    cmd_prefix="sudo -u ${JAVA_USERID}"
    log_debug "user ${JAVA_USERID}"
    fi

    function write_threads() {
    local pid="$1"
    local threadFileName="$2"

    if is_tool_installed "${jcmd_bin}"; then
    ${cmd_prefix} "${jcmd_bin}" "${pid}" Thread.print -l >"${threadFileName}"
    elif is_tool_installed ${jstack_bin}; then
    ${cmd_prefix} "${jstack_bin}" -l "${pid}" >"${threadFileName}"
    elif is_tool_installed "${jattach_bin}"; then
    ${cmd_prefix} "${jattach_bin}" "${pid}" threaddump >"${threadFileName}"
    fi
    }

    # Create temporary directories
    TEMP_DIR="$PERFORMANCE_DATA_OUTPUT_DIR/tmp.$pid.$(date +%Y%m%d%H%M%S)"
    log_debug "Temporary dir ${TEMP_DIR}"
    mkdir -p "${TEMP_DIR}"
    mkdir "${TEMP_DIR}"/iostat "${TEMP_DIR}"/threads "${TEMP_DIR}"/netstat "${TEMP_DIR}"/topdashHOutput "${TEMP_DIR}"/topOutput "${TEMP_DIR}"/vmstat "${TEMP_DIR}"/nfsiostat "${TEMP_DIR}"/nfsstat

    # Begin script and notify the end user
    log_info "The collectPerformanceData.sh script $SCRIPT_VERSION is starting in custom mode." | tee "$TEMP_DIR"/mode.txt
    log_info "The pid is $pid" >>"$TEMP_DIR"/mode.txt
    log_info "The custom duration is $duration" >>"$TEMP_DIR"/mode.txt
    log_info "The custom thread dump generation frequency is $frequency" >>"$TEMP_DIR"/mode.txt

    # Output the Default Settings to the end user
    log_debug "The custom mode should only be used if requested && if data should be collected for longer than 1 minute"
    log_info "The collectPerformanceData.sh script will run for $duration seconds."
    log_info "It will generate a full data generation (threadDump, iostat, vmstat, netstat, top) every $frequency seconds."
    log_debug ">>>>>>>>>>>>>>>The frequency Has To Divide into the duration by a whole integer.<<<<<<<<<<<<<<<"
    log_debug ">>>>>>>>>>>>>>>The duration Divided by 60 should also be a whole integer.<<<<<<<<<<<<<<<"
    log_debug ">>>>>>>>>>>>>>>The duration Divided by 5 should also be a whole integer.<<<<<<<<<<<<<<<"
    log_debug ">>>>>>>>>>>>>>>Setting the frequency to low, i.e. 1 second, may cause the data to be inconclusive.<<<<<<<<<<<<<<<"

    # Begin data generation once every $frequency seconds.
    while [ "${duration}" -gt 0 ]; do
    # Taking top data collection
    log_info "Taking top data collection."
    COLUMNS=300 top -bc -n 1 >"$TEMP_DIR"/topOutput/topOutput."$(date +%Y%m%d%H%M%S)".txt &

    # Taking topdashH data collection
    log_info "Taking TopdashH data collection."
    top -bH -p $pid -n 1 >"$TEMP_DIR"/topdashHOutput/topdashHOutput.$pid."$(date +%Y%m%d%H%M%S)".txt &

    # Taking vmstat data collection in the background
    log_info "Taking vmstat data collection."
    vmstat >"$TEMP_DIR"/vmstat/vmstat."$(date +%Y%m%d%H%M%S)".out &

    # Taking netstat data
    log_info "Taking netstat collection."
    # redirecring to /dev/null to get rid of the annoying message for non root users
    netstat -pan 2>/dev/null >"$TEMP_DIR"/netstat/netstat."$(date +%Y%m%d%H%M%S)".out &

    # Taking iostat data collection
    log_info "Taking iostat data collection."
    if which iostat 2>/dev/null >/dev/null; then
    iostat -t >"$TEMP_DIR"/iostat/iostat."$(date +%Y%m%d%H%M%S)".out &
    else
    log_debug 'The command iostat was not found'
    fi

    # Taking nfsiostat data collection
    log_info 'Taking nfsiostat data collection.'
    if which nfsiostat 2>/dev/null >/dev/null; then
    nfsiostat >"$TEMP_DIR"/nfsiostat/nfsiostat."$(date +%Y%m%d%H%M%S)".out &
    else
    log_debug 'The command nfsiostat was not found'
    fi

    # Taking nfsstat data collection
    log_info 'Taking nfsstat data collection.'
    if which nfsstat 2>/dev/null >/dev/null; then
    nfsstat -c >"$TEMP_DIR"/nfsstat/nfsstat."$(date +%Y%m%d%H%M%S)".out &
    else
    log_debug 'The command nfsstat was not found'
    fi

    # Taking a threadDump
    THREADS_FILENAME="$TEMP_DIR"/threads/threads.$pid."$(date +%Y%m%d%H%M%S)".txt
    write_threads "${pid}" "$THREADS_FILENAME" &
    # Record the process PID
    THREAD_DUMP_PID=$!
    log_info "Collected a threadDump for PID $pid."

    # Wait for the thread dump background process
    wait $THREAD_DUMP_PID
    # Get the exit code of the $THREAD_DUMP_PID
    THREAD_DUMP_PID_STATUS=$?
    # Wait for all background process
    wait

    if [ $THREAD_DUMP_PID_STATUS -ne 0 ]; then
    rm -r "$TEMP_DIR"
    log_error 'The script failed to collect a thread dump. Maybe it is not launched with the same user that the Java process is running as. Try with sudo -u <JAVA_USERID> >>>>>>>>>>>>>>>'
    exit 1
    fi

    # Pause for THREADDUMP_FREQUENCY seconds.
    log_info "A new collection will start in ${frequency} seconds."

    sleep "${frequency}"

    # Update duration
    duration=$(( $duration - $frequency))
    done

    log_info "Packaging data and preparing for cleanup."

    log_debug "Moving to $PERFORMANCE_DATA_OUTPUT_DIR"
    pushd "${TEMP_DIR}" >/dev/null || exit
    PERFORMANCE_DATA_ARCHIVE_NAME="${CBSUPPORT_OUTPUT:-performanceData.$pid.output.tar.gz}"
    tar -czf "${PERFORMANCE_DATA_ARCHIVE_NAME}" topOutput topdashHOutput mode.txt threads vmstat netstat iostat nfsiostat nfsstat
    cp "${PERFORMANCE_DATA_ARCHIVE_NAME}" ..

    log_info "Cleanup files"
    # Clean up the topOutput.txt and topdashHOutput.$pid.txt files
    rm -r "$TEMP_DIR"

    log_debug "Moving back to current dir ${HERE}"
    popd >/dev/null || exit

    # Notify end user. Do not do it when running in the context of cbsupport as the message is misleading for the end user.
    if [ -z "$CBSUPPORT_OUTPUT" ]; then
    log_info "The temporary dir \"${TEMP_DIR}\" has been deleted"
    log_info "The collectPerformanceData.sh script in CUSTOM MODE is complete."
    log_info "The Output files are contained within !>>>! ${PERFORMANCE_DATA_ARCHIVE_NAME} !<<<!"
    log_info "Please upload the ${PERFORMANCE_DATA_ARCHIVE_NAME} archive to your ticket for review."
    fi