Created
August 30, 2021 21:12
-
-
Save ctran/0c20887d2ef5d456ab4ede44c9615841 to your computer and use it in GitHub Desktop.
Revisions
-
ctran created this gist
Aug 30, 2021 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,305 @@ #!/bin/bash ## The following is automatically generated code, do not manually modify. ## Template is available in # scripts/commons-templates.sh ## START AUTOGENERATED CODE # shellcheck disable=SC2034 SCRIPT_VERSION=1630078691 # Useful variables HERE="$( cd -P "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" # Logging functions LOGGING_LEVEL=${CBSUPPORT_LOGGING_LEVEL:-'DEBUG'} function log_info() { local -r txt="${1}" __log 'INFO' "${txt}" } function log_debug() { local -r txt="${1}" __log 'DEBUG' "${txt}" } function log_error() { local -r txt="${1}" __log 'DEBUG' "${txt}" } # A simpler solution would be to use associative arrays, but we cannot assume bash 4... function __level_to_int() { local -r level="${1}" if [ "${level}" == 'ERROR' ]; then echo 2 elif [ "${level}" == 'INFO' ]; then echo 1 else # default echo 0 fi } function __log() { local -r level="${1}" local -r txt="${2}" (( "$(__level_to_int "$level")" < "$(__level_to_int "$LOGGING_LEVEL")" )) && return echo "[${level}] ${txt}" } function check_tool() { local -r cmd="${1}" local -r verbose="${2:-true}" is_tool_installed ${cmd} || ("${verbose}" && log_debug "${cmd} is recommended but it's not installed.") } function is_tool_installed() { local -r cmd="${1}" command -v "${cmd}" >/dev/null 2>&1 } ## END AUTOGENERATED CODE #################################################################################### # This script is used to collect data for # 'RequiredData: Performance, Hang or High CPU Issues for a Java process running on Linux' # ##################################################################################### function print_help() { cat <<EOM Unable to find required PID argument. Please rerun the script as follows: $(basename $0) PID [duration] [frequency] PID: Java process (Jenkins, CI, CD) PID duration: Tests duration time in seconds (default 60 seconds) frequency: Number of seconds that will wait until next data require (default 5 seconds) Optional environment vars JAVA_HOME used to locate JDK JATTACH_HOME path to directory containing jattach (optional: is used only if no JDK is found and jattach is not in the path) JAVA_USERID Java userid if this script is run as root instead of the userid running the Java process PERFORMANCE_DATA_OUTPUT_DIR output dir In case no JDK is found, the script will try to use jattach: https://github.com/apangin/jattach Run $(basename $0) --help to print help. EOM } function script_validation() { log_debug "Script Validation Results" log_debug "Moving to ${PERFORMANCE_DATA_OUTPUT_DIR}" pushd "${PERFORMANCE_DATA_OUTPUT_DIR}" >/dev/null || exit #check if the directory can be written to by the user that is running the script, i.e. user touch testFile.txt 2>/dev/null if [ -e testFile.txt ]; then log_debug 'This directory can be written to by the script' else log_error 'This directory cannot be written to by the script. Please either run this script from a directory that can be written to or use the optional environment variable: PERFORMANCE_DATA_OUTPUT_DIR .' exit 1 fi rm -rf testFile.txt check_tool 'top' check_tool 'vmstat' check_tool 'netstat' check_tool 'iostat' log_debug "Moving back to current dir ${HERE}" popd >/dev/null || exit } duration=60 frequency=5 if [ $# -eq 1 ]; then if [ "$1" = "--help" ]; then print_help exit 0 fi pid="${1}" elif [ $# -eq 2 ]; then pid="${1}" duration="${2}" elif [ $# -eq 3 ]; then pid="${1}" duration="${2}" frequency="${3}" else print_help "${0}" exit 1 fi if [ -z "$PERFORMANCE_DATA_OUTPUT_DIR" ]; then PERFORMANCE_DATA_OUTPUT_DIR="$(pwd)" log_debug "Output dir ${PERFORMANCE_DATA_OUTPUT_DIR}" fi script_validation "${0}" declare jcmd_bin="jcmd" declare jstack_bin="jstack" declare jattach_bin="jattach" if [ -n "${JAVA_HOME}" ]; then # shellcheck disable=SC2016 log_debug 'JAVA_HOME is set. Looking for JDK tools in ${JAVA_HOME}/bin.' jcmd_bin="${JAVA_HOME}/bin/jcmd" jstack_bin="${JAVA_HOME}/bin/jstack" else log_debug 'JAVA_HOME is NOT set. Looking for a JDK on the PATH.' fi if ! is_tool_installed "${jcmd_bin}" && ! is_tool_installed "${jstack_bin}"; then log_debug 'jcmd or jstack not found. Looking for jattach' if [ -n "${JATTACH_HOME}" ]; then log_debug "JATTACH_HOME is set. Looking for the binary in ${JATTACH_HOME}" jattach_bin="${JATTACH_HOME}/jattach" else log_debug 'JATTACH_HOME is NOT set. Looking for jattach on the PATH.' fi if ! is_tool_installed "${jattach_bin}"; then log_error 'Could not find a JDK nor jattach. Either the full Java JDK and jattach are not installed or they are not the path of the user that is running the Java process.' exit 1 fi fi declare cmd_prefix="" if [ -n "${JAVA_USERID}" ]; then cmd_prefix="sudo -u ${JAVA_USERID}" log_debug "user ${JAVA_USERID}" fi function write_threads() { local pid="$1" local threadFileName="$2" if is_tool_installed "${jcmd_bin}"; then ${cmd_prefix} "${jcmd_bin}" "${pid}" Thread.print -l >"${threadFileName}" elif is_tool_installed ${jstack_bin}; then ${cmd_prefix} "${jstack_bin}" -l "${pid}" >"${threadFileName}" elif is_tool_installed "${jattach_bin}"; then ${cmd_prefix} "${jattach_bin}" "${pid}" threaddump >"${threadFileName}" fi } # Create temporary directories TEMP_DIR="$PERFORMANCE_DATA_OUTPUT_DIR/tmp.$pid.$(date +%Y%m%d%H%M%S)" log_debug "Temporary dir ${TEMP_DIR}" mkdir -p "${TEMP_DIR}" mkdir "${TEMP_DIR}"/iostat "${TEMP_DIR}"/threads "${TEMP_DIR}"/netstat "${TEMP_DIR}"/topdashHOutput "${TEMP_DIR}"/topOutput "${TEMP_DIR}"/vmstat "${TEMP_DIR}"/nfsiostat "${TEMP_DIR}"/nfsstat # Begin script and notify the end user log_info "The collectPerformanceData.sh script $SCRIPT_VERSION is starting in custom mode." | tee "$TEMP_DIR"/mode.txt log_info "The pid is $pid" >>"$TEMP_DIR"/mode.txt log_info "The custom duration is $duration" >>"$TEMP_DIR"/mode.txt log_info "The custom thread dump generation frequency is $frequency" >>"$TEMP_DIR"/mode.txt # Output the Default Settings to the end user log_debug "The custom mode should only be used if requested && if data should be collected for longer than 1 minute" log_info "The collectPerformanceData.sh script will run for $duration seconds." log_info "It will generate a full data generation (threadDump, iostat, vmstat, netstat, top) every $frequency seconds." log_debug ">>>>>>>>>>>>>>>The frequency Has To Divide into the duration by a whole integer.<<<<<<<<<<<<<<<" log_debug ">>>>>>>>>>>>>>>The duration Divided by 60 should also be a whole integer.<<<<<<<<<<<<<<<" log_debug ">>>>>>>>>>>>>>>The duration Divided by 5 should also be a whole integer.<<<<<<<<<<<<<<<" log_debug ">>>>>>>>>>>>>>>Setting the frequency to low, i.e. 1 second, may cause the data to be inconclusive.<<<<<<<<<<<<<<<" # Begin data generation once every $frequency seconds. while [ "${duration}" -gt 0 ]; do # Taking top data collection log_info "Taking top data collection." COLUMNS=300 top -bc -n 1 >"$TEMP_DIR"/topOutput/topOutput."$(date +%Y%m%d%H%M%S)".txt & # Taking topdashH data collection log_info "Taking TopdashH data collection." top -bH -p $pid -n 1 >"$TEMP_DIR"/topdashHOutput/topdashHOutput.$pid."$(date +%Y%m%d%H%M%S)".txt & # Taking vmstat data collection in the background log_info "Taking vmstat data collection." vmstat >"$TEMP_DIR"/vmstat/vmstat."$(date +%Y%m%d%H%M%S)".out & # Taking netstat data log_info "Taking netstat collection." # redirecring to /dev/null to get rid of the annoying message for non root users netstat -pan 2>/dev/null >"$TEMP_DIR"/netstat/netstat."$(date +%Y%m%d%H%M%S)".out & # Taking iostat data collection log_info "Taking iostat data collection." if which iostat 2>/dev/null >/dev/null; then iostat -t >"$TEMP_DIR"/iostat/iostat."$(date +%Y%m%d%H%M%S)".out & else log_debug 'The command iostat was not found' fi # Taking nfsiostat data collection log_info 'Taking nfsiostat data collection.' if which nfsiostat 2>/dev/null >/dev/null; then nfsiostat >"$TEMP_DIR"/nfsiostat/nfsiostat."$(date +%Y%m%d%H%M%S)".out & else log_debug 'The command nfsiostat was not found' fi # Taking nfsstat data collection log_info 'Taking nfsstat data collection.' if which nfsstat 2>/dev/null >/dev/null; then nfsstat -c >"$TEMP_DIR"/nfsstat/nfsstat."$(date +%Y%m%d%H%M%S)".out & else log_debug 'The command nfsstat was not found' fi # Taking a threadDump THREADS_FILENAME="$TEMP_DIR"/threads/threads.$pid."$(date +%Y%m%d%H%M%S)".txt write_threads "${pid}" "$THREADS_FILENAME" & # Record the process PID THREAD_DUMP_PID=$! log_info "Collected a threadDump for PID $pid." # Wait for the thread dump background process wait $THREAD_DUMP_PID # Get the exit code of the $THREAD_DUMP_PID THREAD_DUMP_PID_STATUS=$? # Wait for all background process wait if [ $THREAD_DUMP_PID_STATUS -ne 0 ]; then rm -r "$TEMP_DIR" log_error 'The script failed to collect a thread dump. Maybe it is not launched with the same user that the Java process is running as. Try with sudo -u <JAVA_USERID> >>>>>>>>>>>>>>>' exit 1 fi # Pause for THREADDUMP_FREQUENCY seconds. log_info "A new collection will start in ${frequency} seconds." sleep "${frequency}" # Update duration duration=$(( $duration - $frequency)) done log_info "Packaging data and preparing for cleanup." log_debug "Moving to $PERFORMANCE_DATA_OUTPUT_DIR" pushd "${TEMP_DIR}" >/dev/null || exit PERFORMANCE_DATA_ARCHIVE_NAME="${CBSUPPORT_OUTPUT:-performanceData.$pid.output.tar.gz}" tar -czf "${PERFORMANCE_DATA_ARCHIVE_NAME}" topOutput topdashHOutput mode.txt threads vmstat netstat iostat nfsiostat nfsstat cp "${PERFORMANCE_DATA_ARCHIVE_NAME}" .. log_info "Cleanup files" # Clean up the topOutput.txt and topdashHOutput.$pid.txt files rm -r "$TEMP_DIR" log_debug "Moving back to current dir ${HERE}" popd >/dev/null || exit # Notify end user. Do not do it when running in the context of cbsupport as the message is misleading for the end user. if [ -z "$CBSUPPORT_OUTPUT" ]; then log_info "The temporary dir \"${TEMP_DIR}\" has been deleted" log_info "The collectPerformanceData.sh script in CUSTOM MODE is complete." log_info "The Output files are contained within !>>>! ${PERFORMANCE_DATA_ARCHIVE_NAME} !<<<!" log_info "Please upload the ${PERFORMANCE_DATA_ARCHIVE_NAME} archive to your ticket for review." fi