Skip to content

Instantly share code, notes, and snippets.

@suya55
Created April 16, 2020 13:27
Show Gist options
  • Select an option

  • Save suya55/ba1bb681d0147e620f4dc278b40307b9 to your computer and use it in GitHub Desktop.

Select an option

Save suya55/ba1bb681d0147e620f4dc278b40307b9 to your computer and use it in GitHub Desktop.

Revisions

  1. suya55 created this gist Apr 16, 2020.
    216 changes: 216 additions & 0 deletions check_row_size.sh
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,216 @@
    #!/bin/bash
    #
    # usage: mysqldump --no-data | check_row_size.sh
    #
    #
    #
    # https://dev.mysql.com/doc/refman/8.0/en/column-count-limit.html#row-size-limits
    #
    # The maximum row size for an InnoDB table, which applies to data stored locally within a database page, is slightly less than half a page for 4KB, 8KB, 16KB, and 32KB innodb_page_size settings.
    # For example, the maximum row size is slightly less than 8KB for the default 16KB InnoDB page size.
    #
    #
    # MariaDB [(none)]> show variables like 'innodb_page_size';
    #+------------------+-------+
    #| Variable_name | Value |
    #+------------------+-------+
    #| innodb_page_size | 16384 |
    #+------------------+-------+
    #1 row in set (0.00 sec)
    #
    #
    # Options:
    # 1. Change default innodb_page_size to 32k
    # 2. Change storage engine to DYNAMIC for tables
    # 3. ?
    #
    #===========================================================================================
    # Functions
    #===========================================================================================
    RETVAL=0
    calc_row_size() {
    local -n TABLE_FIELDS=$1
    local -n TABLE_CHARSET=$2
    local FIELD_TYPE=""
    local FIELD_SIZE=""
    local FIELD=""
    local ROW_SIZE=0
    local IFS=$'|' # To split the vars using set
    for FIELD in "${TABLE_FIELDS[@]}"
    do
    set $FIELD
    FIELD_NAME=$1
    FIELD_TYPE=$2
    FIELD_SIZE=$3
    calc_field_size_in_bytes $FIELD_TYPE $FIELD_SIZE $TABLE_CHARSET
    ROW_SIZE=$((ROW_SIZE + RETVAL))
    [ $DEBUG -gt 0 ] && echo "DEBUG1: Field name: $FIELD_NAME type: $FIELD_TYPE lenght: $FIELD_SIZE size: $RETVAL bytes Row size: $ROW_SIZE"
    done
    RETVAL=$ROW_SIZE
    }
    calc_field_size_in_bytes() {
    local TYPE=$1
    local SIZE=$2
    local CHARSET=$3

    case $FIELD_TYPE in
    varchar)
    # https://adayinthelifeof.nl/2010/12/04/about-using-utf-8-fields-in-mysql/
    # Max 3 bytes per utf-8 chat in mysql
    case $CHARSET in
    utf8)
    RETVAL=$((SIZE * 3)) # 3 bytes per character for utf8
    ;;
    latin1)
    RETVAL=$((SIZE)) # 1 byte per character for latin1
    ;;
    *)
    echo "Unknown charset ($CHARSET), please fix the script"
    exit 1
    ;;
    esac
    ;;
    smallint|int|bigint|tinyint|varbinary)
    RETVAL=$SIZE
    ;;
    blob)
    # https://dev.mysql.com/doc/refman/8.0/en/column-count-limit.html#row-size-limits
    # BLOB and TEXT columns only contribute 9 to 12 bytes toward the row size limit because their contents are stored separately from the rest of the row.
    RETVAL=9
    ;;
    text)
    RETVAL=12
    ;;
    timestamp)
    RETVAL=4
    ;;
    decimal)
    # https://dev.mysql.com/doc/refman/8.0/en/storage-requirements.html#data-types-storage-reqs-numeric
    # Each multiple of nine digits requires four bytes, and the “leftover” digits require some fraction of four bytes.
    if [[ $SIZE =~ ([0-9]+),([0-9]+) ]]
    then
    INTEGER_PART=${BASH_REMATCH[1]}
    FRACTIONAL_PART=${BASH_REMATCH[2]}
    INTEGER_BYTES=$((INTEGER_PART / 9 * 4))
    REMAINDER=$((INTEGER_PART % 9))
    case $REMAINDER in
    0) INTEGER_BYTES=$((INTEGER_BYTES + 0)); ;;
    1) INTEGER_BYTES=$((INTEGER_BYTES + 1)); ;;
    2) INTEGER_BYTES=$((INTEGER_BYTES + 1)); ;;
    3) INTEGER_BYTES=$((INTEGER_BYTES + 2)); ;;
    4) INTEGER_BYTES=$((INTEGER_BYTES + 2)); ;;
    5) INTEGER_BYTES=$((INTEGER_BYTES + 3)); ;;
    6) INTEGER_BYTES=$((INTEGER_BYTES + 3)); ;;
    7) INTEGER_BYTES=$((INTEGER_BYTES + 4)); ;;
    8) INTEGER_BYTES=$((INTEGER_BYTES + 4)); ;;
    esac
    FRACTIONAL_BYTES=$((FRACTIONAL_PART / 9 * 4))
    REMAINDER=$((FRACTIONAL_PART % 9))
    case $REMAINDER in
    0) FRACTIONAL_BYTES=$((FRACTIONAL_BYTES + 0)); ;;
    1) FRACTIONAL_BYTES=$((FRACTIONAL_BYTES + 1)); ;;
    2) FRACTIONAL_BYTES=$((FRACTIONAL_BYTES + 1)); ;;
    3) FRACTIONAL_BYTES=$((FRACTIONAL_BYTES + 2)); ;;
    4) FRACTIONAL_BYTES=$((FRACTIONAL_BYTES + 2)); ;;
    5) FRACTIONAL_BYTES=$((FRACTIONAL_BYTES + 3)); ;;
    6) FRACTIONAL_BYTES=$((FRACTIONAL_BYTES + 3)); ;;
    7) FRACTIONAL_BYTES=$((FRACTIONAL_BYTES + 4)); ;;
    8) FRACTIONAL_BYTES=$((FRACTIONAL_BYTES + 4)); ;;
    esac
    [ $DEBUG -gt 0 ] && echo "DEBUG1: Calulation of decimal: SIZE: $SIZE INTEGER_PART:$INTEGER_PART FRACTIONAL_PART:$FRACTIONAL_PART TOTAL = INTEGER_BYTES($INTEGER_BYTES) + FRACTIONAL_BYTES($FRACTIONAL_BYTES)"
    RETVAL=$((INTEGER_BYTES + FRACTIONAL_BYTES))
    else
    echo "Seems like SIZE ($SIZE) for a decimal field doesn't match pattern ([0-9]+),([0-9]+). Please investigate"
    exit 1
    fi
    ;;
    *)
    echo "Found a field type that is not handled: $TYPE. Please fix before proceeding."
    exit 1
    ;;
    esac
    }
    #===========================================================================================
    # INIT
    #===========================================================================================
    INSIDE_CREATE_TABLE_STATEMENT=false # True if we are within a create table statement
    TABLE_NAME='' # Current table name
    ROW_SIZE=0 # Current row size being calculated
    DEBUG=0
    VERBOSE=0
    MAX_SIZE=8126 # Default
    declare -a FIELDS # List of fields from the current CREATE TABLE statement
    #===========================================================================================
    # Parameters
    #===========================================================================================
    OPTIND=1 # Reset in case getopts has been used previously in the shell.
    while getopts "hvdt:" opt; do
    case "$opt" in
    h)
    echo "Usage: mysqldump --no-data | ./check_row_size [-v|-d] [-t threshold]"
    exit 0
    ;;
    v) VERBOSE=1
    ;;
    d) DEBUG=2
    ;;
    t) MAX_SIZE=$OPTARG
    ;;
    esac
    done
    #===========================================================================================
    # MAIN Loop - parses schema then calc row_size based on charset
    #===========================================================================================
    while IFS= read -r LINE
    do
    [ $DEBUG -gt 1 ] && echo "DEBUG2: Read: $LINE"
    # Are we within a CREATE TABLE statement?
    if [ $INSIDE_CREATE_TABLE_STATEMENT == "false" ]
    then
    # Nope, is the current line a 'CREATE TABLE' statement?
    if [[ $LINE =~ ^"CREATE TABLE \`"([^\`]+) ]]
    then
    [ $DEBUG -gt 0 ] && echo "CREATE TABLE FOUND!: $TABLE_NAME"
    TABLE_NAME=${BASH_REMATCH[1]} # What has been caught between pattern parenthesis
    INSIDE_CREATE_TABLE_STATEMENT='true'
    FIELDS=()
    fi
    continue # Ok, next line
    fi
    # Is this a create table field definition line?
    if [[ $LINE =~ ^' '+'`'([^'`']+)'` '([a-z]+)'('([^')']+) ]]
    then
    FIELD_NAME=${BASH_REMATCH[1]}
    FIELD_TYPE=${BASH_REMATCH[2]}
    FIELD_SIZE=${BASH_REMATCH[3]}
    FIELDS+=( "$FIELD_NAME|$FIELD_TYPE|$FIELD_SIZE" )
    continue
    fi
    # Have we reached the end of the CREATE TABLE statement?
    if [[ $LINE =~ ^") ENGINE=InnoDB DEFAULT CHARSET="([^ ]+) ]]
    then
    CHARSET=${BASH_REMATCH[1]}
    [ $DEBUG -gt 0 ] && echo "End of CREATE TABLE statement"
    calc_row_size FIELDS CHARSET
    ROW_SIZE=$RETVAL
    if [ $ROW_SIZE -gt $MAX_SIZE ]
    then
    echo "Table: $TABLE_NAME has a row size: $ROW_SIZE Bytes > $MAX_SIZE Bytes Charset: $CHARSET"
    # and is going to cause problem if the we upgrade to tables in ROW_FORMAT compact. See https://mariadb.com/kb/en/library/troubleshooting-row-size-too-large-errors-with-innodb/ for more details."
    fi
    INSIDE_CREATE_TABLE_STATEMENT='false'
    fi
    done