Skip to content

Instantly share code, notes, and snippets.

@Slach
Last active November 19, 2023 09:13
Show Gist options
  • Save Slach/dab0dd73c5a8a8cc35ca3fda10e79619 to your computer and use it in GitHub Desktop.
Save Slach/dab0dd73c5a8a8cc35ca3fda10e79619 to your computer and use it in GitHub Desktop.

Revisions

  1. Slach revised this gist Nov 19, 2023. 1 changed file with 7 additions and 7 deletions.
    14 changes: 7 additions & 7 deletions checksum_parser.sh
    Original file line number Diff line number Diff line change
    @@ -1,4 +1,8 @@
    CHECKSUM_FILE=$1
    if [[ "ENC" == $(dd if="${CHECKSUM_FILE}" bs=1 skip="0" count="3" 2>/dev/null) ]]; then
    echo "ENCRYPTED FILES don't supported"
    exit 0
    fi
    FORMAT_VERSION=$(head -n +1 "${CHECKSUM_FILE}" | sed 's/checksums format version: //g')

    log() { printf '%s\n' "$*"; }
    @@ -53,17 +57,15 @@ function read_uvarint {
    echo $CURRENT_OFFSET > $CURRENT_OFFSET_FIFO
    if [ -z "$byte_value" ]; then
    if [ $i -gt 0 ]; then
    echo "Error: unexpected end of file" >&2
    exit 1
    fatal "Error: unexpected end of file" >&2
    fi
    echo "$x"
    return
    fi

    if [ $byte_value -lt $const0x80 ]; then
    if [ $i -eq $((MaxVarintLen64-1)) ] && [ "$byte_value" -gt 1 ]; then
    echo "Error: overflow" >&2
    exit 1
    fatal "Error: overflow" >&2
    fi
    x=$((x | (byte_value << s)))
    echo "$x"
    @@ -75,8 +77,7 @@ function read_uvarint {
    done

    echo "$x" >&2
    echo "Error: overflow" >&2
    exit 1
    fatal "Error: overflow" >&2
    }

    TEMP_CHECKSUM_BODY=$(mktemp)
    @@ -112,4 +113,3 @@ for ((i=1; i<=$ITEMS_COUNT; i++)); do
    fi
    echo "$NAME=$FILE_HASH"
    done

  2. Slach created this gist Nov 18, 2023.
    115 changes: 115 additions & 0 deletions checksum_parser.sh
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,115 @@
    CHECKSUM_FILE=$1
    FORMAT_VERSION=$(head -n +1 "${CHECKSUM_FILE}" | sed 's/checksums format version: //g')

    log() { printf '%s\n' "$*"; }
    error() { log "ERROR: $*" >&2; }
    fatal() { error "$@"; exit 1; }

    # appends a command to a trap
    #
    # - 1st arg: code to add
    # - remaining args: names of traps to modify
    #
    trap_add() {
    trap_add_cmd=$1; shift || fatal "${FUNCNAME} usage error"
    for trap_add_name in "$@"; do
    trap -- "$(
    # helper fn to get existing trap command from output
    # of trap -p
    extract_trap_cmd() { printf '%s\n' "$3"; }
    # print existing trap command with newline
    eval "extract_trap_cmd $(trap -p "${trap_add_name}")"
    # print the new trap command
    printf '%s\n' "${trap_add_cmd}"
    )" "${trap_add_name}" \
    || fatal "unable to add to trap ${trap_add_name}"
    done
    }

    function checksums_body_cmd {
    if [[ "4" == "${FORMAT_VERSION}" ]]; then
    tail -n +2 "${CHECKSUM_FILE}" | clickhouse-compressor -d
    else
    tail -n +2 "${CHECKSUM_FILE}"
    fi
    }


    declare -g CURRENT_OFFSET=1
    CURRENT_OFFSET_FIFO=$(mktemp -u) # Generate a unique temporary file name
    touch $CURRENT_OFFSET_FIFO
    trap_add 'rm -f $CURRENT_OFFSET_FIFO' EXIT

    function read_uvarint {
    readonly MaxVarintLen64=10
    readonly const0x80=$(printf "%d" 0x80)
    readonly const0x7f=$(printf "%d" 0x7f)
    local x=0
    local s=0

    for ((i=0; i<MaxVarintLen64; i++)); do
    read -r byte_value
    ((CURRENT_OFFSET += 1))
    echo $CURRENT_OFFSET > $CURRENT_OFFSET_FIFO
    if [ -z "$byte_value" ]; then
    if [ $i -gt 0 ]; then
    echo "Error: unexpected end of file" >&2
    exit 1
    fi
    echo "$x"
    return
    fi

    if [ $byte_value -lt $const0x80 ]; then
    if [ $i -eq $((MaxVarintLen64-1)) ] && [ "$byte_value" -gt 1 ]; then
    echo "Error: overflow" >&2
    exit 1
    fi
    x=$((x | (byte_value << s)))
    echo "$x"
    return
    fi

    x=$((x | ((byte_value & $const0x7f) << s)))
    s=$((s + 7))
    done

    echo "$x" >&2
    echo "Error: overflow" >&2
    exit 1
    }

    TEMP_CHECKSUM_BODY=$(mktemp)
    trap_add 'rm -f "${TEMP_CHECKSUM_BODY}"' EXIT

    checksums_body_cmd > "${TEMP_CHECKSUM_BODY}"

    ITEMS_COUNT=$(hexdump -v -e '/1 "%u\n"' "${TEMP_CHECKSUM_BODY}" | read_uvarint)
    read CURRENT_OFFSET < $CURRENT_OFFSET_FIFO

    for ((i=1; i<=$ITEMS_COUNT; i++)); do
    NAME_LENGTH=$(tail -c +$CURRENT_OFFSET "${TEMP_CHECKSUM_BODY}" | hexdump -v -e '/1 "%u\n"' | read_uvarint)
    read CURRENT_OFFSET < $CURRENT_OFFSET_FIFO

    NAME=$(dd if="${TEMP_CHECKSUM_BODY}" bs=1 skip="$((CURRENT_OFFSET-1))" count="${NAME_LENGTH}" 2>/dev/null)
    ((CURRENT_OFFSET += NAME_LENGTH))

    FILE_SIZE=$(tail -c +$CURRENT_OFFSET "${TEMP_CHECKSUM_BODY}" | hexdump -v -e '/1 "%u\n"' | read_uvarint)
    read CURRENT_OFFSET < $CURRENT_OFFSET_FIFO

    FILE_HASH=$(dd if="${TEMP_CHECKSUM_BODY}" bs=1 skip="$((CURRENT_OFFSET-1))" count="16" 2>/dev/null | xxd -ps -c 32)
    ((CURRENT_OFFSET += 16))

    IS_COMPRESSED=$(dd if="${TEMP_CHECKSUM_BODY}" bs=1 skip="$((CURRENT_OFFSET-1))" count="1" 2>/dev/null | xxd -p)
    ((CURRENT_OFFSET += 1))

    if [ "00" != "$IS_COMPRESSED" ]; then
    UNCOMPRESSED_SIZE=$(tail -c +$CURRENT_OFFSET "${TEMP_CHECKSUM_BODY}" | hexdump -v -e '/1 "%u\n"' | read_uvarint)
    read CURRENT_OFFSET < $CURRENT_OFFSET_FIFO

    UNCOMPRESSED_HASH=$(dd if="${TEMP_CHECKSUM_BODY}" bs=1 skip="$((CURRENT_OFFSET-1))" count="16" 2>/dev/null | xxd -ps -c 32)
    ((CURRENT_OFFSET += 16))
    fi
    echo "$NAME=$FILE_HASH"
    done