Skip to content

Instantly share code, notes, and snippets.

@byteshiva
Forked from jart/rename-pictures.sh
Created December 15, 2023 11:43
Show Gist options
  • Save byteshiva/fc3511b77d005b5a59092f2d51079ac1 to your computer and use it in GitHub Desktop.
Save byteshiva/fc3511b77d005b5a59092f2d51079ac1 to your computer and use it in GitHub Desktop.

Revisions

  1. @jart jart created this gist Dec 12, 2023.
    159 changes: 159 additions & 0 deletions rename-pictures.sh
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,159 @@
    #!/bin/sh
    # rename-pictures.sh
    # Author: Justine Tunney <[email protected]>
    # License: Apache 2.0
    #
    # This shell script can be used to ensure all the images in a folder
    # have good descriptive filenames that are written in English. It's
    # based on the Mistral 7b and LLaVA v1.5 models.
    #
    # For example, the following command:
    #
    # ./rename-pictures.sh ~/Pictures
    #
    # Will iterate recursively through the specified directories. For each
    # file, it'll ask the Mistral model if the filename looks reasonable. If
    # Mistral doesn't like the filename, then this script will ask LLaVA to
    # analyze the picture and generate a new filename with lowercase letters
    # and underscores. Most image formats are supported (e.g. png/jpg/gif)
    # and newer more exotic ones (e.g. webp) are also supported if Image
    # Magick is installed.
    #
    # You need to have a system with at minimum 8gb of RAM. This will work
    # even on older computers without GPUs; just let it run overnight!

    abort() {
    printf '%s\n' "renaming terminated." >&2
    exit 1
    }

    if ! LLAVA=$(command -v llava-v1.5-7b-q4-main.llamafile); then
    printf '%s\n' "llava-v1.5-7b-q4-main.llamafile: fatal error: update this script with the path of your llava llamafile" >&2
    printf '%s\n' "please download https://huggingface.co/jartine/llava-v1.5-7B-GGUF/resolve/main/llava-v1.5-7b-q4-main.llamafile and put it on the system path" >&2
    abort
    fi

    if ! MISTRAL=$(command -v mistral-7b-instruct-v0.1-Q4_K_M-main.llamafile); then
    printf '%s\n' "mistral-7b-instruct-v0.1-Q4_K_M-main.llamafile: fatal error: update this script with the path of your mistral llamafile" >&2
    printf '%s\n' "please download https://huggingface.co/jartine/mistral-7b.llamafile/resolve/main/mistral-7b-instruct-v0.1-Q4_K_M-main.llamafile and put it on the system path" >&2
    abort
    fi

    if ! CONVERT=$(command -v convert); then
    printf '%s\n' "${0##*/}: warning: convert command not found (please install imagemagick so we can analyze image formats like webp)" >&2
    fi

    isgood() {
    "$MISTRAL" \
    --temp 0 -ngl 35 \
    --grammar 'root ::= "yes" | "no"' \
    -p "[INST]Does the filename '${1##*/}' look like readable english text?[/INST]" \
    --silent-prompt 2>/dev/null
    }

    pickname() {
    "$LLAVA" \
    --image "$1" --temp 0.3 -ngl 35 \
    --grammar 'root ::= [a-z]+ (" " [a-z]+)+' -n 10 \
    -p '### User: The image has...
    ### Assistant:' \
    --silent-prompt 2>/dev/null
    }

    # https://stackoverflow.com/a/30133294/1653720
    shuf() {
    awk 'BEGIN {srand(); OFMT="%.17f"} {print rand(), $0}' "$@" |
    sort -k1,1n |
    cut -d ' ' -f2-
    }


    if [ $# -eq 0 ]; then
    printf '%s\n' "${0##*/}: fatal error: missing operand" >&2
    abort
    fi

    if [ x"$1" = x"--help" ]; then
    printf '%s\n' "usage: ${0##*/} PATH..."
    exit
    fi

    OIFS=$IFS
    IFS='
    '
    for arg; do

    # ensure argument is a file or directory
    if [ ! -e "$arg" ]; then
    printf '%s\n' "$arg: fatal error: file not found" >&2
    abort
    fi

    # find all regular files under path argument
    for path in $(find "$arg" -type f -print0 | tr '\0' '\n' | shuf); do

    # ask mistral if filename needs renaming
    if ! answer=$(isgood "$path"); then
    printf '%s\n' "$path: fatal error: failed to ask mistral if file needs renaming" >&2
    abort
    fi

    if [ "$answer" = "yes" ]; then
    printf '%s\n' "skipping $path (mistral says it's good)" >&2
    continue
    fi

    # ask llm to generate new filename. if it's a format like web that
    # our stb library doesn't support yet, then we'll ask imagemagick to
    # convert it to png and then try again.
    if ! newname=$(pickname "$path"); then
    png="${TMPDIR:-/tmp}/$$.png"
    if [ -z "$CONVERT" ]; then
    printf '%s\n' "$path: warning: llava failed to describe image (probably due to unsupported file format)" >&2
    continue
    fi
    if "$CONVERT" "$path" "$png" 2>/dev/null; then
    if newname=$(pickname "$png"); then
    rm -f "$png"
    else
    printf '%s\n' "$path: warning: llava llm failed" >&2
    rm -f "$png"
    continue
    fi
    else
    printf '%s\n' "skipping $path (not an image)" >&2
    continue
    fi
    fi

    # replace spaces with underscores
    newname=$(printf '%s\n' "$newname" | sed 's/ /_/g')

    # append the original file extension to the new name
    if [ x"${path%.*}" != x"$path" ]; then
    newname="$newname.${path##*.}"
    fi

    # prefix the original directory to the new name
    if [ x"${path%/*}" != x"$path" ]; then
    newname="${path%/*}/$newname"
    fi

    # ensure new name is unque
    if [ -e "$newname" ]; then
    i=2
    while [ -e "${newname%.*}-$i.${newname##*.}" ]; do
    i=$((i + 1))
    done
    newname="${newname%.*}-$i.${newname##*.}"
    fi

    # rename the file
    printf '%s\n' "renaming $path to $newname"
    if ! mv -n "$path" "$newname"; then
    printf '%s\n' "$newname: fatal error: failed to rename file" >&2
    abort
    fi
    done
    done
    IFS=$OIFS