Skip to content

Instantly share code, notes, and snippets.

@vijinho
Created January 7, 2025 16:25
Show Gist options
  • Save vijinho/50553202da24cfa73347b65f7df74057 to your computer and use it in GitHub Desktop.
Save vijinho/50553202da24cfa73347b65f7df74057 to your computer and use it in GitHub Desktop.

Revisions

  1. vijinho created this gist Jan 7, 2025.
    113 changes: 113 additions & 0 deletions text_to_speech.sh
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,113 @@
    #!/bin/bash

    # Associative arrays for mapping IDs to model and vocoder names
    declare -A model_ids=(
    [12]="tts_models/en/ljspeech/tacotron2-DDC_ph"
    [15]="tts_models/en/ljspeech/tacotron2-DCA"
    )

    declare -A vocoder_ids=(
    [4]="vocoder_models/en/ljspeech/multiband-melgan"
    [6]="vocoder_models/en/ljspeech/univnet"
    )

    # Function to display help
    usage() {
    cat <<EOF
    Usage: $0 [-t <text>] [-m <model_id>] [-v <vocoder_id>] [-f <output_file>] [-p]
    Options:
    -t <text> : The text to speak
    -m <model_id> : Model ID (12 or 15).
    -v <vocoder_id> : Vocoder ID (4 or 6).
    -f <output_file>: Path to save the output file. Default: /tmp/tts_m<model_id>_v<vocoder_id>-YYYYMMDD-HHMMSS.wav
    -p : Play the generated sound file with aplay (for audio playback).
    -h, --help : Display this help message.
    Examples:
    $0 -m 12 -v 4 -f /home/user/output.wav
    $0 -r -p
    EOF
    exit 1
    }

    # Check if the provided IDs are valid
    model_id=""
    vocoder_id=""
    text="TESTING. This is a test because no text was specified."

    while getopts "t:m:v:f:ph" opt; do
    case ${opt} in
    t)
    text=$OPTARG
    ;;
    m)
    model_id=$OPTARG
    ;;
    v)
    vocoder_id=$OPTARG
    ;;
    f)
    out_path=$OPTARG
    ;;
    p)
    pipe_out=true
    ;;
    h)
    usage
    ;;
    \?)
    echo "Invalid option: -$OPTARG" 1>&2
    usage
    ;;
    :)
    echo "Invalid option: -$OPTARG requires an argument" 1>&2
    usage
    ;;
    esac
    done

    # Validate model_id and vocoder_id
    if [[ -z "${model_ids[$model_id]}" ]]; then
    echo "Invalid model ID: $model_id. Valid models: ${!model_ids[@]}"
    usage
    fi

    if [[ -z "${vocoder_ids[$vocoder_id]}" ]]; then
    echo "Invalid vocoder ID: $vocoder_id. Valid vocoders: ${!vocoder_ids[@]}"
    usage
    fi

    # Assign out_path if not provided
    if [[ -z "$out_path" ]]; then
    out_path="$TEMP/tts_m${model_id}_v${vocoder_id}-$(date "+%Y%m%d-%H%M%S").wav"
    else
    # Ensure the provided path is writable
    out_dir=$(dirname "$out_path")
    if [ ! -d "$out_dir" ] || [ ! -w "$out_dir" ]; then
    echo "Invalid output directory: $out_dir"
    usage
    fi
    fi

    # Retrieve the corresponding model and vocoder names
    model_name="${model_ids[$model_id]}"
    vocoder_name="${vocoder_ids[$vocoder_id]}"

    # Execute the TTS command
    tts_command="tts --text \"$text\" --model_name \"$model_name\" --vocoder_name \"$vocoder_name\" --out_path \"$out_path\""

    echo "Executing: $tts_command"
    eval "$tts_command"

    # Convert the WAV file to MP3
    if ! command -v ffmpeg &>/dev/null; then
    echo "Error: 'ffmpeg' is not installed. Please install it to convert the audio wav file to mp3." >&2
    exit 1
    fi

    OUTPUT_FILE="${out_path%.*}.mp3"
    ffmpeg -i $out_path -b:a 128k $OUTPUT_FILE && rm $out_path
    echo "File converted to mp3 and original WAV file deleted. Output file: $OUTPUT_FILE"

    if [ "$pipe_out" = true ]; then
    #aplay $OUTPUT_FILE
    fi