Skip to content

Instantly share code, notes, and snippets.

@rahulunair
Last active November 5, 2024 21:11
Show Gist options
  • Save rahulunair/46f7f8d6633f18b3f64ec80e6f5f1e41 to your computer and use it in GitHub Desktop.
Save rahulunair/46f7f8d6633f18b3f64ec80e6f5f1e41 to your computer and use it in GitHub Desktop.

Revisions

  1. rahulunair revised this gist Nov 5, 2024. 1 changed file with 19 additions and 2 deletions.
    21 changes: 19 additions & 2 deletions tgi_start.sh
    Original file line number Diff line number Diff line change
    @@ -5,8 +5,23 @@ volume="$PWD/data"
    tgi_version="2.0.0" # https://github.com/huggingface/tgi-gaudi/releases/tag/v2.0.0
    max_input_token=16000
    max_total_token=32000
    container_name="tgi-container"

    docker run -p 8080:80 \
    kill_existing_container() {
    if [ "$(docker ps -q -f name=$container_name)" ]; then
    echo "Stopping and removing existing container: $container_name"
    docker stop "$container_name"
    docker rm "$container_name"
    fi
    }

    echo "Pulling Docker image: ghcr.io/huggingface/tgi-gaudi:$tgi_version"
    docker pull ghcr.io/huggingface/tgi-gaudi:"$tgi_version"

    kill_existing_container

    echo "Starting Docker container..."
    docker run -d --name "$container_name" -p 8080:80 \
    -v "$volume:/data" \
    --runtime=habana \
    -e HABANA_VISIBLE_DEVICES=all \
    @@ -20,4 +35,6 @@ docker run -p 8080:80 \
    ghcr.io/huggingface/tgi-gaudi:"$tgi_version" \
    --model-id "$model" \
    --max-input-tokens "$max_input_token" \
    --max-total-tokens "$max_total_token"
    --max-total-tokens "$max_total_token"

    echo "Docker container $container_name is up and running."
  2. rahulunair created this gist Nov 5, 2024.
    23 changes: 23 additions & 0 deletions tgi_start.sh
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,23 @@
    #!/bin/bash

    model="NousResearch/Nous-Hermes-Llama2-13b"
    volume="$PWD/data"
    tgi_version="2.0.0" # https://github.com/huggingface/tgi-gaudi/releases/tag/v2.0.0
    max_input_token=16000
    max_total_token=32000

    docker run -p 8080:80 \
    -v "$volume:/data" \
    --runtime=habana \
    -e HABANA_VISIBLE_DEVICES=all \
    -e OMPI_MCA_btl_vader_single_copy_mechanism=none \
    -e ENABLE_HPU_GRAPH=true \
    -e LIMIT_HPU_GRAPH=true \
    -e USE_FLASH_ATTENTION=true \
    -e FLASH_ATTENTION_RECOMPUTE=true \
    --cap-add=sys_nice \
    --ipc=host \
    ghcr.io/huggingface/tgi-gaudi:"$tgi_version" \
    --model-id "$model" \
    --max-input-tokens "$max_input_token" \
    --max-total-tokens "$max_total_token"