Last active
November 5, 2024 21:11
-
-
Save rahulunair/46f7f8d6633f18b3f64ec80e6f5f1e41 to your computer and use it in GitHub Desktop.
Revisions
-
rahulunair revised this gist
Nov 5, 2024 . 1 changed file with 19 additions and 2 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -5,8 +5,23 @@ volume="$PWD/data" tgi_version="2.0.0" # https://github.com/huggingface/tgi-gaudi/releases/tag/v2.0.0 max_input_token=16000 max_total_token=32000 container_name="tgi-container" kill_existing_container() { if [ "$(docker ps -q -f name=$container_name)" ]; then echo "Stopping and removing existing container: $container_name" docker stop "$container_name" docker rm "$container_name" fi } echo "Pulling Docker image: ghcr.io/huggingface/tgi-gaudi:$tgi_version" docker pull ghcr.io/huggingface/tgi-gaudi:"$tgi_version" kill_existing_container echo "Starting Docker container..." docker run -d --name "$container_name" -p 8080:80 \ -v "$volume:/data" \ --runtime=habana \ -e HABANA_VISIBLE_DEVICES=all \ @@ -20,4 +35,6 @@ docker run -p 8080:80 \ ghcr.io/huggingface/tgi-gaudi:"$tgi_version" \ --model-id "$model" \ --max-input-tokens "$max_input_token" \ --max-total-tokens "$max_total_token" echo "Docker container $container_name is up and running." -
rahulunair created this gist
Nov 5, 2024 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,23 @@ #!/bin/bash model="NousResearch/Nous-Hermes-Llama2-13b" volume="$PWD/data" tgi_version="2.0.0" # https://github.com/huggingface/tgi-gaudi/releases/tag/v2.0.0 max_input_token=16000 max_total_token=32000 docker run -p 8080:80 \ -v "$volume:/data" \ --runtime=habana \ -e HABANA_VISIBLE_DEVICES=all \ -e OMPI_MCA_btl_vader_single_copy_mechanism=none \ -e ENABLE_HPU_GRAPH=true \ -e LIMIT_HPU_GRAPH=true \ -e USE_FLASH_ATTENTION=true \ -e FLASH_ATTENTION_RECOMPUTE=true \ --cap-add=sys_nice \ --ipc=host \ ghcr.io/huggingface/tgi-gaudi:"$tgi_version" \ --model-id "$model" \ --max-input-tokens "$max_input_token" \ --max-total-tokens "$max_total_token"