Skip to content

Instantly share code, notes, and snippets.

@0xBigBoss
Last active January 31, 2025 02:57
Show Gist options
  • Select an option

  • Save 0xBigBoss/896c84ec02e8b27963ea16c5a2ec5692 to your computer and use it in GitHub Desktop.

Select an option

Save 0xBigBoss/896c84ec02e8b27963ea16c5a2ec5692 to your computer and use it in GitHub Desktop.

Revisions

  1. 0xBigBoss revised this gist Jan 31, 2025. 1 changed file with 0 additions and 1 deletion.
    1 change: 0 additions & 1 deletion ray-start.sh
    Original file line number Diff line number Diff line change
    @@ -115,6 +115,5 @@ docker run \
    --ipc=host \
    --gpus all \
    -v "${PATH_TO_HF_HOME}:/root/.cache/huggingface" \
    ${NETWORK_ARG} \
    ${DOCKER_EXTRA_ARGS} \
    "${DOCKER_IMAGE}" -c "${RAY_START_CMD}"
  2. 0xBigBoss revised this gist Jan 31, 2025. No changes.
  3. 0xBigBoss revised this gist Jan 31, 2025. 1 changed file with 81 additions and 92 deletions.
    173 changes: 81 additions & 92 deletions ray-start.sh
    Original file line number Diff line number Diff line change
    @@ -2,26 +2,25 @@

    # Help function to display usage
    show_help() {
    echo "Usage: $0 [OPTIONS]"
    echo
    echo "Options:"
    echo " --image IMAGE Docker image to use (required)"
    echo " --address IP Head node IP address (required)"
    echo " --mode MODE Either 'head' or 'worker' (required)"
    echo " --hf-path PATH Path to Hugging Face cache directory (required)"
    echo " --node-name NAME Custom name for the Ray node (default: ray-node)"
    echo " --port PORT Port for Ray head node (default: 6379)"
    echo " --network-interface IF Network interface to use (e.g., eth0)"
    echo " --docker-args ARGS Additional Docker arguments (quoted if multiple)"
    echo " --ray-args ARGS Additional Ray arguments (quoted if multiple)"
    echo
    echo "Example for head node:"
    echo " $0 --image vllm/vllm-openai:latest --address 10.0.0.12 --mode head \\"
    echo " --hf-path ~/.cache/huggingface --network-interface eth0"
    echo
    echo "Example for worker node:"
    echo " $0 --image vllm/vllm-openai:latest --address 10.0.0.12 --mode worker \\"
    echo " --hf-path ~/.cache/huggingface --network-interface eth0"
    echo "Usage: $0 [OPTIONS]"
    echo
    echo "Options:"
    echo " --image IMAGE Docker image to use (required)"
    echo " --address IP Head node IP address (required)"
    echo " --mode MODE Either 'head' or 'worker' (required)"
    echo " --hf-path PATH Path to Hugging Face cache directory (required)"
    echo " --node-name NAME Custom name for the Ray node (default: ray-node)"
    echo " --port PORT Port for Ray head node (default: 6379)"
    echo " --docker-args ARGS Additional Docker arguments (quoted if multiple)"
    echo " --ray-args ARGS Additional Ray arguments (quoted if multiple)"
    echo
    echo "Example for head node:"
    echo " $0 --image vllm/vllm-openai:latest --address 10.0.0.12 --mode head \\"
    echo " --hf-path ~/.cache/huggingface --network-interface eth0"
    echo
    echo "Example for worker node:"
    echo " $0 --image vllm/vllm-openai:latest --address 10.0.0.12 --mode worker \\"
    echo " --hf-path ~/.cache/huggingface --network-interface eth0"
    }

    # Default values
    @@ -30,102 +29,92 @@ RAY_PORT="6379"

    # Parse command line arguments
    while [[ $# -gt 0 ]]; do
    case $1 in
    --help)
    show_help
    exit 0
    ;;
    --image)
    DOCKER_IMAGE="$2"
    shift 2
    ;;
    --address)
    HEAD_NODE_ADDRESS="$2"
    shift 2
    ;;
    --mode)
    NODE_MODE="$2"
    shift 2
    ;;
    --hf-path)
    PATH_TO_HF_HOME="$2"
    shift 2
    ;;
    --node-name)
    NODE_NAME="$2"
    shift 2
    ;;
    --port)
    RAY_PORT="$2"
    shift 2
    ;;
    --network-interface)
    NETWORK_INTERFACE="$2"
    shift 2
    ;;
    --docker-args)
    DOCKER_EXTRA_ARGS="$2"
    shift 2
    ;;
    --ray-args)
    RAY_EXTRA_ARGS="$2"
    shift 2
    ;;
    *)
    echo "Unknown option: $1"
    show_help
    exit 1
    ;;
    esac
    case $1 in
    --help)
    show_help
    exit 0
    ;;
    --image)
    DOCKER_IMAGE="$2"
    shift 2
    ;;
    --address)
    HEAD_NODE_ADDRESS="$2"
    shift 2
    ;;
    --mode)
    NODE_MODE="$2"
    shift 2
    ;;
    --hf-path)
    PATH_TO_HF_HOME="$2"
    shift 2
    ;;
    --node-name)
    NODE_NAME="$2"
    shift 2
    ;;
    --port)
    RAY_PORT="$2"
    shift 2
    ;;
    --docker-args)
    DOCKER_EXTRA_ARGS="$2"
    shift 2
    ;;
    --ray-args)
    RAY_EXTRA_ARGS="$2"
    shift 2
    ;;
    *)
    echo "Unknown option: $1"
    show_help
    exit 1
    ;;
    esac
    done

    # Validate required arguments
    if [ -z "$DOCKER_IMAGE" ] || [ -z "$HEAD_NODE_ADDRESS" ] || [ -z "$NODE_MODE" ] || [ -z "$PATH_TO_HF_HOME" ]; then
    echo "Error: Missing required arguments"
    show_help
    exit 1
    echo "Error: Missing required arguments"
    show_help
    exit 1
    fi

    # Validate node mode
    if [ "${NODE_MODE}" != "head" ] && [ "${NODE_MODE}" != "worker" ]; then
    echo "Error: Mode must be 'head' or 'worker'"
    exit 1
    echo "Error: Mode must be 'head' or 'worker'"
    exit 1
    fi

    # Define a function to cleanup on EXIT signal
    cleanup() {
    docker stop "${NODE_NAME}"
    docker rm "${NODE_NAME}"
    docker stop "${NODE_NAME}"
    docker rm "${NODE_NAME}"
    }
    trap cleanup EXIT

    # Command setup for head or worker node
    RAY_START_CMD="ray start --block"
    if [ "${NODE_MODE}" == "head" ]; then
    RAY_START_CMD+=" --head --port=${RAY_PORT}"
    RAY_START_CMD+=" --head --port=${RAY_PORT}"
    else
    RAY_START_CMD+=" --address=${HEAD_NODE_ADDRESS}:${RAY_PORT}"
    RAY_START_CMD+=" --address=${HEAD_NODE_ADDRESS}:${RAY_PORT}"
    fi

    # Add any extra Ray arguments
    if [ -n "$RAY_EXTRA_ARGS" ]; then
    RAY_START_CMD+=" ${RAY_EXTRA_ARGS}"
    fi

    # Build network interface argument if specified
    NETWORK_ARG=""
    if [ -n "$NETWORK_INTERFACE" ]; then
    NETWORK_ARG="-e NCCL_SOCKET_IFNAME=${NETWORK_INTERFACE}"
    RAY_START_CMD+=" ${RAY_EXTRA_ARGS}"
    fi

    # Run the docker command
    docker run \
    --entrypoint /bin/bash \
    --network host \
    --name "${NODE_NAME}" \
    --ipc=host \
    --gpus all \
    -v "${PATH_TO_HF_HOME}:/root/.cache/huggingface" \
    ${NETWORK_ARG} \
    ${DOCKER_EXTRA_ARGS} \
    "${DOCKER_IMAGE}" -c "${RAY_START_CMD}"
    --entrypoint /bin/bash \
    --network host \
    --name "${NODE_NAME}" \
    --ipc=host \
    --gpus all \
    -v "${PATH_TO_HF_HOME}:/root/.cache/huggingface" \
    ${NETWORK_ARG} \
    ${DOCKER_EXTRA_ARGS} \
    "${DOCKER_IMAGE}" -c "${RAY_START_CMD}"
  4. 0xBigBoss revised this gist Jan 31, 2025. 1 changed file with 106 additions and 24 deletions.
    130 changes: 106 additions & 24 deletions ray-start.sh
    Original file line number Diff line number Diff line change
    @@ -1,49 +1,131 @@
    #!/bin/bash

    # Check for minimum number of required arguments
    if [ $# -lt 4 ]; then
    echo "Usage: $0 docker_image head_node_address --head|--worker path_to_hf_home [additional_args...]"
    exit 1
    fi
    # Help function to display usage
    show_help() {
    echo "Usage: $0 [OPTIONS]"
    echo
    echo "Options:"
    echo " --image IMAGE Docker image to use (required)"
    echo " --address IP Head node IP address (required)"
    echo " --mode MODE Either 'head' or 'worker' (required)"
    echo " --hf-path PATH Path to Hugging Face cache directory (required)"
    echo " --node-name NAME Custom name for the Ray node (default: ray-node)"
    echo " --port PORT Port for Ray head node (default: 6379)"
    echo " --network-interface IF Network interface to use (e.g., eth0)"
    echo " --docker-args ARGS Additional Docker arguments (quoted if multiple)"
    echo " --ray-args ARGS Additional Ray arguments (quoted if multiple)"
    echo
    echo "Example for head node:"
    echo " $0 --image vllm/vllm-openai:latest --address 10.0.0.12 --mode head \\"
    echo " --hf-path ~/.cache/huggingface --network-interface eth0"
    echo
    echo "Example for worker node:"
    echo " $0 --image vllm/vllm-openai:latest --address 10.0.0.12 --mode worker \\"
    echo " --hf-path ~/.cache/huggingface --network-interface eth0"
    }

    # Assign the first three arguments and shift them away
    DOCKER_IMAGE="$1"
    HEAD_NODE_ADDRESS="$2"
    NODE_TYPE="$3" # Should be --head or --worker
    PATH_TO_HF_HOME="$4"
    shift 4
    # Default values
    NODE_NAME="ray-node"
    RAY_PORT="6379"

    # Additional arguments are passed directly to the Docker command
    ADDITIONAL_ARGS=("$@")
    # Parse command line arguments
    while [[ $# -gt 0 ]]; do
    case $1 in
    --help)
    show_help
    exit 0
    ;;
    --image)
    DOCKER_IMAGE="$2"
    shift 2
    ;;
    --address)
    HEAD_NODE_ADDRESS="$2"
    shift 2
    ;;
    --mode)
    NODE_MODE="$2"
    shift 2
    ;;
    --hf-path)
    PATH_TO_HF_HOME="$2"
    shift 2
    ;;
    --node-name)
    NODE_NAME="$2"
    shift 2
    ;;
    --port)
    RAY_PORT="$2"
    shift 2
    ;;
    --network-interface)
    NETWORK_INTERFACE="$2"
    shift 2
    ;;
    --docker-args)
    DOCKER_EXTRA_ARGS="$2"
    shift 2
    ;;
    --ray-args)
    RAY_EXTRA_ARGS="$2"
    shift 2
    ;;
    *)
    echo "Unknown option: $1"
    show_help
    exit 1
    ;;
    esac
    done

    # Validate required arguments
    if [ -z "$DOCKER_IMAGE" ] || [ -z "$HEAD_NODE_ADDRESS" ] || [ -z "$NODE_MODE" ] || [ -z "$PATH_TO_HF_HOME" ]; then
    echo "Error: Missing required arguments"
    show_help
    exit 1
    fi

    # Validate node type
    if [ "${NODE_TYPE}" != "--head" ] && [ "${NODE_TYPE}" != "--worker" ]; then
    echo "Error: Node type must be --head or --worker"
    # Validate node mode
    if [ "${NODE_MODE}" != "head" ] && [ "${NODE_MODE}" != "worker" ]; then
    echo "Error: Mode must be 'head' or 'worker'"
    exit 1
    fi

    # Define a function to cleanup on EXIT signal
    cleanup() {
    docker stop node
    docker rm node
    docker stop "${NODE_NAME}"
    docker rm "${NODE_NAME}"
    }
    trap cleanup EXIT

    # Command setup for head or worker node
    RAY_START_CMD="ray start --block"
    if [ "${NODE_TYPE}" == "--head" ]; then
    RAY_START_CMD+=" --head --port=6379"
    if [ "${NODE_MODE}" == "head" ]; then
    RAY_START_CMD+=" --head --port=${RAY_PORT}"
    else
    RAY_START_CMD+=" --address=${HEAD_NODE_ADDRESS}:6379"
    RAY_START_CMD+=" --address=${HEAD_NODE_ADDRESS}:${RAY_PORT}"
    fi

    # Add any extra Ray arguments
    if [ -n "$RAY_EXTRA_ARGS" ]; then
    RAY_START_CMD+=" ${RAY_EXTRA_ARGS}"
    fi

    # Build network interface argument if specified
    NETWORK_ARG=""
    if [ -n "$NETWORK_INTERFACE" ]; then
    NETWORK_ARG="-e NCCL_SOCKET_IFNAME=${NETWORK_INTERFACE}"
    fi

    # Run the docker command with the user specified parameters and additional arguments
    # Run the docker command
    docker run \
    --entrypoint /bin/bash \
    --network host \
    --name ray-node \
    --name "${NODE_NAME}" \
    --ipc=host \
    --gpus all \
    -v "${PATH_TO_HF_HOME}:/root/.cache/huggingface" \
    "${ADDITIONAL_ARGS[@]}" \
    ${NETWORK_ARG} \
    ${DOCKER_EXTRA_ARGS} \
    "${DOCKER_IMAGE}" -c "${RAY_START_CMD}"
  5. 0xBigBoss created this gist Jan 31, 2025.
    49 changes: 49 additions & 0 deletions ray-start.sh
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,49 @@
    #!/bin/bash

    # Check for minimum number of required arguments
    if [ $# -lt 4 ]; then
    echo "Usage: $0 docker_image head_node_address --head|--worker path_to_hf_home [additional_args...]"
    exit 1
    fi

    # Assign the first three arguments and shift them away
    DOCKER_IMAGE="$1"
    HEAD_NODE_ADDRESS="$2"
    NODE_TYPE="$3" # Should be --head or --worker
    PATH_TO_HF_HOME="$4"
    shift 4

    # Additional arguments are passed directly to the Docker command
    ADDITIONAL_ARGS=("$@")

    # Validate node type
    if [ "${NODE_TYPE}" != "--head" ] && [ "${NODE_TYPE}" != "--worker" ]; then
    echo "Error: Node type must be --head or --worker"
    exit 1
    fi

    # Define a function to cleanup on EXIT signal
    cleanup() {
    docker stop node
    docker rm node
    }
    trap cleanup EXIT

    # Command setup for head or worker node
    RAY_START_CMD="ray start --block"
    if [ "${NODE_TYPE}" == "--head" ]; then
    RAY_START_CMD+=" --head --port=6379"
    else
    RAY_START_CMD+=" --address=${HEAD_NODE_ADDRESS}:6379"
    fi

    # Run the docker command with the user specified parameters and additional arguments
    docker run \
    --entrypoint /bin/bash \
    --network host \
    --name ray-node \
    --ipc=host \
    --gpus all \
    -v "${PATH_TO_HF_HOME}:/root/.cache/huggingface" \
    "${ADDITIONAL_ARGS[@]}" \
    "${DOCKER_IMAGE}" -c "${RAY_START_CMD}"