Version: 1.1 Date: September 14, 2025 Status: Implementation-Ready Infrastructure: GitHub Actions with Self-Hosted Linux Runners Purpose: Generate Python dependency wheels and system installation scripts for Databricks Runtime environments
This specification defines a build system that produces Python wheels containing exact DBR dependency specifications and installation scripts for system-level requirements. The system uses GitHub Actions with self-hosted Linux runners for CI/CD, enabling users to recreate Databricks Runtime environments through standard pip installation combined with pre/post setup scripts. The specification includes tested reference implementations from production Dockerfiles.
- Python Wheels - Meta-packages declaring exact DBR dependencies (not bundling)
- Pre-installation Script - System dependencies installer
- Post-installation Script - Binary tools installer
- Constraints Files - Locked dependency versions with SHA256 hashes
- Validation Script - Environment verification tool
- CI/CD: GitHub Actions with self-hosted Linux runners
- Build Environment: Linux (Ubuntu 20.04/22.04)
- Python Versions: 3.11 (DBR 15), 3.12 (DBR 16)
- Container Runtime: Docker/Podman on runners
dbr-env-setup/
├── wheels/
│ ├── dbr-env-core/
│ │ ├── pyproject.toml
│ │ └── src/
│ │ └── dbr_env_core/
│ │ └── __init__.py # Version metadata only
│ ├── dbr-env-ml/
│ ├── dbr-env-cloud/
│ └── dbr-env-all/ # Meta-package
├── scripts/
│ ├── dbr-setup-pre # System dependencies
│ └── dbr-setup-post # Binary tools
│ # Note: dbr-validate is provided by the dbr-env-all Python package
├── constraints/
│ ├── dbr15-constraints.txt
│ ├── dbr16-constraints.txt
│ └── generate.py
├── checksums/
│ ├── binaries-dbr15.json
│ └── binaries-dbr16.json
├── requirements/ # Source requirements from DBR
│ ├── dbr15/
│ │ ├── core.txt
│ │ ├── ml.txt
│ │ └── cloud.txt
│ └── dbr16/
│ ├── core.txt
│ ├── ml.txt
│ └── cloud.txt
├── reference/ # Tested reference implementations
│ ├── dockerfiles/
│ │ ├── dbr15.Dockerfile
│ │ └── dbr16.Dockerfile
│ └── requirements/
│ ├── dbr15-full.txt
│ └── dbr16-full.txt
├── .github/
│ └── workflows/
│ ├── build-wheels.yml
│ ├── test-installation.yml
│ └── release.yml
└── build/
└── scripts/
├── build-wheels.sh
└── test-local.sh
# reference/dockerfiles/dbr15.Dockerfile
FROM python:3.11-bullseye
ARG TARGETARCH=amd64
USER root
WORKDIR /tmp
# Install certificates (make configurable for generic use)
# COPY build-images/common/install_certs.sh install_certs.sh
# RUN bash /tmp/install_certs.sh
# Setup UV for fast package installation
ENV UV_TOOL_BIN_DIR=/bin
COPY --from=ghcr.io/astral-sh/uv:0.6.12 /uv /uvx /bin/
# Install Python packages
COPY reference/requirements/dbr15-full.txt /tmp/requirements.txt
RUN uv pip install -r requirements.txt --system && uv cache clean
# Install Java & System tools
RUN apt-get update && \
apt-get install -y -f -m \
wget unzip zip jq \
openjdk-17-jdk-headless && \
/var/lib/dpkg/info/ca-certificates-java.postinst configure && \
apt-get clean && \
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
# Install Databricks CLI
ARG DBX_CLI_VERSION=0.245.0
RUN mkdir -p /tmp/dbx-cli && \
cd /tmp/dbx-cli && \
wget https://github.com/databricks/cli/releases/download/v${DBX_CLI_VERSION}/databricks_cli_${DBX_CLI_VERSION}_linux_${TARGETARCH}.zip && \
unzip *.zip && \
mv databricks /usr/local/bin/databricks && \
chmod +x /usr/local/bin/databricks && \
rm -rf /tmp/dbx-cli && \
databricks --help
# Install AWS CLI
RUN if [ "${TARGETARCH}" = "arm64" ]; then \
AWS_CLI_PKG="aarch64"; \
else \
AWS_CLI_PKG="x86_64"; \
fi \
&& curl -L "https://awscli.amazonaws.com/awscli-exe-linux-${AWS_CLI_PKG}.zip" -o "awscliv2.zip" \
&& unzip awscliv2.zip \
&& ./aws/install \
&& rm -rf awscliv2.zip aws \
&& aws --version
# Install Terraform
ARG TERRAFORM_VERSION=1.11.2
RUN curl --remote-name --location https://releases.hashicorp.com/terraform/${TERRAFORM_VERSION}/terraform_${TERRAFORM_VERSION}_linux_${TARGETARCH}.zip \
&& unzip terraform_${TERRAFORM_VERSION}_linux_${TARGETARCH}.zip \
&& mv terraform /usr/bin \
&& rm LICENSE.txt terraform_${TERRAFORM_VERSION}_linux_${TARGETARCH}.zip \
&& terraform version
# Install Terragrunt
ARG TERRAGRUNT_VERSION="v0.77.0"
RUN curl -sL https://github.com/gruntwork-io/terragrunt/releases/download/${TERRAGRUNT_VERSION}/terragrunt_linux_${TARGETARCH} -o /usr/local/bin/terragrunt \
&& chmod +x /usr/local/bin/terragrunt \
&& terragrunt --version# reference/dockerfiles/dbr16.Dockerfile
FROM python:3.12-bullseye
ARG TARGETARCH=amd64
USER root
WORKDIR /tmp
# Setup UV for fast package installation
ENV UV_TOOL_BIN_DIR=/bin
COPY --from=ghcr.io/astral-sh/uv:0.7.14 /uv /uvx /bin/
# Install Python packages
COPY reference/requirements/dbr16-full.txt /tmp/requirements.txt
RUN uv pip install -r requirements.txt --system && uv cache clean
# Install Java & System tools
RUN apt-get update && \
apt-get install -y -f -m \
wget unzip zip jq \
openjdk-17-jdk-headless && \
/var/lib/dpkg/info/ca-certificates-java.postinst configure && \
apt-get clean && \
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
# Install Databricks CLI
ARG DBX_CLI_VERSION=0.256.0
RUN mkdir -p /tmp/dbx-cli && \
cd /tmp/dbx-cli && \
wget https://github.com/databricks/cli/releases/download/v${DBX_CLI_VERSION}/databricks_cli_${DBX_CLI_VERSION}_linux_${TARGETARCH}.zip && \
unzip *.zip && \
mv databricks /usr/local/bin/databricks && \
chmod +x /usr/local/bin/databricks && \
rm -rf /tmp/dbx-cli && \
databricks --help
# Install AWS CLI
RUN if [ "${TARGETARCH}" = "arm64" ]; then \
AWS_CLI_PKG="aarch64"; \
else \
AWS_CLI_PKG="x86_64"; \
fi \
&& curl -L "https://awscli.amazonaws.com/awscli-exe-linux-${AWS_CLI_PKG}.zip" -o "awscliv2.zip" \
&& unzip awscliv2.zip \
&& ./aws/install \
&& rm -rf awscliv2.zip aws \
&& aws --version
# Install Terraform
ARG TERRAFORM_VERSION=1.12.2
RUN curl --remote-name --location https://releases.hashicorp.com/terraform/${TERRAFORM_VERSION}/terraform_${TERRAFORM_VERSION}_linux_${TARGETARCH}.zip \
&& unzip terraform_${TERRAFORM_VERSION}_linux_${TARGETARCH}.zip \
&& mv terraform /usr/bin \
&& rm LICENSE.txt terraform_${TERRAFORM_VERSION}_linux_${TARGETARCH}.zip \
&& terraform version
# Install Terragrunt
ARG TERRAGRUNT_VERSION="v0.81.10"
RUN curl -sL https://github.com/gruntwork-io/terragrunt/releases/download/${TERRAGRUNT_VERSION}/terragrunt_linux_${TARGETARCH} -o /usr/local/bin/terragrunt \
&& chmod +x /usr/local/bin/terragrunt \
&& terragrunt --version# reference/requirements/dbr15-full.txt
# Packages from https://docs.databricks.com/aws/en/release-notes/runtime/15.4lts#installed-python-libraries
# Tested and verified in production
asttokens==2.0.5
astunparse==1.6.3
azure-core==1.30.2
azure-storage-blob==12.19.1
azure-storage-file-datalake==12.14.0
backcall==0.2.0
black==23.3.0
blinker==1.4
boto3==1.34.39
botocore==1.34.39
cachetools==5.3.3
certifi==2023.7.22
cffi==1.15.1
chardet==4.0.0
charset-normalizer==2.0.4
click==8.0.4
cloudpickle==2.2.1
comm==0.1.2
contourpy==1.0.5
cryptography==41.0.3
cycler==0.11.0
Cython==0.29.32
databricks-sdk==0.20.0
debugpy==1.6.7
decorator==5.1.1
delta-spark==3.2.0
distlib==0.3.8
entrypoints==0.4
executing==0.8.3
facets-overview==1.1.1
filelock==3.13.4
fonttools==4.25.0
gitdb==4.0.11
GitPython==3.1.43
google-api-core==2.18.0
google-auth==2.31.0
google-cloud-core==2.4.1
google-cloud-storage==2.17.0
google-crc32c==1.5.0
google-resumable-media==2.7.1
googleapis-common-protos==1.63.2
grpcio==1.60.0
grpcio-status==1.60.0
httplib2==0.20.2
idna==3.4
importlib-metadata==6.0.0
ipyflow-core==0.0.198
ipython==8.15.0
ipython-genutils==0.2.0
ipywidgets==7.7.2
isodate==0.6.1
jedi==0.18.1
jeepney==0.7.1
jmespath==0.10.0
joblib==1.2.0
jupyter_client==7.4.9
jupyter_core==5.3.0
keyring==23.5.0
kiwisolver==1.4.4
launchpadlib==1.10.16
lazr.restfulclient==0.14.4
lazr.uri==1.0.6
matplotlib==3.7.2
matplotlib-inline==0.1.6
mlflow-skinny==2.11.4
more-itertools==8.10.0
mypy-extensions==0.4.3
nest-asyncio==1.5.6
numpy==1.23.5
oauthlib==3.2.0
packaging==23.2
pandas==1.5.3
parso==0.8.3
pathspec==0.10.3
patsy==0.5.3
pexpect==4.8.0
pickleshare==0.7.5
Pillow==9.4.0
pip==23.2.1
platformdirs==3.10.0
plotly==5.9.0
prompt-toolkit==3.0.36
proto-plus==1.24.0
protobuf==4.24.1
psutil==5.9.0
psycopg2==2.9.3
ptyprocess==0.7.0
pure-eval==0.2.2
pyarrow==14.0.1
pyasn1==0.4.8
pyasn1-modules==0.2.8
pyccolo==0.0.52
pycparser==2.21
pydantic==1.10.6
Pygments==2.15.1
PyJWT==2.3.0
pyodbc==4.0.39
pyparsing==3.0.9
pyspark==3.5.0
python-dateutil==2.8.2
python-lsp-jsonrpc==1.1.1
pytz==2022.7
PyYAML==6.0
pyzmq==23.2.0
requests==2.31.0
rsa==4.9
s3transfer==0.10.2
scikit-learn==1.3.0
scipy==1.11.1
seaborn==0.12.2
SecretStorage==3.3.1
setuptools==68.0.0
six==1.16.0
smmap==5.0.1
sqlparse==0.5.0
ssh-import-id==5.11
stack-data==0.2.0
statsmodels==0.14.0
tenacity==8.2.2
threadpoolctl==2.2.0
tokenize-rt==4.2.1
tornado==6.3.2
traitlets==5.7.1
typing_extensions==4.10.0
tzdata==2022.1
ujson==5.4.0
urllib3==1.26.16
virtualenv==20.24.2
wadllib==1.3.6
wcwidth==0.2.5
wheel==0.38.4
zipp==3.11.0
# reference/requirements/dbr16-full.txt
# Packages from https://docs.databricks.com/aws/en/release-notes/runtime/16.4lts#installed-python-libraries
# Tested and verified in production
annotated-types==0.7.0
asttokens==2.0.5
astunparse==1.6.3
autocommand==2.2.2
azure-core==1.31.0
azure-storage-blob==12.23.0
azure-storage-file-datalake==12.17.0
backports.tarfile==1.2.0
black==24.4.2
blinker==1.7.0
boto3==1.34.69
botocore==1.34.69
cachetools==5.3.3
certifi==2024.6.2
cffi==1.16.0
chardet==4.0.0
charset-normalizer==2.0.4
click==8.1.7
cloudpickle==2.2.1
comm==0.2.1
contourpy==1.2.0
cryptography==42.0.5
cycler==0.11.0
Cython==3.0.11
databricks-sdk==0.30.0
debugpy==1.6.7
decorator==5.1.1
Deprecated==1.2.14
distlib==0.3.8
docstring-to-markdown==0.11
executing==0.8.3
facets-overview==1.1.1
filelock==3.15.4
fonttools==4.51.0
gitdb==4.0.11
GitPython==3.1.37
google-api-core==2.20.0
google-auth==2.35.0
google-cloud-core==2.4.1
google-cloud-storage==2.18.2
google-crc32c==1.6.0
google-resumable-media==2.7.2
googleapis-common-protos==1.65.0
grpcio==1.60.0
grpcio-status==1.60.0
httplib2==0.20.4
idna==3.7
importlib-metadata==6.0.0
importlib_resources==6.4.0
inflect==7.3.1
ipyflow-core==0.0.201
ipykernel==6.28.0
ipython==8.25.0
ipython-genutils==0.2.0
ipywidgets==7.7.2
isodate==0.6.1
jaraco.context==5.3.0
jaraco.functools==4.0.1
jaraco.text==3.12.1
jedi==0.19.1
jmespath==1.0.1
joblib==1.4.2
jupyter_client==8.6.0
jupyter_core==5.7.2
kiwisolver==1.4.4
launchpadlib==1.11.0
lazr.restfulclient==0.14.6
lazr.uri==1.0.6
matplotlib==3.8.4
matplotlib-inline==0.1.6
mccabe==0.7.0
mlflow-skinny==2.19.0
more-itertools==10.3.0
mypy==1.10.0
mypy-extensions==1.0.0
nest-asyncio==1.6.0
nodeenv==1.9.1
numpy==1.26.4
oauthlib==3.2.2
opentelemetry-api==1.27.0
opentelemetry-sdk==1.27.0
opentelemetry-semantic-conventions==0.48b0
packaging==24.1
pandas==1.5.3
parso==0.8.3
pathspec==0.10.3
patsy==0.5.6
pexpect==4.8.0
pillow==10.3.0
pip==24.2
platformdirs==3.10.0
plotly==5.22.0
pluggy==1.0.0
prompt-toolkit==3.0.43
proto-plus==1.24.0
protobuf==4.24.1
psutil==5.9.0
psycopg2==2.9.10
ptyprocess==0.7.0
pure-eval==0.2.2
pyarrow==15.0.2
pyasn1==0.4.8
pyasn1-modules==0.2.8
pyccolo==0.0.65
pycparser==2.21
pydantic==2.8.2
pydantic_core==2.20.1
pyflakes==3.2.0
Pygments==2.15.1
PyJWT==2.7.0
pyodbc==5.0.1
pyparsing==3.0.9
pyright==1.1.294
python-dateutil==2.9.0.post0
python-lsp-jsonrpc==1.1.2
python-lsp-server==1.10.0
pytoolconfig==1.2.6
pytz==2024.1
PyYAML==6.0.1
pyzmq==25.1.2
requests==2.32.2
rope==1.12.0
rsa==4.9
s3transfer==0.10.2
scikit-learn==1.4.2
scipy==1.13.1
seaborn==0.13.2
setuptools==74.0.0
six==1.16.0
smmap==5.0.0
sqlparse==0.5.1
ssh-import-id==5.11
stack-data==0.2.0
statsmodels==0.14.2
tenacity==8.2.2
threadpoolctl==2.2.0
tokenize-rt==4.2.1
tomli==2.0.1
tornado==6.4.1
traitlets==5.14.3
typeguard==4.3.0
types-protobuf==3.20.3
types-psutil==5.9.0
types-pytz==2023.3.1.1
types-PyYAML==6.0.0
types-requests==2.31.0.0
types-setuptools==68.0.0.0
types-six==1.16.0
types-urllib3==1.26.25.14
typing_extensions==4.11.0
ujson==5.10.0
urllib3==1.26.16
virtualenv==20.26.2
wadllib==1.3.6
wcwidth==0.2.5
whatthepatch==1.0.2
wheel==0.43.0
wrapt==1.14.1
zipp==3.17.0
Based on the tested requirements, packages are categorized as follows:
# requirements/dbr15/core.txt
pandas==1.5.3
numpy==1.23.5
pyarrow==14.0.1
pyspark==3.5.0
delta-spark==3.2.0
databricks-sdk==0.20.0
requests==2.31.0
urllib3==1.26.16
certifi==2023.7.22
click==8.0.4
packaging==23.2
setuptools==68.0.0
pip==23.2.1
wheel==0.38.4
six==1.16.0
python-dateutil==2.8.2
pytz==2022.7
typing_extensions==4.10.0# requirements/dbr16/core.txt
pandas==1.5.3
numpy==1.26.4
pyarrow==15.0.2
pyspark==3.5.0
delta-spark==3.2.0
databricks-sdk==0.30.0
requests==2.32.2
urllib3==1.26.16
certifi==2024.6.2
click==8.1.7
packaging==24.1
setuptools==74.0.0
pip==24.2
wheel==0.43.0
six==1.16.0
python-dateutil==2.9.0.post0
pytz==2024.1
typing_extensions==4.11.0# requirements/dbr15/ml.txt
scikit-learn==1.3.0
scipy==1.11.1
statsmodels==0.14.0
matplotlib==3.7.2
seaborn==0.12.2
plotly==5.9.0
mlflow-skinny==2.11.4
joblib==1.2.0
threadpoolctl==2.2.0
patsy==0.5.3
contourpy==1.0.5
cycler==0.11.0
fonttools==4.25.0
kiwisolver==1.4.4
Pillow==9.4.0# requirements/dbr16/ml.txt
scikit-learn==1.4.2
scipy==1.13.1
statsmodels==0.14.2
matplotlib==3.8.4
seaborn==0.13.2
plotly==5.22.0
mlflow-skinny==2.19.0
joblib==1.4.2
threadpoolctl==2.2.0
patsy==0.5.6
contourpy==1.2.0
cycler==0.11.0
fonttools==4.51.0
kiwisolver==1.4.4
pillow==10.3.0# requirements/dbr15/cloud.txt
boto3==1.34.39
botocore==1.34.39
s3transfer==0.10.2
azure-core==1.30.2
azure-storage-blob==12.19.1
azure-storage-file-datalake==12.14.0
google-cloud-storage==2.17.0
google-cloud-core==2.4.1
google-auth==2.31.0
google-api-core==2.18.0
google-crc32c==1.5.0
google-resumable-media==2.7.1
googleapis-common-protos==1.63.2# requirements/dbr16/cloud.txt
boto3==1.34.69
botocore==1.34.69
s3transfer==0.10.2
azure-core==1.31.0
azure-storage-blob==12.23.0
azure-storage-file-datalake==12.17.0
google-cloud-storage==2.18.2
google-cloud-core==2.4.1
google-auth==2.35.0
google-api-core==2.20.0
google-crc32c==1.6.0
google-resumable-media==2.7.2
googleapis-common-protos==1.65.0# wheels/dbr-env-core/pyproject.toml
[build-system]
requires = ["setuptools>=61.0", "wheel"]
build-backend = "setuptools.build_meta"
[project]
name = "dbr-env-core"
version = "1.0.0"
description = "Databricks Runtime core Python dependencies"
readme = "README.md"
requires-python = ">=3.11"
license = {text = "Apache-2.0"}
[project.optional-dependencies]
dbr15 = [
"pandas==1.5.3",
"numpy==1.23.5",
"pyarrow==14.0.1",
"pyspark==3.5.0",
"delta-spark==3.2.0",
"databricks-sdk==0.20.0",
# ... rest from requirements/dbr15/core.txt
]
dbr16 = [
"pandas==1.5.3",
"numpy==1.26.4",
"pyarrow==15.0.2",
"pyspark==3.5.0",
"delta-spark==3.2.0",
"databricks-sdk==0.30.0",
# ... rest from requirements/dbr16/core.txt
]#!/bin/bash
# scripts/dbr-setup-pre
set -euo pipefail
VERSION="1.0.0"
DBR_VERSION=""
PLATFORM=""
NON_INTERACTIVE=false
# Parse arguments
while [[ $# -gt 0 ]]; do
case $1 in
--version)
DBR_VERSION="$2"
shift 2
;;
--platform)
PLATFORM="$2"
shift 2
;;
--non-interactive)
NON_INTERACTIVE=true
shift
;;
*)
echo "Unknown option: $1"
exit 1
;;
esac
done
# Detect platform if not specified
if [ -z "$PLATFORM" ]; then
PLATFORM=$(uname -s | tr '[:upper:]' '[:lower:]')
fi
echo "Installing system dependencies for $DBR_VERSION on $PLATFORM"
# Install Java 17 (required for PySpark)
install_java() {
echo "Installing Java 17..."
if [ "$PLATFORM" = "linux" ]; then
if command -v apt-get &> /dev/null; then
apt-get update
apt-get install -y openjdk-17-jdk-headless
# Configure certificates for Java
/var/lib/dpkg/info/ca-certificates-java.postinst configure || true
elif command -v yum &> /dev/null; then
yum install -y java-17-openjdk-headless
fi
elif [ "$PLATFORM" = "darwin" ]; then
if command -v brew &> /dev/null; then
brew install openjdk@17
fi
fi
}
# Install system packages
install_system_packages() {
echo "Installing system packages..."
if [ "$PLATFORM" = "linux" ]; then
if command -v apt-get &> /dev/null; then
apt-get install -y wget curl unzip zip jq
elif command -v yum &> /dev/null; then
yum install -y wget curl unzip zip jq
fi
elif [ "$PLATFORM" = "darwin" ]; then
if command -v brew &> /dev/null; then
brew install wget curl jq
fi
fi
}
# Main execution
main() {
install_java
install_system_packages
echo "System dependencies installation complete"
echo "Next: pip install dbr-env-all[$DBR_VERSION]"
}
main#!/bin/bash
# scripts/dbr-setup-post
set -euo pipefail
VERSION="1.0.0"
DBR_VERSION=""
INSTALL_DIR="/usr/local/bin"
SKIP_CHECKSUMS=false
# Version mappings from reference implementations
declare -A TOOL_VERSIONS_DBR15=(
["databricks-cli"]="0.245.0"
["terraform"]="1.11.2"
["terragrunt"]="0.77.0"
)
declare -A TOOL_VERSIONS_DBR16=(
["databricks-cli"]="0.256.0"
["terraform"]="1.12.2"
["terragrunt"]="0.81.10"
)
# Parse arguments
while [[ $# -gt 0 ]]; do
case $1 in
--version)
DBR_VERSION="$2"
shift 2
;;
--install-dir)
INSTALL_DIR="$2"
shift 2
;;
--skip-checksums)
SKIP_CHECKSUMS=true
shift
;;
*)
echo "Unknown option: $1"
exit 1
;;
esac
done
# Install Databricks CLI
install_databricks_cli() {
local version="${TOOL_VERSIONS_DBR15[databricks-cli]}"
if [ "$DBR_VERSION" = "dbr16" ]; then
version="${TOOL_VERSIONS_DBR16[databricks-cli]}"
fi
echo "Installing Databricks CLI v${version}..."
local platform=$(uname -s | tr '[:upper:]' '[:lower:]')
local arch=$(uname -m)
if [ "$arch" = "x86_64" ]; then
arch="amd64"
elif [ "$arch" = "aarch64" ]; then
arch="arm64"
fi
local url="https://github.com/databricks/cli/releases/download/v${version}/databricks_cli_${version}_${platform}_${arch}.zip"
wget -q "$url" -O /tmp/databricks_cli.zip
unzip -q -o /tmp/databricks_cli.zip -d "$INSTALL_DIR"
chmod +x "$INSTALL_DIR/databricks"
rm /tmp/databricks_cli.zip
echo "Databricks CLI installed"
}
# Install Terraform
install_terraform() {
local version="${TOOL_VERSIONS_DBR15[terraform]}"
if [ "$DBR_VERSION" = "dbr16" ]; then
version="${TOOL_VERSIONS_DBR16[terraform]}"
fi
echo "Installing Terraform v${version}..."
local platform=$(uname -s | tr '[:upper:]' '[:lower:]')
local arch=$(uname -m)
if [ "$arch" = "x86_64" ]; then
arch="amd64"
elif [ "$arch" = "aarch64" ]; then
arch="arm64"
fi
local url="https://releases.hashicorp.com/terraform/${version}/terraform_${version}_${platform}_${arch}.zip"
curl -L "$url" -o /tmp/terraform.zip
unzip -q -o /tmp/terraform.zip terraform -d "$INSTALL_DIR"
chmod +x "$INSTALL_DIR/terraform"
rm /tmp/terraform.zip
echo "Terraform installed"
}
# Install Terragrunt
install_terragrunt() {
local version="${TOOL_VERSIONS_DBR15[terragrunt]}"
if [ "$DBR_VERSION" = "dbr16" ]; then
version="${TOOL_VERSIONS_DBR16[terragrunt]}"
fi
echo "Installing Terragrunt ${version}..."
local platform=$(uname -s | tr '[:upper:]' '[:lower:]')
local arch=$(uname -m)
if [ "$arch" = "x86_64" ]; then
arch="amd64"
elif [ "$arch" = "aarch64" ]; then
arch="arm64"
fi
local url="https://github.com/gruntwork-io/terragrunt/releases/download/${version}/terragrunt_${platform}_${arch}"
curl -sL "$url" -o "$INSTALL_DIR/terragrunt"
chmod +x "$INSTALL_DIR/terragrunt"
echo "Terragrunt installed"
}
# Install AWS CLI
install_aws_cli() {
echo "Installing AWS CLI..."
local platform=$(uname -s | tr '[:upper:]' '[:lower:]')
local arch=$(uname -m)
if [ "$platform" = "linux" ]; then
local aws_arch="x86_64"
if [ "$arch" = "aarch64" ]; then
aws_arch="aarch64"
fi
curl -L "https://awscli.amazonaws.com/awscli-exe-linux-${aws_arch}.zip" -o /tmp/awscliv2.zip
unzip -q /tmp/awscliv2.zip -d /tmp
/tmp/aws/install --install-dir /usr/local/aws-cli --bin-dir "$INSTALL_DIR"
rm -rf /tmp/awscliv2.zip /tmp/aws
fi
echo "AWS CLI installed"
}
# Main execution
main() {
mkdir -p "$INSTALL_DIR"
install_databricks_cli
install_terraform
install_terragrunt
install_aws_cli
echo "Binary tools installation complete"
echo "Run 'dbr-validate --version $DBR_VERSION' to verify (provided by dbr-env-all package)"
}
main# .github/workflows/build-wheels.yml
name: Build DBR Environment Wheels
on:
push:
branches: [main]
paths:
- 'wheels/**'
- 'requirements/**'
- 'reference/**'
pull_request:
branches: [main]
jobs:
build-wheels:
runs-on: [self-hosted, linux, dbr-builder]
strategy:
matrix:
package: [core, ml, cloud, all]
steps:
- uses: actions/checkout@v4
- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: "3.11"
- name: Install build tools
run: |
pip install --upgrade pip
pip install build twine
- name: Build wheel for ${{ matrix.package }}
run: |
cd wheels/dbr-env-${{ matrix.package }}
python -m build --wheel
- name: Validate wheel
run: |
cd wheels/dbr-env-${{ matrix.package }}
twine check dist/*.whl
- name: Upload wheel
uses: actions/upload-artifact@v4
with:
name: wheel-${{ matrix.package }}
path: wheels/dbr-env-${{ matrix.package }}/dist/*.whl
retention-days: 7
test-reference-dockerfiles:
needs: build-wheels
runs-on: [self-hosted, linux, dbr-builder]
strategy:
matrix:
dbr-version: [15, 16]
steps:
- uses: actions/checkout@v4
- name: Download wheels
uses: actions/download-artifact@v4
with:
pattern: wheel-*
path: dist/
merge-multiple: true
- name: Build reference Dockerfile
run: |
docker build -f reference/dockerfiles/dbr${{ matrix.dbr-version }}.Dockerfile \
-t dbr-test:${{ matrix.dbr-version }} .
- name: Test container
run: |
# Test Python version
docker run --rm dbr-test:${{ matrix.dbr-version }} python --version
# Test PySpark
docker run --rm dbr-test:${{ matrix.dbr-version }} python -c "import pyspark; print(pyspark.__version__)"
# Test tools
docker run --rm dbr-test:${{ matrix.dbr-version }} databricks --version
docker run --rm dbr-test:${{ matrix.dbr-version }} terraform version
docker run --rm dbr-test:${{ matrix.dbr-version }} aws --version| Component | DBR 15 | DBR 16 |
|---|---|---|
| Python | 3.11 | 3.12 |
| UV | 0.6.12 | 0.7.14 |
| Databricks CLI | 0.245.0 | 0.256.0 |
| Terraform | 1.11.2 | 1.12.2 |
| Terragrunt | v0.77.0 | v0.81.10 |
| Java | 17 | 17 |
| Base Image | python:3.11-bullseye | python:3.12-bullseye |
# 1. Install system dependencies (requires sudo)
sudo ./scripts/dbr-setup-pre --version dbr15
# 2. Install Python packages
pip install dbr-env-all[dbr15]
# 3. Install binary tools
./scripts/dbr-setup-post --version dbr15
# 4. Validate (command provided by dbr-env-all package)
dbr-validate --version dbr15# Use reference Dockerfile as base
FROM python:3.11-bullseye
# Copy and run installation scripts
COPY scripts/ /tmp/scripts/
COPY dist/ /tmp/dist/
RUN /tmp/scripts/dbr-setup-pre --version dbr15 --non-interactive && \
pip install /tmp/dist/dbr-env-all*.whl[dbr15] && \
/tmp/scripts/dbr-setup-post --version dbr15
# Validate (command provided by dbr-env-all package)
RUN dbr-validate --version dbr15This specification includes tested reference implementations from production Dockerfiles and requirements files, providing a reliable foundation for building DBR environment setup tools.