Skip to content

Instantly share code, notes, and snippets.

@chaosddp
Last active October 14, 2025 06:38
Show Gist options
  • Select an option

  • Save chaosddp/2aa5237f3292834a5a0b80ff614f68ca to your computer and use it in GitHub Desktop.

Select an option

Save chaosddp/2aa5237f3292834a5a0b80ff614f68ca to your computer and use it in GitHub Desktop.
#!/usr/bin/bash
warnning(){
printf '\033[1;33;40m%b\033[0m\n' "$1";
}
# if no arguments, then print help message
if [ $# -eq 0 ]; then
echo "This script is used in development enviroment (with source code), not tested in production enviroment (pypi package)."
echo ""
warnning "NOTE: place this script in the rd-agent root directory, as the script need to know the path of the source code."
warnning "NOTE: this script will only try to add dependencies that do not need root permission to the current environment."
warnning "NOTE: only tested on MacOS Tahoe 26.0.1 (M4)."
echo ""
echo "Usage: <competition_name> <loops> [<log_path>]"
echo ""
echo "Example 1 - start a new competition experiment with max loop number: bash $0 store-sales-time-series-forecasting 50"
echo "Example 2 - resume a stopped experiment with a new max loop number: bash $0 store-sales-time-series-forecasting 60 /path/to/previous/experiment/log"
exit 1
fi
# enable alias
shopt -s expand_aliases
competition_name="$1" # such as "store-sales-time-series-forecasting"
loops="$2" # max loop to run, if not specified loop number, rd-agent will keep running, until there is any exception or CTRL+C.
log_path="$3" # if provided, we will use ask rd-agent to try to resume from the path
if [ -n "$log_path" ]; then
warnning "Provided log path, try to resume from the logs."
# make sure log_path exists and is a directory
if [ ! -d "$log_path" ]; then
warnning "Log path $log_path does not exist or is not a directory."
exit 1
fi
fi
os=$(uname -s)
cpu_arch=$(uname -m)
# check chromedriver (hard coded) for selenium, as rd-agent use it to grab competition description.
# if not exist, then we download it and place it in current folder, with this way we do not need root permission to run the script.
if [ ! -e "/usr/local/bin/chromedriver" ] && [ ! -e "./chromedriver" ]; then
warnning "chromedriver is not installed, try to install it."
if [ "$os" = "Linux" ]; then
# if not x86_64, then exit with error
if [ "$cpu_arch" != "x86_64" ]; then
warnning "this script only supported x86_64 on linux, but your cpu arch is $cpu_arch"
exit 1
fi
wget https://storage.googleapis.com/chrome-for-testing-public/141.0.7390.76/linux64/chromedriver-linux64.zip
unzip chromedriver-linux64.zip
mv chromedriver-linux64/chromedriver ./chromedriver
# install dependencies need permission, so ask user to install dependencies.
warnning 'chromedriver is ready, make sure you have dependencies installed before using it.'
warnning 'Just run ./chromedriver to see what dependencies we need. Retun this script after you are ready.'
rm -r chromedriver-linux64
rm chromedriver-linux64.zip
# first time installing the chromedriver, it need user to install dependencies, so we need to exit.
exit 1
elif [ "$os" = "Darwin" ]; then
# if is not arm64 mac, then exit with error
if [ "$(uname -m)" != "arm64" ]; then
warnning 'this script not support this cpu, please use the script for arm64 mac.';
exit 1
fi
wget https://storage.googleapis.com/chrome-for-testing-public/141.0.7390.76/mac-arm64/chromedriver-mac-arm64.zip
unzip chromedriver-mac-arm64.zip
mv chromedriver-mac-arm64/chromedriver ./chromedriver
# trust the binary to run
xattr -d com.apple.quarantine ./chromedriver
rm chromedriver-mac-arm64.zip
rm -r chromedriver-mac-arm64
fi
fi
# root of the rd-agent
SCRIPT_DIR=$(cd $(dirname "${BASH_SOURCE[0]}") && pwd)
# check if we are under the rd-agent root directory using pyproject.toml
if [ ! -f "${SCRIPT_DIR}/pyproject.toml" ]; then
echo "Please place this script under the root of rd-agent directory."
exit 1
fi
# if we do not install rd-agent with -e, then we need to set the PYTHONPATH
export PYTHONPATH=$SCRIPT_DIR
# prepare for uv
if command -v uv &> /dev/null
then
# i am using uv :)
echo "Using uv"
alias pip='uv pip'
alias python='uv run python'
fi
case $os in
Darwin*)
# check if brew is installed
if ! command -v brew &> /dev/null
then
warnning "brew is not installed, please install it first to install dependencies on macOS"
exit 1
fi
# if gtimeout is not installed, we need to install it
if ! command -v gtimeout &> /dev/null
then
warnning "gtimeout is not installed, now try to install coreutils"
brew install coreutils
fi
;;
esac
now_t=$(date +%Y-%m-%d-%H-%M)
#################################################################################
#
# settings from .devcontainer, lets apply them first,
# then we can override them on demand
#
#################################################################################
export BACKEND=rdagent.oai.backend.LiteLLMAPIBackend
export ENABLE_CACHE=False
export MAX_RETRY=12000
export RETRY_WAIT_SECONDS=5
export CHAT_STREAM=False
export CHAT_TEMPERATURE=1
export CHAT_MODEL=o1-preview
export SYSTEM_PROMPT_ROLE=user
export EMBEDDING_MODEL=text-embedding-ada-002
export DUMP_CHAT_CACHE=True
export USE_CHAT_CACHE=False
export DUMP_EMBEDDING_CACHE=True
export USE_EMBEDDING_CACHE=False
export LOG_LLM_CHAT_CONTENT=True
export DS_LOCAL_DATA_PATH=/tmp/kaggle
export DS_IF_USING_MLE_DATA=True
export PICKLE_CACHE_FOLDER_PATH_STR=./log/pickle_cache
export CACHE_WITH_PICKLE=False
export ENABLE_CACHE=False
export PROMPT_CACHE_PATH=./log/prompt_cache.db
export DS_CODER_COSTEER_ENV_TYPE=conda
export DS_CODER_ON_WHOLE_PIPELINE=True
export COSTEER_V2_QUERY_FORMER_TRACE_LIMIT=3
export OPENAI_API_KEY=sk-1234
export OPENAI_API_BASE=http://ep14.213428.xyz:38881
# end of default settings
#################################################################################
################################################################################################
#
# customized settings, we configurated the program via environment variables, to meet our needs
#
################################################################################################
# kaggle settings: enable and update it if you do not use ~/.kaggle to save settings,
# when starting a new competition from scratch, rd-agent need it to download competition data.
# export KAGGLE_USERNAME="YOUR KAGGLE USER NAME"
# export KAGGLE_KEY="YOUR KAGGLE KEY"
# export KAGGLE_PROXY="$HTTPS_PROXY"
# disable this to use kaggle cli to donwload data
export DS_IF_USING_MLE_DATA=False
# environment type to execute LLM generated code, we use local here
export DS_CODER_COSTEER_ENV_TYPE="local"
# OPENAI settings
export CHAT_MODEL=gpt-5
# export OPENAI_API_KEY=<your_openai_api_key>
# export OPENAI_API_BASE=<your_openai_api_base>
# or use deepseek
# CHAT_MODEL=deepseek/deepseek-chat
# DEEPSEEK_API_KEY=<replace_with_your_deepseek_api_key>
# OPTIONAL: where is the kaggle competition data placed.
# rd-agent save competition description and extract data files into this folder
export DS_LOCAL_DATA_PATH="$SCRIPT_DIR/data"
# OPTIONAL: if your competition is too complex and will cost much more time to execute the code,
# then try to enable these settings to make it possible to complete, or the execution will be killed by timeout.
# export DS_FULL_TIMEOUT=10800
# export DS_FULL_RECOMMEND_TIMEOUT=10800
# export DS_DEBUG_TIMEOUT=1800
# export DS_DEBUG_RECOMMEND_TIMEOUT=1800
# which folder is used as current competition workspace, we use a timestamp to make it unique each time we run the program.
# we use default workspace path with competition name and timestamp to make it easier to find
export WORKSPACE_PATH="git_ignore_folder/$competition_name/$now_t"
# where to save the logs, we can use the ui toolkit to view the details
# we use default log path with competition name and timestamp to make it easier to find
export LOG_TRACE_PATH="log/$competition_name/$now_t"
echo "Working dir: $SCRIPT_DIR"
echo "Competition: $competition_name"
echo "Loops: $loops"
# try to resume from the log path if provided
if [ -n "$log_path" ]; then
echo "Resume from log path: $log_path"
# override log path setting
export LOG_TRACE_PATH="$log_path"
python -u rdagent/app/data_science/loop.py --competition "$competition_name" --loop_n "$loops" --path "$log_path"
exit 0
fi
mkdir -p "$WORKSPACE_PATH"
mkdir -p "$LOG_TRACE_PATH"
if [ "$DS_CODER_COSTEER_ENV_TYPE" = "local" ]; then
# dependencies for code execution, as we are sharing same env locally
pip install lightgbm keras tensorflow accelerate transformers torch opencv-python scikit-learn vtk pydicom xgboost
fi
python -u rdagent/app/data_science/loop.py --competition "$competition_name" --loop_n "$loops"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment