Last active
October 14, 2025 06:38
-
-
Save chaosddp/2aa5237f3292834a5a0b80ff614f68ca to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/bash | |
| warnning(){ | |
| printf '\033[1;33;40m%b\033[0m\n' "$1"; | |
| } | |
| # if no arguments, then print help message | |
| if [ $# -eq 0 ]; then | |
| echo "This script is used in development enviroment (with source code), not tested in production enviroment (pypi package)." | |
| echo "" | |
| warnning "NOTE: place this script in the rd-agent root directory, as the script need to know the path of the source code." | |
| warnning "NOTE: this script will only try to add dependencies that do not need root permission to the current environment." | |
| warnning "NOTE: only tested on MacOS Tahoe 26.0.1 (M4)." | |
| echo "" | |
| echo "Usage: <competition_name> <loops> [<log_path>]" | |
| echo "" | |
| echo "Example 1 - start a new competition experiment with max loop number: bash $0 store-sales-time-series-forecasting 50" | |
| echo "Example 2 - resume a stopped experiment with a new max loop number: bash $0 store-sales-time-series-forecasting 60 /path/to/previous/experiment/log" | |
| exit 1 | |
| fi | |
| # enable alias | |
| shopt -s expand_aliases | |
| competition_name="$1" # such as "store-sales-time-series-forecasting" | |
| loops="$2" # max loop to run, if not specified loop number, rd-agent will keep running, until there is any exception or CTRL+C. | |
| log_path="$3" # if provided, we will use ask rd-agent to try to resume from the path | |
| if [ -n "$log_path" ]; then | |
| warnning "Provided log path, try to resume from the logs." | |
| # make sure log_path exists and is a directory | |
| if [ ! -d "$log_path" ]; then | |
| warnning "Log path $log_path does not exist or is not a directory." | |
| exit 1 | |
| fi | |
| fi | |
| os=$(uname -s) | |
| cpu_arch=$(uname -m) | |
| # check chromedriver (hard coded) for selenium, as rd-agent use it to grab competition description. | |
| # if not exist, then we download it and place it in current folder, with this way we do not need root permission to run the script. | |
| if [ ! -e "/usr/local/bin/chromedriver" ] && [ ! -e "./chromedriver" ]; then | |
| warnning "chromedriver is not installed, try to install it." | |
| if [ "$os" = "Linux" ]; then | |
| # if not x86_64, then exit with error | |
| if [ "$cpu_arch" != "x86_64" ]; then | |
| warnning "this script only supported x86_64 on linux, but your cpu arch is $cpu_arch" | |
| exit 1 | |
| fi | |
| wget https://storage.googleapis.com/chrome-for-testing-public/141.0.7390.76/linux64/chromedriver-linux64.zip | |
| unzip chromedriver-linux64.zip | |
| mv chromedriver-linux64/chromedriver ./chromedriver | |
| # install dependencies need permission, so ask user to install dependencies. | |
| warnning 'chromedriver is ready, make sure you have dependencies installed before using it.' | |
| warnning 'Just run ./chromedriver to see what dependencies we need. Retun this script after you are ready.' | |
| rm -r chromedriver-linux64 | |
| rm chromedriver-linux64.zip | |
| # first time installing the chromedriver, it need user to install dependencies, so we need to exit. | |
| exit 1 | |
| elif [ "$os" = "Darwin" ]; then | |
| # if is not arm64 mac, then exit with error | |
| if [ "$(uname -m)" != "arm64" ]; then | |
| warnning 'this script not support this cpu, please use the script for arm64 mac.'; | |
| exit 1 | |
| fi | |
| wget https://storage.googleapis.com/chrome-for-testing-public/141.0.7390.76/mac-arm64/chromedriver-mac-arm64.zip | |
| unzip chromedriver-mac-arm64.zip | |
| mv chromedriver-mac-arm64/chromedriver ./chromedriver | |
| # trust the binary to run | |
| xattr -d com.apple.quarantine ./chromedriver | |
| rm chromedriver-mac-arm64.zip | |
| rm -r chromedriver-mac-arm64 | |
| fi | |
| fi | |
| # root of the rd-agent | |
| SCRIPT_DIR=$(cd $(dirname "${BASH_SOURCE[0]}") && pwd) | |
| # check if we are under the rd-agent root directory using pyproject.toml | |
| if [ ! -f "${SCRIPT_DIR}/pyproject.toml" ]; then | |
| echo "Please place this script under the root of rd-agent directory." | |
| exit 1 | |
| fi | |
| # if we do not install rd-agent with -e, then we need to set the PYTHONPATH | |
| export PYTHONPATH=$SCRIPT_DIR | |
| # prepare for uv | |
| if command -v uv &> /dev/null | |
| then | |
| # i am using uv :) | |
| echo "Using uv" | |
| alias pip='uv pip' | |
| alias python='uv run python' | |
| fi | |
| case $os in | |
| Darwin*) | |
| # check if brew is installed | |
| if ! command -v brew &> /dev/null | |
| then | |
| warnning "brew is not installed, please install it first to install dependencies on macOS" | |
| exit 1 | |
| fi | |
| # if gtimeout is not installed, we need to install it | |
| if ! command -v gtimeout &> /dev/null | |
| then | |
| warnning "gtimeout is not installed, now try to install coreutils" | |
| brew install coreutils | |
| fi | |
| ;; | |
| esac | |
| now_t=$(date +%Y-%m-%d-%H-%M) | |
| ################################################################################# | |
| # | |
| # settings from .devcontainer, lets apply them first, | |
| # then we can override them on demand | |
| # | |
| ################################################################################# | |
| export BACKEND=rdagent.oai.backend.LiteLLMAPIBackend | |
| export ENABLE_CACHE=False | |
| export MAX_RETRY=12000 | |
| export RETRY_WAIT_SECONDS=5 | |
| export CHAT_STREAM=False | |
| export CHAT_TEMPERATURE=1 | |
| export CHAT_MODEL=o1-preview | |
| export SYSTEM_PROMPT_ROLE=user | |
| export EMBEDDING_MODEL=text-embedding-ada-002 | |
| export DUMP_CHAT_CACHE=True | |
| export USE_CHAT_CACHE=False | |
| export DUMP_EMBEDDING_CACHE=True | |
| export USE_EMBEDDING_CACHE=False | |
| export LOG_LLM_CHAT_CONTENT=True | |
| export DS_LOCAL_DATA_PATH=/tmp/kaggle | |
| export DS_IF_USING_MLE_DATA=True | |
| export PICKLE_CACHE_FOLDER_PATH_STR=./log/pickle_cache | |
| export CACHE_WITH_PICKLE=False | |
| export ENABLE_CACHE=False | |
| export PROMPT_CACHE_PATH=./log/prompt_cache.db | |
| export DS_CODER_COSTEER_ENV_TYPE=conda | |
| export DS_CODER_ON_WHOLE_PIPELINE=True | |
| export COSTEER_V2_QUERY_FORMER_TRACE_LIMIT=3 | |
| export OPENAI_API_KEY=sk-1234 | |
| export OPENAI_API_BASE=http://ep14.213428.xyz:38881 | |
| # end of default settings | |
| ################################################################################# | |
| ################################################################################################ | |
| # | |
| # customized settings, we configurated the program via environment variables, to meet our needs | |
| # | |
| ################################################################################################ | |
| # kaggle settings: enable and update it if you do not use ~/.kaggle to save settings, | |
| # when starting a new competition from scratch, rd-agent need it to download competition data. | |
| # export KAGGLE_USERNAME="YOUR KAGGLE USER NAME" | |
| # export KAGGLE_KEY="YOUR KAGGLE KEY" | |
| # export KAGGLE_PROXY="$HTTPS_PROXY" | |
| # disable this to use kaggle cli to donwload data | |
| export DS_IF_USING_MLE_DATA=False | |
| # environment type to execute LLM generated code, we use local here | |
| export DS_CODER_COSTEER_ENV_TYPE="local" | |
| # OPENAI settings | |
| export CHAT_MODEL=gpt-5 | |
| # export OPENAI_API_KEY=<your_openai_api_key> | |
| # export OPENAI_API_BASE=<your_openai_api_base> | |
| # or use deepseek | |
| # CHAT_MODEL=deepseek/deepseek-chat | |
| # DEEPSEEK_API_KEY=<replace_with_your_deepseek_api_key> | |
| # OPTIONAL: where is the kaggle competition data placed. | |
| # rd-agent save competition description and extract data files into this folder | |
| export DS_LOCAL_DATA_PATH="$SCRIPT_DIR/data" | |
| # OPTIONAL: if your competition is too complex and will cost much more time to execute the code, | |
| # then try to enable these settings to make it possible to complete, or the execution will be killed by timeout. | |
| # export DS_FULL_TIMEOUT=10800 | |
| # export DS_FULL_RECOMMEND_TIMEOUT=10800 | |
| # export DS_DEBUG_TIMEOUT=1800 | |
| # export DS_DEBUG_RECOMMEND_TIMEOUT=1800 | |
| # which folder is used as current competition workspace, we use a timestamp to make it unique each time we run the program. | |
| # we use default workspace path with competition name and timestamp to make it easier to find | |
| export WORKSPACE_PATH="git_ignore_folder/$competition_name/$now_t" | |
| # where to save the logs, we can use the ui toolkit to view the details | |
| # we use default log path with competition name and timestamp to make it easier to find | |
| export LOG_TRACE_PATH="log/$competition_name/$now_t" | |
| echo "Working dir: $SCRIPT_DIR" | |
| echo "Competition: $competition_name" | |
| echo "Loops: $loops" | |
| # try to resume from the log path if provided | |
| if [ -n "$log_path" ]; then | |
| echo "Resume from log path: $log_path" | |
| # override log path setting | |
| export LOG_TRACE_PATH="$log_path" | |
| python -u rdagent/app/data_science/loop.py --competition "$competition_name" --loop_n "$loops" --path "$log_path" | |
| exit 0 | |
| fi | |
| mkdir -p "$WORKSPACE_PATH" | |
| mkdir -p "$LOG_TRACE_PATH" | |
| if [ "$DS_CODER_COSTEER_ENV_TYPE" = "local" ]; then | |
| # dependencies for code execution, as we are sharing same env locally | |
| pip install lightgbm keras tensorflow accelerate transformers torch opencv-python scikit-learn vtk pydicom xgboost | |
| fi | |
| python -u rdagent/app/data_science/loop.py --competition "$competition_name" --loop_n "$loops" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment