chaosddp · October 14, 2025 06:38
diff --git a/run_rdagent_competition_local.sh b/run_rdagent_competition_local.sh
 #!/usr/bin/bash

 warnning(){
  printf '\033[1;33;40m%b\033[0m\n' "$1";
 }

 # if no arguments, then print help message
 if [ $# -eq 0 ]; then
  echo "This script is used in development enviroment (with source code), not tested in production enviroment (pypi package)."
  echo ""
  warnning "NOTE: place this script in the rd-agent root directory, as the script need to know the path of the source code."
  warnning "NOTE: this script will only try to add dependencies that do not need root permission to the current environment."
  warnning "NOTE: only tested on MacOS Tahoe 26.0.1 (M4)."
  echo ""
  echo "Usage: <competition_name> <loops> [<log_path>]"
  echo ""
  echo "Example 1 - start a new competition experiment with max loop number: bash $0 store-sales-time-series-forecasting 50"
  echo "Example 2 - resume a stopped experiment with a new max loop number: bash $0 store-sales-time-series-forecasting 60 /path/to/previous/experiment/log"
  
  exit 1
 fi

 # enable alias
 shopt -s expand_aliases

 competition_name="$1" # such as "store-sales-time-series-forecasting"
 loops="$2" # max loop to run, if not specified loop number, rd-agent will keep running, until there is any exception or CTRL+C.
 log_path="$3" # if provided, we will use ask rd-agent to try to resume from the path

 if [ -n "$log_path" ]; then
  warnning "Provided log path, try to resume from the logs."

  # make sure log_path exists and is a directory
  if [ ! -d "$log_path" ]; then
    warnning "Log path $log_path does not exist or is not a directory."
    exit 1
  fi
 fi

 os=$(uname -s)
 cpu_arch=$(uname -m)

 # check chromedriver (hard coded) for selenium, as rd-agent use it to grab competition description.
 # if not exist, then we download it and place it in current folder, with this way we do not need root permission to run the script.
 if [ ! -e "/usr/local/bin/chromedriver" ] && [ ! -e "./chromedriver" ]; then
  warnning "chromedriver is not installed, try to install it."

  if [ "$os" = "Linux" ]; then
      # if not x86_64, then exit with error
      if [ "$cpu_arch" != "x86_64" ]; then
        warnning "this script only supported x86_64 on linux, but your cpu arch is $cpu_arch"
        exit 1
      fi

      wget https://storage.googleapis.com/chrome-for-testing-public/141.0.7390.76/linux64/chromedriver-linux64.zip

      unzip chromedriver-linux64.zip

      mv chromedriver-linux64/chromedriver ./chromedriver

      # install dependencies need permission, so ask user to install dependencies.
      warnning 'chromedriver is ready, make sure you have dependencies installed before using it.'
      warnning 'Just run ./chromedriver to see what dependencies we need. Retun this script after you are ready.'

      rm -r chromedriver-linux64
      rm chromedriver-linux64.zip

      # first time installing the chromedriver, it need user to install dependencies, so we need to exit.
      exit 1

  elif [ "$os" = "Darwin" ]; then
    # if is not arm64 mac, then exit with error
    if [ "$(uname -m)" != "arm64" ]; then
      warnning 'this script not support this cpu, please use the script for arm64 mac.';
      exit 1
    fi

    wget https://storage.googleapis.com/chrome-for-testing-public/141.0.7390.76/mac-arm64/chromedriver-mac-arm64.zip

    unzip chromedriver-mac-arm64.zip

    mv chromedriver-mac-arm64/chromedriver ./chromedriver

    # trust the binary to run
    xattr -d com.apple.quarantine ./chromedriver

    rm chromedriver-mac-arm64.zip
    rm -r chromedriver-mac-arm64
  fi
 fi


 # root of the rd-agent
 SCRIPT_DIR=$(cd $(dirname "${BASH_SOURCE[0]}") && pwd)

 # check if we are under the rd-agent root directory using pyproject.toml
 if [ ! -f "${SCRIPT_DIR}/pyproject.toml" ]; then
  echo "Please place  this script under the root of rd-agent directory."
  exit 1
 fi

 # if we do not install rd-agent with -e, then we need to set the PYTHONPATH
 export PYTHONPATH=$SCRIPT_DIR

 # prepare for uv
 if command -v uv &> /dev/null
 then
  # i am using uv :)
  echo "Using uv"

  alias pip='uv pip'
  alias python='uv run python'
 fi


 case $os in
  Darwin*)
    # check if brew is installed
    if ! command -v brew &> /dev/null
    then
      warnning "brew is not installed, please install it first to install dependencies on macOS"
      exit 1
    fi

    # if gtimeout is not installed, we need to install it
    if ! command -v gtimeout &> /dev/null
    then
      warnning "gtimeout is not installed, now try to install coreutils"
      brew install coreutils
    fi
    ;;
 esac

 now_t=$(date +%Y-%m-%d-%H-%M)


 #################################################################################
 #
 # settings from .devcontainer, lets apply them first,
 # then we can override them on demand
 #
 #################################################################################

 export BACKEND=rdagent.oai.backend.LiteLLMAPIBackend
 export ENABLE_CACHE=False
 export MAX_RETRY=12000
 export RETRY_WAIT_SECONDS=5
 export CHAT_STREAM=False
 export CHAT_TEMPERATURE=1
 export CHAT_MODEL=o1-preview
 export SYSTEM_PROMPT_ROLE=user
 export EMBEDDING_MODEL=text-embedding-ada-002
 export DUMP_CHAT_CACHE=True
 export USE_CHAT_CACHE=False
 export DUMP_EMBEDDING_CACHE=True
 export USE_EMBEDDING_CACHE=False
 export LOG_LLM_CHAT_CONTENT=True
 export DS_LOCAL_DATA_PATH=/tmp/kaggle
 export DS_IF_USING_MLE_DATA=True
 export PICKLE_CACHE_FOLDER_PATH_STR=./log/pickle_cache
 export CACHE_WITH_PICKLE=False
 export ENABLE_CACHE=False
 export PROMPT_CACHE_PATH=./log/prompt_cache.db
 export DS_CODER_COSTEER_ENV_TYPE=conda
 export DS_CODER_ON_WHOLE_PIPELINE=True
 export COSTEER_V2_QUERY_FORMER_TRACE_LIMIT=3
 export OPENAI_API_KEY=sk-1234
 export OPENAI_API_BASE=http://ep14.213428.xyz:38881

 # end of default settings
 #################################################################################


 ################################################################################################
 #
 # customized settings, we configurated the program via environment variables, to meet our needs
 #
 ################################################################################################

 # kaggle settings: enable and update it if you do not use ~/.kaggle to save settings,
 # when starting a new competition from scratch, rd-agent need it to download competition data.
 # export KAGGLE_USERNAME="YOUR KAGGLE USER NAME"
 # export KAGGLE_KEY="YOUR KAGGLE KEY"
 # export KAGGLE_PROXY="$HTTPS_PROXY"

 # disable this to use kaggle cli to donwload data
 export DS_IF_USING_MLE_DATA=False

 # environment type to execute LLM generated code, we use local here
 export DS_CODER_COSTEER_ENV_TYPE="local"

 # OPENAI settings
 export CHAT_MODEL=gpt-5
 # export OPENAI_API_KEY=<your_openai_api_key>
 # export OPENAI_API_BASE=<your_openai_api_base>

 # or use deepseek
 # CHAT_MODEL=deepseek/deepseek-chat 
 # DEEPSEEK_API_KEY=<replace_with_your_deepseek_api_key>


 # OPTIONAL: where is the kaggle competition data placed.
 # rd-agent save competition description and extract data files into this folder
 export DS_LOCAL_DATA_PATH="$SCRIPT_DIR/data"

 # OPTIONAL: if your competition is too complex and will cost much more time to execute the code,
 # then try to enable these settings to make it possible to complete, or the execution will be killed by timeout.
 # export DS_FULL_TIMEOUT=10800
 # export DS_FULL_RECOMMEND_TIMEOUT=10800
 # export DS_DEBUG_TIMEOUT=1800
 # export DS_DEBUG_RECOMMEND_TIMEOUT=1800

 # which folder is used as current competition workspace, we use a timestamp to make it unique each time we run the program.
 # we use default workspace path with competition name and timestamp to make it easier to find
 export WORKSPACE_PATH="git_ignore_folder/$competition_name/$now_t"

 # where to save the logs, we can use the ui toolkit to view the details
 # we use default log path with competition name and timestamp to make it easier to find
 export LOG_TRACE_PATH="log/$competition_name/$now_t"

 echo "Working dir: $SCRIPT_DIR"
 echo "Competition: $competition_name"
 echo "Loops: $loops"

 # try to resume from the log path if provided
 if [ -n "$log_path" ]; then
  echo "Resume from log path: $log_path"

  # override log path setting
  export LOG_TRACE_PATH="$log_path"

  python -u rdagent/app/data_science/loop.py --competition "$competition_name"  --loop_n "$loops" --path "$log_path"

  exit 0
 fi

 mkdir -p "$WORKSPACE_PATH"
 mkdir -p "$LOG_TRACE_PATH"

 if [ "$DS_CODER_COSTEER_ENV_TYPE" =  "local" ]; then
  # dependencies for code execution, as we are sharing same env locally
  pip install lightgbm  keras tensorflow accelerate transformers torch opencv-python scikit-learn vtk pydicom xgboost
 fi

 python -u rdagent/app/data_science/loop.py --competition "$competition_name"  --loop_n "$loops"
	#!/usr/bin/bash

	warnning(){
	printf '\033[1;33;40m%b\033[0m\n' "$1";
	}

	# if no arguments, then print help message
	if [ $# -eq 0 ]; then
	echo "This script is used in development enviroment (with source code), not tested in production enviroment (pypi package)."
	echo ""
	warnning "NOTE: place this script in the rd-agent root directory, as the script need to know the path of the source code."
	warnning "NOTE: this script will only try to add dependencies that do not need root permission to the current environment."
	warnning "NOTE: only tested on MacOS Tahoe 26.0.1 (M4)."
	echo ""
	echo "Usage: <competition_name> <loops> [<log_path>]"
	echo ""
	echo "Example 1 - start a new competition experiment with max loop number: bash $0 store-sales-time-series-forecasting 50"
	echo "Example 2 - resume a stopped experiment with a new max loop number: bash $0 store-sales-time-series-forecasting 60 /path/to/previous/experiment/log"

	exit 1
	fi

	# enable alias
	shopt -s expand_aliases

	competition_name="$1" # such as "store-sales-time-series-forecasting"
	loops="$2" # max loop to run, if not specified loop number, rd-agent will keep running, until there is any exception or CTRL+C.
	log_path="$3" # if provided, we will use ask rd-agent to try to resume from the path

	if [ -n "$log_path" ]; then
	warnning "Provided log path, try to resume from the logs."

	# make sure log_path exists and is a directory
	if [ ! -d "$log_path" ]; then
	warnning "Log path $log_path does not exist or is not a directory."
	exit 1
	fi
	fi

	os=$(uname -s)
	cpu_arch=$(uname -m)

	# check chromedriver (hard coded) for selenium, as rd-agent use it to grab competition description.
	# if not exist, then we download it and place it in current folder, with this way we do not need root permission to run the script.
	if [ ! -e "/usr/local/bin/chromedriver" ] && [ ! -e "./chromedriver" ]; then
	warnning "chromedriver is not installed, try to install it."

	if [ "$os" = "Linux" ]; then
	# if not x86_64, then exit with error
	if [ "$cpu_arch" != "x86_64" ]; then
	warnning "this script only supported x86_64 on linux, but your cpu arch is $cpu_arch"
	exit 1
	fi

	wget https://storage.googleapis.com/chrome-for-testing-public/141.0.7390.76/linux64/chromedriver-linux64.zip

	unzip chromedriver-linux64.zip

	mv chromedriver-linux64/chromedriver ./chromedriver

	# install dependencies need permission, so ask user to install dependencies.
	warnning 'chromedriver is ready, make sure you have dependencies installed before using it.'
	warnning 'Just run ./chromedriver to see what dependencies we need. Retun this script after you are ready.'

	rm -r chromedriver-linux64
	rm chromedriver-linux64.zip

	# first time installing the chromedriver, it need user to install dependencies, so we need to exit.
	exit 1

	elif [ "$os" = "Darwin" ]; then
	# if is not arm64 mac, then exit with error
	if [ "$(uname -m)" != "arm64" ]; then
	warnning 'this script not support this cpu, please use the script for arm64 mac.';
	exit 1
	fi

	wget https://storage.googleapis.com/chrome-for-testing-public/141.0.7390.76/mac-arm64/chromedriver-mac-arm64.zip

	unzip chromedriver-mac-arm64.zip

	mv chromedriver-mac-arm64/chromedriver ./chromedriver

	# trust the binary to run
	xattr -d com.apple.quarantine ./chromedriver

	rm chromedriver-mac-arm64.zip
	rm -r chromedriver-mac-arm64
	fi
	fi


	# root of the rd-agent
	SCRIPT_DIR=$(cd $(dirname "${BASH_SOURCE[0]}") && pwd)

	# check if we are under the rd-agent root directory using pyproject.toml
	if [ ! -f "${SCRIPT_DIR}/pyproject.toml" ]; then
	echo "Please place this script under the root of rd-agent directory."
	exit 1
	fi

	# if we do not install rd-agent with -e, then we need to set the PYTHONPATH
	export PYTHONPATH=$SCRIPT_DIR

	# prepare for uv
	if command -v uv &> /dev/null
	then
	# i am using uv :)
	echo "Using uv"

	alias pip='uv pip'
	alias python='uv run python'
	fi


	case $os in
	Darwin*)
	# check if brew is installed
	if ! command -v brew &> /dev/null
	then
	warnning "brew is not installed, please install it first to install dependencies on macOS"
	exit 1
	fi

	# if gtimeout is not installed, we need to install it
	if ! command -v gtimeout &> /dev/null
	then
	warnning "gtimeout is not installed, now try to install coreutils"
	brew install coreutils
	fi
	;;
	esac

	now_t=$(date +%Y-%m-%d-%H-%M)


	#################################################################################
	#
	# settings from .devcontainer, lets apply them first,
	# then we can override them on demand
	#
	#################################################################################

	export BACKEND=rdagent.oai.backend.LiteLLMAPIBackend
	export ENABLE_CACHE=False
	export MAX_RETRY=12000
	export RETRY_WAIT_SECONDS=5
	export CHAT_STREAM=False
	export CHAT_TEMPERATURE=1
	export CHAT_MODEL=o1-preview
	export SYSTEM_PROMPT_ROLE=user
	export EMBEDDING_MODEL=text-embedding-ada-002
	export DUMP_CHAT_CACHE=True
	export USE_CHAT_CACHE=False
	export DUMP_EMBEDDING_CACHE=True
	export USE_EMBEDDING_CACHE=False
	export LOG_LLM_CHAT_CONTENT=True
	export DS_LOCAL_DATA_PATH=/tmp/kaggle
	export DS_IF_USING_MLE_DATA=True
	export PICKLE_CACHE_FOLDER_PATH_STR=./log/pickle_cache
	export CACHE_WITH_PICKLE=False
	export ENABLE_CACHE=False
	export PROMPT_CACHE_PATH=./log/prompt_cache.db
	export DS_CODER_COSTEER_ENV_TYPE=conda
	export DS_CODER_ON_WHOLE_PIPELINE=True
	export COSTEER_V2_QUERY_FORMER_TRACE_LIMIT=3
	export OPENAI_API_KEY=sk-1234
	export OPENAI_API_BASE=http://ep14.213428.xyz:38881

	# end of default settings
	#################################################################################


	################################################################################################
	#
	# customized settings, we configurated the program via environment variables, to meet our needs
	#
	################################################################################################

	# kaggle settings: enable and update it if you do not use ~/.kaggle to save settings,
	# when starting a new competition from scratch, rd-agent need it to download competition data.
	# export KAGGLE_USERNAME="YOUR KAGGLE USER NAME"
	# export KAGGLE_KEY="YOUR KAGGLE KEY"
	# export KAGGLE_PROXY="$HTTPS_PROXY"

	# disable this to use kaggle cli to donwload data
	export DS_IF_USING_MLE_DATA=False

	# environment type to execute LLM generated code, we use local here
	export DS_CODER_COSTEER_ENV_TYPE="local"

	# OPENAI settings
	export CHAT_MODEL=gpt-5
	# export OPENAI_API_KEY=<your_openai_api_key>
	# export OPENAI_API_BASE=<your_openai_api_base>

	# or use deepseek
	# CHAT_MODEL=deepseek/deepseek-chat
	# DEEPSEEK_API_KEY=<replace_with_your_deepseek_api_key>


	# OPTIONAL: where is the kaggle competition data placed.
	# rd-agent save competition description and extract data files into this folder
	export DS_LOCAL_DATA_PATH="$SCRIPT_DIR/data"

	# OPTIONAL: if your competition is too complex and will cost much more time to execute the code,
	# then try to enable these settings to make it possible to complete, or the execution will be killed by timeout.
	# export DS_FULL_TIMEOUT=10800
	# export DS_FULL_RECOMMEND_TIMEOUT=10800
	# export DS_DEBUG_TIMEOUT=1800
	# export DS_DEBUG_RECOMMEND_TIMEOUT=1800

	# which folder is used as current competition workspace, we use a timestamp to make it unique each time we run the program.
	# we use default workspace path with competition name and timestamp to make it easier to find
	export WORKSPACE_PATH="git_ignore_folder/$competition_name/$now_t"

	# where to save the logs, we can use the ui toolkit to view the details
	# we use default log path with competition name and timestamp to make it easier to find
	export LOG_TRACE_PATH="log/$competition_name/$now_t"

	echo "Working dir: $SCRIPT_DIR"
	echo "Competition: $competition_name"
	echo "Loops: $loops"

	# try to resume from the log path if provided
	if [ -n "$log_path" ]; then
	echo "Resume from log path: $log_path"

	# override log path setting
	export LOG_TRACE_PATH="$log_path"

	python -u rdagent/app/data_science/loop.py --competition "$competition_name" --loop_n "$loops" --path "$log_path"

	exit 0
	fi

	mkdir -p "$WORKSPACE_PATH"
	mkdir -p "$LOG_TRACE_PATH"

	if [ "$DS_CODER_COSTEER_ENV_TYPE" = "local" ]; then
	# dependencies for code execution, as we are sharing same env locally
	pip install lightgbm keras tensorflow accelerate transformers torch opencv-python scikit-learn vtk pydicom xgboost
	fi

	python -u rdagent/app/data_science/loop.py --competition "$competition_name" --loop_n "$loops"
No results found