Skip to content

Instantly share code, notes, and snippets.

@Emrys-Hong
Created August 18, 2019 09:27
Show Gist options
  • Select an option

  • Save Emrys-Hong/4b39ea72576c348dc7917c4724a0d430 to your computer and use it in GitHub Desktop.

Select an option

Save Emrys-Hong/4b39ea72576c348dc7917c4724a0d430 to your computer and use it in GitHub Desktop.
automatically search hparameters for you, it comes with config.py
"""Peform hyperparemeters search"""
import argparse
import os
from subprocess import check_call
import sys
from multiprocessing import Process
import utils
import torch
gpu_option = [2]
PYTHON = sys.executable
parser = argparse.ArgumentParser()
parser.add_argument('--main_dir', required=True, help='Directory containing params.json')
parser.add_argument('--start_id', required=True, help='the number when the epoch starts')
parser.add_argument('--start_cuda', default=0, help='which cuda id to start from')
parser.add_argument('--num_process', default=3, help='how many number of process to start at the same time')
cuda_number = torch.cuda.device_count()
def launch_training_job(main_dir, config_id, cuda_id):
"""Launch training of the model with a set of hyperparameters in parent_dir/job_name
Args:
model_dir: (string) directory containing config, weights and log
data_dir: (string) directory containing the dataset
params: (dict) containing hyperparameters
"""
# Create a new folder in parent_dir with unique_name "job_name"
if not os.path.exists(main_dir):
raise Exception(model_dir + ' is not created')
# Launch training with this config
cmd = "CUDA_VISIBLE_DEVICES={cuda_id} {python} main.py --config_dir={main_dir} --config_id={config_id}"\
.format(cuda_id=cuda_id, python=PYTHON, main_dir=main_dir, config_id=config_id)
print(cmd)
check_call(cmd, shell=True)
if __name__ == "__main__":
# Load the "reference" parameters from parent_dir json file
args = parser.parse_args()
config_dir = os.path.join(args.main_dir, 'configs')
print(os.listdir(config_dir))
tasks = []
for config in os.listdir(config_dir):
config_id = config.split('_')[-1].split('.')[0]
if int(config_id) >= int(args.start_id):
# p = Process(target=launch_training_job, args=(args.main_dir, config_id, len(tasks)%cuda_number))
p = Process(target=launch_training_job, args=(args.main_dir, config_id,\
gpu_option[len(tasks)%len(gpu_option)]))
tasks.append(p)
p.start()
if len(tasks) == args.num_process:
for p in tasks:
p.join()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment