Last active
October 24, 2025 06:42
-
-
Save treydock/b964c5599fd057b0aa6a to your computer and use it in GitHub Desktop.
Revisions
-
treydock revised this gist
Jan 14, 2015 . 1 changed file with 2 additions and 2 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -160,7 +160,7 @@ function slurm_job_submit ( job_desc, part_list, submit_uid ) end end return 0 end function slurm_job_modify ( job_desc, job_rec, part_list, modify_uid ) @@ -193,7 +193,7 @@ function slurm_job_modify ( job_desc, job_rec, part_list, modify_uid ) end end return 0 end --########################################################################-- -
treydock revised this gist
Jan 14, 2015 . 1 changed file with 30 additions and 30 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -78,28 +78,28 @@ end function _build_part_table ( part_list ) -- Create a partition table from SLURM structure local part_rec = {} for i in ipairs(part_list) do part_rec[i] = { part_rec_ptr=part_list[i] } setmetatable (part_rec[i], part_rec_meta) end return part_rec end --========================================================================-- function default_partition(part_rec) -- Return the name of the default partition -- part_rec : list of partitions local i = 1 while part_rec[i] do if part_rec[i].flag_default == 1 then return part_rec[i].name end i = i + 1 end end --========================================================================-- @@ -146,8 +146,8 @@ end --########################################################################-- function slurm_job_submit ( job_desc, part_list, submit_uid ) setmetatable (job_desc, job_req_meta) local part_rec = _build_part_table(part_list) local account = job_desc.account or job_desc.default_account local partition = job_desc.partition or default_partition(part_rec) @@ -212,22 +212,22 @@ log_err = slurm.error log_user = slurm.log_user job_rec_meta = { __index = function (table, key) return _get_job_rec_field(table.job_rec_ptr, key) end } job_req_meta = { __index = function (table, key) return _get_job_req_field(table.job_desc_ptr, key) end, __newindex = function (table, key, value) return _set_job_req_field(table.job_desc_ptr, key, value or "") end } part_rec_meta = { __index = function (table, key) return _get_part_rec_field(table.part_rec_ptr, key) end } log_info("initialized") -
treydock revised this gist
Jan 14, 2015 . 1 changed file with 46 additions and 5 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -104,12 +104,28 @@ end --========================================================================-- function get_partition(part_rec, name) -- Return the part_rec matching name -- part_rec : list of partitions -- name : partition name local i = 1 while part_rec[i] do if part_rec[i].name == name then return part_rec[i] end i = i + 1 end end --========================================================================-- function get_partition_qos(partition) local qos = nil local part = partition or 'NONE' local partT = PARTITION_TO_QOS[part] or PARTITION_TO_QOS["default"] log_debug("slurm_job_submit#get_partition_qos: partition: %s", part) if partT == nil then return qos @@ -118,7 +134,7 @@ function get_partition_qos(partition) end log_debug("slurm_job_submit#get_partition_qos: partT: %s", dump(partT)) log_debug("slurm_job_submit#get_partition_qos: partition: %s matched to qos: %s", part, qos) return qos end @@ -148,9 +164,34 @@ function slurm_job_submit ( job_desc, part_list, submit_uid ) end function slurm_job_modify ( job_desc, job_rec, part_list, modify_uid ) setmetatable (job_desc, job_req_meta) setmetatable (job_rec, job_rec_meta) local part_rec = _build_part_table(part_list) local current_partition = job_rec.partition local new_partition = job_desc.partition or current_partition -- If changing partition if current_partition ~= new_partition then local new_part_rec = get_partition(part_rec, new_partition) -- If qos was not specified if job_desc.qos == nil then local qos = get_partition_qos(new_partition) if qos ~= nil then log_info("slurm_job_modify: for job %u from uid %d, qos value: %s", job_rec.job_id, modify_uid, qos) job_desc.qos = qos end end -- If time was not specified -- Instead of nil SLURM returns 4294967294 if (job_desc.time_limit == nil or job_desc.time_limit == 4294967294) then if job_rec.time_limit > new_part_rec.max_time then log_info("slurm_job_modify: for job %u from uid %d, time_limit value: %s", job_rec.job_id, modify_uid, new_part_rec.max_time) job_desc.time_limit = new_part_rec.max_time end end end return 0 end -
treydock revised this gist
Oct 6, 2014 . 1 changed file with 26 additions and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,26 @@ tests = { {["hepx"] = "hepx"}, {["hepx"] = "serial"}, {["hepx"] = "background"}, {["idhmc"] = "serial"}, {["idhmc"] = "background"}, {["iamcs"] = "serial"}, {["iamcs"] = "mpi-core8"}, {["iamcs"] = "mpi-core32"}, {["iamcs"] = "mpi-core32-4g"}, {["iamcs"] = "background"}, {["foo"] = "serial"}, {["foo"] = "mpi-core8"}, {["foo"] = "background"}, {["foo"] = "bar"}, } for i, test in ipairs(tests) do for account,partition in pairs(test) do printf("TEST: default_account: %s , partition: %s", account, partition) local qos = get_partition_qos(partition) or "" printf("-------------------------------------------------------------") end end -
treydock revised this gist
Sep 23, 2014 . 1 changed file with 31 additions and 82 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -12,32 +12,27 @@ Some code and ideas pulled from https://github.com/edf-hpc/slurm-llnl-misc-plugi -- --########################################################################-- PARTITION_TO_QOS = { ["hepx"] = { qos = "hepx" }, ["idhmc"] = { qos = "idhmc" }, ["serial"] = { qos = "general" }, ["serial-long"] = { qos = "long" }, ["mpi-core8"] = { qos = "mpi" }, ["mpi-core32"] = { qos = "mpi" }, ["mpi-core32-4g"] = { qos = "mpi" }, ["background"] = { qos = "background" }, ["background-4g"] = { qos = "background" }, ["grid"] = { qos = "grid" }, ["interactive"] = { qos = "interactive" }, ["default"] = { qos = "general" } } -- UNUSED --[[ PARTITION_ROUTES = { ['something'] = 'something-else', } ]] --########################################################################-- -- @@ -47,19 +42,6 @@ QOS_MAP = { --========================================================================-- function dump(o) if type(o) == 'table' then local s = '{ ' @@ -75,10 +57,11 @@ end --========================================================================-- -- UNUSED --[[ function reroute_job(job_desc, routeT) local partition = routeT["part"] log_info("slurm_job_submit#reroute_job: Setting partition to %s", partition) job_desc.partition = partition @@ -89,6 +72,7 @@ function reroute_job(job_desc, routeT) end end ]] --========================================================================-- @@ -105,20 +89,6 @@ end --========================================================================-- function default_partition(part_rec) -- Return the name of the default partition -- part_rec : list of partitions @@ -134,35 +104,21 @@ end --========================================================================-- function get_partition_qos(partition) local qos = nil local part = partition or 'NONE' local partT = PARTITION_TO_QOS[part] or PARTITION_TO_QOS["default"] log_info("slurm_job_submit#get_partition_qos: partition: %s", part) if partT == nil then return qos else qos = partT["qos"] end log_debug("slurm_job_submit#get_partition_qos: partT: %s", dump(partT)) log_info("slurm_job_submit#get_partition_qos: partition: %s matched to qos: %s", part, qos) return qos end @@ -175,19 +131,12 @@ end function slurm_job_submit ( job_desc, part_list, submit_uid ) setmetatable (job_desc, job_req_meta) local part_rec = _build_part_table(part_list) local account = job_desc.account or job_desc.default_account local partition = job_desc.partition or default_partition(part_rec) if job_desc.qos == nil then local qos = get_partition_qos(partition) if qos ~= nil then log_info("slurm_job_submit: job from uid %d, setting qos value: %s", submit_uid, qos) -
treydock created this gist
Aug 1, 2014 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,245 @@ --[[ SLURM job submit filter for QOS Some code and ideas pulled from https://github.com/edf-hpc/slurm-llnl-misc-plugins/blob/master/job_submit.lua --]] --########################################################################-- -- -- Define constant -- --########################################################################-- PART_ROUTES = { hepx = { part = "serial", qos = "hepx" }, grid = { part = "background", qos = "grid" }, idhmc = { part = "serial", qos = "idhmc" }, } QOS_MAP = { hepx = { partT = { serial = { cores = 12, qos = "hepx" }, }, }, idhmc = { partT = { serial = { cores = 3, qos = "idhmc" }, }, }, default = { partT = { serial = { qos = "general" }, mpi_core8 = { qos = "mpi" }, mpi_core32 = { qos = "mpi" }, background = { qos = "background" }, }, }, } --########################################################################-- -- -- Define functions -- --########################################################################-- --========================================================================-- function os.capture(cmd, raw) local f = assert(io.popen(cmd, 'r')) local s = assert(f:read('*a')) f:close() if raw then return s end s = string.gsub(s, '^%s+', '') s = string.gsub(s, '%s+$', '') s = string.gsub(s, '[\n\r]+', ' ') return s end --========================================================================-- function dump(o) if type(o) == 'table' then local s = '{ ' for k,v in pairs(o) do if type(k) ~= 'number' then k = '"'..k..'"' end s = s .. '['..k..'] = ' .. dump(v) .. ',' end return s .. '} ' else return tostring(o) end end --========================================================================-- function reroute_job(job_desc, routeT) local partition = routeT["part"] local qos = routeT["qos"] log_info("slurm_job_submit#reroute_job: Setting partition to %s", partition) job_desc.partition = partition if job_desc.qos == nil then log_info("slurm_job_submit#reroute_job: Setting QOS to %s", qos) job_desc.qos = qos end end --========================================================================-- function _build_part_table ( part_list ) -- Create a partition table from SLURM structure local part_rec = {} for i in ipairs(part_list) do part_rec[i] = { part_rec_ptr=part_list[i] } setmetatable (part_rec[i], part_rec_meta) end return part_rec end --========================================================================-- function get_allocated_cpus(qos, partition) local cmd = "squeue --qos=" .. qos .. " --states=R --partition=" .. partition .. " --noheader --format='%C' | paste -sd+ | bc" output = os.capture(cmd) if output == '' then output = 0 end return tonumber(output) end --========================================================================-- function default_partition(part_rec) -- Return the name of the default partition -- part_rec : list of partitions local i = 1 while part_rec[i] do if part_rec[i].flag_default == 1 then return part_rec[i].name end i = i + 1 end end --========================================================================-- function get_qos(account, partition, job_desc) local qos = nil log_info("slurm_job_submit#get_qos: account: %s, partition: %s", account, partition) accountT = QOS_MAP[account] or QOS_MAP["default"] log_debug("slurm_job_submit#get_qos: accountT: %s", dump(accountT)) partT = accountT["partT"][partition] or QOS_MAP["default"]["partT"][partition] if partT == nil then return qos end log_debug("slurm_job_submit#get_qos: partT: %s", dump(partT)) qos = partT["qos"] if partT["cores"] ~= nil then cpus = partT["cores"] used_cpus = get_allocated_cpus(qos, partition) if used_cpus >= cpus then log_info("slurm_job_submit#get_qos: %s used_cpus >= %s cpus, reassigning to default account", cpus, used_cpus) return get_qos("default", partition) end end log_info("slurm_job_submit: account: %s, partition: %s, matched to qos: %s", account, partition, qos) return qos end --########################################################################-- -- -- SLURM job_submit/lua interface: -- --########################################################################-- function slurm_job_submit ( job_desc, part_list, submit_uid ) setmetatable (job_desc, job_req_meta) local part_rec = _build_part_table (part_list) local account = job_desc.account or job_desc.default_account local partition = job_desc.partition or default_partition(part_rec) local routeT = PART_ROUTES[partition] --[[ if routeT ~= nil then reroute_job(job_desc, routeT) end ]] if job_desc.qos == nil then local qos = get_qos(account, partition, job_desc) if qos ~= nil then log_info("slurm_job_submit: job from uid %d, setting qos value: %s", submit_uid, qos) job_desc.qos = qos end end return 0 end function slurm_job_modify ( job_desc, job_rec, part_list, modify_uid ) setmetatable (job_desc, job_req_meta) setmetatable (job_rec, job_rec_meta) local part_rec = _build_part_table (part_list) return 0 end --########################################################################-- -- -- Initialization code: -- -- Define functions for logging and accessing slurmctld structures -- --########################################################################-- log_info = slurm.log_info log_verbose = slurm.log_verbose log_debug = slurm.log_debug log_err = slurm.error log_user = slurm.log_user job_rec_meta = { __index = function (table, key) return _get_job_rec_field(table.job_rec_ptr, key) end } job_req_meta = { __index = function (table, key) return _get_job_req_field(table.job_desc_ptr, key) end, __newindex = function (table, key, value) return _set_job_req_field(table.job_desc_ptr, key, value or "") end } part_rec_meta = { __index = function (table, key) return _get_part_rec_field(table.part_rec_ptr, key) end } log_info("initialized") return slurm.SUCCESS