Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Manifest.toml
8 changes: 4 additions & 4 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
name = "SlurmClusterManager"
uuid = "c82cd089-7bf7-41d7-976b-6b5d413cbe0a"
authors = ["Joseph Kleinhenz <kleinhenz.joseph@gmail.com>"]
version = "1.1.0"
authors = ["Joseph Kleinhenz <kleinhenz.joseph@gmail.com>"]

[deps]
Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"

[compat]
julia = "1.0"

[extras]
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[targets]
test = ["Test"]

[compat]
julia = "1.0"
6 changes: 4 additions & 2 deletions src/slurmmanager.jl
Original file line number Diff line number Diff line change
Expand Up @@ -205,10 +205,12 @@ function Distributed.launch(manager::SlurmManager, params::Dict, instances_arr::
line = readline(manager.srun_proc)
m = match(r".*:(\d*)#(.*)", line)
m === nothing && error("could not parse $line")
m[1] === nothing && error("could not extract first capture group after parsing $line")
m[2] === nothing && error("could not extract second capture group after parsing $line")

config = WorkerConfig()
config.port = parse(Int, m[1])
config.host = strip(m[2])
config.port = parse(Int, m[1]::AbstractString)
config.host = strip(m[2]::AbstractString)

@debug "Worker $i ready on host $(config.host), port $(config.port)"

Expand Down
4 changes: 4 additions & 0 deletions test/Project.toml
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
[deps]
Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
JET = "c3a54625-cd67-489e-a8e7-0a5a0ff4e31b"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[compat]
JET = "0.9, 0.10"
6 changes: 6 additions & 0 deletions test/jet.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
import SlurmClusterManager

import JET
import Test

JET.test_package(SlurmClusterManager; ignored_modules = (Base,))
120 changes: 62 additions & 58 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -15,72 +15,76 @@ else
ENV["JULIA_DEBUG"] = original_JULIA_DEBUG * ",SlurmClusterManager"
end

@testset "SlurmClusterManager.jl" begin
# test that slurm is available
@test !(Sys.which("sinfo") === nothing)
# @testset "SlurmClusterManager.jl" begin
# # test that slurm is available
# @test !(Sys.which("sinfo") === nothing)

# submit job
# project should point to top level dir so that SlurmClusterManager is available to script.jl
project_path = abspath(joinpath(@__DIR__, ".."))
@info "" project_path
jobid = withenv("JULIA_PROJECT"=>project_path) do
strip(read(`sbatch --export=ALL --parsable -n 4 -o test.out script.bash`, String))
end
@info "" jobid
# # submit job
# # project should point to top level dir so that SlurmClusterManager is available to script.jl
# project_path = abspath(joinpath(@__DIR__, ".."))
# @info "" project_path
# jobid = withenv("JULIA_PROJECT"=>project_path) do
# strip(read(`sbatch --export=ALL --parsable -n 4 -o test.out script.bash`, String))
# end
# @info "" jobid

# get job state from jobid
getjobstate = jobid -> begin
cmd = Cmd(`scontrol show jobid=$jobid`, ignorestatus=true)
info = read(cmd, String)
state = match(r"JobState=(\S*)", info)
return state === nothing ? nothing : state.captures[1]
end
# # get job state from jobid
# getjobstate = jobid -> begin
# cmd = Cmd(`scontrol show jobid=$jobid`, ignorestatus=true)
# info = read(cmd, String)
# state = match(r"JobState=(\S*)", info)
# return state === nothing ? nothing : state.captures[1]
# end

# wait for job to complete
default_timeout_seconds = 600 # 10 minutes
timeout_seconds = parse(Float64, strip(get(ENV, "JULIA_SLURMCLUSTERMANAGER_TEST_TIMEOUT_SECONDS", "$(default_timeout_seconds)")))
pollint = 1.0 # 1 second
status = timedwait(timeout_seconds, pollint=pollint) do
state = getjobstate(jobid)
state == nothing && return false
@info "jobstate=$(state)"
return state == "COMPLETED" || state == "FAILED"
end
# # wait for job to complete
# default_timeout_seconds = 600 # 10 minutes
# timeout_seconds = parse(Float64, strip(get(ENV, "JULIA_SLURMCLUSTERMANAGER_TEST_TIMEOUT_SECONDS", "$(default_timeout_seconds)")))
# pollint = 1.0 # 1 second
# status = timedwait(timeout_seconds, pollint=pollint) do
# state = getjobstate(jobid)
# state == nothing && return false
# @info "jobstate=$(state)"
# return state == "COMPLETED" || state == "FAILED"
# end

state = getjobstate(jobid)
# state = getjobstate(jobid)

# check that job finished running within timelimit (either completed or failed)
@test status == :ok
@test state == "COMPLETED"
# # check that job finished running within timelimit (either completed or failed)
# @test status == :ok
# @test state == "COMPLETED"

# print job output
output = read("test.out", String)
println("# BEGIN script output")
println(output)
println("# END script output")
# # print job output
# output = read("test.out", String)
# println("# BEGIN script output")
# println(output)
# println("# END script output")

end # testset "SlurmClusterManager.jl"
# end # testset "SlurmClusterManager.jl"

@testset "warn_if_unexpected_params()" begin
if Base.VERSION >= v"1.6"
# This test is not relevant for Julia 1.6+
else
params = Dict(:env => ["foo" => "bar"])
SlurmClusterManager.warn_if_unexpected_params(params)
@test_logs(
(:warn, "The user provided the `env` kwarg, but SlurmClusterManager.jl's support for the `env` kwarg requires Julia 1.6 or later"),
SlurmClusterManager.warn_if_unexpected_params(params),
)
end
end
# @testset "warn_if_unexpected_params()" begin
# if Base.VERSION >= v"1.6"
# # This test is not relevant for Julia 1.6+
# else
# params = Dict(:env => ["foo" => "bar"])
# SlurmClusterManager.warn_if_unexpected_params(params)
# @test_logs(
# (:warn, "The user provided the `env` kwarg, but SlurmClusterManager.jl's support for the `env` kwarg requires Julia 1.6 or later"),
# SlurmClusterManager.warn_if_unexpected_params(params),
# )
# end
# end

# include("util.jl")

include("util.jl")
# @testset "Test some unhappy paths (error paths)" begin
# @testset "intentionally fail" begin
# include("error_path_intentionally_fail.jl")
# end
# @testset "manager's launch timeout" begin
# include("error_path_manager_timeout.jl")
# end
# end

@testset "Test some unhappy paths (error paths)" begin
@testset "intentionally fail" begin
include("error_path_intentionally_fail.jl")
end
@testset "manager's launch timeout" begin
include("error_path_manager_timeout.jl")
end
@testset "JET" begin
include("jet.jl")
end
Loading