diff --git a/Project.toml b/Project.toml index 79ee376..5c66830 100644 --- a/Project.toml +++ b/Project.toml @@ -1,13 +1,14 @@ name = "MLFlowClient" uuid = "64a0f543-368b-4a9a-827a-e71edb2a0b83" authors = ["@deyandyankov and contributors"] -version = "0.2.0" +version = "0.3.0" [deps] Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" FilePathsBase = "48062228-2e41-5def-b9a4-89aafe57970f" HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3" JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" +ShowCases = "605ecd9f-84a6-4c9e-81e2-4798472b76a3" URIs = "5c2747f8-b7ea-4ff2-ba2e-563bfd36b1d4" UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" @@ -15,6 +16,7 @@ UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" FilePathsBase = "0.9" HTTP = "0.9" JSON = "0.21" +ShowCases = "0.1" URIs = "1" julia = "1" diff --git a/docs/src/reference.md b/docs/src/reference.md index ab8fe72..b31d1c8 100644 --- a/docs/src/reference.md +++ b/docs/src/reference.md @@ -16,6 +16,8 @@ MLFlowRunInfo MLFlowRunData MLFlowRunDataMetric MLFlowRunStatus +MLFlowArtifactFileInfo +MLFlowArtifactDirInfo ``` # Experiments @@ -36,14 +38,10 @@ getrun updaterun deleterun searchruns -``` - -# Logging - -```@docs logparam logmetric logartifact +listartifacts ``` # Utilities diff --git a/src/MLFlowClient.jl b/src/MLFlowClient.jl index 3f0fa6c..ef253e1 100644 --- a/src/MLFlowClient.jl +++ b/src/MLFlowClient.jl @@ -17,6 +17,7 @@ using UUIDs using HTTP using URIs using JSON +using ShowCases using FilePathsBase: AbstractPath include("types.jl") @@ -31,7 +32,11 @@ export MLFlowRunDataMetric, MLFlowRun, get_info, - get_data + get_data, + MLFlowArtifactFileInfo, + MLFlowArtifactDirInfo, + get_path, + get_size include("utils.jl") export @@ -51,12 +56,10 @@ export getrun, updaterun, deleterun, - searchruns - -include("logging.jl") -export + searchruns, logparam, logmetric, - logartifact + logartifact, + listartifacts end diff --git a/src/logging.jl b/src/logging.jl deleted file mode 100644 index 74b207e..0000000 --- a/src/logging.jl +++ /dev/null @@ -1,136 +0,0 @@ -""" - logparam(mlf::MLFlow, run, key, value) - logparam(mlf::MLFlow, run, kv) - -Associates a key/value pair of parameters to the particular run. - -# Arguments -- `mlf`: [`MLFlow`](@ref) configuration. -- `run`: one of [`MLFlowRun`](@ref), [`MLFlowRunInfo`](@ref), or `String`. -- `key`: parameter key (name). -- `value`: parameter value. - -One could also specify `kv::Dict` instead of separate `key` and `value` arguments. -""" -function logparam(mlf::MLFlow, run_id::String, key, value) - endpoint = "runs/log-parameter" - mlfpost(mlf, endpoint; run_id=run_id, key=key, value=value) -end -logparam(mlf::MLFlow, run_info::MLFlowRunInfo, key, value) = - logparam(mlf, run_info.run_id, key, value) -logparam(mlf::MLFlow, run::MLFlowRun, key, value) = - logparam(mlf, run.info, key, value) -function logparam(mlf::MLFlow, run::Union{String,MLFlowRun,MLFlowRunInfo}, kv) - for (k, v) in kv - logparam(mlf, run, k, v) - end -end - -""" - logmetric(mlf::MLFlow, run, key, value::T; timestamp, step) where T<:Real - logmetric(mlf::MLFlow, run, key, values::AbstractArray{T}; timestamp, step) where T<:Real - -Logs a metric value (or values) against a particular run. - -# Arguments -- `mlf`: [`MLFlow`](@ref) configuration. -- `run`: one of [`MLFlowRun`](@ref), [`MLFlowRunInfo`](@ref), or `String` -- `key`: metric name. -- `value`: metric value, must be numeric. - -# Keywords -- `timestamp`: if provided, must be a UNIX timestamp in milliseconds. By default, set to current time. -- `step`: step at which the metric value has been taken. -""" -function logmetric(mlf::MLFlow, run_id::String, key, value::T; timestamp=missing, step=missing) where T<:Real - endpoint = "runs/log-metric" - if ismissing(timestamp) - timestamp = Int(trunc(datetime2unix(now()) * 1000)) - end - mlfpost(mlf, endpoint; run_id=run_id, key=key, value=value, timestamp=timestamp, step=step) -end -logmetric(mlf::MLFlow, run_info::MLFlowRunInfo, key, value::T; timestamp=missing, step=missing) where T<:Real = - logmetric(mlf::MLFlow, run_info.run_id, key, value; timestamp=timestamp, step=step) -logmetric(mlf::MLFlow, run::MLFlowRun, key, value::T; timestamp=missing, step=missing) where T<:Real = - logmetric(mlf, run.info, key, value; timestamp=timestamp, step=step) - -function logmetric(mlf::MLFlow, run::Union{String,MLFlowRun,MLFlowRunInfo}, key, values::AbstractArray{T}; timestamp=missing, step=missing) where T<:Real - for v in values - logmetric(mlf, run, key, v; timestamp=timestamp, step=step) - end -end - - -""" - logartifact(mlf::MLFlow, run, basefilename, data) - -Stores an artifact (file) in the run's artifact location. - -!!! note - Assumes that artifact_uri is mapped to a local directory. - At the moment, this only works if both MLFlow and the client are running on the same host or they map a directory that leads to the same location over NFS, for example. - -# Arguments -- `mlf::MLFlow`: [`MLFlow`](@ref) onfiguration. Currently not used, but when this method is extended to support `S3`, information from `mlf` will be needed. -- `run`: one of [`MLFlowRun`](@ref), [`MLFlowRunInfo`](@ref) or `String`. -- `basefilename`: name of the file to be written. -- `data`: artifact content, an object that can be written directly to a file handle. - -# Throws -- an `ErrorException` if an exception occurs during writing artifact. - -# Returns - -path of the artifact that was created. -""" -function logartifact(mlf::MLFlow, run_id::AbstractString, basefilename::AbstractString, data) - mlflowrun = getrun(mlf, run_id) - artifact_uri = mlflowrun.info.artifact_uri - mkpath(artifact_uri) - filepath = joinpath(artifact_uri, basefilename) - try - f = open(filepath, "w") - write(f, data) - close(f) - catch e - error("Unable to create artifact $(filepath): $e") - end - filepath -end -logartifact(mlf::MLFlow, run::MLFlowRun, basefilename::AbstractString, data) = - logartifact(mlf, run.info, basefilename, data) -logartifact(mlf::MLFlow, run_info::MLFlowRunInfo, basefilename::AbstractString, data) = - logartifact(mlf, run_info.run_id, basefilename, data) - -""" - logartifact(mlf::MLFlow, run, filepath) - -Stores an artifact (file) in the run's artifact location. -The name of the artifact is calculated using `basename(filepath)`. - -Dispatches on `logartifact(mlf::MLFlow, run, basefilename, data)` where `data` is the contents of `filepath`. - -# Throws -- an `ErrorException` if `filepath` does not exist. -- an exception if such occurs while trying to read the contents of `filepath`. - -""" -function logartifact(mlf::MLFlow, run_id::AbstractString, filepath::Union{AbstractPath,AbstractString}) - isfile(filepath) || error("File $filepath does not exist.") - try - f = open(filepath, "r") - data = read(f) - close(f) - return logartifact(mlf, run_id, basename(filepath), data) - catch e - throw(e) - finally - if @isdefined f - close(f) - end - end -end -logartifact(mlf::MLFlow, run::MLFlowRun, filepath::Union{AbstractPath,AbstractString}) = - logartifact(mlf, run.info, filepath) -logartifact(mlf::MLFlow, run_info::MLFlowRunInfo, filepath::Union{AbstractPath,AbstractString}) = - logartifact(mlf, run_info.run_id, filepath) diff --git a/src/runs.jl b/src/runs.jl index ed31224..45c27cb 100644 --- a/src/runs.jl +++ b/src/runs.jl @@ -70,7 +70,7 @@ function updaterun(mlf::MLFlow, run_id::String, status::MLFlowRunStatus; end_tim ) if ismissing(end_time) && status.status == "FINISHED" end_time = Int(trunc(datetime2unix(now()) * 1000)) - kwargs[:end_time] => end_time + kwargs[:end_time] = string(end_time) end result = mlfpost(mlf, endpoint; kwargs...) MLFlowRun(result["run_info"]) @@ -133,7 +133,7 @@ function searchruns(mlf::MLFlow, experiment_ids::AbstractVector{<:Integer}; filter_params::AbstractDict{K,V}=Dict{}(), run_view_type::String="ACTIVE_ONLY", max_results::Int64=50000, - order_by::AbstractVector{<:String}=["attribute.start_time"], + order_by::AbstractVector{<:String}=["attribute.end_time"], page_token::String="" ) where {K,V} endpoint = "runs/search" @@ -184,3 +184,196 @@ searchruns(mlf::MLFlow, exp::MLFlowExperiment; kwargs...) = searchruns(mlf, exp.experiment_id; kwargs...) searchruns(mlf::MLFlow, exps::AbstractVector{MLFlowExperiment}; kwargs...) = searchruns(mlf, [getfield.(exps, :experiment_id)]; kwargs...) + + +""" + logparam(mlf::MLFlow, run, key, value) + logparam(mlf::MLFlow, run, kv) + +Associates a key/value pair of parameters to the particular run. + +# Arguments +- `mlf`: [`MLFlow`](@ref) configuration. +- `run`: one of [`MLFlowRun`](@ref), [`MLFlowRunInfo`](@ref), or `String`. +- `key`: parameter key (name). Automatically converted to string before sending to MLFlow because this is the only type that MLFlow supports. +- `value`: parameter value. Automatically converted to string before sending to MLFlow because this is the only type that MLFlow supports. + +One could also specify `kv::Dict` instead of separate `key` and `value` arguments. +""" +function logparam(mlf::MLFlow, run_id::String, key, value) + endpoint = "runs/log-parameter" + mlfpost(mlf, endpoint; run_id=run_id, key=string(key), value=string(value)) +end +logparam(mlf::MLFlow, run_info::MLFlowRunInfo, key, value) = + logparam(mlf, run_info.run_id, key, value) +logparam(mlf::MLFlow, run::MLFlowRun, key, value) = + logparam(mlf, run.info, key, value) +function logparam(mlf::MLFlow, run::Union{String,MLFlowRun,MLFlowRunInfo}, kv) + for (k, v) in kv + logparam(mlf, run, k, v) + end +end + +""" + logmetric(mlf::MLFlow, run, key, value::T; timestamp, step) where T<:Real + logmetric(mlf::MLFlow, run, key, values::AbstractArray{T}; timestamp, step) where T<:Real + +Logs a metric value (or values) against a particular run. + +# Arguments +- `mlf`: [`MLFlow`](@ref) configuration. +- `run`: one of [`MLFlowRun`](@ref), [`MLFlowRunInfo`](@ref), or `String` +- `key`: metric name. +- `value`: metric value, must be numeric. + +# Keywords +- `timestamp`: if provided, must be a UNIX timestamp in milliseconds. By default, set to current time. +- `step`: step at which the metric value has been taken. +""" +function logmetric(mlf::MLFlow, run_id::String, key, value::T; timestamp=missing, step=missing) where T<:Real + endpoint = "runs/log-metric" + if ismissing(timestamp) + timestamp = Int(trunc(datetime2unix(now()) * 1000)) + end + mlfpost(mlf, endpoint; run_id=run_id, key=key, value=value, timestamp=timestamp, step=step) +end +logmetric(mlf::MLFlow, run_info::MLFlowRunInfo, key, value::T; timestamp=missing, step=missing) where T<:Real = + logmetric(mlf::MLFlow, run_info.run_id, key, value; timestamp=timestamp, step=step) +logmetric(mlf::MLFlow, run::MLFlowRun, key, value::T; timestamp=missing, step=missing) where T<:Real = + logmetric(mlf, run.info, key, value; timestamp=timestamp, step=step) + +function logmetric(mlf::MLFlow, run::Union{String,MLFlowRun,MLFlowRunInfo}, key, values::AbstractArray{T}; timestamp=missing, step=missing) where T<:Real + for v in values + logmetric(mlf, run, key, v; timestamp=timestamp, step=step) + end +end + + +""" + logartifact(mlf::MLFlow, run, basefilename, data) + +Stores an artifact (file) in the run's artifact location. + +!!! note + Assumes that artifact_uri is mapped to a local directory. + At the moment, this only works if both MLFlow and the client are running on the same host or they map a directory that leads to the same location over NFS, for example. + +# Arguments +- `mlf::MLFlow`: [`MLFlow`](@ref) onfiguration. Currently not used, but when this method is extended to support `S3`, information from `mlf` will be needed. +- `run`: one of [`MLFlowRun`](@ref), [`MLFlowRunInfo`](@ref) or `String`. +- `basefilename`: name of the file to be written. +- `data`: artifact content, an object that can be written directly to a file handle. + +# Throws +- an `ErrorException` if an exception occurs during writing artifact. + +# Returns +path of the artifact that was created. +""" +function logartifact(mlf::MLFlow, run_id::AbstractString, basefilename::AbstractString, data) + mlflowrun = getrun(mlf, run_id) + artifact_uri = mlflowrun.info.artifact_uri + mkpath(artifact_uri) + filepath = joinpath(artifact_uri, basefilename) + try + f = open(filepath, "w") + write(f, data) + close(f) + catch e + error("Unable to create artifact $(filepath): $e") + end + filepath +end +logartifact(mlf::MLFlow, run::MLFlowRun, basefilename::AbstractString, data) = + logartifact(mlf, run.info, basefilename, data) +logartifact(mlf::MLFlow, run_info::MLFlowRunInfo, basefilename::AbstractString, data) = + logartifact(mlf, run_info.run_id, basefilename, data) + +""" + logartifact(mlf::MLFlow, run, filepath) + +Stores an artifact (file) in the run's artifact location. +The name of the artifact is calculated using `basename(filepath)`. + +Dispatches on `logartifact(mlf::MLFlow, run, basefilename, data)` where `data` is the contents of `filepath`. + +# Throws +- an `ErrorException` if `filepath` does not exist. +- an exception if such occurs while trying to read the contents of `filepath`. + +""" +function logartifact(mlf::MLFlow, run_id::AbstractString, filepath::Union{AbstractPath,AbstractString}) + isfile(filepath) || error("File $filepath does not exist.") + try + f = open(filepath, "r") + data = read(f) + close(f) + return logartifact(mlf, run_id, basename(filepath), data) + catch e + throw(e) + finally + if @isdefined f + close(f) + end + end +end +logartifact(mlf::MLFlow, run::MLFlowRun, filepath::Union{AbstractPath,AbstractString}) = + logartifact(mlf, run.info, filepath) +logartifact(mlf::MLFlow, run_info::MLFlowRunInfo, filepath::Union{AbstractPath,AbstractString}) = + logartifact(mlf, run_info.run_id, filepath) + +""" + listartifacts(mlf::MLFlow, run) + +Lists the artifacts associated with an experiment run. +According to [MLFlow documentation](https://mlflow.org/docs/latest/rest-api.html#list-artifacts), this API endpoint should return paged results, similar to [`searchruns`](@ref). +However, after some experimentation, this doesn't seem to be the case. Therefore, the paging functionality is not implemented here. + +# Arguments +- `mlf::MLFlow`: [`MLFlow`](@ref) onfiguration. Currently not used, but when this method is extended to support `S3`, information from `mlf` will be needed. +- `run`: one of [`MLFlowRun`](@ref), [`MLFlowRunInfo`](@ref) or `String`. + +# Keywords +- `path::String`: path of a directory within the artifact location. If set, returns the contents of the directory. By default, this is the root directory of the artifacts. +- `maxdepth::Int64`: depth of listing. Default is 1. This will only return the files/directories in the current `path`. To return all artifacts files and directories, use `maxdepth=-1`. + +# Returns +A vector of `Union{MLFlowArtifactFileInfo,MLFlowArtifactDirInfo}`. +""" +function listartifacts(mlf::MLFlow, run_id::String; path::String="", maxdepth::Int64=1) + endpoint = "artifacts/list" + kwargs = ( + run_id=run_id, + ) + kwargs = (; kwargs..., path=path) + httpresult = mlfget(mlf, endpoint; kwargs...) + "files" ∈ keys(httpresult) || return Vector{Union{MLFlowArtifactFileInfo,MLFlowArtifactDirInfo}}() + "root_uri" ∈ keys(httpresult) || error("Malformed response from MLFlow REST API.") + root_uri = httpresult["root_uri"] + result = Vector{Union{MLFlowArtifactFileInfo,MLFlowArtifactDirInfo}}() + maxdepth == 0 && return result + + for resultentry ∈ httpresult["files"] + if resultentry["is_dir"] == false + filepath = joinpath(root_uri, resultentry["path"]) + filesize = parse(Int, resultentry["file_size"]) + push!(result, MLFlowArtifactFileInfo(filepath, filesize)) + elseif resultentry["is_dir"] == true + dirpath = joinpath(root_uri, resultentry["path"]) + push!(result, MLFlowArtifactDirInfo(dirpath)) + if maxdepth != 0 + nextdepthresult = listartifacts(mlf, run_id, path=resultentry["path"], maxdepth=maxdepth-1) + result = vcat(result, nextdepthresult) + end + else + isdirval = resultentry["is_dir"] + @warn "Malformed response from MLFlow REST API is_dir=$isdirval - skipping" + continue + end + end + result +end +listartifacts(mlf::MLFlow, run::MLFlowRun; kwargs...) = + listartifacts(mlf, run.info.run_id; kwargs...) +listartifacts(mlf::MLFlow, run_info::MLFlowRunInfo; kwargs...) = + listartifacts(mlf, run_info.run_id; kwargs...) diff --git a/src/types.jl b/src/types.jl index 5d75a6e..24740d9 100644 --- a/src/types.jl +++ b/src/types.jl @@ -25,6 +25,7 @@ struct MLFlow end MLFlow(baseuri; apiversion=2.0) = MLFlow(baseuri, apiversion) MLFlow() = MLFlow("http://localhost:5000", 2.0) +Base.show(io::IO, t::MLFlow) = show(io, ShowCase(t, new_lines=true)) """ MLFlowExperiment @@ -59,7 +60,7 @@ function MLFlowExperiment(exp::Dict{String,Any}) artifact_location = get(exp, "artifact_location", missing) MLFlowExperiment(name, lifecycle_stage, experiment_id, tags, artifact_location) end - +Base.show(io::IO, t::MLFlowExperiment) = show(io, ShowCase(t, new_lines=true)) """ MLFlowRunStatus @@ -81,6 +82,7 @@ struct MLFlowRunStatus new(status) end end +Base.show(io::IO, t::MLFlowRunStatus) = show(io, ShowCase(t, new_lines=true)) """ MLFlowRunInfo @@ -126,6 +128,7 @@ function MLFlowRunInfo(info::Dict{String,Any}) MLFlowRunInfo(run_id, experiment_id, status, start_time, end_time, artifact_uri, lifecycle_stage) end +Base.show(io::IO, t::MLFlowRunInfo) = show(io, ShowCase(t, new_lines=true)) get_run_id(runinfo::MLFlowRunInfo) = runinfo.run_id """ @@ -157,7 +160,7 @@ function MLFlowRunDataMetric(d::Dict{String,Any}) timestamp = parse(Int64, d["timestamp"]) MLFlowRunDataMetric(key, value, step, timestamp) end - +Base.show(io::IO, t::MLFlowRunDataMetric) = show(io, ShowCase(t, new_lines=true)) """ MLFlowRunData @@ -165,7 +168,7 @@ end Represents run data. # Fields -- `metrics::Vector{MLFlowRunDataMetric}`: run metrics. +- `metrics::Dict{String,MLFlowRunDataMetric}`: run metrics. - `params::Dict{String,String}`: run parameters. - `tags`: list of run tags. @@ -175,12 +178,18 @@ Represents run data. """ struct MLFlowRunData - metrics::Vector{MLFlowRunDataMetric} + metrics::Dict{String,MLFlowRunDataMetric} params::Union{Dict{String,String},Missing} tags end function MLFlowRunData(data::Dict{String,Any}) - metrics = haskey(data, "metrics") ? MLFlowRunDataMetric.(data["metrics"]) : MLFlowRunDataMetric[] + metrics = Dict{String,MLFlowRunDataMetric}() + if haskey(data, "metrics") + for metric in data["metrics"] + v = MLFlowRunDataMetric(metric) + metrics[v.key] = v + end + end if haskey(data, "params") params = Dict{String,String}() for p in data["params"] @@ -192,6 +201,7 @@ function MLFlowRunData(data::Dict{String,Any}) tags = haskey(data, "tags") ? data["tags"] : missing MLFlowRunData(metrics, params, tags) end +Base.show(io::IO, t::MLFlowRunData) = show(io, ShowCase(t, new_lines=true)) get_params(rundata::MLFlowRunData) = rundata.params """ @@ -223,5 +233,40 @@ MLFlowRun(info::Dict{String,Any}) = MLFlowRun(MLFlowRunInfo(info), missing) MLFlowRun(info::Dict{String,Any}, data::Dict{String,Any}) = MLFlowRun(MLFlowRunInfo(info), MLFlowRunData(data)) +Base.show(io::IO, t::MLFlowRun) = show(io, ShowCase(t, new_lines=true)) get_info(run::MLFlowRun) = run.info get_data(run::MLFlowRun) = run.data +get_run_id(run::MLFlowRun) = get_run_id(run.info) +get_params(run::MLFlowRun) = get_params(run.data) + +""" + MLFlowArtifactFileInfo + +Metadata of a single artifact file -- result of [`listartifacts`](@ref). + +# Fields +- `filepath::String`: File path, including the root artifact directory of a run. +- `filesize::Int64`: Size in bytes. +""" +struct MLFlowArtifactFileInfo + filepath::String + filesize::Int64 +end +Base.show(io::IO, t::MLFlowArtifactFileInfo) = show(io, ShowCase(t, new_lines=true)) +get_path(mlfafi::MLFlowArtifactFileInfo) = mlfafi.filepath +get_size(mlfafi::MLFlowArtifactFileInfo) = mlfafi.filesize + +""" + MLFlowArtifactDirInfo + +Metadata of a single artifact directory -- result of [`listartifacts`](@ref). + +# Fields +- `dirpath::String`: Directory path, including the root artifact directory of a run. +""" +struct MLFlowArtifactDirInfo + dirpath::String +end +Base.show(io::IO, t::MLFlowArtifactDirInfo) = show(io, ShowCase(t, new_lines=true)) +get_path(mlfadi::MLFlowArtifactDirInfo) = mlfadi.dirpath +get_size(mlfadi::MLFlowArtifactDirInfo) = 0 diff --git a/src/utils.jl b/src/utils.jl index 9a27870..553bfa8 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -67,6 +67,6 @@ generatefilterfromparams(Dict("paramkey1" => "paramvalue1", "paramkey2" => "para function generatefilterfromparams(filter_params::AbstractDict{K,V}) where {K,V} length(filter_params) > 0 || return "" # NOTE: may have issues with escaping. - filters = ["param.\"$(k)\" = \"$(v)\"" for(k, v) ∈ filter_params ] + filters = ["param.\"$(k)\" = \"$(v)\"" for(k, v) ∈ filter_params] join(filters, " and ") end diff --git a/test/runtests.jl b/test/runtests.jl index 62718db..480faff 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -97,12 +97,85 @@ end @test length(findrun) == 1 r = only(findrun) @test get_run_id(get_info(r)) == exprun.info.run_id + @test get_run_id(r) == get_run_id(get_info(r)) @test sort(collect(keys(get_params(get_data(r))))) == sort(string.(keys(runparams))) @test sort(collect(values(get_params(get_data(r))))) == sort(string.(values(runparams))) - + @test get_params(r) == get_params(get_data(r)) @test deleteexperiment(mlf, exp) end +@testset "artifacts" begin + @ensuremlf + exp = createexperiment(mlf) + @test isa(exp, MLFlowExperiment) + exprun = createrun(mlf, exp) + @test isa(exprun, MLFlowRun) + # only run the below if artifact_uri is a local directory + # i.e. when running mlflow server as a separate process next to the testset + # when running mlflow in a container, the below tests will be skipped + # this is what happens in github actions - mlflow runs in a container, the artifact_uri is not immediately available, and tests are skipped + artifact_uri = exprun.info.artifact_uri + if isdir(artifact_uri) + @test_throws SystemError logartifact(mlf, exprun, "/etc/shadow") + + tmpfiletoupload = "sometempfilename.txt" + f = open(tmpfiletoupload, "w") + write(f, "samplecontents") + close(f) + artifactpath = logartifact(mlf, exprun, tmpfiletoupload) + @test isfile(artifactpath) + rm(tmpfiletoupload) + artifactpath = logartifact(mlf, exprun, "randbytes.bin", b"some rand bytes here") + @test isfile(artifactpath) + + mkdir(joinpath(artifact_uri, "newdir")) + artifactpath = logartifact(mlf, exprun, joinpath("newdir", "randbytesindir.bin"), b"bytes here") + artifactpath = logartifact(mlf, exprun, joinpath("newdir", "randbytesindir2.bin"), b"bytes here") + mkdir(joinpath(artifact_uri, "newdir", "new2")) + artifactpath = logartifact(mlf, exprun, joinpath("newdir", "new2", "randbytesindir.bin"), b"bytes here") + artifactpath = logartifact(mlf, exprun, joinpath("newdir", "new2", "randbytesindir2.bin"), b"bytes here") + mkdir(joinpath(artifact_uri, "newdir", "new2", "new3")) + artifactpath = logartifact(mlf, exprun, joinpath("newdir", "new2", "new3", "randbytesindir.bin"), b"bytes here") + artifactpath = logartifact(mlf, exprun, joinpath("newdir", "new2", "new3", "randbytesindir2.bin"), b"bytes here") + mkdir(joinpath(artifact_uri, "newdir", "new2", "new3", "new4")) + artifactpath = logartifact(mlf, exprun, joinpath("newdir", "new2", "new3", "new4", "randbytesindir.bin"), b"bytes here") + artifactpath = logartifact(mlf, exprun, joinpath("newdir", "new2", "new3", "new4", "randbytesindir2.bin"), b"bytes here") + + # artifact tree should now look like this: + # + # ├── newdir + # │   ├── new2 + # │   │   ├── new3 + # │   │   │   ├── new4 + # │   │   │   │   ├── randbytesindir2.bin + # │   │   │   │   └── randbytesindir.bin + # │   │   │   ├── randbytesindir2.bin + # │   │   │   └── randbytesindir.bin + # │   │   ├── randbytesindir2.bin + # │   │   └── randbytesindir.bin + # │   ├── randbytesindir2.bin + # │   └── randbytesindir.bin + # ├── randbytes.bin + # └── sometempfilename.txt + + # 4 directories, 10 files + + artifactlist = listartifacts(mlf, exprun) + @test sort(basename.(get_path.(artifactlist))) == ["newdir", "randbytes.bin", "sometempfilename.txt"] + @test sort(get_size.(artifactlist)) == [0, 14, 20] + + ald2 = listartifacts(mlf, exprun, maxdepth=2) + @test length(ald2) == 6 + @test sort(basename.(get_path.(ald2))) == ["new2", "newdir", "randbytes.bin", "randbytesindir.bin", "randbytesindir2.bin", "sometempfilename.txt"] + aldrecursion = listartifacts(mlf, exprun, maxdepth=-1) + @test length(aldrecursion) == 14 # 4 directories, 10 files + @test sum(typeof.(aldrecursion) .== MLFlowArtifactDirInfo) == 4 # 4 directories + @test sum(typeof.(aldrecursion) .== MLFlowArtifactFileInfo) == 10 # 10 files + end + deleterun(mlf, exprun) + deleteexperiment(mlf, exp) +end + @testset "MLFlowClient.jl" begin @ensuremlf exp = createexperiment(mlf) @@ -137,27 +210,13 @@ end retrieved_run = getrun(mlf, exprunid) @test exprun.info == retrieved_run.info - tmpfiletoupload = tempname() - f = open(tmpfiletoupload, "w") - write(f, "samplecontents") - close(f) - artifactpath = logartifact(mlf, retrieved_run, tmpfiletoupload) - @test isfile(artifactpath) - @test_throws SystemError logartifact(mlf, retrieved_run, "/etc/shadow") - rm(tmpfiletoupload) - - artifactpath = logartifact(mlf, retrieved_run, "randbytes.bin", b"some rand bytes here") - @test isfile(artifactpath) - running_run = updaterun(mlf, exprunid, "RUNNING") @test running_run.info.experiment_id == experiment_id @test running_run.info.status == MLFlowRunStatus("RUNNING") finished_run = updaterun(mlf, exprun, MLFlowRunStatus("FINISHED")) finishedrun = getrun(mlf, finished_run.info.run_id) - # NOTE: seems like MLFlow API never returns `end_time` as documented in https://mlflow.org/docs/latest/rest-api.html#runinfo - # Consider raising an issue with MLFlow itself. - @test_broken !ismissing(finishedrun.info.end_time) + @test !ismissing(finishedrun.info.end_time) exprun2 = createrun(mlf, experiment_id) exprun2id = exprun.info.run_id @@ -179,5 +238,3 @@ end deleteexperiment(mlf, exp) end - -