From 50004ce680003863469d23def4a8be95645bb4b0 Mon Sep 17 00:00:00 2001 From: Jose Esparza <28990958+pebeto@users.noreply.github.com> Date: Sun, 3 Nov 2024 03:26:22 -0500 Subject: [PATCH] Fixing documentation --- Project.toml | 2 - README.md | 4 -- docs/Project.toml | 4 +- docs/make.jl | 32 +++------ docs/src/{ => images}/mlflowexp.png | Bin docs/src/{ => images}/mlflowexpmetric1.png | Bin docs/src/{ => images}/withoutmlflow.png | Bin docs/src/reference.md | 59 --------------- docs/src/reference/artifact.md | 4 ++ docs/src/reference/experiment.md | 11 +++ docs/src/reference/loggers.md | 7 ++ docs/src/reference/misc.md | 5 ++ docs/src/reference/run.md | 11 +++ docs/src/reference/types.md | 21 ++++++ docs/src/tutorial.md | 6 +- src/MLFlowClient.jl | 19 +++-- src/services/artifact.jl | 8 +-- src/services/experiment.jl | 80 +++++++++++---------- src/services/loggers.jl | 45 ++++++------ src/services/misc.jl | 35 ++++++--- src/services/run.jl | 70 +++++++++--------- src/types/dataset.jl | 10 +-- src/types/enums.jl | 2 +- src/types/experiment.jl | 2 +- src/types/mlflow.jl | 8 +-- src/types/model_version.jl | 10 +-- src/types/registered_model.jl | 6 +- src/types/run.jl | 13 ++-- src/utils.jl | 4 -- 29 files changed, 239 insertions(+), 239 deletions(-) rename docs/src/{ => images}/mlflowexp.png (100%) rename docs/src/{ => images}/mlflowexpmetric1.png (100%) rename docs/src/{ => images}/withoutmlflow.png (100%) delete mode 100644 docs/src/reference.md create mode 100644 docs/src/reference/artifact.md create mode 100644 docs/src/reference/experiment.md create mode 100644 docs/src/reference/loggers.md create mode 100644 docs/src/reference/misc.md create mode 100644 docs/src/reference/run.md create mode 100644 docs/src/reference/types.md diff --git a/Project.toml b/Project.toml index 59d3b69..00d90f9 100644 --- a/Project.toml +++ b/Project.toml @@ -5,7 +5,6 @@ version = "0.5.1" [deps] Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" -FilePathsBase = "48062228-2e41-5def-b9a4-89aafe57970f" HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3" JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" ShowCases = "605ecd9f-84a6-4c9e-81e2-4798472b76a3" @@ -13,7 +12,6 @@ URIs = "5c2747f8-b7ea-4ff2-ba2e-563bfd36b1d4" UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" [compat] -FilePathsBase = "0.9" HTTP = "1.9" JSON = "0.21" ShowCases = "0.1" diff --git a/README.md b/README.md index 51e9194..4cf00a3 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,3 @@ Julia client for [MLFlow](https://www.mlflow.org/) - -This package is still under development and interfaces may change. See the documentation for current features and limitations. - -Tested against `mlflow==1.21.0` and `mlflow==1.22.0`. diff --git a/docs/Project.toml b/docs/Project.toml index dee67d4..d364e82 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -1,8 +1,6 @@ [deps] -Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3" JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" -MLFlowClient = "64a0f543-368b-4a9a-827a-e71edb2a0b83" +ShowCases = "605ecd9f-84a6-4c9e-81e2-4798472b76a3" URIs = "5c2747f8-b7ea-4ff2-ba2e-563bfd36b1d4" -UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" diff --git a/docs/make.jl b/docs/make.jl index 3e6244d..264bc6d 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -1,27 +1,15 @@ -using MLFlowClient +push!(LOAD_PATH,"../src/") using Documenter - -DocMeta.setdocmeta!(MLFlowClient, :DocTestSetup, :(using MLFlowClient); recursive=true) +using MLFlowClient makedocs(; - modules=[MLFlowClient], - authors="@deyandyankov and contributors", - repo="https://github.com/JuliaAI.jl/blob/{commit}{path}#{line}", sitename="MLFlowClient.jl", - format=Documenter.HTML(; - prettyurls=get(ENV, "CI", "false") == "true", - canonical="https://juliaai.github.io/MLFlowClient.jl", - assets=String[] - ), - pages=[ - "Home" => "index.md", - "Tutorial" => "tutorial.md", - "Reference" => "reference.md" - ], - checkdocs=:exports -) + authors="@deyandyankov and contributors", + pages=["Home" => "index.md", "Tutorial" => "tutorial.md", "Reference" => [ + "Types" => "reference/types.md", "Artifact operations" => "reference/artifact.md", + "Experiment operations" => "reference/experiment.md", + "Logging operations" => "reference/loggers.md", + "Miscellaneous operations" => "reference/misc.md", + "Run operations" => "reference/run.md"]]) -deploydocs(; - repo="github.com/JuliaAI/MLFlowClient.jl", - devbranch="main" -) +deploydocs(; repo="github.com/JuliaAI/MLFlowClient.jl", devbranch="main") diff --git a/docs/src/mlflowexp.png b/docs/src/images/mlflowexp.png similarity index 100% rename from docs/src/mlflowexp.png rename to docs/src/images/mlflowexp.png diff --git a/docs/src/mlflowexpmetric1.png b/docs/src/images/mlflowexpmetric1.png similarity index 100% rename from docs/src/mlflowexpmetric1.png rename to docs/src/images/mlflowexpmetric1.png diff --git a/docs/src/withoutmlflow.png b/docs/src/images/withoutmlflow.png similarity index 100% rename from docs/src/withoutmlflow.png rename to docs/src/images/withoutmlflow.png diff --git a/docs/src/reference.md b/docs/src/reference.md deleted file mode 100644 index 7f79daf..0000000 --- a/docs/src/reference.md +++ /dev/null @@ -1,59 +0,0 @@ -# Reference - -```@meta -CurrentModule = MLFlowClient -``` - -# Types - -TODO: Document accessors. - -```@docs -MLFlow -MLFlowExperiment -MLFlowRun -MLFlowRunInfo -MLFlowRunData -MLFlowRunDataParam -MLFlowRunDataMetric -MLFlowRunStatus -MLFlowArtifactFileInfo -MLFlowArtifactDirInfo -``` - -# Experiments - -```@docs -createexperiment -getexperiment -getorcreateexperiment -deleteexperiment -searchexperiments -restoreexperiment -``` - -# Runs - -```@docs -createrun -getrun -updaterun -deleterun -searchruns -logparam -logmetric -logbatch -logartifact -listartifacts -``` - -# Utilities - -```@docs -mlfget -mlfpost -uri -generatefilterfromentity_type -generatefilterfromparams -generatefilterfromattributes -``` diff --git a/docs/src/reference/artifact.md b/docs/src/reference/artifact.md new file mode 100644 index 0000000..0e28093 --- /dev/null +++ b/docs/src/reference/artifact.md @@ -0,0 +1,4 @@ +# Artifact operations +```@docs +listartifacts +``` diff --git a/docs/src/reference/experiment.md b/docs/src/reference/experiment.md new file mode 100644 index 0000000..ba98435 --- /dev/null +++ b/docs/src/reference/experiment.md @@ -0,0 +1,11 @@ +# Experiment operations +```@docs +createexperiment +getexperiment +getexperimentbyname +deleteexperiment +restoreexperiment +updateexperiment +searchexperiments +setexperimenttag +``` diff --git a/docs/src/reference/loggers.md b/docs/src/reference/loggers.md new file mode 100644 index 0000000..2bad6ce --- /dev/null +++ b/docs/src/reference/loggers.md @@ -0,0 +1,7 @@ +# Logging operations +```@docs +logmetric +logbatch +loginputs +logparam +``` diff --git a/docs/src/reference/misc.md b/docs/src/reference/misc.md new file mode 100644 index 0000000..7baadfc --- /dev/null +++ b/docs/src/reference/misc.md @@ -0,0 +1,5 @@ +# Miscellaneous operations +```@docs +getmetrichistory +refresh +``` diff --git a/docs/src/reference/run.md b/docs/src/reference/run.md new file mode 100644 index 0000000..5fbd25c --- /dev/null +++ b/docs/src/reference/run.md @@ -0,0 +1,11 @@ +# Run operations +```@docs +createrun +deleterun +restorerun +getrun +setruntag +deleteruntag +searchruns +updaterun +``` diff --git a/docs/src/reference/types.md b/docs/src/reference/types.md new file mode 100644 index 0000000..e7db289 --- /dev/null +++ b/docs/src/reference/types.md @@ -0,0 +1,21 @@ +# Types +```@docs +MLFlow +Tag +ViewType +RunStatus +ModelVersionStatus +Dataset +DatasetInput +FileInfo +ModelVersion +RegisteredModel +RegisteredModelAlias +Experiment +Run +Param +Metric +RunData +RunInfo +RunInputs +``` diff --git a/docs/src/tutorial.md b/docs/src/tutorial.md index 3fbf101..d7565c3 100644 --- a/docs/src/tutorial.md +++ b/docs/src/tutorial.md @@ -40,7 +40,7 @@ p This could result in the following plot: -![](withoutmlflow.png) +![](images/withoutmlflow.png) Now, suppose that you are interested in turning this into an experiment which stores its metadata and results in MLFlow using `MLFlowClient`. You could amend the code like this: @@ -114,8 +114,8 @@ updaterun(mlf, exprun, "FINISHED") This will result in the folowing experiment created in your `MLFlow` which is running on `http://localhost/`: -![](mlflowexp.png) +![](images/mlflowexp.png) You can also observe series logged against individual metrics, i.e. `pricepath1` looks like this in `MLFlow`: -![](mlflowexpmetric1.png) +![](images/mlflowexpmetric1.png) diff --git a/src/MLFlowClient.jl b/src/MLFlowClient.jl index 88e60a7..99638e7 100644 --- a/src/MLFlowClient.jl +++ b/src/MLFlowClient.jl @@ -1,3 +1,14 @@ +""" + MLFlowClient + +[MLFlowClient](https://github.com/JuliaAI.jl) is a [Julia](https://julialang.org/) package +for working with [MLFlow](https://mlflow.org/) using the REST +[API v2.0](https://www.mlflow.org/docs/latest/rest-api.html). + +`MLFlowClient` allows you to create and manage `MLFlow` experiments, runs, and log metrics +and artifacts. If you are not familiar with `MLFlow` and its concepts, please refer to +[MLFlow documentation](https://mlflow.org/docs/latest/index.html). +""" module MLFlowClient using Dates @@ -6,7 +17,6 @@ using HTTP using URIs using JSON using ShowCases -using FilePathsBase: AbstractPath include("types/mlflow.jl") export MLFlow @@ -35,11 +45,10 @@ export Experiment include("types/run.jl") export Run, Param, Metric, RunData, RunInfo, RunInputs -include("utils.jl") -export refresh - include("api.jl") +include("utils.jl") + include("services/experiment.jl") export getexperiment, createexperiment, deleteexperiment, setexperimenttag, updateexperiment, restoreexperiment, searchexperiments, getexperimentbyname @@ -55,6 +64,6 @@ include("services/artifact.jl") export listartifacts include("services/misc.jl") -export getmetrichistory +export refresh, getmetrichistory end diff --git a/src/services/artifact.jl b/src/services/artifact.jl index a56908b..5a73613 100644 --- a/src/services/artifact.jl +++ b/src/services/artifact.jl @@ -2,17 +2,17 @@ listartifacts(instance::MLFlow, run_id::String; path::String="", page_token::String="") listartifacts(instance::MLFlow, run::Run; path::String="", page_token::String="") -List artifacts for a run. +List artifacts for a [`Run`](@ref). # Arguments - `instance`: [`MLFlow`](@ref) configuration. -- `run_id`: ID of the run whose artifacts to list. +- `run_id`: ID of the [`Run`](@ref) whose artifacts to list. - `path`: Filter artifacts matching this path (a relative path from the root artifact -directory). + directory). - `page_token`: Token indicating the page of artifact results to fetch # Returns -- Root artifact directory for the run. +- Root artifact directory for the [`Run`](@ref). - List of file location and metadata for artifacts. - Token that can be used to retrieve the next page of artifact results. """ diff --git a/src/services/experiment.jl b/src/services/experiment.jl index 1233d48..b0e99a3 100644 --- a/src/services/experiment.jl +++ b/src/services/experiment.jl @@ -1,21 +1,22 @@ """ - createexperiment(instance::MLFlow, name::String; artifact_location::String="", - tags::Union{Dict{<:Any}, Array{<:Any}}=[]) + createexperiment(instance::MLFlow, name::String; + artifact_location::Union{String, Missing}=missing, + tags::MLFlowUpsertData{Tag}=Tag[]) -Create an experiment with a name. Returns the newly created experiment. Validates that -another experiment with the same name does not already exist and fails if another -experiment with the same name already exists. +Create an [`Experiment`](@ref) with a name. Returns the newly created [`Experiment`](@ref). +Validates that another [`Experiment`](@ref) with the same name does not already exist and +fails if another [`Experiment`](@ref) with the same name already exists. # Arguments - `instance`: [`MLFlow`](@ref) configuration. -- `name`: Experiment name. This field is required. -- `artifact_location`: Location where all artifacts for the experiment -are stored. If not provided, the remote server will select an appropriate -default. -- `tags`: A collection of tags to set on the experiment. +- `name`: [`Experiment`](@ref) name. This field is required. +- `artifact_location`: Location where all artifacts for the [`Experiment`](@ref) + are stored. If not provided, the remote server will select an appropriate + default. +- `tags`: A collection of [`Tag`](@ref) to set on the [`Experiment`](@ref). # Returns -The ID of the newly created experiment. +The ID of the newly created [`Experiment`](@ref). """ function createexperiment(instance::MLFlow, name::String; artifact_location::Union{String, Missing}=missing, @@ -29,11 +30,11 @@ end getexperiment(instance::MLFlow, experiment_id::String) getexperiment(instance::MLFlow, experiment_id::Integer) -Get metadata for an experiment. This method works on deleted experiments. +Get metadata for an [`Experiment`](@ref). This method works on deleted experiments. # Arguments - `instance`: [`MLFlow`](@ref) configuration. -- `experiment_id`: ID of the associated experiment. +- `experiment_id`: ID of the associated [`Experiment`](@ref). # Returns An instance of type [`Experiment`](@ref). @@ -48,15 +49,15 @@ getexperiment(instance::MLFlow, experiment_id::Integer)::Experiment = """ getexperimentbyname(instance::MLFlow, experiment_name::String) -Get metadata for an experiment. +Get metadata for an [`Experiment`](@ref). -This endpoint will return deleted experiments, but prefers the active experiment if an -active and deleted experiment share the same name. If multiple deleted experiments share -the same name, the API will return one of them. +This endpoint will return deleted experiments, but prefers the active [`Experiment`](@ref) +if an active and deleted [`Experiment`](@ref) share the same name. If multiple deleted +experiments share the same name, the API will return one of them. # Arguments - `instance`: [`MLFlow`](@ref) configuration. -- `experiment_name`: Name of the associated experiment. +- `experiment_name`: Name of the associated [`Experiment`](@ref). # Returns An instance of type [`Experiment`](@ref). @@ -71,12 +72,13 @@ end deleteexperiment(instance::MLFlow, experiment_id::Integer) deleteexperiment(instance::MLFlow, experiment::Experiment) -Mark an experiment and associated metadata, runs, metrics, params, and tags for deletion. -If the experiment uses FileStore, artifacts associated with experiment are also deleted. +Mark an [`Experiment`](@ref) and associated metadata, runs, metrics, params, and tags for +deletion. If the [`Experiment`](@ref) uses FileStore, artifacts associated with +[`Experiment`](@ref) are also deleted. # Arguments - `instance`: [`MLFlow`](@ref) configuration. -- `experiment_id`: ID of the associated experiment. +- `experiment_id`: ID of the associated [`Experiment`](@ref). # Returns `true` if successful. Otherwise, raises exception. @@ -95,13 +97,13 @@ deleteexperiment(instance::MLFlow, experiment::Experiment)::Bool = restoreexperiment(instance::MLFlow, experiment_id::Integer) restoreexperiment(instance::MLFlow, experiment::Experiment) -Restore an experiment marked for deletion. This also restores associated metadata, runs, -metrics, params, and tags. If experiment uses FileStore, underlying artifacts associated -with experiment are also restored. +Restore an [`Experiment`](@ref) marked for deletion. This also restores associated +metadata, runs, metrics, params, and tags. If [`Experiment`](@ref) uses FileStore, +underlying artifacts associated with [`Experiment`](@ref) are also restored. # Arguments - `instance`: [`MLFlow`](@ref) configuration. -- `experiment_id`: ID of the associated experiment. +- `experiment_id`: ID of the associated [`Experiment`](@ref). # Returns `true` if successful. Otherwise, raises exception. @@ -120,13 +122,13 @@ restoreexperiment(instance::MLFlow, experiment::Experiment)::Bool = updateexperiment(instance::MLFlow, experiment_id::Integer, new_name::String) updateexperiment(instance::MLFlow, experiment::Experiment, new_name::String) -Update experiment metadata. +Update [`Experiment`](@ref) metadata. # Arguments - `instance`: [`MLFlow`](@ref) configuration. -- `experiment_id`: ID of the associated experiment. -- `new_name`: If provided, the experiment’s name is changed to the new name. The new name -must be unique. +- `experiment_id`: ID of the associated [`Experiment`](@ref). +- `new_name`: If provided, the [`Experiment`](@ref) name is changed to the new name. The new name + must be unique. # Returns `true` if successful. Otherwise, raises exception. @@ -142,21 +144,22 @@ updateexperiment(instance::MLFlow, experiment::Experiment, new_name::String)::Bo """ searchexperiments(instance::MLFlow; max_results::Int64=20000, page_token::String="", - filter::String="", order_by::Array{String}=[], view_type::ViewType=ACTIVE_ONLY) + filter::String="", order_by::Array{String}=String[], + view_type::ViewType=ACTIVE_ONLY) # Arguments - `instance`: [`MLFlow`](@ref) configuration. - `max_results`: Maximum number of experiments desired. - `page_token`: Token indicating the page of experiments to fetch. -- `filter`: A filter expression over experiment attributes and tags that allows returning a -subset of experiments. See [MLFlow documentation](https://mlflow.org/docs/latest/rest-api.html#search-experiments). -- `order_by`: List of columns for ordering search results, which can include experiment -name and id with an optional “DESC” or “ASC” annotation, where “ASC” is the default. +- `filter`: A filter expression over [`Experiment`](@ref) attributes and tags that allows returning a + subset of experiments. See [MLFlow documentation](https://mlflow.org/docs/latest/rest-api.html#search-experiments). +- `order_by`: List of columns for ordering search results, which can include [`Experiment`](@ref) + name and id with an optional “DESC” or “ASC” annotation, where “ASC” is the default. - `view_type`: Qualifier for type of experiments to be returned. If unspecified, return -only active experiments. + only active experiments. For more values, see [`ViewType`](@ref). # Returns -- Vector of [`Experiment`](@ref) that were found in the MLFlow instance. +- Vector of [`Experiment`](@ref) that were found in the [`MLFlow`](@ref) instance. - The next page token if there are more results. """ function searchexperiments(instance::MLFlow; max_results::Int64=20000, @@ -181,10 +184,11 @@ end setexperimenttag(instance::MLFlow, experiment_id::Integer, key::String, value::String) setexperimenttag(instance::MLFlow, experiment::Experiment, key::String, value::String) -Set a tag on an experiment. Experiment tags are metadata that can be updated. +Set a tag on an [`Experiment`](@ref). [`Experiment`](@ref) tags are metadata that can be +updated. # Arguments -- `experiment_id`: ID of the experiment under which to log the tag. +- `experiment_id`: ID of the [`Experiment`](@ref) under which to log the tag. - `key`: Name of the tag. - `value`: String value of the tag being logged. diff --git a/src/services/loggers.jl b/src/services/loggers.jl index ecab95a..ea1f57a 100644 --- a/src/services/loggers.jl +++ b/src/services/loggers.jl @@ -6,17 +6,17 @@ timestamp::Int64=round(Int, now() |> datetime2unix), step::Union{Int64, Missing}=missing) -Log a metric for a run. A metric is a key-value pair (string key, float value) with an -associated timestamp. Examples include the various metrics that represent ML model -accuracy. A metric can be logged multiple times. +Log a [`Metric`](@ref) for a [`Run`](@ref). A [`Metric`](@ref) is a key-value pair (string +key, float value) with an associated timestamp. Examples include the various metrics that +represent ML model accuracy. A [`Metric`](@ref) can be logged multiple times. # Arguments - `instance`: [`MLFlow`](@ref) configuration. -- `run_id`: ID of the run under which to log the metric. -- `key`: Name of the metric. -- `value`: Double value of the metric being logged. -- `timestamp`: Unix timestamp in milliseconds at the time metric was logged. -- `step`: Step at which to log the metric. +- `run_id`: ID of the [`Run`](@ref) under which to log the [`Metric`](@ref). +- `key`: Name of the [`Metric`](@ref). +- `value`: Double value of the [`Metric`](@ref) being logged. +- `timestamp`: Unix timestamp in milliseconds at the time [`Metric`](@ref) was logged. +- `step`: Step at which to log the [`Metric`](@ref). # Returns `true` if successful. Otherwise, raises exception. @@ -45,17 +45,17 @@ logmetric(instance::MLFlow, run::Run, metric::Metric)::Bool = logbatch(instance::MLFlow, run::Run; metrics::Array{Metric}, params::MLFlowUpsertData{Param}, tags::MLFlowUpsertData{Tag}) -Log a batch of metrics, params, and tags for a run. In case of error, partial data may be -written. +Log a batch of metrics, params, and tags for a [`Run`](@ref). In case of error, partial +data may be written. For more information about this function, check [MLFlow official documentation](https://mlflow.org/docs/latest/rest-api.html#log-batch). # Arguments - `instance`: [`MLFlow`](@ref) configuration. -- `run_id`: ID of the run to log under. -- `metrics`: Metrics to log. -- `params`: Params to log. -- `tags`: Tags to log. +- `run_id`: ID of the [`Run`](@ref) to log under. +- `metrics`: A collection of [`Metric`](@ref) to log. +- `params`: A collection of [`Param`](@ref) to log. +- `tags`: A collection of [`Tag`](@ref) to log. **Note**: A single request can contain up to 1000 metrics, and up to 1000 metrics, params, and tags in total. @@ -80,8 +80,8 @@ logbatch(instance::MLFlow, run::Run; metrics::MLFlowUpsertData{Metric}=Metric[], # Arguments - `instance`: [`MLFlow`](@ref) configuration. -- `run_id`: ID of the run to log under This field is required. -- `datasets`: Dataset inputs. +- `run_id`: ID of the [`Run`](@ref) to log under this field is required. +- `datasets`: A collection of [`DatasetInput`](@ref) to log. # Returns `true` if successful. Otherwise, raises exception. @@ -99,15 +99,16 @@ loginputs(instance::MLFlow, run::Run, datasets::Array{DatasetInput})::Bool = logparam(instance::MLFlow, run_id::String, param::Param) logparam(instance::MLFlow, run::Run, param::Param) -Log a param used for a run. A param is a key-value pair (string key, string value). -Examples include hyperparameters used for ML model training and constant dates and values -used in an ETL pipeline. A param can be logged only once for a run. +Log a [`Param`](@ref) used for a [`Run`](@ref). A [`Param`](@ref) is a key-value pair +(string key, string value). Examples include hyperparameters used for ML model training and +constant dates and values used in an ETL pipeline. A [`Param`](@ref) can be logged only +once for a [`Run`](@ref). # Arguments - `instance`: [`MLFlow`](@ref) configuration. -- `run_id`: ID of the run under which to log the param. -- `key`: Name of the param. -- `value`: String value of the param being logged. +- `run_id`: ID of the [`Run`](@ref) under which to log the [`Param`](@ref). +- `key`: Name of the [`Param`](@ref). +- `value`: String value of the [`Param`](@ref) being logged. # Returns `true` if successful. Otherwise, raises exception. diff --git a/src/services/misc.jl b/src/services/misc.jl index 7be2137..35f7a11 100644 --- a/src/services/misc.jl +++ b/src/services/misc.jl @@ -1,20 +1,20 @@ """ getmetrichistory(instance::MLFlow, run_id::String, metric_key::String; - page_token::String="", max_results::Int32=1) + page_token::String="", max_results::Union{Int64, Missing}=missing) -Get a list of all values for the specified metric for a given run. +Get a list of all values for the specified [`Metric`](@ref) for a given [`Run`](@ref). # Arguments - `instance`: [`MLFlow`](@ref) configuration. -- `run_id`: ID of the run from which to fetch metric values. -- `metric_key`: Name of the metric. -- `page_token`: Token indicating the page of metric history to fetch. -- `max_results`: Maximum number of logged instances of a metric for a run to -return per call. +- `run_id`: ID of the [`Run`](@ref) from which to fetch [`Metric`](@ref) values. +- `metric_key`: Name of the [`Metric`](@ref) to fetch. +- `page_token`: Token indicating the page of [`Metric`](@ref) history to fetch. +- `max_results`: Maximum number of logged instances of a [`Metric`](@ref) for a + [`Run`](@ref) to return per call. # Returns -- A list of all metric historical values for the specified metric in the -specified run. +- A list of all historical values for the specified [`Metric`](@ref) in the specified + [`Run`](@ref). - The next page token if there are more results. """ function getmetrichistory(instance::MLFlow, run_id::String, metric_key::String; @@ -39,3 +39,20 @@ getmetrichistory(instance::MLFlow, run::Run, metric::Metric; page_token::String= )::Tuple{Array{Metric}, Union{String, Nothing}} = getmetrichistory(instance, run.info.run_id, metric.key; page_token=page_token, max_results=max_results) + +""" + refresh(instance::MLFlow, run::Run) + refresh(instance::MLFlow, experiment::Experiment) + +Get the latest metadata for a [`Run`](@ref) or [`Experiment`](@ref). + +# Arguments +- `instance`: [`MLFlow`](@ref) configuration. +- `run` or `experiment`: [`Run`](@ref) or [`Experiment`](@ref) to refresh. + +# Returns +An instance of type [`Run`](@ref) or [`Experiment`](@ref). +""" +refresh(instance::MLFlow, experiment::Experiment)::Experiment = + getexperiment(instance, experiment.experiment_id) +refresh(instance::MLFlow, run::Run)::Run = getrun(instance, run.info.run_id) diff --git a/src/services/run.jl b/src/services/run.jl index ef27c3a..caaef6e 100644 --- a/src/services/run.jl +++ b/src/services/run.jl @@ -4,15 +4,15 @@ start_time::Union{Int64, Missing}=missing, tags::Union{Dict{<:Any}, Array{<:Any}}=[]) -Create a new run within an experiment. A run is usually a single execution of a machine -learning or data ETL pipeline. +Create a new [`Run`](@ref) within an [`Experiment`](@ref). A [`Run`](@ref) is usually a +single execution of a machine learning or data ETL pipeline. # Arguments - `instance`: [`MLFlow`](@ref) configuration. -- `experiment_id`: ID of the associated experiment. -- `run_name`: Name of the run. -- `start_time`: Unix timestamp in milliseconds of when the run started. -- `tags`: Additional metadata for run. +- `experiment_id`: ID of the associated [`Experiment`](@ref). +- `run_name`: Name of the [`Run`](@ref). +- `start_time`: Unix timestamp in milliseconds of when the [`Run`](@ref) started. +- `tags`: Additional metadata for [`Run`](@ref). # Returns An instance of type [`Run`](@ref). @@ -39,11 +39,11 @@ createrun(instance::MLFlow, experiment::Experiment; deleterun(instance::MLFlow, run_id::String) deleterun(instance::MLFlow, run::Run) -Mark a run for deletion. +Mark a [`Run`](@ref) for deletion. # Arguments - `instance`: [`MLFlow`](@ref) configuration. -- `run_id`: ID of the run to delete. +- `run_id`: ID of the [`Run`](@ref) to delete. # Returns `true` if successful. Otherwise, raises exception. @@ -59,11 +59,11 @@ deleterun(instance::MLFlow, run::Run)::Bool = restorerun(instance::MLFlow, run_id::String) restorerun(instance::MLFlow, run::Run) -Restore a deleted run. +Restore a deleted [`Run`](@ref). # Arguments - `instance`: [`MLFlow`](@ref) configuration. -- `run_id`: ID of the run to restore. +- `run_id`: ID of the [`Run`](@ref) to restore. # Returns `true` if successful. Otherwise, raises exception. @@ -78,13 +78,14 @@ restorerun(instance::MLFlow, run::Run)::Bool = """ getrun(instance::MLFlow, run_id::String) -Get metadata, metrics, params, and tags for a run. In the case where multiple metrics with -the same key are logged for a run, return only the value with the latest timestamp. If -there are multiple values with the latest timestamp, return the maximum of these values. +Get metadata, metrics, params, and tags for a [`Run`](@ref). In the case where multiple +metrics with the same key are logged for a [`Run`](@ref), return only the value with the +latest timestamp. If there are multiple values with the latest timestamp, return the +maximum of these values. # Arguments - `instance`: [`MLFlow`](@ref) configuration. -- `run_id`: ID of the run to fetch. +- `run_id`: ID of the [`Run`](@ref) to fetch. # Returns An instance of type [`Run`](@ref). @@ -99,13 +100,13 @@ end setruntag(instance::MLFlow, run::Run, key::String, value::String) setruntag(instance::MLFlow, run::Run, tag::Tag) -Set a tag on a run. +Set a [`Tag`](@ref) on a [`Run`](@ref). # Arguments - `instance`: [`MLFlow`](@ref) configuration. -- `run_id`: ID of the run under which to log the tag. -- `key`: Name of the tag. -- `value`: String value of the tag being logged. +- `run_id`: ID of the [`Run`](@ref) under which to log the [`Tag`](@ref). +- `key`: Name of the [`Tag`](@ref). +- `value`: String value of the [`Tag`](@ref) being logged. # Returns `true` if successful. Otherwise, raises exception. @@ -120,16 +121,16 @@ setruntag(instance::MLFlow, run::Run, tag::Tag)::Bool = setruntag(instance, run.info.run_id, tag.key, tag.value) """ - deletetag(instance::MLFlow, run_id::String, key::String) - deletetag(instance::MLFlow, run::Run, key::String) - deletetag(instance::MLFlow, run::Run, tag::Tag) + deleteruntag(instance::MLFlow, run_id::String, key::String) + deleteruntag(instance::MLFlow, run::Run, key::String) + deleteruntag(instance::MLFlow, run::Run, tag::Tag) -Delete a tag on a run. +Delete a [`Tag`](@ref) on a [`Run`](@ref). # Arguments - `instance`: [`MLFlow`](@ref) configuration. -- `run_id`: ID of the run that the tag was logged under. -- `key`: Name of the tag. +- `run_id`: ID of the [`Run`](@ref) that the [`Tag`](@ref) was logged under. +- `key`: Name of the [`Tag`](@ref). # Returns `true` if successful. Otherwise, raises exception. @@ -148,18 +149,19 @@ deleteruntag(instance::MLFlow, run::Run, tag::Tag)::Bool = run_view_type::ViewType=ACTIVE_ONLY, max_results::Int=1000, order_by::Array{String}=String[], page_token::String="") -Search for runs that satisfy expressions. Search expressions can use Metric and Param keys. +Search for runs that satisfy expressions. Search expressions can use [`Metric`](@ref) and +[`Param`](@ref) keys. # Arguments - `instance`: [`MLFlow`](@ref) configuration. -- `experiment_ids`: List of experiment IDs to search over. +- `experiment_ids`: List of [`Experiment`](@ref) IDs to search over. - `filter`: A filter expression over params, metrics, and tags, that allows returning a -subset of runs. See [MLFlow documentation](https://mlflow.org/docs/latest/rest-api.html#search-runs). + subset of runs. See [MLFlow documentation](https://mlflow.org/docs/latest/rest-api.html#search-runs). - `run_view_type`: Whether to display only active, only deleted, or all runs. Defaults to -only active runs. + only active runs. - `max_results`: Maximum number of runs desired. - `order_by`: List of columns to be ordered by, including attributes, params, metrics, and -tags with an optional “DESC” or “ASC” annotation, where “ASC” is the default. + tags with an optional “DESC” or “ASC” annotation, where “ASC” is the default. - `page_token`: Token indicating the page of runs to fetch. # Returns @@ -191,14 +193,14 @@ end updaterun(instance::MLFlow, run::Run; status::Union{RunStatus, Missing}=missing, end_time::Union{Int64, Missing}=missing, run_name::Union{String, Missing}=missing) -Update run metadata. +Update [`Run`](@ref) metadata. # Arguments - `instance`: [`MLFlow`](@ref) configuration. -- `run_id`: ID of the run to update. -- `status`: Updated status of the run. -- `end_time`: Unix timestamp in milliseconds of when the run ended. -- `run_name`: Updated name of the run. +- `run_id`: ID of the [`Run`](@ref) to update. +- `status`: Updated status of the [`Run`](@ref). +- `end_time`: Unix timestamp in milliseconds of when the [`Run`](@ref) ended. +- `run_name`: Updated name of the [`Run`](@ref). # Returns - An instance of type [`RunInfo`](@ref) with the updated metadata. diff --git a/src/types/dataset.jl b/src/types/dataset.jl index 7843b2a..011fb33 100644 --- a/src/types/dataset.jl +++ b/src/types/dataset.jl @@ -11,10 +11,6 @@ development process. - `source::String`: Source information for the dataset. - `schema::String`: The schema of the dataset. This field is optional. - `profile::String`: The profile of the dataset. This field is optional. - -# Constructors -- `Dataset(name, digest, source_type, source, schema, profile)` -- `Dataset(name, digest, source_type, source; schema=nothing, profile=nothing)` """ struct Dataset name::String @@ -36,11 +32,7 @@ Represents a dataset and input tags. # Fields - `tags::Array{Tag}`: A list of tags for the dataset input. -- `dataset::Dataset`: The dataset being used as a Run input. - -# Constructors -- `DatasetInput(tags, dataset)` -- `DatasetInput(dataset; tags=[])` +- `dataset::Dataset`: The dataset being used as a run input. """ struct DatasetInput tags::Array{Tag} diff --git a/src/types/enums.jl b/src/types/enums.jl index 4c2cb69..b9c1104 100644 --- a/src/types/enums.jl +++ b/src/types/enums.jl @@ -3,7 +3,7 @@ # Members - `PENDING_REGISTRATION`: Request to register a new model version is pending as server -performs background tasks. + performs background tasks. - `FAILED_REGISTRATION`: Request to register a new model version has failed. - `READY`: Model version is ready for use. """ diff --git a/src/types/experiment.jl b/src/types/experiment.jl index 1529dbc..105a981 100644 --- a/src/types/experiment.jl +++ b/src/types/experiment.jl @@ -6,7 +6,7 @@ - `name::String`: Human readable name that identifies the experiment. - `artifact_location::String`: Location where artifacts for the experiment are stored. - `lifecycle_stage::String`: Current life cycle stage of the experiment: “active” or -“deleted”. Deleted experiments are not returned by APIs. + “deleted”. Deleted experiments are not returned by APIs. - `last_update_time::Int64`: Last update time. - `creation_time::Int64`: Creation time. - `tags::Array{Tag}`: Additional metadata key-value pairs. diff --git a/src/types/mlflow.jl b/src/types/mlflow.jl index 9ba4e6a..ff0a282 100644 --- a/src/types/mlflow.jl +++ b/src/types/mlflow.jl @@ -7,13 +7,7 @@ Base type which defines location and version for MLFlow API service. - `apiroot::String`: API root URL, e.g. `http://localhost:5000/api` - `apiversion::Union{Integer, AbstractFloat}`: used API version, e.g. `2.0` - `headers::Dict`: HTTP headers to be provided with the REST API requests (useful for -authetication tokens) Default is `false`, using the REST API endpoint. - -# Constructors - -- `MLFlow(apiroot; apiversion=2.0,headers=Dict())` -- `MLFlow()` - defaults to `MLFlow(ENV["MLFLOW_TRACKING_URI"])` or -`MLFlow("http://localhost:5000/api")` + authetication tokens) Default is `false`, using the REST API endpoint. # Examples diff --git a/src/types/model_version.jl b/src/types/model_version.jl index 3b07655..1c17c05 100644 --- a/src/types/model_version.jl +++ b/src/types/model_version.jl @@ -6,20 +6,20 @@ - `version::String`: Model’s version number. - `creation_timestamp::Int64`: Timestamp recorded when this model_version was created. - `last_updated_timestamp::Int64`: Timestamp recorded when metadata for this model_version -was last updated. + was last updated. - `user_id::String`: User that created this model_version. - `current_stage::String`: Current stage for this model_version. - `description::String`: Description of this model_version. - `source::String`: URI indicating the location of the source model artifacts, used when -creating model_version. + creating model_version. - `run_id::String`: MLflow run ID used when creating model_version, if source was generated -by an experiment run stored in MLflow tracking server. + by an experiment run stored in MLflow tracking server. - `status::ModelVersionStatus`: Current status of model_version. - `status_message::String`: Details on current status, if it is pending or failed. - `tags::Array{Tag}`: Additional metadata key-value pairs. - `run_link::String`: Direct link to the run that generated this version. This field is set -at model version creation time only for model versions whose source run is from a tracking -server that is different from the registry server. + at model version creation time only for model versions whose source run is from a + tracking server that is different from the registry server. - `aliases::Array{String}`: Aliases pointing to this model_version. """ struct ModelVersion diff --git a/src/types/registered_model.jl b/src/types/registered_model.jl index ebdd865..7d1f217 100644 --- a/src/types/registered_model.jl +++ b/src/types/registered_model.jl @@ -20,14 +20,14 @@ Base.show(io::IO, t::RegisteredModelAlias) = show(io, ShowCase(t, new_lines=true - `name::String`: Unique name for the model. - `creation_timestamp::Int64`: Timestamp recorded when this RegisteredModel was created. - `last_updated_timestamp::Int64`: Timestamp recorded when metadata for this -RegisteredModel was last updated. + RegisteredModel was last updated. - `user_id::String`: User that created this RegisteredModel. - `description::String`: Description of this RegisteredModel. - `latest_versions::Array{ModelVersion}`: Collection of latest model versions for each -stage. Only contains models with current READY status. + stage. Only contains models with current READY status. - `tags::Array{Tag}`: Additional metadata key-value pairs. - `aliases::Array{RegisteredModelAlias}`: Aliases pointing to model versions associated -with this RegisteredModel. + with this RegisteredModel. """ struct RegisteredModel name::String diff --git a/src/types/run.jl b/src/types/run.jl index 44d51b6..4b0e94c 100644 --- a/src/types/run.jl +++ b/src/types/run.jl @@ -48,11 +48,11 @@ Metadata of a single run. - `start_time::Int64`: Unix timestamp of when the run started in milliseconds. - `end_time::Int64`: Unix timestamp of when the run ended in milliseconds. - `artifact_uri::String`: URI of the directory where artifacts should be uploaded. This can -be a local path (starting with “/”), or a distributed file system (DFS) path, -like s3://bucket/directory or dbfs:/my/directory. If not set, the local ./mlruns directory -is chosen. + be a local path (starting with “/”), or a distributed file system (DFS) path, + like s3://bucket/directory or dbfs:/my/directory. If not set, the local ./mlruns + directory is chosen. - `lifecycle_stage::String`: Current life cycle stage of the experiment: "active" or -"deleted". + "deleted". """ struct RunInfo run_id::String @@ -109,6 +109,11 @@ Base.show(io::IO, t::RunInputs) = show(io, ShowCase(t, new_lines=true)) Run A single run. + +# Fields +- `info::RunInfo`: Metadata of the run. +- `data::RunData`: Run data (metrics, params, and tags). +- `inputs::RunInputs`: Run inputs. """ struct Run info::RunInfo diff --git a/src/utils.jl b/src/utils.jl index 7a28a60..c6993a7 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -91,7 +91,3 @@ function parse(::Type{T}, entities::MLFlowUpsertData{T}) where T<:LoggingData end return entities end - -refresh(instance::MLFlow, experiment::Experiment)::Experiment = - getexperiment(instance, experiment.experiment_id) -refresh(instance::MLFlow, run::Run)::Run = getrun(instance, run.info.run_id)