Skip to content

Commit

Permalink
Fix: #966: golang part for CleanPolicy position
Browse files Browse the repository at this point in the history
Signed-off-by: Peter Pan <[email protected]>
  • Loading branch information
panpan0000 committed Dec 18, 2023
1 parent 1c6ae0a commit c18318e
Show file tree
Hide file tree
Showing 11 changed files with 75 additions and 21 deletions.
5 changes: 2 additions & 3 deletions pkg/operators/pytorch-operator/apis/pytorch/v1/defaults.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ package v1

import (
"strings"

common "github.com/kubeflow/arena/pkg/operators/tf-operator/apis/common/v1"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/runtime"
Expand Down Expand Up @@ -87,9 +86,9 @@ func setTypeNameToCamelCase(job *PyTorchJob, typ PyTorchReplicaType) {
// SetDefaults_PyTorchJob sets any unspecified values to defaults.
func SetDefaults_PyTorchJob(job *PyTorchJob) {
// Set default cleanpod policy to None.
if job.Spec.CleanPodPolicy == nil {
if job.Spec.RunPolicy.CleanPodPolicy == nil {
policy := common.CleanPodPolicyNone
job.Spec.CleanPodPolicy = &policy
job.Spec.RunPolicy.CleanPodPolicy = &policy
}

// Update the key of PyTorchReplicaSpecs to camel case.
Expand Down
4 changes: 2 additions & 2 deletions pkg/operators/pytorch-operator/apis/pytorch/v1/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,9 @@ type PyTorchJobSpec struct {
// +optional
BackoffLimit *int32 `json:"backoffLimit,omitempty"`

// Defines the policy for cleaning up pods after the PyTorchJob completes.
// Defines the policy for cleaning up pods( under runPolicy) after the PyTorchJob completes.
// Defaults to None.
CleanPodPolicy *common.CleanPodPolicy `json:"cleanPodPolicy,omitempty"`
RunPolicy *common.RunPolicy `json:"runPolicy,omitempty"`

// Defines the TTL for cleaning up finished PyTorchJobs (temporary
// before Kubernetes adds the cleanup controller).
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

28 changes: 28 additions & 0 deletions pkg/operators/tf-operator/apis/common/v1/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,34 @@ const (
CleanPodPolicyNone CleanPodPolicy = "None"
)


// +k8s:deepcopy-gen=true
// RunPolicy encapsulates various runtime policies of the distributed training
// job, for example how to clean up resources and how long the job can stay
// active.
type RunPolicy struct {
// CleanPodPolicy defines the policy to kill pods after the job completes.
// Default to Running.
CleanPodPolicy *CleanPodPolicy `json:"cleanPodPolicy,omitempty"`

// TTLSecondsAfterFinished is the TTL to clean up jobs.
// It may take extra ReconcilePeriod seconds for the cleanup, since
// reconcile gets called periodically.
// Default to infinite.
TTLSecondsAfterFinished *int32 `json:"ttlSecondsAfterFinished,omitempty"`

// Specifies the duration in seconds relative to the startTime that the job may be active
// before the system tries to terminate it; value must be positive integer.
// +optional
ActiveDeadlineSeconds *int64 `json:"activeDeadlineSeconds,omitempty"`

// Optional number of retries before marking this job failed.
// +optional
BackoffLimit *int32 `json:"backoffLimit,omitempty"`

}


// RestartPolicy describes how the replicas should be restarted.
// Can be one of: Always, OnFailure, Never, or ExitCode.
// If none of the following policies is specified, the default one
Expand Down
27 changes: 27 additions & 0 deletions pkg/operators/tf-operator/apis/common/v1beta2/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,33 @@ const (
CleanPodPolicyNone CleanPodPolicy = "None"
)

// +k8s:deepcopy-gen=true
// RunPolicy encapsulates various runtime policies of the distributed training
// job, for example how to clean up resources and how long the job can stay
// active.
type RunPolicy struct {
// CleanPodPolicy defines the policy to kill pods after the job completes.
// Default to Running.
CleanPodPolicy *CleanPodPolicy `json:"cleanPodPolicy,omitempty"`

// TTLSecondsAfterFinished is the TTL to clean up jobs.
// It may take extra ReconcilePeriod seconds for the cleanup, since
// reconcile gets called periodically.
// Default to infinite.
TTLSecondsAfterFinished *int32 `json:"ttlSecondsAfterFinished,omitempty"`

// Specifies the duration in seconds relative to the startTime that the job may be active
// before the system tries to terminate it; value must be positive integer.
// +optional
ActiveDeadlineSeconds *int64 `json:"activeDeadlineSeconds,omitempty"`

// Optional number of retries before marking this job failed.
// +optional
BackoffLimit *int32 `json:"backoffLimit,omitempty"`

}


// RestartPolicy describes how the replicas should be restarted.
// Only one of the following restart policies may be specified.
// If none of the following policies is specified, the default one
Expand Down
4 changes: 2 additions & 2 deletions pkg/operators/tf-operator/apis/tensorflow/v1/defaults.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,9 +91,9 @@ func setTypeNameToCamelCase(tfJob *TFJob, typ TFReplicaType) {
// SetDefaults_TFJob sets any unspecified values to defaults.
func SetDefaults_TFJob(tfjob *TFJob) {
// Set default cleanpod policy to Running.
if tfjob.Spec.CleanPodPolicy == nil {
if tfjob.Spec.RunPolicy.CleanPodPolicy == nil {
running := common.CleanPodPolicyRunning
tfjob.Spec.CleanPodPolicy = &running
tfjob.Spec.RunPolicy.CleanPodPolicy = &running
}

// Update the key of TFReplicaSpecs to camel case.
Expand Down
3 changes: 2 additions & 1 deletion pkg/operators/tf-operator/apis/tensorflow/v1/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@ type TFJobSpec struct {

// Defines the policy for cleaning up pods after the TFJob completes.
// Defaults to Running.
CleanPodPolicy *common.CleanPodPolicy `json:"cleanPodPolicy,omitempty"`
RunPolicy *common.RunPolicy `json:"runPolicy,omitempty"`


// Defines the TTL for cleaning up finished TFJobs (temporary
// before kubernetes adds the cleanup controller).
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions pkg/operators/tf-operator/apis/tensorflow/v1beta2/defaults.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,9 +91,9 @@ func setTypeNameToCamelCase(tfJob *TFJob, typ TFReplicaType) {
// SetDefaults_TFJob sets any unspecified values to defaults.
func SetDefaults_TFJob(tfjob *TFJob) {
// Set default cleanpod policy to Running.
if tfjob.Spec.CleanPodPolicy == nil {
if tfjob.Spec.RunPolicy.CleanPodPolicy == nil {
running := common.CleanPodPolicyRunning
tfjob.Spec.CleanPodPolicy = &running
tfjob.Spec.RunPolicy.CleanPodPolicy = &running
}

// Update the key of TFReplicaSpecs to camel case.
Expand Down
2 changes: 1 addition & 1 deletion pkg/operators/tf-operator/apis/tensorflow/v1beta2/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ type TFJobSpec struct {
// CleanPodPolicy defines the policy to kill pods after TFJob is
// succeeded.
// Default to Running.
CleanPodPolicy *common.CleanPodPolicy `json:"cleanPodPolicy,omitempty"`
RunPolicy *common.RunPolicy `json:"runPolicy,omitempty"`

// TTLSecondsAfterFinished is the TTL to clean up tf-jobs (temporary
// before kubernetes adds the cleanup controller).
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit c18318e

Please sign in to comment.