diff --git a/api/inference/v1alpha1/playground_types.go b/api/inference/v1alpha1/playground_types.go index 6bf4a8bc..eba28772 100644 --- a/api/inference/v1alpha1/playground_types.go +++ b/api/inference/v1alpha1/playground_types.go @@ -44,6 +44,7 @@ type PlaygroundSpec struct { BackendRuntimeConfig *BackendRuntimeConfig `json:"backendRuntimeConfig,omitempty"` // ElasticConfig defines the configuration for elastic usage, // e.g. the max/min replicas. + // +optional ElasticConfig *ElasticConfig `json:"elasticConfig,omitempty"` } @@ -56,8 +57,9 @@ type ElasticConfig struct { MinReplicas *int32 `json:"minReplicas,omitempty"` // MaxReplicas indicates the maximum number of inference workloads based on the traffic. // Default to nil means there's no limit for the instance number. - // +optional - MaxReplicas *int32 `json:"maxReplicas,omitempty"` + // +kubebuilder:validation:Required + // +kubebuilder:validation:Minimum:=1 + MaxReplicas int32 `json:"maxReplicas,omitempty"` // ScaleTrigger defines the rules to scale the workloads. // Only one trigger cloud work at a time, mostly used in Playground. // ScaleTrigger defined here will "overwrite" the scaleTrigger in the recommendedConfig. diff --git a/api/inference/v1alpha1/zz_generated.deepcopy.go b/api/inference/v1alpha1/zz_generated.deepcopy.go index b5bea51c..3100f603 100644 --- a/api/inference/v1alpha1/zz_generated.deepcopy.go +++ b/api/inference/v1alpha1/zz_generated.deepcopy.go @@ -224,11 +224,6 @@ func (in *ElasticConfig) DeepCopyInto(out *ElasticConfig) { *out = new(int32) **out = **in } - if in.MaxReplicas != nil { - in, out := &in.MaxReplicas, &out.MaxReplicas - *out = new(int32) - **out = **in - } if in.ScaleTrigger != nil { in, out := &in.ScaleTrigger, &out.ScaleTrigger *out = new(ScaleTrigger) diff --git a/config/crd/bases/inference.llmaz.io_playgrounds.yaml b/config/crd/bases/inference.llmaz.io_playgrounds.yaml index 588e4cf5..00ec2e20 100644 --- a/config/crd/bases/inference.llmaz.io_playgrounds.yaml +++ b/config/crd/bases/inference.llmaz.io_playgrounds.yaml @@ -242,6 +242,7 @@ spec: MaxReplicas indicates the maximum number of inference workloads based on the traffic. Default to nil means there's no limit for the instance number. format: int32 + minimum: 1 type: integer minReplicas: default: 1 @@ -864,6 +865,8 @@ spec: type: array type: object type: object + required: + - maxReplicas type: object modelClaim: description: |- diff --git a/pkg/controller/inference/playground_controller.go b/pkg/controller/inference/playground_controller.go index fe45f7b8..15fd6043 100644 --- a/pkg/controller/inference/playground_controller.go +++ b/pkg/controller/inference/playground_controller.go @@ -557,12 +557,7 @@ func newHPA(playground *inferenceapi.Playground) *autoscalingv2.HorizontalPodAut } hpa.Spec.MinReplicas = playground.Spec.ElasticConfig.MinReplicas - if playground.Spec.ElasticConfig.MaxReplicas == nil { - // The value is hardcoded, because maxReplicas is required by HPA. - hpa.Spec.MaxReplicas = 99999 - } else { - hpa.Spec.MaxReplicas = *playground.Spec.ElasticConfig.MaxReplicas - } + hpa.Spec.MaxReplicas = playground.Spec.ElasticConfig.MaxReplicas return hpa } diff --git a/pkg/webhook/playground_webhook.go b/pkg/webhook/playground_webhook.go index 8ce509c9..96272950 100644 --- a/pkg/webhook/playground_webhook.go +++ b/pkg/webhook/playground_webhook.go @@ -145,8 +145,8 @@ func (w *PlaygroundWebhook) generateValidate(obj runtime.Object) field.ErrorList allErrs = append(allErrs, field.Forbidden(specPath.Child("elasticConfig.minReplicas"), "minReplicas couldn't be 0")) } - if playground.Spec.ElasticConfig.MinReplicas != nil && playground.Spec.ElasticConfig.MaxReplicas != nil { - if *playground.Spec.ElasticConfig.MinReplicas >= *playground.Spec.ElasticConfig.MaxReplicas { + if playground.Spec.ElasticConfig.MinReplicas != nil { + if *playground.Spec.ElasticConfig.MinReplicas >= playground.Spec.ElasticConfig.MaxReplicas { allErrs = append(allErrs, field.Invalid(specPath.Child("elasticConfig.scaleTrigger.hpa"), *playground.Spec.ElasticConfig.MinReplicas, "minReplicas must be less than maxReplicas")) } } diff --git a/test/util/wrapper/playground.go b/test/util/wrapper/playground.go index fdcf7b91..f05dbd80 100644 --- a/test/util/wrapper/playground.go +++ b/test/util/wrapper/playground.go @@ -160,7 +160,7 @@ func (w *PlaygroundWrapper) ElasticConfig(minReplicas, maxReplicas int32) *Playg if w.Spec.ElasticConfig == nil { w.Spec.ElasticConfig = &inferenceapi.ElasticConfig{} } - w.Spec.ElasticConfig.MaxReplicas = ptr.To[int32](maxReplicas) + w.Spec.ElasticConfig.MaxReplicas = maxReplicas w.Spec.ElasticConfig.MinReplicas = ptr.To[int32](minReplicas) return w }