InftyAI
diff --git a/‎api/core/v1alpha1/model_types.go‎
Lines changed: 7 additions & 8 deletions b/‎api/core/v1alpha1/model_types.go‎
Lines changed: 7 additions & 8 deletions
diff --git a/‎api/inference/v1alpha1/service_types.go‎
Lines changed: 0 additions & 5 deletions b/‎api/inference/v1alpha1/service_types.go‎
Lines changed: 0 additions & 5 deletions
diff --git a/‎api/inference/v1alpha1/zz_generated.deepcopy.go‎
Lines changed: 0 additions & 5 deletions b/‎api/inference/v1alpha1/zz_generated.deepcopy.go‎
Lines changed: 0 additions & 5 deletions
diff --git a/‎client-go/applyconfiguration/inference/v1alpha1/servicespec.go‎
Lines changed: 0 additions & 9 deletions b/‎client-go/applyconfiguration/inference/v1alpha1/servicespec.go‎
Lines changed: 0 additions & 9 deletions
@@ -98,14 +98,13 @@ type FlavorName string
 type Flavor struct {
 	// Name represents the flavor name, which will be used in model claim.
 	Name FlavorName `json:"name"`
-	// Requests defines the required accelerators to serve the model, like nvidia.com/gpu: 8.
-	// When GPU number is greater than 8, like 32, then multi-host inference is enabled and
-	// 32/8=4 hosts will be grouped as an unit, each host will have a resource request as
-	// nvidia.com/gpu: 8. The may change in the future if the GPU number limit is broken.
-	// Not recommended to set the cpu and memory usage here.
-	// If using playground, you can define the cpu/mem usage at backendConfig.
-	// If using service, you can define the cpu/mem at the container resources.
-	// Note: if you define the same accelerator requests at playground/service as well,
+	// Requests defines the required accelerators to serve the model for each replica,
+	// like <nvidia.com/gpu: 8>. For multi-hosts cases, the requests here indicates
+	// the resource requirements for each replica. This may change in the future.
+	// Not recommended to set the cpu and memory usage here:
+	// - if using playground, you can define the cpu/mem usage at backendConfig.
+	// - if using inference service, you can define the cpu/mem at the container resources.
+	// However, if you define the same accelerator requests at playground/service as well,
 	// the requests here will be covered.
 	// +optional
 	Requests v1.ResourceList `json:"requests,omitempty"`
 
@@ -35,11 +35,6 @@ type ServiceSpec struct {
 	// LWS supports both single-host and multi-host scenarios, for single host
 	// cases, only need to care about replicas, rolloutStrategy and workerTemplate.
 	WorkloadTemplate lws.LeaderWorkerSetSpec `json:"workloadTemplate"`
-	// ElasticConfig defines the configuration for elastic usage,
-	// e.g. the max/min replicas. Default to 0 ~ Inf+.
-	// This requires to install the HPA first or will not work.
-	// +optional
-	ElasticConfig *ElasticConfig `json:"elasticConfig,omitempty"`
 }
 
 const (