@@ -98,14 +98,13 @@ type FlavorName string
98
98
type Flavor struct {
99
99
// Name represents the flavor name, which will be used in model claim.
100
100
Name FlavorName `json:"name"`
101
- // Requests defines the required accelerators to serve the model, like nvidia.com/gpu: 8.
102
- // When GPU number is greater than 8, like 32, then multi-host inference is enabled and
103
- // 32/8=4 hosts will be grouped as an unit, each host will have a resource request as
104
- // nvidia.com/gpu: 8. The may change in the future if the GPU number limit is broken.
105
- // Not recommended to set the cpu and memory usage here.
106
- // If using playground, you can define the cpu/mem usage at backendConfig.
107
- // If using service, you can define the cpu/mem at the container resources.
108
- // Note: if you define the same accelerator requests at playground/service as well,
101
+ // Requests defines the required accelerators to serve the model for each replica,
102
+ // like <nvidia.com/gpu: 8>. For multi-hosts cases, the requests here indicates
103
+ // the resource requirements for each replica. This may change in the future.
104
+ // Not recommended to set the cpu and memory usage here:
105
+ // - if using playground, you can define the cpu/mem usage at backendConfig.
106
+ // - if using inference service, you can define the cpu/mem at the container resources.
107
+ // However, if you define the same accelerator requests at playground/service as well,
109
108
// the requests here will be covered.
110
109
// +optional
111
110
Requests v1.ResourceList `json:"requests,omitempty"`
0 commit comments