Skip to content

Commit 8243ac6

Browse files
committed
Change ModelClaims API
Signed-off-by: kerthcet <[email protected]>
1 parent 24ee839 commit 8243ac6

37 files changed

+536
-397
lines changed

api/core/v1alpha1/model_types.go

Lines changed: 24 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -120,28 +120,35 @@ type ModelClaim struct {
120120
InferenceFlavors []FlavorName `json:"inferenceFlavors,omitempty"`
121121
}
122122

123-
type InferenceMode string
123+
type ModelRole string
124124

125125
const (
126-
Standard InferenceMode = "Standard"
127-
SpeculativeDecoding InferenceMode = "SpeculativeDecoding"
126+
// Main represents the main model, if only one model is required,
127+
// it must be the main model. Only one main model is allowed.
128+
MainRole ModelRole = "main"
129+
// Draft represents the draft model in speculative decoding,
130+
// the main model is the target model then.
131+
DraftRole ModelRole = "draft"
128132
)
129133

130-
// MultiModelsClaim represents claiming for multiple models with different claimModes,
131-
// like standard or speculative-decoding to support different inference scenarios.
132-
type MultiModelsClaim struct {
133-
// ModelNames represents a list of models, there maybe multiple models here
134-
// to support state-of-the-art technologies like speculative decoding.
135-
// If the composedMode is SpeculativeDecoding, the first model is the target model,
136-
// and the second model is the draft model.
137-
// +kubebuilder:validation:MinItems=1
138-
ModelNames []ModelName `json:"modelNames,omitempty"`
139-
// Mode represents the paradigm to serve the model, whether via a standard way
140-
// or via an advanced technique like SpeculativeDecoding.
141-
// +kubebuilder:default=Standard
142-
// +kubebuilder:validation:Enum={Standard,SpeculativeDecoding}
134+
type ModelRepresentative struct {
135+
// Name represents the model name.
136+
Name ModelName `json:"name"`
137+
// Role represents the model role once more than one model is required.
138+
// +kubebuilder:validation:Enum={main,draft}
139+
// +kubebuilder:default=main
143140
// +optional
144-
InferenceMode InferenceMode `json:"inferenceMode,omitempty"`
141+
Role *ModelRole `json:"role,omitempty"`
142+
}
143+
144+
// ModelClaims represents multiple claims for different models.
145+
type ModelClaims struct {
146+
// Models represents a list of models with roles specified, there maybe
147+
// multiple models here to support state-of-the-art technologies like
148+
// speculative decoding, then one model is main(target) model, another one
149+
// is draft model.
150+
// +kubebuilder:validation:MinItems=1
151+
Models []ModelRepresentative `json:"models,omitempty"`
145152
// InferenceFlavors represents a list of flavors with fungibility supported
146153
// to serve the model.
147154
// - If not set, always apply with the 0-index model by default.

api/core/v1alpha1/zz_generated.deepcopy.go

Lines changed: 47 additions & 25 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

api/inference/v1alpha1/playground_types.go

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -28,17 +28,16 @@ type PlaygroundSpec struct {
2828
// +kubebuilder:default=1
2929
// +optional
3030
Replicas *int32 `json:"replicas,omitempty"`
31-
// ModelClaim represents claiming for one model, it's the standard claimMode
32-
// of multiModelsClaim compared to other modes like SpeculativeDecoding.
33-
// Most of the time, modelClaim is enough.
34-
// ModelClaim and multiModelsClaim are exclusive configured.
31+
// ModelClaim represents claiming for one model, it's a simplified use case
32+
// of modelClaims. Most of the time, modelClaim is enough.
33+
// ModelClaim and modelClaims are exclusive configured.
3534
// +optional
3635
ModelClaim *coreapi.ModelClaim `json:"modelClaim,omitempty"`
37-
// MultiModelsClaim represents claiming for multiple models with different claimModes,
38-
// like standard or speculative-decoding to support different inference scenarios.
39-
// ModelClaim and multiModelsClaim are exclusive configured.
36+
// ModelClaims represents claiming for multiple models for more complicated
37+
// use cases like speculative-decoding.
38+
// ModelClaims and modelClaim are exclusive configured.
4039
// +optional
41-
MultiModelsClaim *coreapi.MultiModelsClaim `json:"multiModelsClaim,omitempty"`
40+
ModelClaims *coreapi.ModelClaims `json:"modelClaims,omitempty"`
4241
// BackendConfig represents the inference backend configuration
4342
// under the hood, e.g. vLLM, which is the default backend.
4443
// +optional

api/inference/v1alpha1/service_types.go

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,8 @@ import (
2727
// Service controller will maintain multi-flavor of workloads with
2828
// different accelerators for cost or performance considerations.
2929
type ServiceSpec struct {
30-
// MultiModelsClaim represents claiming for multiple models with different claimModes,
31-
// like standard or speculative-decoding to support different inference scenarios.
32-
MultiModelsClaim coreapi.MultiModelsClaim `json:"multiModelsClaim,omitempty"`
30+
// ModelClaims represents multiple claims for different models.
31+
ModelClaims coreapi.ModelClaims `json:"modelClaims,omitempty"`
3332
// WorkloadTemplate defines the underlying workload layout and configuration.
3433
// Note: the LWS spec might be twisted with various LWS instances to support
3534
// accelerator fungibility or other cutting-edge researches.

api/inference/v1alpha1/zz_generated.deepcopy.go

Lines changed: 4 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

client-go/applyconfiguration/core/v1alpha1/modelclaims.go

Lines changed: 58 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

client-go/applyconfiguration/core/v1alpha1/modelrepresentative.go

Lines changed: 51 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

client-go/applyconfiguration/core/v1alpha1/multimodelsclaim.go

Lines changed: 0 additions & 64 deletions
This file was deleted.

0 commit comments

Comments
 (0)