InftyAI
diff --git a/‎api/core/v1alpha1/model_types.go‎
Lines changed: 1 addition & 1 deletion b/‎api/core/v1alpha1/model_types.go‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎api/inference/v1alpha1/backendruntime_types.go‎
Lines changed: 4 additions & 3 deletions b/‎api/inference/v1alpha1/backendruntime_types.go‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎api/inference/v1alpha1/config_types.go‎
Lines changed: 0 additions & 1 deletion b/‎api/inference/v1alpha1/config_types.go‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎api/inference/v1alpha1/playground_types.go‎
Lines changed: 1 addition & 0 deletions b/‎api/inference/v1alpha1/playground_types.go‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎config/crd/bases/inference.llmaz.io_backendruntimes.yaml‎
Lines changed: 3 additions & 0 deletions b/‎config/crd/bases/inference.llmaz.io_backendruntimes.yaml‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎config/crd/bases/inference.llmaz.io_playgrounds.yaml‎
Lines changed: 2 additions & 4 deletions b/‎config/crd/bases/inference.llmaz.io_playgrounds.yaml‎
Lines changed: 2 additions & 4 deletions
diff --git a/‎config/crd/bases/llmaz.io_openmodels.yaml‎
Lines changed: 2 additions & 0 deletions b/‎config/crd/bases/llmaz.io_openmodels.yaml‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎config/rbac/role.yaml‎
Lines changed: 3 additions & 3 deletions b/‎config/rbac/role.yaml‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎docs/assets/arch.png‎
6.32 KB b/‎docs/assets/arch.png‎
6.32 KB
diff --git a/‎docs/examples/llamacpp/playground.yaml‎
Lines changed: 1 addition & 1 deletion b/‎docs/examples/llamacpp/playground.yaml‎
Lines changed: 1 addition & 1 deletion
@@ -181,7 +181,7 @@ type ModelStatus struct {
 //+genclient
 //+kubebuilder:object:root=true
 //+kubebuilder:subresource:status
-//+kubebuilder:resource:scope=Cluster
+//+kubebuilder:resource:shortName=om,scope=Cluster
 
 // OpenModel is the Schema for the open models API
 type OpenModel struct {
 
@@ -24,8 +24,8 @@ import (
 type InferenceMode string
 
 const (
-	DefaultInferenceMode             InferenceMode = "default"
-	SpeculativeDecodingInferenceMode InferenceMode = "speculative-decoding"
+	DefaultInferenceMode             InferenceMode = "Default"
+	SpeculativeDecodingInferenceMode InferenceMode = "SpeculativeDecoding"
 )
 
 type BackendRuntimeArg struct {
@@ -47,6 +47,7 @@ type BackendRuntimeSpec struct {
 	// They can be appended or overwritten by the Playground args.
 	// The key is the inference option, like default one or advanced
 	// speculativeDecoding, the values are the corresponding args.
+	// Flag around with {{ .XXX }} is a flag waiting for render.
 	Args []BackendRuntimeArg `json:"args,omitempty"`
 	// Envs represents the environments set to the container.
 	// +optional
@@ -65,7 +66,7 @@ type BackendRuntimeStatus struct {
 
 //+kubebuilder:object:root=true
 //+kubebuilder:subresource:status
-//+kubebuilder:resource:scope=Cluster
+//+kubebuilder:resource:shortName=br,scope=Cluster
 
 // BackendRuntime is the Schema for the backendRuntime API
 type BackendRuntime struct {
 
@@ -30,7 +30,6 @@ const (
 
 type BackendRuntimeConfig struct {
 	// Name represents the inference backend under the hood, e.g. vLLM.
-	// +kubebuilder:validation:Enum={vllm,sglang,llamacpp}
 	// +kubebuilder:default=vllm
 	// +optional
 	Name *BackendName `json:"name,omitempty"`
 
@@ -61,6 +61,7 @@ type PlaygroundStatus struct {
 //+genclient
 //+kubebuilder:object:root=true
 //+kubebuilder:subresource:status
+//+kubebuilder:resource:shortName={pl}
 
 // Playground is the Schema for the playgrounds API
 type Playground struct {
 
@@ -11,6 +11,8 @@ spec:
     kind: BackendRuntime
     listKind: BackendRuntimeList
     plural: backendruntimes
+    shortNames:
+    - br
     singular: backendruntime
   scope: Cluster
   versions:
@@ -45,6 +47,7 @@ spec:
                   They can be appended or overwritten by the Playground args.
                   The key is the inference option, like default one or advanced
                   speculativeDecoding, the values are the corresponding args.
+                  Flag around with {{ .XXX }} is a flag waiting for render.
                 items:
                   properties:
                     flags:
 
@@ -11,6 +11,8 @@ spec:
     kind: Playground
     listKind: PlaygroundList
     plural: playgrounds
+    shortNames:
+    - pl
     singular: playground
   scope: Namespaced
   versions:
@@ -179,10 +181,6 @@ spec:
                     default: vllm
                     description: Name represents the inference backend under the hood,
                       e.g. vLLM.
-                    enum:
-                    - vllm
-                    - sglang
-                    - llamacpp
                     type: string
                   resources:
                     description: |-
 
@@ -11,6 +11,8 @@ spec:
     kind: OpenModel
     listKind: OpenModelList
     plural: openmodels
+    shortNames:
+    - om
     singular: openmodel
   scope: Cluster
   versions:
 
@@ -34,7 +34,7 @@ rules:
 - apiGroups:
   - inference.llmaz.io
   resources:
-  - backends
+  - backendruntimes
   verbs:
   - create
   - delete
@@ -46,13 +46,13 @@ rules:
 - apiGroups:
   - inference.llmaz.io
   resources:
-  - backends/finalizers
+  - backendruntimes/finalizers
   verbs:
   - update
 - apiGroups:
   - inference.llmaz.io
   resources:
-  - backends/status
+  - backendruntimes/status
   verbs:
   - get
   - patch
 
@@ -6,7 +6,7 @@ spec:
   replicas: 1
   modelClaim:
     modelName: qwen2-0--5b-gguf
-  backendConfig:
+  backendRuntimeConfig:
     name: llamacpp
     args:
       - -fa # use flash attention