Skip to content

Commit 99d9f57

Browse files
committed
Add global configmap
Signed-off-by: kerthcet <[email protected]>
1 parent 9f1f543 commit 99d9f57

File tree

14 files changed

+225
-7
lines changed

14 files changed

+225
-7
lines changed

config/default/configmap.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
apiVersion: v1
2+
kind: ConfigMap
3+
metadata:
4+
name: global-config
5+
data:
6+
config.data: |
7+
scheduler-name: default
8+
# init-container-image: inftyai/model-loader:v0.0.10

config/default/kustomization.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@ resources:
2222
# crd/kustomization.yaml
2323
- ../webhook
2424

25+
# [customized]
26+
- configmap.yaml
27+
2528
# [INTERNALCERT]
2629
- ../internalcert
2730

config/prometheus/monitor.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,3 +21,4 @@ spec:
2121
selector:
2222
matchLabels:
2323
app.kubernetes.io/name: llmaz
24+
control-plane: controller-manager
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
apiVersion: llmaz.io/v1alpha1
2+
kind: OpenModel
3+
metadata:
4+
name: qwen3-0--6b
5+
spec:
6+
familyName: qwen3
7+
source:
8+
modelHub:
9+
modelID: Qwen/Qwen3-0.6B
10+
inferenceConfig:
11+
flavors:
12+
- name: t4 # GPU type
13+
limits:
14+
nvidia.com/gpu: 1
15+
---
16+
apiVersion: inference.llmaz.io/v1alpha1
17+
kind: Playground
18+
metadata:
19+
name: qwen3-0--6b
20+
spec:
21+
replicas: 1
22+
modelClaim:
23+
modelName: qwen3-0--6b
24+
backendRuntimeConfig:
25+
backendName: vllm
26+
version: v0.8.5
27+
---
28+
apiVersion: gateway.networking.k8s.io/v1
29+
kind: GatewayClass
30+
metadata:
31+
name: default-envoy-ai-gateway
32+
spec:
33+
controllerName: gateway.envoyproxy.io/gatewayclass-controller
34+
---
35+
apiVersion: gateway.networking.k8s.io/v1
36+
kind: Gateway
37+
metadata:
38+
name: default-envoy-ai-gateway
39+
spec:
40+
gatewayClassName: default-envoy-ai-gateway
41+
listeners:
42+
- name: http
43+
protocol: HTTP
44+
port: 80
45+
---
46+
apiVersion: aigateway.envoyproxy.io/v1alpha1
47+
kind: AIGatewayRoute
48+
metadata:
49+
name: default-envoy-ai-gateway
50+
spec:
51+
schema:
52+
name: OpenAI
53+
targetRefs:
54+
- name: default-envoy-ai-gateway
55+
kind: Gateway
56+
group: gateway.networking.k8s.io
57+
rules:
58+
- matches:
59+
- headers:
60+
- type: Exact
61+
name: x-ai-eg-model
62+
value: qwen3-0--6b
63+
backendRefs:
64+
- name: qwen3-0--6b
65+
---
66+
apiVersion: aigateway.envoyproxy.io/v1alpha1
67+
kind: AIServiceBackend
68+
metadata:
69+
name: qwen3-0--6b
70+
spec:
71+
timeouts:
72+
request: 3m
73+
schema:
74+
name: OpenAI
75+
backendRef:
76+
name: qwen3-0--6b-lb
77+
kind: Service
78+
port: 8080
79+
---

docs/examples/envoy-ai-gateway/basic.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -82,14 +82,14 @@ spec:
8282
- headers:
8383
- type: Exact
8484
name: x-ai-eg-model
85-
value: qwen2-0.5b
85+
value: qwen2-0--5b
8686
backendRefs:
8787
- name: qwen2-0--5b
8888
- matches:
8989
- headers:
9090
- type: Exact
9191
name: x-ai-eg-model
92-
value: qwen2.5-coder
92+
value: qwen2--5-coder
9393
backendRefs:
9494
- name: qwen2--5-coder
9595
---

go.mod

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ require (
88
github.com/onsi/gomega v1.37.0
99
github.com/open-policy-agent/cert-controller v0.12.0
1010
github.com/stretchr/testify v1.9.0
11+
gopkg.in/yaml.v2 v2.4.0
1112
k8s.io/api v0.32.5
1213
k8s.io/apiextensions-apiserver v0.32.5
1314
k8s.io/apimachinery v0.32.5

go.sum

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,8 @@ gopkg.in/evanphx/json-patch.v4 v4.12.0 h1:n6jtcsulIzXPJaxegRbvFNNrZDjbij7ny3gmSP
232232
gopkg.in/evanphx/json-patch.v4 v4.12.0/go.mod h1:p8EYWUEYMpynmqDbY58zCKCFZw8pRWMG4EsWvDvM72M=
233233
gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc=
234234
gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw=
235+
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
236+
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
235237
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
236238
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
237239
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

pkg/controller/inference/service_controller.go

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ func NewServiceReconciler(client client.Client, scheme *runtime.Scheme, record r
6868
//+kubebuilder:rbac:groups=inference.llmaz.io,resources=services/status,verbs=get;update;patch
6969
//+kubebuilder:rbac:groups=inference.llmaz.io,resources=services/finalizers,verbs=update
7070
//+kubebuilder:rbac:groups="",resources=services,verbs=get;list;watch;create;update;patch;delete
71+
//+kubebuilder:rbac:groups="",resources=configmaps,verbs=get;list
7172

7273
// Reconcile is part of the main kubernetes reconciliation loop which aims to
7374
// move the current state of the cluster closer to the desired state.
@@ -83,6 +84,31 @@ func (r *ServiceReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct
8384

8485
logger.V(10).Info("reconcile Service", "Service", klog.KObj(service))
8586

87+
cm := &corev1.ConfigMap{}
88+
if err := r.Get(ctx, types.NamespacedName{Name: "llmaz-global-config", Namespace: "llmaz-system"}, cm); err != nil {
89+
if client.IgnoreNotFound(err) != nil {
90+
return ctrl.Result{}, fmt.Errorf("failed to get llmaz-global-config configmap: %w", err)
91+
}
92+
}
93+
configs, err := helper.ParseGlobalConfigmap(cm)
94+
if err != nil {
95+
return ctrl.Result{}, fmt.Errorf("failed to parse global configurations: %w", err)
96+
}
97+
98+
// Set the global configurations to the service.
99+
if configs.SchedulerName != "" {
100+
if service.Spec.WorkloadTemplate.LeaderTemplate != nil && service.Spec.WorkloadTemplate.LeaderTemplate.Spec.SchedulerName == "" {
101+
service.Spec.WorkloadTemplate.LeaderTemplate.Spec.SchedulerName = configs.SchedulerName
102+
}
103+
if service.Spec.WorkloadTemplate.WorkerTemplate.Spec.SchedulerName == "" {
104+
service.Spec.WorkloadTemplate.WorkerTemplate.Spec.SchedulerName = configs.SchedulerName
105+
}
106+
107+
if err := r.Client.Update(ctx, service); err != nil {
108+
return ctrl.Result{}, fmt.Errorf("failed to update service: %w", err)
109+
}
110+
}
111+
86112
models, err := helper.FetchModelsByService(ctx, r.Client, service)
87113
if err != nil {
88114
return ctrl.Result{}, err
@@ -93,8 +119,6 @@ func (r *ServiceReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct
93119
return ctrl.Result{}, err
94120
}
95121

96-
// TODO: handle fungibility
97-
98122
if err := util.Patch(ctx, r.Client, workloadApplyConfiguration); err != nil {
99123
return ctrl.Result{}, err
100124
}

pkg/controller_helper/configmap.go

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
/*
2+
Copyright 2025 The InftyAI Team.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package helper
18+
19+
import (
20+
"fmt"
21+
22+
"gopkg.in/yaml.v2"
23+
corev1 "k8s.io/api/core/v1"
24+
)
25+
26+
type GlobalConfigs struct {
27+
SchedulerName string `yaml:"scheduler-name"`
28+
InitContainerImage string `yaml:"init-container-image"`
29+
}
30+
31+
func ParseGlobalConfigmap(cm *corev1.ConfigMap) (*GlobalConfigs, error) {
32+
rawConfig, ok := cm.Data["config.data"]
33+
if !ok {
34+
return nil, fmt.Errorf("config.data not found in ConfigMap")
35+
}
36+
37+
var configs GlobalConfigs
38+
err := yaml.Unmarshal([]byte(rawConfig), &configs)
39+
if err != nil {
40+
return nil, fmt.Errorf("failed to unmarshal config.data: %v", err)
41+
}
42+
43+
return &configs, nil
44+
}

site/content/en/docs/integrations/prometheus-operator.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ This document provides deployment steps to install and configure Prometheus Oper
1010
Please follow the [documentation](https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/getting-started/installation.md) to install prometheus operator or simply run the following command:
1111

1212
```bash
13-
curl -sL https://github.com/prometheus-operator/prometheus-operator/releases/download/v0.81.0/bundle.yaml | kubectl delete -f -
13+
curl -sL https://github.com/prometheus-operator/prometheus-operator/releases/download/v0.81.0/bundle.yaml | kubectl create -f -
1414
```
1515

1616
Ensure that the Prometheus Operator Pod is running successfully.

0 commit comments

Comments
 (0)