InftyAI · InftyAI-Agent · Jun 2, 2025 · Jun 2, 2025 · Jun 2, 2025 · Jun 2, 2025
diff --git a/Makefile b/Makefile
@@ -2,7 +2,6 @@ include Makefile-deps.mk
 
 # ENVTEST_K8S_VERSION refers to the version of kubebuilder assets to be downloaded by envtest binary.
 ENVTEST_K8S_VERSION = 1.32.0
-ENVTEST_LWS_VERSION = v0.5.1
 
 # Get the currently used golang install path (in GOPATH/bin, unless GOBIN is set)
 ifeq (,$(shell go env GOBIN))
@@ -84,9 +83,7 @@ manifests: controller-gen ## Generate WebhookConfiguration, ClusterRole and Cust
 		rbac:roleName=manager-role output:rbac:artifacts:config=config/rbac \
 		crd:generateEmbeddedObjectMeta=true output:crd:artifacts:config=config/crd/bases \
 		webhook output:webhook:artifacts:config=config/webhook \
-		paths="./cmd/..."
-		paths="./api/..."
-		paths="./pkg/..."
+		paths="./api/...;./pkg/...;./cmd/..."
 
 .PHONY: generate
 generate: controller-gen code-generator ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations.
@@ -136,7 +133,7 @@ test-integration: manifests fmt vet envtest ginkgo ## Run integration tests.
 
 .PHONY: test-e2e
 test-e2e: kustomize manifests fmt vet envtest ginkgo kind-image-build
-	E2E_KIND_NODE_VERSION=$(E2E_KIND_NODE_VERSION) KIND_CLUSTER_NAME=$(KIND_CLUSTER_NAME) KIND=$(KIND) KUBECTL=$(KUBECTL) KUSTOMIZE=$(KUSTOMIZE) GINKGO=$(GINKGO) USE_EXISTING_CLUSTER=$(USE_EXISTING_CLUSTER) IMAGE_TAG=$(IMG) ENVTEST_LWS_VERSION=$(ENVTEST_LWS_VERSION) ./hack/e2e-test.sh
+	E2E_KIND_NODE_VERSION=$(E2E_KIND_NODE_VERSION) KIND_CLUSTER_NAME=$(KIND_CLUSTER_NAME) KIND=$(KIND) KUBECTL=$(KUBECTL) KUSTOMIZE=$(KUSTOMIZE) GINKGO=$(GINKGO) USE_EXISTING_CLUSTER=$(USE_EXISTING_CLUSTER) IMAGE_TAG=$(IMG) ./hack/e2e-test.sh
 
 test-deploy-with-helm: kind-image-build
 	E2E_KIND_NODE_VERSION=$(E2E_KIND_NODE_VERSION) KIND_CLUSTER_NAME=$(KIND_CLUSTER_NAME) KIND=$(KIND) KUBECTL=$(KUBECTL) USE_EXISTING_CLUSTER=$(USE_EXISTING_CLUSTER) IMAGE_TAG=$(IMG) TAG=$(GIT_TAG) ./hack/test-deploy-with-helm.sh

diff --git a/api/core/v1alpha1/model_types.go b/api/core/v1alpha1/model_types.go
@@ -71,11 +71,6 @@ type ModelHub struct {
 // URIProtocol represents the protocol of the URI.
 type URIProtocol string
 
-// Add roles for operating leaderWorkerSet.
-//
-// +kubebuilder:rbac:groups=leaderworkerset.x-k8s.io,resources=leaderworkersets,verbs=get;list;watch;create;update;patch;delete
-// +kubebuilder:rbac:groups=leaderworkerset.x-k8s.io,resources=leaderworkersets/status,verbs=get;update;patch
-
 // ModelSource represents the source of the model.
 // Only one model source will be used.
 type ModelSource struct {

diff --git a/config/crd/bases/inference.llmaz.io_playgrounds.yaml b/config/crd/bases/inference.llmaz.io_playgrounds.yaml
@@ -897,7 +897,7 @@ spec:
                     description: |-
                       InferenceFlavors represents a list of flavor names with fungibility supported
                       to serve the model.
-                      - If not set, always apply with the 0-index model by default.
+                      - If not set, will employ the model configured flavors by default.
                       - If set, will lookup the flavor names following the model orders.
                     items:
                       type: string

diff --git a/config/crd/bases/inference.llmaz.io_services.yaml b/config/crd/bases/inference.llmaz.io_services.yaml
@@ -52,7 +52,7 @@ spec:
                     description: |-
                       InferenceFlavors represents a list of flavor names with fungibility supported
                       to serve the model.
-                      - If not set, always apply with the 0-index model by default.
+                      - If not set, will employ the model configured flavors by default.
                       - If set, will lookup the flavor names following the model orders.
                     items:
                       type: string

diff --git a/config/default/configmap.yaml b/config/default/configmap.yaml
@@ -0,0 +1,8 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: global-config
+data:
+  config.data: |
+    scheduler-name: default-scheduler
+    # init-container-image: inftyai/model-loader:v0.0.10
diff --git a/config/default/kustomization.yaml b/config/default/kustomization.yaml
@@ -22,6 +22,9 @@ resources:
 # crd/kustomization.yaml
 - ../webhook
 
+# [customized]
+- configmap.yaml
+
 # [INTERNALCERT]
 - ../internalcert
 

diff --git a/config/prometheus/monitor.yaml b/config/prometheus/monitor.yaml
@@ -21,3 +21,4 @@ spec:
   selector:
     matchLabels:
       app.kubernetes.io/name: llmaz
+      control-plane: controller-manager
diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml
@@ -4,6 +4,13 @@ kind: ClusterRole
 metadata:
   name: manager-role
 rules:
+- apiGroups:
+  - ""
+  resources:
+  - configmaps
+  verbs:
+  - get
+  - list
 - apiGroups:
   - ""
   resources:

diff --git a/docs/examples/envoy-ai-gateway/basic-vllm.yaml b/docs/examples/envoy-ai-gateway/basic-vllm.yaml
@@ -0,0 +1,79 @@
+apiVersion: llmaz.io/v1alpha1
+kind: OpenModel
+metadata:
+  name: qwen3-0--6b
+spec:
+  familyName: qwen3
+  source:
+    modelHub:
+      modelID: Qwen/Qwen3-0.6B
+  inferenceConfig:
+    flavors:
+      - name: t4 # GPU type
+        limits:
+          nvidia.com/gpu: 1
+---
+apiVersion: inference.llmaz.io/v1alpha1
+kind: Playground
+metadata:
+  name: qwen3-0--6b
+spec:
+  replicas: 1
+  modelClaim:
+    modelName: qwen3-0--6b
+  backendRuntimeConfig:
+    backendName: vllm
+    version: v0.8.5
+---
+apiVersion: gateway.networking.k8s.io/v1
+kind: GatewayClass
+metadata:
+  name: default-envoy-ai-gateway
+spec:
+  controllerName: gateway.envoyproxy.io/gatewayclass-controller
+---
+apiVersion: gateway.networking.k8s.io/v1
+kind: Gateway
+metadata:
+  name: default-envoy-ai-gateway
+spec:
+  gatewayClassName: default-envoy-ai-gateway
+  listeners:
+    - name: http
+      protocol: HTTP
+      port: 80
+---
+apiVersion: aigateway.envoyproxy.io/v1alpha1
+kind: AIGatewayRoute
+metadata:
+  name: default-envoy-ai-gateway
+spec:
+  schema:
+    name: OpenAI
+  targetRefs:
+    - name: default-envoy-ai-gateway
+      kind: Gateway
+      group: gateway.networking.k8s.io
+  rules:
+    - matches:
+        - headers:
+            - type: Exact
+              name: x-ai-eg-model
+              value: qwen3-0--6b
+      backendRefs:
+        - name: qwen3-0--6b
+---
+apiVersion: aigateway.envoyproxy.io/v1alpha1
+kind: AIServiceBackend
+metadata:
+  name: qwen3-0--6b
+spec:
+  timeouts:
+    request: 3m
+  schema:
+    name: OpenAI
+  backendRef:
+    name: qwen3-0--6b-lb
+    kind: Service
+    port: 8080
+---
diff --git a/docs/examples/envoy-ai-gateway/basic.yaml b/docs/examples/envoy-ai-gateway/basic.yaml
@@ -82,14 +82,14 @@ spec:
         - headers:
             - type: Exact
               name: x-ai-eg-model
-              value: qwen2-0.5b
+              value: qwen2-0--5b
       backendRefs:
         - name: qwen2-0--5b
     - matches:
         - headers:
             - type: Exact
               name: x-ai-eg-model
-              value: qwen2.5-coder
+              value: qwen2--5-coder
       backendRefs:
         - name: qwen2--5-coder
 ---

diff --git a/go.mod b/go.mod
@@ -8,6 +8,7 @@ require (
 	github.com/onsi/gomega v1.37.0
 	github.com/open-policy-agent/cert-controller v0.12.0
 	github.com/stretchr/testify v1.9.0
+	gopkg.in/yaml.v2 v2.4.0
 	k8s.io/api v0.32.5
 	k8s.io/apiextensions-apiserver v0.32.5
 	k8s.io/apimachinery v0.32.5

diff --git a/go.sum b/go.sum
@@ -232,6 +232,8 @@ gopkg.in/evanphx/json-patch.v4 v4.12.0 h1:n6jtcsulIzXPJaxegRbvFNNrZDjbij7ny3gmSP
 gopkg.in/evanphx/json-patch.v4 v4.12.0/go.mod h1:p8EYWUEYMpynmqDbY58zCKCFZw8pRWMG4EsWvDvM72M=
 gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc=
 gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw=
+gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
+gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
 gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
 gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
 gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

diff --git a/hack/e2e-test.sh b/hack/e2e-test.sh
@@ -40,7 +40,7 @@ function deploy {
     $KUSTOMIZE build $CWD/test/e2e/config | $KUBECTL apply --server-side -f -
 }
 function deploy_lws {
-    $KUBECTL apply --server-side -f https://github.com/kubernetes-sigs/lws/releases/download/$ENVTEST_LWS_VERSION/manifests.yaml
+    $KUBECTL apply --server-side -f $CWD/test/config/lws/
 }
 trap cleanup EXIT
 startup

diff --git a/hack/test-deploy-with-helm.sh b/hack/test-deploy-with-helm.sh
@@ -25,7 +25,7 @@ function kind_load {
 function deploy {
     cd $CWD
     HELM_EXT_OPTS='--set controllerManager.manager.image.tag=${TAG}' make helm-install
-    $KUBECTL wait --timeout=30m --for=condition=ready pods --namespace=llmaz-system -l app.kubernetes.io/component!=open-webui,app!=certgen
+    $KUBECTL wait --timeout=10m --for=condition=ready pods --namespace=llmaz-system -l app.kubernetes.io/component!=open-webui,app!=certgen
     echo "all pods of llmaz-system is ready..."
     $KUBECTL get pod -n llmaz-system
 }

diff --git a/pkg/controller/inference/service_controller.go b/pkg/controller/inference/service_controller.go
@@ -68,6 +68,9 @@ func NewServiceReconciler(client client.Client, scheme *runtime.Scheme, record r
 //+kubebuilder:rbac:groups=inference.llmaz.io,resources=services/status,verbs=get;update;patch
 //+kubebuilder:rbac:groups=inference.llmaz.io,resources=services/finalizers,verbs=update
 //+kubebuilder:rbac:groups="",resources=services,verbs=get;list;watch;create;update;patch;delete
+//+kubebuilder:rbac:groups="",resources=configmaps,verbs=get;list
+//+kubebuilder:rbac:groups=leaderworkerset.x-k8s.io,resources=leaderworkersets,verbs=get;list;watch;create;update;patch;delete
+//+kubebuilder:rbac:groups=leaderworkerset.x-k8s.io,resources=leaderworkersets/status,verbs=get;update;patch
 
 // Reconcile is part of the main kubernetes reconciliation loop which aims to
 // move the current state of the cluster closer to the desired state.
@@ -83,6 +86,31 @@ func (r *ServiceReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct
 
 	logger.V(10).Info("reconcile Service", "Service", klog.KObj(service))
 
+	cm := &corev1.ConfigMap{}
+	if err := r.Get(ctx, types.NamespacedName{Name: "llmaz-global-config", Namespace: "llmaz-system"}, cm); err != nil {
+		if client.IgnoreNotFound(err) != nil {
+			return ctrl.Result{}, fmt.Errorf("failed to get llmaz-global-config configmap: %w", err)
+		}
+	}
+	configs, err := helper.ParseGlobalConfigmap(cm)
+	if err != nil {
+		return ctrl.Result{}, fmt.Errorf("failed to parse global configurations: %w", err)
+	}
+
+	// Set the global configurations to the service.
+	if configs.SchedulerName != "" {
+		if service.Spec.WorkloadTemplate.LeaderTemplate != nil && service.Spec.WorkloadTemplate.LeaderTemplate.Spec.SchedulerName == "" {
+			service.Spec.WorkloadTemplate.LeaderTemplate.Spec.SchedulerName = configs.SchedulerName
+		}
+		if service.Spec.WorkloadTemplate.WorkerTemplate.Spec.SchedulerName == "" {
+			service.Spec.WorkloadTemplate.WorkerTemplate.Spec.SchedulerName = configs.SchedulerName
+		}
+
+		if err := r.Client.Update(ctx, service); err != nil {
+			return ctrl.Result{}, fmt.Errorf("failed to update service: %w", err)
+		}
+	}
+
 	models, err := helper.FetchModelsByService(ctx, r.Client, service)
 	if err != nil {
 		return ctrl.Result{}, err
@@ -93,8 +121,6 @@ func (r *ServiceReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct
 		return ctrl.Result{}, err
 	}
 
-	// TODO: handle fungibility
-
 	if err := util.Patch(ctx, r.Client, workloadApplyConfiguration); err != nil {
 		return ctrl.Result{}, err
 	}

diff --git a/pkg/controller_helper/configmap.go b/pkg/controller_helper/configmap.go
@@ -0,0 +1,44 @@
+/*
+Copyright 2025 The InftyAI Team.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package helper
+
+import (
+	"fmt"
+
+	"gopkg.in/yaml.v2"
+	corev1 "k8s.io/api/core/v1"
+)
+
+type GlobalConfigs struct {
+	SchedulerName      string `yaml:"scheduler-name"`
+	InitContainerImage string `yaml:"init-container-image"`
+}
+
+func ParseGlobalConfigmap(cm *corev1.ConfigMap) (*GlobalConfigs, error) {
+	rawConfig, ok := cm.Data["config.data"]
+	if !ok {
+		return nil, fmt.Errorf("config.data not found in ConfigMap")
+	}
+
+	var configs GlobalConfigs
+	err := yaml.Unmarshal([]byte(rawConfig), &configs)
+	if err != nil {
+		return nil, fmt.Errorf("failed to unmarshal config.data: %v", err)
+	}
+
+	return &configs, nil
+}
diff --git a/site/content/en/docs/integrations/prometheus-operator.md b/site/content/en/docs/integrations/prometheus-operator.md
@@ -10,7 +10,7 @@ This document provides deployment steps to install and configure Prometheus Oper
 Please follow the [documentation](https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/getting-started/installation.md) to install prometheus operator or simply run the following command:
 
 ```bash
-curl -sL https://github.com/prometheus-operator/prometheus-operator/releases/download/v0.81.0/bundle.yaml | kubectl delete -f -
+curl -sL https://github.com/prometheus-operator/prometheus-operator/releases/download/v0.81.0/bundle.yaml | kubectl create -f -
 ```
 
 Ensure that the Prometheus Operator Pod is running successfully.

diff --git a/test/config/others/global-configmap.yaml b/test/config/others/global-configmap.yaml
@@ -0,0 +1,9 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: llmaz-global-config
+  namespace: llmaz-system
+data:
+  config.data: |
+    scheduler-name: inftyai-scheduler
+    init-container-image: inftyai/model-loader:v0.0.10
diff --git a/test/e2e/playground_test.go b/test/e2e/playground_test.go
@@ -55,7 +55,7 @@ var _ = ginkgo.Describe("playground e2e tests", func() {
 			Image("ollama/ollama").Version("latest").
 			Command([]string{"sh", "-c"}).
 			Arg("default", []string{"ollama serve & while true;do output=$(ollama list 2>&1);if ! echo $output | grep -q 'could not connect to ollama app' && echo $output | grep -q 'NAME';then echo 'ollama is running';break; else echo 'Waiting for the ollama to be running...';sleep 1;fi;done;ollama run {{.ModelName}};while true;do sleep 60;done"}).
-			Request("default", "cpu", "2").Request("default", "memory", "4Gi").Limit("default", "cpu", "4").Limit("default", "memory", "4Gi").Obj()
+			Request("default", "cpu", "1").Request("default", "memory", "2Gi").Limit("default", "cpu", "2").Limit("default", "memory", "4Gi").Obj()
 		gomega.Expect(k8sClient.Create(ctx, backendRuntime)).To(gomega.Succeed())
 
 		model := wrapper.MakeModel("qwen2-0--5b").FamilyName("qwen2").ModelSourceWithURI("ollama://qwen2:0.5b").Obj()

diff --git a/test/integration/controller/inference/suit_test.go b/test/integration/controller/inference/suit_test.go
@@ -31,6 +31,7 @@ import (
 	autoscalingv2 "k8s.io/api/autoscaling/v2"
 	corev1 "k8s.io/api/core/v1"
 	"k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset/scheme"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/client-go/rest"
 	ctrl "sigs.k8s.io/controller-runtime"
 	"sigs.k8s.io/controller-runtime/pkg/client"
@@ -120,7 +121,14 @@ var _ = BeforeSuite(func() {
 	serviceController := inferencecontroller.NewServiceReconciler(mgr.GetClient(), mgr.GetScheme(), mgr.GetEventRecorderFor("service"))
 	Expect(serviceController.SetupWithManager(mgr)).NotTo(HaveOccurred())
 
+	Expect(k8sClient.Create(ctx, &corev1.Namespace{
+		ObjectMeta: metav1.ObjectMeta{
+			Name: "llmaz-system",
+		},
+	})).ToNot(HaveOccurred())
+
 	Expect(util.Setup(ctx, k8sClient, "../../../config/backends")).To(Succeed())
+	Expect(util.Setup(ctx, k8sClient, "../../../config/others")).To(Succeed())
 
 	go func() {
 		defer GinkgoRecover()