Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 2 additions & 5 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ include Makefile-deps.mk

# ENVTEST_K8S_VERSION refers to the version of kubebuilder assets to be downloaded by envtest binary.
ENVTEST_K8S_VERSION = 1.32.0
ENVTEST_LWS_VERSION = v0.5.1

# Get the currently used golang install path (in GOPATH/bin, unless GOBIN is set)
ifeq (,$(shell go env GOBIN))
Expand Down Expand Up @@ -84,9 +83,7 @@ manifests: controller-gen ## Generate WebhookConfiguration, ClusterRole and Cust
rbac:roleName=manager-role output:rbac:artifacts:config=config/rbac \
crd:generateEmbeddedObjectMeta=true output:crd:artifacts:config=config/crd/bases \
webhook output:webhook:artifacts:config=config/webhook \
paths="./cmd/..."
paths="./api/..."
paths="./pkg/..."
paths="./api/...;./pkg/...;./cmd/..."

.PHONY: generate
generate: controller-gen code-generator ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations.
Expand Down Expand Up @@ -136,7 +133,7 @@ test-integration: manifests fmt vet envtest ginkgo ## Run integration tests.

.PHONY: test-e2e
test-e2e: kustomize manifests fmt vet envtest ginkgo kind-image-build
E2E_KIND_NODE_VERSION=$(E2E_KIND_NODE_VERSION) KIND_CLUSTER_NAME=$(KIND_CLUSTER_NAME) KIND=$(KIND) KUBECTL=$(KUBECTL) KUSTOMIZE=$(KUSTOMIZE) GINKGO=$(GINKGO) USE_EXISTING_CLUSTER=$(USE_EXISTING_CLUSTER) IMAGE_TAG=$(IMG) ENVTEST_LWS_VERSION=$(ENVTEST_LWS_VERSION) ./hack/e2e-test.sh
E2E_KIND_NODE_VERSION=$(E2E_KIND_NODE_VERSION) KIND_CLUSTER_NAME=$(KIND_CLUSTER_NAME) KIND=$(KIND) KUBECTL=$(KUBECTL) KUSTOMIZE=$(KUSTOMIZE) GINKGO=$(GINKGO) USE_EXISTING_CLUSTER=$(USE_EXISTING_CLUSTER) IMAGE_TAG=$(IMG) ./hack/e2e-test.sh

test-deploy-with-helm: kind-image-build
E2E_KIND_NODE_VERSION=$(E2E_KIND_NODE_VERSION) KIND_CLUSTER_NAME=$(KIND_CLUSTER_NAME) KIND=$(KIND) KUBECTL=$(KUBECTL) USE_EXISTING_CLUSTER=$(USE_EXISTING_CLUSTER) IMAGE_TAG=$(IMG) TAG=$(GIT_TAG) ./hack/test-deploy-with-helm.sh
Expand Down
5 changes: 0 additions & 5 deletions api/core/v1alpha1/model_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,11 +71,6 @@ type ModelHub struct {
// URIProtocol represents the protocol of the URI.
type URIProtocol string

// Add roles for operating leaderWorkerSet.
//
// +kubebuilder:rbac:groups=leaderworkerset.x-k8s.io,resources=leaderworkersets,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=leaderworkerset.x-k8s.io,resources=leaderworkersets/status,verbs=get;update;patch

// ModelSource represents the source of the model.
// Only one model source will be used.
type ModelSource struct {
Expand Down
2 changes: 1 addition & 1 deletion config/crd/bases/inference.llmaz.io_playgrounds.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -897,7 +897,7 @@ spec:
description: |-
InferenceFlavors represents a list of flavor names with fungibility supported
to serve the model.
- If not set, always apply with the 0-index model by default.
- If not set, will employ the model configured flavors by default.
- If set, will lookup the flavor names following the model orders.
items:
type: string
Expand Down
2 changes: 1 addition & 1 deletion config/crd/bases/inference.llmaz.io_services.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ spec:
description: |-
InferenceFlavors represents a list of flavor names with fungibility supported
to serve the model.
- If not set, always apply with the 0-index model by default.
- If not set, will employ the model configured flavors by default.
- If set, will lookup the flavor names following the model orders.
items:
type: string
Expand Down
8 changes: 8 additions & 0 deletions config/default/configmap.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: global-config
data:
config.data: |
scheduler-name: default-scheduler
# init-container-image: inftyai/model-loader:v0.0.10
3 changes: 3 additions & 0 deletions config/default/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ resources:
# crd/kustomization.yaml
- ../webhook

# [customized]
- configmap.yaml

# [INTERNALCERT]
- ../internalcert

Expand Down
1 change: 1 addition & 0 deletions config/prometheus/monitor.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,4 @@ spec:
selector:
matchLabels:
app.kubernetes.io/name: llmaz
control-plane: controller-manager
7 changes: 7 additions & 0 deletions config/rbac/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,13 @@ kind: ClusterRole
metadata:
name: manager-role
rules:
- apiGroups:
- ""
resources:
- configmaps
verbs:
- get
- list
- apiGroups:
- ""
resources:
Expand Down
79 changes: 79 additions & 0 deletions docs/examples/envoy-ai-gateway/basic-vllm.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
apiVersion: llmaz.io/v1alpha1
kind: OpenModel
metadata:
name: qwen3-0--6b
spec:
familyName: qwen3
source:
modelHub:
modelID: Qwen/Qwen3-0.6B
inferenceConfig:
flavors:
- name: t4 # GPU type
limits:
nvidia.com/gpu: 1
---
apiVersion: inference.llmaz.io/v1alpha1
kind: Playground
metadata:
name: qwen3-0--6b
spec:
replicas: 1
modelClaim:
modelName: qwen3-0--6b
backendRuntimeConfig:
backendName: vllm
version: v0.8.5
---
apiVersion: gateway.networking.k8s.io/v1
kind: GatewayClass
metadata:
name: default-envoy-ai-gateway
spec:
controllerName: gateway.envoyproxy.io/gatewayclass-controller
---
apiVersion: gateway.networking.k8s.io/v1
kind: Gateway
metadata:
name: default-envoy-ai-gateway
spec:
gatewayClassName: default-envoy-ai-gateway
listeners:
- name: http
protocol: HTTP
port: 80
---
apiVersion: aigateway.envoyproxy.io/v1alpha1
kind: AIGatewayRoute
metadata:
name: default-envoy-ai-gateway
spec:
schema:
name: OpenAI
targetRefs:
- name: default-envoy-ai-gateway
kind: Gateway
group: gateway.networking.k8s.io
rules:
- matches:
- headers:
- type: Exact
name: x-ai-eg-model
value: qwen3-0--6b
backendRefs:
- name: qwen3-0--6b
---
apiVersion: aigateway.envoyproxy.io/v1alpha1
kind: AIServiceBackend
metadata:
name: qwen3-0--6b
spec:
timeouts:
request: 3m
schema:
name: OpenAI
backendRef:
name: qwen3-0--6b-lb
kind: Service
port: 8080
---
4 changes: 2 additions & 2 deletions docs/examples/envoy-ai-gateway/basic.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -82,14 +82,14 @@ spec:
- headers:
- type: Exact
name: x-ai-eg-model
value: qwen2-0.5b
value: qwen2-0--5b
backendRefs:
- name: qwen2-0--5b
- matches:
- headers:
- type: Exact
name: x-ai-eg-model
value: qwen2.5-coder
value: qwen2--5-coder
backendRefs:
- name: qwen2--5-coder
---
Expand Down
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ require (
github.com/onsi/gomega v1.37.0
github.com/open-policy-agent/cert-controller v0.12.0
github.com/stretchr/testify v1.9.0
gopkg.in/yaml.v2 v2.4.0
k8s.io/api v0.32.5
k8s.io/apiextensions-apiserver v0.32.5
k8s.io/apimachinery v0.32.5
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,8 @@ gopkg.in/evanphx/json-patch.v4 v4.12.0 h1:n6jtcsulIzXPJaxegRbvFNNrZDjbij7ny3gmSP
gopkg.in/evanphx/json-patch.v4 v4.12.0/go.mod h1:p8EYWUEYMpynmqDbY58zCKCFZw8pRWMG4EsWvDvM72M=
gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc=
gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw=
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
Expand Down
2 changes: 1 addition & 1 deletion hack/e2e-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ function deploy {
$KUSTOMIZE build $CWD/test/e2e/config | $KUBECTL apply --server-side -f -
}
function deploy_lws {
$KUBECTL apply --server-side -f https://github.com/kubernetes-sigs/lws/releases/download/$ENVTEST_LWS_VERSION/manifests.yaml
$KUBECTL apply --server-side -f $CWD/test/config/lws/
}
trap cleanup EXIT
startup
Expand Down
2 changes: 1 addition & 1 deletion hack/test-deploy-with-helm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ function kind_load {
function deploy {
cd $CWD
HELM_EXT_OPTS='--set controllerManager.manager.image.tag=${TAG}' make helm-install
$KUBECTL wait --timeout=30m --for=condition=ready pods --namespace=llmaz-system -l app.kubernetes.io/component!=open-webui,app!=certgen
$KUBECTL wait --timeout=10m --for=condition=ready pods --namespace=llmaz-system -l app.kubernetes.io/component!=open-webui,app!=certgen
echo "all pods of llmaz-system is ready..."
$KUBECTL get pod -n llmaz-system
}
Expand Down
30 changes: 28 additions & 2 deletions pkg/controller/inference/service_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,9 @@ func NewServiceReconciler(client client.Client, scheme *runtime.Scheme, record r
//+kubebuilder:rbac:groups=inference.llmaz.io,resources=services/status,verbs=get;update;patch
//+kubebuilder:rbac:groups=inference.llmaz.io,resources=services/finalizers,verbs=update
//+kubebuilder:rbac:groups="",resources=services,verbs=get;list;watch;create;update;patch;delete
//+kubebuilder:rbac:groups="",resources=configmaps,verbs=get;list
//+kubebuilder:rbac:groups=leaderworkerset.x-k8s.io,resources=leaderworkersets,verbs=get;list;watch;create;update;patch;delete
//+kubebuilder:rbac:groups=leaderworkerset.x-k8s.io,resources=leaderworkersets/status,verbs=get;update;patch

// Reconcile is part of the main kubernetes reconciliation loop which aims to
// move the current state of the cluster closer to the desired state.
Expand All @@ -83,6 +86,31 @@ func (r *ServiceReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct

logger.V(10).Info("reconcile Service", "Service", klog.KObj(service))

cm := &corev1.ConfigMap{}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have a question here 🤔 : Do we need to get cm obj every time we reconcile? Or can we use event triggering? Because I understand that cm will not be modified in most cases, it seems meaningless to get it every time in Reconcile. But this is not a blocking item but more of an optimization item. If it is due to the code width problem, I can submit a PR to optimize this part in the near future.

Copy link
Member Author

@kerthcet kerthcet Jun 1, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Anyway, it's not a big problem. We can optimize this later.

if err := r.Get(ctx, types.NamespacedName{Name: "llmaz-global-config", Namespace: "llmaz-system"}, cm); err != nil {
if client.IgnoreNotFound(err) != nil {
return ctrl.Result{}, fmt.Errorf("failed to get llmaz-global-config configmap: %w", err)
}
}
configs, err := helper.ParseGlobalConfigmap(cm)
if err != nil {
return ctrl.Result{}, fmt.Errorf("failed to parse global configurations: %w", err)
}

// Set the global configurations to the service.
if configs.SchedulerName != "" {
if service.Spec.WorkloadTemplate.LeaderTemplate != nil && service.Spec.WorkloadTemplate.LeaderTemplate.Spec.SchedulerName == "" {
service.Spec.WorkloadTemplate.LeaderTemplate.Spec.SchedulerName = configs.SchedulerName
}
if service.Spec.WorkloadTemplate.WorkerTemplate.Spec.SchedulerName == "" {
service.Spec.WorkloadTemplate.WorkerTemplate.Spec.SchedulerName = configs.SchedulerName
}

if err := r.Client.Update(ctx, service); err != nil {
return ctrl.Result{}, fmt.Errorf("failed to update service: %w", err)
}
}

models, err := helper.FetchModelsByService(ctx, r.Client, service)
if err != nil {
return ctrl.Result{}, err
Expand All @@ -93,8 +121,6 @@ func (r *ServiceReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct
return ctrl.Result{}, err
}

// TODO: handle fungibility

if err := util.Patch(ctx, r.Client, workloadApplyConfiguration); err != nil {
return ctrl.Result{}, err
}
Expand Down
44 changes: 44 additions & 0 deletions pkg/controller_helper/configmap.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
/*
Copyright 2025 The InftyAI Team.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package helper

import (
"fmt"

"gopkg.in/yaml.v2"
corev1 "k8s.io/api/core/v1"
)

type GlobalConfigs struct {
SchedulerName string `yaml:"scheduler-name"`
InitContainerImage string `yaml:"init-container-image"`
}

func ParseGlobalConfigmap(cm *corev1.ConfigMap) (*GlobalConfigs, error) {
rawConfig, ok := cm.Data["config.data"]
if !ok {
return nil, fmt.Errorf("config.data not found in ConfigMap")
}

var configs GlobalConfigs
err := yaml.Unmarshal([]byte(rawConfig), &configs)
if err != nil {
return nil, fmt.Errorf("failed to unmarshal config.data: %v", err)
}

return &configs, nil
}
2 changes: 1 addition & 1 deletion site/content/en/docs/integrations/prometheus-operator.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ This document provides deployment steps to install and configure Prometheus Oper
Please follow the [documentation](https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/getting-started/installation.md) to install prometheus operator or simply run the following command:

```bash
curl -sL https://github.com/prometheus-operator/prometheus-operator/releases/download/v0.81.0/bundle.yaml | kubectl delete -f -
curl -sL https://github.com/prometheus-operator/prometheus-operator/releases/download/v0.81.0/bundle.yaml | kubectl create -f -
```

Ensure that the Prometheus Operator Pod is running successfully.
Expand Down
9 changes: 9 additions & 0 deletions test/config/others/global-configmap.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: llmaz-global-config
namespace: llmaz-system
data:
config.data: |
scheduler-name: inftyai-scheduler
init-container-image: inftyai/model-loader:v0.0.10
2 changes: 1 addition & 1 deletion test/e2e/playground_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ var _ = ginkgo.Describe("playground e2e tests", func() {
Image("ollama/ollama").Version("latest").
Command([]string{"sh", "-c"}).
Arg("default", []string{"ollama serve & while true;do output=$(ollama list 2>&1);if ! echo $output | grep -q 'could not connect to ollama app' && echo $output | grep -q 'NAME';then echo 'ollama is running';break; else echo 'Waiting for the ollama to be running...';sleep 1;fi;done;ollama run {{.ModelName}};while true;do sleep 60;done"}).
Request("default", "cpu", "2").Request("default", "memory", "4Gi").Limit("default", "cpu", "4").Limit("default", "memory", "4Gi").Obj()
Request("default", "cpu", "1").Request("default", "memory", "2Gi").Limit("default", "cpu", "2").Limit("default", "memory", "4Gi").Obj()
gomega.Expect(k8sClient.Create(ctx, backendRuntime)).To(gomega.Succeed())

model := wrapper.MakeModel("qwen2-0--5b").FamilyName("qwen2").ModelSourceWithURI("ollama://qwen2:0.5b").Obj()
Expand Down
8 changes: 8 additions & 0 deletions test/integration/controller/inference/suit_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import (
autoscalingv2 "k8s.io/api/autoscaling/v2"
corev1 "k8s.io/api/core/v1"
"k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset/scheme"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/rest"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
Expand Down Expand Up @@ -120,7 +121,14 @@ var _ = BeforeSuite(func() {
serviceController := inferencecontroller.NewServiceReconciler(mgr.GetClient(), mgr.GetScheme(), mgr.GetEventRecorderFor("service"))
Expect(serviceController.SetupWithManager(mgr)).NotTo(HaveOccurred())

Expect(k8sClient.Create(ctx, &corev1.Namespace{
ObjectMeta: metav1.ObjectMeta{
Name: "llmaz-system",
},
})).ToNot(HaveOccurred())

Expect(util.Setup(ctx, k8sClient, "../../../config/backends")).To(Succeed())
Expect(util.Setup(ctx, k8sClient, "../../../config/others")).To(Succeed())

go func() {
defer GinkgoRecover()
Expand Down
Loading
Loading