Skip to content

Commit 1987254

Browse files
committed
Create lb service for inference Service
Signed-off-by: kerthcet <[email protected]>
1 parent a60ff0b commit 1987254

File tree

4 files changed

+72
-2
lines changed

4 files changed

+72
-2
lines changed

README.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,8 +90,10 @@ spec:
9090
9191
#### Expose the service
9292
93+
By default, llmaz will create a ClusterIP service named like `<service>-lb` for load balancing.
94+
9395
```cmd
94-
kubectl port-forward pod/opt-125m-0 8080:8080
96+
kubectl port-forward svc/opt-125m-lb 8080:8080
9597
```
9698

9799
#### Get registered models

config/rbac/role.yaml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,18 @@ rules:
1313
- list
1414
- update
1515
- watch
16+
- apiGroups:
17+
- ""
18+
resources:
19+
- services
20+
verbs:
21+
- create
22+
- delete
23+
- get
24+
- list
25+
- patch
26+
- update
27+
- watch
1628
- apiGroups:
1729
- admissionregistration.k8s.io
1830
resources:

pkg/controller/inference/service_controller.go

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ import (
2727
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2828
"k8s.io/apimachinery/pkg/runtime"
2929
"k8s.io/apimachinery/pkg/types"
30+
"k8s.io/apimachinery/pkg/util/intstr"
3031
metaapplyv1 "k8s.io/client-go/applyconfigurations/meta/v1"
3132
"k8s.io/client-go/tools/record"
3233
"k8s.io/klog/v2"
@@ -66,6 +67,7 @@ func NewServiceReconciler(client client.Client, scheme *runtime.Scheme, record r
6667
//+kubebuilder:rbac:groups=inference.llmaz.io,resources=services,verbs=get;list;watch;create;update;patch;delete
6768
//+kubebuilder:rbac:groups=inference.llmaz.io,resources=services/status,verbs=get;update;patch
6869
//+kubebuilder:rbac:groups=inference.llmaz.io,resources=services/finalizers,verbs=update
70+
//+kubebuilder:rbac:groups="",resources=services,verbs=get;list;watch;create;update;patch;delete
6971

7072
// Reconcile is part of the main kubernetes reconciliation loop which aims to
7173
// move the current state of the cluster closer to the desired state.
@@ -97,6 +99,11 @@ func (r *ServiceReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct
9799
return ctrl.Result{}, err
98100
}
99101

102+
// Create a service for the leader pods of the lws for loadbalancing.
103+
if err := CreateServiceIfNotExists(ctx, r.Client, r.Scheme, service); err != nil {
104+
return ctrl.Result{}, err
105+
}
106+
100107
// Handle status.
101108

102109
workload := &lws.LeaderWorkerSet{}
@@ -299,3 +306,48 @@ func setControllerReferenceForWorkload(owner metav1.Object, lws *applyconfigurat
299306
WithController(true))
300307
return nil
301308
}
309+
310+
func CreateServiceIfNotExists(ctx context.Context, k8sClient client.Client, Scheme *runtime.Scheme, service *inferenceapi.Service) error {
311+
log := ctrl.LoggerFrom(ctx)
312+
// The load balancing service name.
313+
svcName := service.Name + "-lb"
314+
315+
var svc corev1.Service
316+
if err := k8sClient.Get(ctx, types.NamespacedName{Name: svcName, Namespace: service.Namespace}, &svc); err != nil {
317+
if client.IgnoreNotFound(err) != nil {
318+
return err
319+
}
320+
svc = corev1.Service{
321+
ObjectMeta: metav1.ObjectMeta{
322+
Name: svcName,
323+
Namespace: service.Namespace,
324+
},
325+
Spec: corev1.ServiceSpec{
326+
Ports: []corev1.ServicePort{
327+
{
328+
Name: "http",
329+
Protocol: corev1.ProtocolTCP,
330+
Port: modelSource.DEFAULT_BACKEND_PORT,
331+
TargetPort: intstr.FromInt(modelSource.DEFAULT_BACKEND_PORT),
332+
},
333+
},
334+
Selector: map[string]string{
335+
lws.SetNameLabelKey: service.Name,
336+
// the leader pod.
337+
lws.WorkerIndexLabelKey: "0",
338+
},
339+
},
340+
}
341+
342+
// Set the controller owner reference for garbage collection and reconciliation.
343+
if err := ctrl.SetControllerReference(service, &svc, Scheme); err != nil {
344+
return err
345+
}
346+
// create the service in the cluster
347+
log.V(2).Info("Creating service.")
348+
if err := k8sClient.Create(ctx, &svc); err != nil {
349+
return err
350+
}
351+
}
352+
return nil
353+
}

test/util/validation/validate_service.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,8 +84,12 @@ func ValidateService(ctx context.Context, k8sClient client.Client, service *infe
8484
return err
8585
}
8686
}
87-
return nil
8887

88+
if err := k8sClient.Get(ctx, types.NamespacedName{Name: service.Name + "-lb", Namespace: service.Namespace}, &corev1.Service{}); err != nil {
89+
return err
90+
}
91+
92+
return nil
8993
}, util.IntegrationTimeout, util.Interval).Should(gomega.Succeed())
9094
}
9195

0 commit comments

Comments
 (0)