Skip to content

Commit b5af734

Browse files
committed
Support shared memory size
Signed-off-by: kerthcet <[email protected]>
1 parent 1987254 commit b5af734

File tree

8 files changed

+74
-4
lines changed

8 files changed

+74
-4
lines changed

.github/ISSUE_TEMPLATE/BUG_REPORT.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ assignees: ''
2121
**Environment**:
2222

2323
- Kubernetes version (use `kubectl version`):
24-
- LWS version:
2524
- llmaz version (use `git describe --tags --dirty --always`):
2625
- Cloud provider or hardware configuration:
2726
- OS (e.g: `cat /etc/os-release`):

.github/ISSUE_TEMPLATE/ENHANCEMENT.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
name: Enhancement Request
33
about: Suggest an enhancement to the llmaz project
44
title: ''
5-
labels: kind/feature
5+
labels: feature
66
assignees: ''
77

88
---

api/core/v1alpha1/model_types.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ package v1alpha1
1818

1919
import (
2020
v1 "k8s.io/api/core/v1"
21+
"k8s.io/apimachinery/pkg/api/resource"
2122
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2223
)
2324

@@ -131,6 +132,9 @@ type InferenceConfig struct {
131132
// +kubebuilder:validation:MaxItems=8
132133
// +optional
133134
Flavors []Flavor `json:"flavors,omitempty"`
135+
// SharedMemorySize represents the size of /dev/shm required in the runtime of
136+
// inference workload.
137+
SharedMemorySize *resource.Quantity `json:"sharedMemorySize,omitempty"`
134138
}
135139

136140
type ModelName string

config/crd/bases/llmaz.io_openmodels.yaml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,15 @@ spec:
105105
type: object
106106
maxItems: 8
107107
type: array
108+
sharedMemorySize:
109+
anyOf:
110+
- type: integer
111+
- type: string
112+
description: |-
113+
SharedMemorySize represents the size of /dev/shm required in the runtime of
114+
inference workload.
115+
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
116+
x-kubernetes-int-or-string: true
108117
type: object
109118
source:
110119
description: |-

pkg/controller/inference/playground_controller.go

Lines changed: 38 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -265,7 +265,7 @@ func buildWorkloadTemplate(models []*coreapi.OpenModel, playground *inferenceapi
265265

266266
if multiHost {
267267
workload.LeaderWorkerTemplate.LeaderTemplate = &template
268-
workload.LeaderWorkerTemplate.WorkerTemplate = buildWorkerTemplate(playground, backendRuntime)
268+
workload.LeaderWorkerTemplate.WorkerTemplate = buildWorkerTemplate(models, playground, backendRuntime)
269269
} else {
270270
workload.LeaderWorkerTemplate.WorkerTemplate = template
271271
}
@@ -366,12 +366,30 @@ func buildTemplate(models []*coreapi.OpenModel, playground *inferenceapi.Playgro
366366
},
367367
}
368368

369+
// construct /dev/shm size
370+
if models[0].Spec.InferenceConfig != nil && models[0].Spec.InferenceConfig.SharedMemorySize != nil {
371+
template.Spec.Volumes = append(template.Spec.Volumes, corev1.Volume{
372+
Name: "dshm",
373+
VolumeSource: corev1.VolumeSource{
374+
EmptyDir: &corev1.EmptyDirVolumeSource{
375+
Medium: corev1.StorageMediumMemory,
376+
SizeLimit: models[0].Spec.InferenceConfig.SharedMemorySize,
377+
},
378+
},
379+
})
380+
381+
template.Spec.Containers[0].VolumeMounts = append(template.Spec.Containers[0].VolumeMounts, corev1.VolumeMount{
382+
Name: "dshm",
383+
MountPath: "/dev/shm",
384+
})
385+
}
386+
369387
return template, nil
370388
}
371389

372390
// This is a copy of buildTemplate with some refactors, only used in multi-nodes cases.
373391
// Worker template has no args, no contain port.
374-
func buildWorkerTemplate(playground *inferenceapi.Playground, backendRuntime *inferenceapi.BackendRuntime) corev1.PodTemplateSpec {
392+
func buildWorkerTemplate(models []*coreapi.OpenModel, playground *inferenceapi.Playground, backendRuntime *inferenceapi.BackendRuntime) corev1.PodTemplateSpec {
375393
parser := helper.NewBackendRuntimeParser(backendRuntime)
376394

377395
envs := parser.Envs()
@@ -423,6 +441,24 @@ func buildWorkerTemplate(playground *inferenceapi.Playground, backendRuntime *in
423441
},
424442
}
425443

444+
// construct /dev/shm size
445+
if models[0].Spec.InferenceConfig != nil && models[0].Spec.InferenceConfig.SharedMemorySize != nil {
446+
template.Spec.Volumes = append(template.Spec.Volumes, corev1.Volume{
447+
Name: "dshm",
448+
VolumeSource: corev1.VolumeSource{
449+
EmptyDir: &corev1.EmptyDirVolumeSource{
450+
Medium: corev1.StorageMediumMemory,
451+
SizeLimit: models[0].Spec.InferenceConfig.SharedMemorySize,
452+
},
453+
},
454+
})
455+
456+
template.Spec.Containers[0].VolumeMounts = append(template.Spec.Containers[0].VolumeMounts, corev1.VolumeMount{
457+
Name: "dshm",
458+
MountPath: "/dev/shm",
459+
})
460+
}
461+
426462
return template
427463
}
428464

test/util/mock.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ func MockASampleModel() *coreapi.OpenModel {
3737
InferenceFlavors(
3838
*wrapper.MakeFlavor("a100").SetRequest("nvidia.com/gpu", "1").Obj(),
3939
*wrapper.MakeFlavor("a10").SetRequest("nvidia.com/gpu", "2").Obj()).
40+
SharedMemoryDize("1Gi").
4041
Obj()
4142
}
4243

test/util/validation/validate_playground.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -260,6 +260,18 @@ func ValidatePlayground(ctx context.Context, k8sClient client.Client, playground
260260
return errors.New("command not right")
261261
}
262262
}
263+
264+
if models[0].Spec.InferenceConfig != nil && models[0].Spec.InferenceConfig.SharedMemorySize != nil {
265+
if multiHost {
266+
if *models[0].Spec.InferenceConfig.SharedMemorySize != *service.Spec.WorkloadTemplate.LeaderWorkerTemplate.LeaderTemplate.Spec.Volumes[0].EmptyDir.SizeLimit {
267+
return fmt.Errorf("expected SharedMemorySize %s, got %s", models[0].Spec.InferenceConfig.SharedMemorySize.String(), service.Spec.WorkloadTemplate.LeaderWorkerTemplate.LeaderTemplate.Spec.Volumes[0].EmptyDir.SizeLimit.String())
268+
}
269+
}
270+
if *models[0].Spec.InferenceConfig.SharedMemorySize != *service.Spec.WorkloadTemplate.LeaderWorkerTemplate.WorkerTemplate.Spec.Volumes[0].EmptyDir.SizeLimit {
271+
return fmt.Errorf("expected SharedMemorySize %s, got %s", models[0].Spec.InferenceConfig.SharedMemorySize.String(), service.Spec.WorkloadTemplate.LeaderWorkerTemplate.WorkerTemplate.Spec.Volumes[0].EmptyDir.SizeLimit.String())
272+
}
273+
}
274+
263275
return nil
264276

265277
}, util.IntegrationTimeout, util.Interval).Should(gomega.Succeed())

test/util/wrapper/model.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,15 @@ func (w *ModelWrapper) Label(k, v string) *ModelWrapper {
107107
return w
108108
}
109109

110+
func (w *ModelWrapper) SharedMemoryDize(v string) *ModelWrapper {
111+
if w.Spec.InferenceConfig == nil {
112+
w.Spec.InferenceConfig = &coreapi.InferenceConfig{}
113+
}
114+
value := resource.MustParse(v)
115+
w.Spec.InferenceConfig.SharedMemorySize = &value
116+
return w
117+
}
118+
110119
func MakeFlavor(name string) *FlavorWrapper {
111120
return &FlavorWrapper{
112121
coreapi.Flavor{

0 commit comments

Comments
 (0)