@@ -265,7 +265,7 @@ func buildWorkloadTemplate(models []*coreapi.OpenModel, playground *inferenceapi
265
265
266
266
if multiHost {
267
267
workload .LeaderWorkerTemplate .LeaderTemplate = & template
268
- workload .LeaderWorkerTemplate .WorkerTemplate = buildWorkerTemplate (playground , backendRuntime )
268
+ workload .LeaderWorkerTemplate .WorkerTemplate = buildWorkerTemplate (models , playground , backendRuntime )
269
269
} else {
270
270
workload .LeaderWorkerTemplate .WorkerTemplate = template
271
271
}
@@ -366,12 +366,30 @@ func buildTemplate(models []*coreapi.OpenModel, playground *inferenceapi.Playgro
366
366
},
367
367
}
368
368
369
+ // construct /dev/shm size
370
+ if models [0 ].Spec .InferenceConfig != nil && models [0 ].Spec .InferenceConfig .SharedMemorySize != nil {
371
+ template .Spec .Volumes = append (template .Spec .Volumes , corev1.Volume {
372
+ Name : "dshm" ,
373
+ VolumeSource : corev1.VolumeSource {
374
+ EmptyDir : & corev1.EmptyDirVolumeSource {
375
+ Medium : corev1 .StorageMediumMemory ,
376
+ SizeLimit : models [0 ].Spec .InferenceConfig .SharedMemorySize ,
377
+ },
378
+ },
379
+ })
380
+
381
+ template .Spec .Containers [0 ].VolumeMounts = append (template .Spec .Containers [0 ].VolumeMounts , corev1.VolumeMount {
382
+ Name : "dshm" ,
383
+ MountPath : "/dev/shm" ,
384
+ })
385
+ }
386
+
369
387
return template , nil
370
388
}
371
389
372
390
// This is a copy of buildTemplate with some refactors, only used in multi-nodes cases.
373
391
// Worker template has no args, no contain port.
374
- func buildWorkerTemplate (playground * inferenceapi.Playground , backendRuntime * inferenceapi.BackendRuntime ) corev1.PodTemplateSpec {
392
+ func buildWorkerTemplate (models [] * coreapi. OpenModel , playground * inferenceapi.Playground , backendRuntime * inferenceapi.BackendRuntime ) corev1.PodTemplateSpec {
375
393
parser := helper .NewBackendRuntimeParser (backendRuntime )
376
394
377
395
envs := parser .Envs ()
@@ -423,6 +441,24 @@ func buildWorkerTemplate(playground *inferenceapi.Playground, backendRuntime *in
423
441
},
424
442
}
425
443
444
+ // construct /dev/shm size
445
+ if models [0 ].Spec .InferenceConfig != nil && models [0 ].Spec .InferenceConfig .SharedMemorySize != nil {
446
+ template .Spec .Volumes = append (template .Spec .Volumes , corev1.Volume {
447
+ Name : "dshm" ,
448
+ VolumeSource : corev1.VolumeSource {
449
+ EmptyDir : & corev1.EmptyDirVolumeSource {
450
+ Medium : corev1 .StorageMediumMemory ,
451
+ SizeLimit : models [0 ].Spec .InferenceConfig .SharedMemorySize ,
452
+ },
453
+ },
454
+ })
455
+
456
+ template .Spec .Containers [0 ].VolumeMounts = append (template .Spec .Containers [0 ].VolumeMounts , corev1.VolumeMount {
457
+ Name : "dshm" ,
458
+ MountPath : "/dev/shm" ,
459
+ })
460
+ }
461
+
426
462
return template
427
463
}
428
464
0 commit comments