Skip to content

Commit eae24fa

Browse files
Merge pull request #142 from kerthcet/feat/support-tgi
Add helm chart support
2 parents f02361f + 8507472 commit eae24fa

27 files changed

+34440
-4
lines changed

Makefile

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,7 @@ image-build:
185185
--build-arg CGO_ENABLED=$(CGO_ENABLED) \
186186
$(IMAGE_BUILD_EXTRA_OPTS) ./
187187
image-load: IMAGE_BUILD_EXTRA_OPTS=--load
188-
image-load: image-load
188+
image-load: image-build
189189
image-push: IMAGE_BUILD_EXTRA_OPTS=--push
190190
image-push: image-build
191191

@@ -284,4 +284,20 @@ artifacts: kustomize
284284
if [ -d artifacts ]; then rm -rf artifacts; fi
285285
mkdir -p artifacts
286286
$(KUSTOMIZE) build config/default -o artifacts/manifests.yaml
287-
@$(call clean-manifests)
287+
@$(call clean-manifests)
288+
289+
HELMIFY ?= $(LOCALBIN)/helmify
290+
291+
.PHONY: helmify
292+
helmify: $(HELMIFY) ## Download helmify locally if necessary.
293+
$(HELMIFY): $(LOCALBIN)
294+
test -s $(LOCALBIN)/helmify || GOBIN=$(LOCALBIN) go install github.com/arttor/helmify/cmd/helmify@latest
295+
296+
.PHONY: helm
297+
helm: manifests kustomize helmify
298+
$(KUBECTL) create namespace llmaz-system --dry-run=client -o yaml | $(KUBECTL) apply -f -
299+
$(KUSTOMIZE) build config/default | $(HELMIFY) -crd-dir
300+
301+
.PHONY: helm-install
302+
helm-install: helm
303+
helm upgrade --install llmaz ./chart --namespace llmaz-system -f ./chart/values.global.yaml

chart/.helmignore

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# Patterns to ignore when building packages.
2+
# This supports shell glob matching, relative path matching, and
3+
# negation (prefixed with !). Only one pattern per line.
4+
.DS_Store
5+
# Common VCS dirs
6+
.git/
7+
.gitignore
8+
.bzr/
9+
.bzrignore
10+
.hg/
11+
.hgignore
12+
.svn/
13+
# Common backup files
14+
*.swp
15+
*.bak
16+
*.tmp
17+
*.orig
18+
*~
19+
# Various IDEs
20+
.project
21+
.idea/
22+
*.tmproj
23+
.vscode/

chart/Chart.yaml

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
apiVersion: v2
2+
name: llmaz
3+
description: A Helm chart for llmaz
4+
# A chart can be either an 'application' or a 'library' chart.
5+
#
6+
# Application charts are a collection of templates that can be packaged into versioned archives
7+
# to be deployed.
8+
#
9+
# Library charts provide useful utilities or functions for the chart developer. They're included as
10+
# a dependency of application charts to inject those utilities and functions into the rendering
11+
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
12+
type: application
13+
# This is the chart version. This version number should be incremented each time you make changes
14+
# to the chart and its templates, including the app version.
15+
# Versions are expected to follow Semantic Versioning (https://semver.org/)
16+
version: 0.0.1
17+
# This is the version number of the application being deployed. This version number should be
18+
# incremented each time you make changes to the application. Versions are not expected to
19+
# follow Semantic Versioning. They should reflect the version the application is using.
20+
# It is recommended to use it with quotes.
21+
appVersion: "0.0.6"

chart/crds/backendruntime-crd.yaml

Lines changed: 314 additions & 0 deletions
Large diffs are not rendered by default.

chart/crds/leaderworkerset.x-k8s.io_leaderworkersets.yaml

Lines changed: 16015 additions & 0 deletions
Large diffs are not rendered by default.

chart/crds/openmodel-crd.yaml

Lines changed: 236 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,236 @@
1+
apiVersion: apiextensions.k8s.io/v1
2+
kind: CustomResourceDefinition
3+
metadata:
4+
annotations:
5+
controller-gen.kubebuilder.io/version: v0.14.0
6+
name: openmodels.llmaz.io
7+
spec:
8+
conversion:
9+
strategy: Webhook
10+
webhook:
11+
clientConfig:
12+
service:
13+
name: llmaz-webhook-service
14+
namespace: llmaz-system
15+
path: /convert
16+
conversionReviewVersions:
17+
- v1
18+
group: llmaz.io
19+
names:
20+
kind: OpenModel
21+
listKind: OpenModelList
22+
plural: openmodels
23+
shortNames:
24+
- om
25+
singular: openmodel
26+
scope: Cluster
27+
versions:
28+
- name: v1alpha1
29+
schema:
30+
openAPIV3Schema:
31+
description: OpenModel is the Schema for the open models API
32+
properties:
33+
apiVersion:
34+
description: |-
35+
APIVersion defines the versioned schema of this representation of an object.
36+
Servers should convert recognized schemas to the latest internal value, and
37+
may reject unrecognized values.
38+
More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
39+
type: string
40+
kind:
41+
description: |-
42+
Kind is a string value representing the REST resource this object represents.
43+
Servers may infer this from the endpoint the client submits requests to.
44+
Cannot be updated.
45+
In CamelCase.
46+
More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
47+
type: string
48+
metadata:
49+
type: object
50+
spec:
51+
description: ModelSpec defines the desired state of Model
52+
properties:
53+
familyName:
54+
description: |-
55+
FamilyName represents the model type, like llama2, which will be auto injected
56+
to the labels with the key of `llmaz.io/model-family-name`.
57+
type: string
58+
inferenceFlavors:
59+
description: |-
60+
InferenceFlavors represents the accelerator requirements to serve the model.
61+
Flavors are fungible following the priority represented by the slice order.
62+
items:
63+
description: |-
64+
Flavor defines the accelerator requirements for a model and the necessary parameters
65+
in autoscaling. Right now, it will be used in two places:
66+
- Pod scheduling with node selectors specified.
67+
- Cluster autoscaling with essential parameters provided.
68+
properties:
69+
name:
70+
description: Name represents the flavor name, which will be
71+
used in model claim.
72+
type: string
73+
nodeSelector:
74+
additionalProperties:
75+
type: string
76+
description: |-
77+
NodeSelector represents the node candidates for Pod placements, if a node doesn't
78+
meet the nodeSelector, it will be filtered out in the resourceFungibility scheduler plugin.
79+
If nodeSelector is empty, it means every node is a candidate.
80+
type: object
81+
params:
82+
additionalProperties:
83+
type: string
84+
description: |-
85+
Params stores other useful parameters and will be consumed by the autoscaling components
86+
like cluster-autoscaler, Karpenter.
87+
E.g. when scaling up nodes with 8x Nvidia A00, the parameter can be injected with
88+
instance-type: p4d.24xlarge for AWS.
89+
type: object
90+
requests:
91+
additionalProperties:
92+
anyOf:
93+
- type: integer
94+
- type: string
95+
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
96+
x-kubernetes-int-or-string: true
97+
description: |-
98+
Requests defines the required accelerators to serve the model, like nvidia.com/gpu: 8.
99+
When GPU number is greater than 8, like 32, then multi-host inference is enabled and
100+
32/8=4 hosts will be grouped as an unit, each host will have a resource request as
101+
nvidia.com/gpu: 8. The may change in the future if the GPU number limit is broken.
102+
Not recommended to set the cpu and memory usage here.
103+
If using playground, you can define the cpu/mem usage at backendConfig.
104+
If using service, you can define the cpu/mem at the container resources.
105+
Note: if you define the same accelerator requests at playground/service as well,
106+
the requests here will be covered.
107+
type: object
108+
required:
109+
- name
110+
type: object
111+
maxItems: 8
112+
type: array
113+
source:
114+
description: |-
115+
Source represents the source of the model, there're several ways to load
116+
the model such as loading from huggingface, OCI registry, s3, host path and so on.
117+
properties:
118+
modelHub:
119+
description: ModelHub represents the model registry for model
120+
downloads.
121+
properties:
122+
filename:
123+
description: |-
124+
Filename refers to a specified model file rather than the whole repo.
125+
This is helpful to download a specified GGUF model rather than downloading
126+
the whole repo which includes all kinds of quantized models.
127+
TODO: this is only supported with Huggingface, add support for ModelScope
128+
in the near future.
129+
type: string
130+
modelID:
131+
description: |-
132+
ModelID refers to the model identifier on model hub,
133+
such as meta-llama/Meta-Llama-3-8B.
134+
type: string
135+
name:
136+
default: Huggingface
137+
description: Name refers to the model registry, such as huggingface.
138+
enum:
139+
- Huggingface
140+
- ModelScope
141+
type: string
142+
revision:
143+
description: |-
144+
Revision refers to a Git revision id which can be a branch name, a tag, or a commit hash.
145+
Most of the time, you don't need to specify it.
146+
type: string
147+
type: object
148+
uri:
149+
description: |-
150+
URI represents a various kinds of model sources following the uri protocol, e.g.:
151+
- OSS: oss://<bucket>.<endpoint>/<path-to-your-model>
152+
type: string
153+
type: object
154+
required:
155+
- familyName
156+
- source
157+
type: object
158+
status:
159+
description: ModelStatus defines the observed state of Model
160+
properties:
161+
conditions:
162+
description: Conditions represents the Inference condition.
163+
items:
164+
description: "Condition contains details for one aspect of the current
165+
state of this API Resource.\n---\nThis struct is intended for
166+
direct use as an array at the field path .status.conditions. For
167+
example,\n\n\n\ttype FooStatus struct{\n\t // Represents the
168+
observations of a foo's current state.\n\t // Known .status.conditions.type
169+
are: \"Available\", \"Progressing\", and \"Degraded\"\n\t //
170+
+patchMergeKey=type\n\t // +patchStrategy=merge\n\t // +listType=map\n\t
171+
\ // +listMapKey=type\n\t Conditions []metav1.Condition `json:\"conditions,omitempty\"
172+
patchStrategy:\"merge\" patchMergeKey:\"type\" protobuf:\"bytes,1,rep,name=conditions\"`\n\n\n\t
173+
\ // other fields\n\t}"
174+
properties:
175+
lastTransitionTime:
176+
description: |-
177+
lastTransitionTime is the last time the condition transitioned from one status to another.
178+
This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable.
179+
format: date-time
180+
type: string
181+
message:
182+
description: |-
183+
message is a human readable message indicating details about the transition.
184+
This may be an empty string.
185+
maxLength: 32768
186+
type: string
187+
observedGeneration:
188+
description: |-
189+
observedGeneration represents the .metadata.generation that the condition was set based upon.
190+
For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date
191+
with respect to the current state of the instance.
192+
format: int64
193+
minimum: 0
194+
type: integer
195+
reason:
196+
description: |-
197+
reason contains a programmatic identifier indicating the reason for the condition's last transition.
198+
Producers of specific condition types may define expected values and meanings for this field,
199+
and whether the values are considered a guaranteed API.
200+
The value should be a CamelCase string.
201+
This field may not be empty.
202+
maxLength: 1024
203+
minLength: 1
204+
pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$
205+
type: string
206+
status:
207+
description: status of the condition, one of True, False, Unknown.
208+
enum:
209+
- "True"
210+
- "False"
211+
- Unknown
212+
type: string
213+
type:
214+
description: |-
215+
type of condition in CamelCase or in foo.example.com/CamelCase.
216+
---
217+
Many .condition.type values are consistent across resources like Available, but because arbitrary conditions can be
218+
useful (see .node.status.conditions), the ability to deconflict is important.
219+
The regex it matches is (dns1123SubdomainFmt/)?(qualifiedNameFmt)
220+
maxLength: 316
221+
pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
222+
type: string
223+
required:
224+
- lastTransitionTime
225+
- message
226+
- reason
227+
- status
228+
- type
229+
type: object
230+
type: array
231+
type: object
232+
type: object
233+
served: true
234+
storage: true
235+
subresources:
236+
status: {}

0 commit comments

Comments
 (0)