Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 76 additions & 0 deletions development/simulator/components.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
# Debug only: Make sure pod can be visited from controller that deployed in mac.
apiVersion: v1
kind: Service
metadata:
name: vllm-llama3-8b-instruct
namespace: default
labels:
prometheus-discovery: "true"
annotations:
prometheus.io/scrape: "true"
prometheus.io/path: "/metrics"
prometheus.io/port: "8000"
spec:
selector:
model.aibrix.ai/name: "vllm-llama3-8b-instruct"
ports:
- protocol: TCP
name: inference
port: 8010
targetPort: 8010
nodePort: 30082
type: NodePort
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: vllm-llama3-8b-instruct-sa
namespace: default
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: vllm-llama3-8b-instruct-reader-role
namespace: default
rules:
- apiGroups: [""]
resources: ["pods"]
verbs: ["get", "list", "watch"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: vllm-llama3-8b-instruct-pod-reader-role-binding
namespace: default
subjects:
- kind: ServiceAccount
name: vllm-llama3-8b-instruct-sa
namespace: default
roleRef:
kind: Role
name: vllm-llama3-8b-instruct-pod-reader-role
apiGroup: rbac.authorization.k8s.io
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
namespace: default
name: vllm-llama3-8b-instruct-deployment-reader-role
rules:
- apiGroups: ["apps"]
resources: ["deployments"]
verbs: ["get", "list", "watch"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: vllm-llama3-8b-instruct-deployment-reader-role-binding
namespace: default
subjects:
- kind: ServiceAccount
name: vllm-llama3-8b-instruct-sa
namespace: default
roleRef:
kind: Role
name: vllm-llama3-8b-instruct-deployment-reader-role
apiGroup: rbac.authorization.k8s.io
54 changes: 54 additions & 0 deletions development/simulator/deployment.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: vllm-llama3-8b-instruct
namespace: default
labels:
model.aibrix.ai/name: "vllm-llama3-8b-instruct"
model.aibrix.ai/port: "8010"
adapter.model.aibrix.ai/enabled: "true"
spec:
replicas: 1
selector:
matchLabels:
model.aibrix.ai/name: "vllm-llama3-8b-instruct"
adapter.model.aibrix.ai/enabled: "true"
app: vllm-llama3-8b-instruct
template:
metadata:
labels:
model.aibrix.ai/name: "vllm-llama3-8b-instruct"
model.aibrix.ai/port: "8010"
adapter.model.aibrix.ai/enabled: "true"
app: vllm-llama3-8b-instruct
spec:
serviceAccountName: vllm-llama3-8b-instruct-sa
containers:
- name: llm-engine
image: ghcr.io/llm-d/llm-d-inference-sim:v0.5.0
imagePullPolicy: IfNotPresent
args:
- --model
- vllm-llama3-8b-instruct
- --port
- "8010"
- --max-loras
- "2"
- --lora-modules
- '{"name": "food-review-1"}'
env:
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
ports:
- containerPort: 8010
name: http
protocol: TCP
resources:
requests:
cpu: 10m
5 changes: 5 additions & 0 deletions development/simulator/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
kind: Kustomization

resources:
- deployment.yaml
- components.yaml
14 changes: 9 additions & 5 deletions test/e2e/e2e_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@ import (
"github.com/stretchr/testify/assert"
)

const (
simModeName = "vllm-llama3-8b-instruct"
)

func TestBaseModelInference(t *testing.T) {
initializeClient(context.Background(), t)

Expand All @@ -35,12 +39,12 @@ func TestBaseModelInference(t *testing.T) {
Prompt: openai.CompletionNewParamsPromptUnion{
OfString: openai.String("Say this is a test"),
},
Model: modelName,
Model: simModeName,
})
if err != nil {
t.Fatalf("completions failed: %v", err)
}
assert.Equal(t, modelName, completion.Model)
assert.Equal(t, simModeName, completion.Model)
assert.NotEmpty(t, completion.Choices, "completion has no choices returned")
assert.NotEmpty(t, completion.Choices[0].Text, "chat completion has no message returned")
assert.Greater(t, completion.Usage.CompletionTokens, int64(0), "completion tokens are more than zero")
Expand All @@ -49,12 +53,12 @@ func TestBaseModelInference(t *testing.T) {
Messages: []openai.ChatCompletionMessageParamUnion{
openai.UserMessage("Say this is a test"),
},
Model: modelName,
Model: simModeName,
})
if err != nil {
t.Fatalf("chat completions failed: %v", err)
}
assert.Equal(t, modelName, chatCompletion.Model)
assert.Equal(t, simModeName, chatCompletion.Model)
assert.NotEmpty(t, chatCompletion.Choices, "chat completion has no choices returned")
assert.NotNil(t, chatCompletion.Choices[0].Message.Content, "chat completion has no message returned")
}
Expand Down Expand Up @@ -82,7 +86,7 @@ func TestBaseModelInferenceFailures(t *testing.T) {
{
name: "Invalid Routing Strategy",
apiKey: apiKey,
modelName: modelName,
modelName: simModeName,
routingStrategy: "invalid-routing-strategy",
expectErrCode: 400,
},
Expand Down
5 changes: 4 additions & 1 deletion test/run-e2e-tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ if [ -n "$INSTALL_AIBRIX" ]; then
make docker-build-all
kind load docker-image aibrix/controller-manager:nightly aibrix/gateway-plugins:nightly aibrix/metadata-service:nightly aibrix/runtime:nightly

kubectl apply -k development/simulator

kubectl apply -k config/dependency --server-side
kubectl apply -k config/test

Expand Down Expand Up @@ -108,8 +110,9 @@ function cleanup {
# Clean up k8s resources if INSTALL_AIBRIX is set
kubectl delete --ignore-not-found=true -k config/test
kubectl delete --ignore-not-found=true -k config/dependency
kubectl delete --ignore-not-found=true -k development/simulator
cd development/app
kubectl delete -k config/mock
kubectl delete --ignore-not-found=true -k config/mock
cd ../..
else
echo "Skipping k8s cleanup as INSTALL_AIBRIX is not set"
Expand Down
Loading