Skip to content

Commit b2a75a2

Browse files
author
Mike Kuykendall
committed
test: Add workflow testing triggers
1 parent 66b4054 commit b2a75a2

File tree

11 files changed

+408
-6
lines changed

11 files changed

+408
-6
lines changed

.cargo/config.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@ jobs = 4
66

77
# Environment variables for llama.cpp compilation
88
[env]
9-
# Disable CUDA compilation by default to speed up builds
10-
LLAMA_CUDA = "OFF"
9+
# Enable CUDA compilation for GPU support
10+
LLAMA_CUDA = "ON"
1111
# Use faster compilation flags
1212
CMAKE_BUILD_TYPE = "Release"
1313
# Limit parallel jobs for llama.cpp to prevent hanging

.claude/settings.local.json

Lines changed: 38 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,45 @@
1616
"Bash(git add:*)",
1717
"Bash(gh run:*)",
1818
"Bash(rm:*)",
19-
"Bash(cargo build:*)"
19+
"Bash(cargo build:*)",
20+
"Bash(cargo publish:*)",
21+
"Bash(git commit:*)",
22+
"Bash(gh release create:*)",
23+
"Bash(vsce publish:*)",
24+
"Bash(cargo search:*)",
25+
"WebFetch(domain:marketplace.visualstudio.com)",
26+
"Bash(vsce list:*)",
27+
"Bash(vsce show:*)",
28+
"Bash(git log:*)",
29+
"Bash(grep:*)",
30+
"Bash(gh release view:*)",
31+
"Bash(gh release upload:*)",
32+
"Bash(cp:*)",
33+
"Bash(cargo test:*)",
34+
"Bash(git pull:*)",
35+
"Bash(gh api:*)",
36+
"Bash(gh repo edit:*)",
37+
"Bash(mkdir:*)",
38+
"Bash(gh issue list:*)",
39+
"Bash(gh pr list:*)",
40+
"Bash(gh issue view:*)",
41+
"Bash(gh pr view:*)",
42+
"Bash(gh pr diff:*)",
43+
"Bash(gh pr close:*)",
44+
"Bash(cargo check:*)",
45+
"Bash(nvidia-smi:*)",
46+
"Bash(./target/release/shimmy.exe:*)",
47+
"Bash(cargo clean:*)",
48+
"Bash(cat:*)",
49+
"Bash(rg:*)",
50+
"Bash(gh issue close:*)",
51+
"Bash(sed:*)",
52+
"Bash(git checkout:*)"
2053
],
2154
"deny": [],
22-
"ask": []
55+
"ask": [],
56+
"additionalDirectories": [
57+
"C:\\Users\\micha\\.ollama\\models"
58+
]
2359
}
2460
}

.github/workflows/release.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,10 @@ on:
44
push:
55
tags:
66
- 'v*'
7+
workflow_dispatch: # Allow manual testing
8+
push:
9+
branches:
10+
- test-release # Test on specific branch
711

812
jobs:
913
release:

Dockerfile

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
# Multi-stage build for shimmy with GPU support
2+
FROM nvidia/cuda:12.0-devel-ubuntu22.04 as builder
3+
4+
# Install Rust and build dependencies
5+
RUN apt-get update && apt-get install -y \
6+
curl \
7+
build-essential \
8+
cmake \
9+
pkg-config \
10+
&& rm -rf /var/lib/apt/lists/*
11+
12+
# Install Rust
13+
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
14+
ENV PATH="/root/.cargo/bin:${PATH}"
15+
16+
# Copy source code
17+
WORKDIR /app
18+
COPY . .
19+
20+
# Build shimmy with GPU support
21+
RUN cargo build --release --features llama
22+
23+
# Runtime image with CUDA runtime
24+
FROM nvidia/cuda:12.0-runtime-ubuntu22.04
25+
26+
# Install runtime dependencies
27+
RUN apt-get update && apt-get install -y \
28+
ca-certificates \
29+
&& rm -rf /var/lib/apt/lists/*
30+
31+
# Copy binary from builder
32+
COPY --from=builder /app/target/release/shimmy /usr/local/bin/shimmy
33+
34+
# Create non-root user
35+
RUN useradd -m -u 1000 shimmy
36+
USER shimmy
37+
38+
# Set working directory
39+
WORKDIR /home/shimmy
40+
41+
# Expose default port
42+
EXPOSE 3000
43+
44+
# Default command
45+
CMD ["shimmy", "serve", "--bind", "0.0.0.0:3000"]

Dockerfile.cpu

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
# CPU-only build for shimmy
2+
FROM rust:1.70-slim-bullseye as builder
3+
4+
# Install build dependencies
5+
RUN apt-get update && apt-get install -y \
6+
build-essential \
7+
cmake \
8+
pkg-config \
9+
&& rm -rf /var/lib/apt/lists/*
10+
11+
# Copy source code
12+
WORKDIR /app
13+
COPY . .
14+
15+
# Build shimmy with CPU-only features
16+
RUN cargo build --release --features llama
17+
18+
# Runtime image
19+
FROM debian:bullseye-slim
20+
21+
# Install runtime dependencies
22+
RUN apt-get update && apt-get install -y \
23+
ca-certificates \
24+
curl \
25+
&& rm -rf /var/lib/apt/lists/*
26+
27+
# Copy binary from builder
28+
COPY --from=builder /app/target/release/shimmy /usr/local/bin/shimmy
29+
30+
# Create non-root user
31+
RUN useradd -m -u 1000 shimmy
32+
USER shimmy
33+
34+
# Set working directory
35+
WORKDIR /home/shimmy
36+
37+
# Expose default port
38+
EXPOSE 3000
39+
40+
# Default command
41+
CMD ["shimmy", "serve", "--bind", "0.0.0.0:3000"]

README.md

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,26 @@ cargo install shimmy
9898
- Xcode 17+ compatibility
9999
- All LoRA adapter features
100100

101+
## 🚀 GPU Acceleration
102+
103+
### NVIDIA CUDA ✅
104+
```bash
105+
# Install with GPU support
106+
cargo install shimmy --features llama
107+
108+
# Docker with GPU
109+
docker run --runtime=nvidia --gpus all shimmy:latest
110+
```
111+
112+
### Apple Metal ✅
113+
- Automatic acceleration on macOS
114+
- M1/M2 and discrete GPU support
115+
- No configuration needed
116+
117+
### CPU Fallback ✅
118+
- Multi-threaded CPU inference
119+
- Works on all systems without GPU
120+
101121
## Integration Examples
102122

103123
### VSCode Copilot

docker-compose.yml

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
version: '3.8'
2+
3+
services:
4+
shimmy:
5+
build: .
6+
runtime: nvidia
7+
environment:
8+
- NVIDIA_VISIBLE_DEVICES=all
9+
- RUST_LOG=info
10+
ports:
11+
- "3000:3000"
12+
volumes:
13+
- "./models:/home/shimmy/models:ro"
14+
- "~/.cache/huggingface:/home/shimmy/.cache/huggingface:ro"
15+
restart: unless-stopped
16+
healthcheck:
17+
test: ["CMD", "curl", "-f", "http://localhost:3000/health"]
18+
interval: 30s
19+
timeout: 10s
20+
retries: 3
21+
start_period: 10s
22+
23+
# CPU-only version for systems without GPU
24+
shimmy-cpu:
25+
build:
26+
context: .
27+
dockerfile: Dockerfile.cpu
28+
environment:
29+
- RUST_LOG=info
30+
ports:
31+
- "3001:3000"
32+
volumes:
33+
- "./models:/home/shimmy/models:ro"
34+
- "~/.cache/huggingface:/home/shimmy/.cache/huggingface:ro"
35+
restart: unless-stopped
36+
profiles:
37+
- cpu

docs/CONFIGURATION.md

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,48 @@ export SHIMMY_MMAP=true
119119

120120
### GPU Support
121121

122-
Currently, shimmy uses CPU-only inference. GPU support is planned for future releases.
122+
Shimmy supports GPU acceleration through multiple backends:
123+
124+
#### NVIDIA CUDA Support ✅
125+
- **Status**: Available with `--features llama` build flag
126+
- **Requirements**: NVIDIA GPU with CUDA support, CUDA toolkit installed
127+
- **Automatic Detection**: Models are automatically offloaded to GPU when available
128+
- **Docker Support**: Use NVIDIA runtime (`--runtime=nvidia` or `--gpus all`)
129+
130+
#### Apple Metal Support ✅
131+
- **Status**: Automatic on macOS with Apple Silicon or discrete GPUs
132+
- **Performance**: Significant acceleration confirmed on M1/M2 and AMD Radeon Pro GPUs
133+
- **Detection**: Automatic, no configuration required
134+
135+
#### CPU Fallback
136+
- **Status**: Always available as fallback
137+
- **Performance**: Multi-threaded CPU inference for systems without GPU support
138+
139+
#### Build Configuration
140+
141+
To enable GPU support, build with:
142+
```bash
143+
cargo build --release --features llama
144+
```
145+
146+
Or install via cargo with GPU features:
147+
```bash
148+
cargo install shimmy --features llama
149+
```
150+
151+
#### Docker GPU Usage
152+
153+
```dockerfile
154+
# Use NVIDIA runtime
155+
docker run --runtime=nvidia --gpus all shimmy:latest
156+
157+
# Or with docker-compose
158+
services:
159+
shimmy:
160+
runtime: nvidia
161+
environment:
162+
- NVIDIA_VISIBLE_DEVICES=all
163+
```
123164

124165
## Security Considerations
125166

shimmy-windows-amd64.exe

2.27 MB
Binary file not shown.

0 commit comments

Comments
 (0)