Skip to content

Commit d29cb45

Browse files
committed
Merge remote-tracking branch 'pwilkin/master' into apertus-implementation
2 parents 6972404 + 835b2b9 commit d29cb45

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

72 files changed

+7022
-2642
lines changed

.devops/s390x.Dockerfile

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@ ARG GCC_VERSION=15.2.0
22
ARG UBUNTU_VERSION=24.04
33

44
### Build Llama.cpp stage
5-
FROM --platform=linux/s390x gcc:${GCC_VERSION} AS build
5+
FROM gcc:${GCC_VERSION} AS build
66

7-
RUN --mount=type=cache,target=/var/cache/apt \
8-
--mount=type=cache,target=/var/lib/apt/lists \
7+
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
8+
--mount=type=cache,target=/var/lib/apt/lists,sharing=locked \
99
apt update -y && \
1010
apt upgrade -y && \
1111
apt install -y --no-install-recommends \
@@ -40,7 +40,7 @@ COPY requirements /opt/llama.cpp/gguf-py/requirements
4040

4141

4242
### Collect all llama.cpp binaries, libraries and distro libraries
43-
FROM --platform=linux/s390x scratch AS collector
43+
FROM scratch AS collector
4444

4545
# Copy llama.cpp binaries and libraries
4646
COPY --from=build /opt/llama.cpp/bin /llama.cpp/bin
@@ -49,13 +49,14 @@ COPY --from=build /opt/llama.cpp/gguf-py /llama.cpp/gguf-py
4949

5050

5151
### Base image
52-
FROM --platform=linux/s390x ubuntu:${UBUNTU_VERSION} AS base
52+
FROM ubuntu:${UBUNTU_VERSION} AS base
5353

54-
RUN --mount=type=cache,target=/var/cache/apt \
55-
--mount=type=cache,target=/var/lib/apt/lists \
54+
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
55+
--mount=type=cache,target=/var/lib/apt/lists,sharing=locked \
5656
apt update -y && \
5757
apt install -y --no-install-recommends \
5858
# WARNING: Do not use libopenblas-openmp-dev. libopenblas-dev is faster.
59+
# See: https://github.com/ggml-org/llama.cpp/pull/15915#issuecomment-3317166506
5960
curl libgomp1 libopenblas-dev && \
6061
apt autoremove -y && \
6162
apt clean -y && \
@@ -68,13 +69,13 @@ COPY --from=collector /llama.cpp/lib /usr/lib/s390x-linux-gnu
6869

6970

7071
### Full
71-
FROM --platform=linux/s390x base AS full
72+
FROM base AS full
7273

7374
ENV PATH="/root/.cargo/bin:${PATH}"
7475
WORKDIR /app
7576

76-
RUN --mount=type=cache,target=/var/cache/apt \
77-
--mount=type=cache,target=/var/lib/apt/lists \
77+
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
78+
--mount=type=cache,target=/var/lib/apt/lists,sharing=locked \
7879
apt update -y && \
7980
apt install -y \
8081
git cmake libjpeg-dev \
@@ -97,7 +98,7 @@ ENTRYPOINT [ "/app/tools.sh" ]
9798

9899

99100
### CLI Only
100-
FROM --platform=linux/s390x base AS light
101+
FROM base AS light
101102

102103
WORKDIR /llama.cpp/bin
103104

@@ -108,7 +109,7 @@ ENTRYPOINT [ "/llama.cpp/bin/llama-cli" ]
108109

109110

110111
### Server
111-
FROM --platform=linux/s390x base AS server
112+
FROM base AS server
112113

113114
ENV LLAMA_ARG_HOST=0.0.0.0
114115

.github/workflows/build.yml

Lines changed: 112 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -1251,87 +1251,132 @@ jobs:
12511251
# TODO: simplify the following workflows using a matrix
12521252
# TODO: run lighter CI on PRs and the full CI only on master (if needed)
12531253
ggml-ci-x64-cpu-low-perf:
1254-
runs-on: [self-hosted, Linux, X64, CPU, low-perf]
1254+
runs-on: ubuntu-22.04
12551255

12561256
steps:
12571257
- name: Clone
12581258
id: checkout
12591259
uses: actions/checkout@v4
12601260

1261+
- name: ccache
1262+
uses: ggml-org/[email protected]
1263+
with:
1264+
key: ggml-ci-x64-cpu-low-perf
1265+
evict-old-files: 1d
1266+
1267+
- name: Dependencies
1268+
id: depends
1269+
run: |
1270+
sudo apt-get update
1271+
sudo apt-get install build-essential libcurl4-openssl-dev
1272+
12611273
- name: Test
12621274
id: ggml-ci
12631275
run: |
1264-
bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
1276+
LLAMA_ARG_THREADS=$(nproc) GG_BUILD_LOW_PERF=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
12651277
12661278
ggml-ci-arm64-cpu-low-perf:
1267-
runs-on: [self-hosted, Linux, ARM64, CPU, low-perf]
1279+
runs-on: ubuntu-22.04-arm
12681280

12691281
steps:
12701282
- name: Clone
12711283
id: checkout
12721284
uses: actions/checkout@v4
12731285

1286+
- name: ccache
1287+
uses: ggml-org/[email protected]
1288+
with:
1289+
key: ggml-ci-arm64-cpu-low-perf
1290+
evict-old-files: 1d
1291+
1292+
- name: Dependencies
1293+
id: depends
1294+
run: |
1295+
sudo apt-get update
1296+
sudo apt-get install build-essential libcurl4-openssl-dev
1297+
12741298
- name: Test
12751299
id: ggml-ci
12761300
run: |
1277-
bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
1301+
LLAMA_ARG_THREADS=$(nproc) GG_BUILD_LOW_PERF=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
12781302
12791303
ggml-ci-x64-cpu-high-perf:
1280-
runs-on: [self-hosted, Linux, X64, CPU, high-perf]
1304+
runs-on: ubuntu-22.04
12811305

12821306
steps:
12831307
- name: Clone
12841308
id: checkout
12851309
uses: actions/checkout@v4
12861310

1311+
- name: ccache
1312+
uses: ggml-org/[email protected]
1313+
with:
1314+
key: ggml-ci-x64-cpu-high-perf
1315+
evict-old-files: 1d
1316+
1317+
- name: Dependencies
1318+
id: depends
1319+
run: |
1320+
sudo apt-get update
1321+
sudo apt-get install build-essential libcurl4-openssl-dev
1322+
12871323
- name: Test
12881324
id: ggml-ci
12891325
run: |
1290-
bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
1326+
LLAMA_ARG_THREADS=$(nproc) bash ./ci/run.sh ./tmp/results ./tmp/mnt
12911327
12921328
ggml-ci-arm64-cpu-high-perf:
1293-
runs-on: [self-hosted, Linux, ARM64, CPU, high-perf]
1329+
runs-on: ubuntu-22.04-arm
12941330

12951331
steps:
12961332
- name: Clone
12971333
id: checkout
12981334
uses: actions/checkout@v4
12991335

1300-
- name: Test
1301-
id: ggml-ci
1302-
run: |
1303-
GG_BUILD_NO_BF16=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
1304-
1305-
ggml-ci-x64-nvidia-v100-cuda:
1306-
runs-on: [self-hosted, Linux, X64, NVIDIA, V100]
1336+
- name: ccache
1337+
uses: ggml-org/[email protected]
1338+
with:
1339+
key: ggml-ci-arm64-cpu-high-perf
1340+
evict-old-files: 1d
13071341

1308-
steps:
1309-
- name: Clone
1310-
id: checkout
1311-
uses: actions/checkout@v4
1342+
- name: Dependencies
1343+
id: depends
1344+
run: |
1345+
sudo apt-get update
1346+
sudo apt-get install build-essential libcurl4-openssl-dev
13121347
13131348
- name: Test
13141349
id: ggml-ci
13151350
run: |
1316-
nvidia-smi
1317-
GG_BUILD_CUDA=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
1351+
LLAMA_ARG_THREADS=$(nproc) GG_BUILD_NO_SVE=1 GG_BUILD_NO_BF16=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
13181352
1319-
ggml-ci-x64-nvidia-v100-vulkan:
1320-
runs-on: [self-hosted, Linux, X64, NVIDIA, V100]
1353+
ggml-ci-arm64-cpu-high-perf-sve:
1354+
runs-on: ubuntu-22.04-arm
13211355

13221356
steps:
13231357
- name: Clone
13241358
id: checkout
13251359
uses: actions/checkout@v4
13261360

1361+
- name: ccache
1362+
uses: ggml-org/[email protected]
1363+
with:
1364+
key: ggml-ci-arm64-cpu-high-perf-sve
1365+
evict-old-files: 1d
1366+
1367+
- name: Dependencies
1368+
id: depends
1369+
run: |
1370+
sudo apt-get update
1371+
sudo apt-get install build-essential libcurl4-openssl-dev
1372+
13271373
- name: Test
13281374
id: ggml-ci
13291375
run: |
1330-
vulkaninfo
1331-
GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
1376+
LLAMA_ARG_THREADS=$(nproc) GG_BUILD_NO_BF16=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
13321377
1333-
ggml-ci-x64-nvidia-t4-cuda:
1334-
runs-on: [self-hosted, Linux, X64, NVIDIA, T4]
1378+
ggml-ci-x64-nvidia-cuda:
1379+
runs-on: [self-hosted, Linux, X64, NVIDIA]
13351380

13361381
steps:
13371382
- name: Clone
@@ -1344,8 +1389,8 @@ jobs:
13441389
nvidia-smi
13451390
GG_BUILD_CUDA=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
13461391
1347-
ggml-ci-x64-nvidia-t4-vulkan:
1348-
runs-on: [self-hosted, Linux, X64, NVIDIA, T4]
1392+
ggml-ci-x64-nvidia-vulkan-cm:
1393+
runs-on: [self-hosted, Linux, X64, NVIDIA]
13491394

13501395
steps:
13511396
- name: Clone
@@ -1355,11 +1400,11 @@ jobs:
13551400
- name: Test
13561401
id: ggml-ci
13571402
run: |
1358-
vulkaninfo
1359-
GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
1403+
vulkaninfo --summary
1404+
GG_BUILD_VULKAN=1 GGML_VK_DISABLE_COOPMAT2=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
13601405
1361-
ggml-ci-x64-nvidia-t4-vulkan-coopmat1:
1362-
runs-on: [self-hosted, Linux, X64, NVIDIA, T4]
1406+
ggml-ci-x64-nvidia-vulkan-cm2:
1407+
runs-on: [self-hosted, Linux, X64, NVIDIA, COOPMAT2]
13631408

13641409
steps:
13651410
- name: Clone
@@ -1369,8 +1414,8 @@ jobs:
13691414
- name: Test
13701415
id: ggml-ci
13711416
run: |
1372-
vulkaninfo
1373-
GG_BUILD_VULKAN=1 GGML_VK_DISABLE_COOPMAT2=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
1417+
vulkaninfo --summary
1418+
GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
13741419
13751420
ggml-ci-x64-cpu-amx:
13761421
runs-on: [self-hosted, Linux, X64, CPU, AMX]
@@ -1385,21 +1430,36 @@ jobs:
13851430
run: |
13861431
bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
13871432
1388-
ggml-ci-x64-amd-v710-vulkan:
1389-
runs-on: [self-hosted, Linux, X64, AMD, V710]
1390-
1391-
steps:
1392-
- name: Clone
1393-
id: checkout
1394-
uses: actions/checkout@v4
1395-
1396-
- name: Test
1397-
id: ggml-ci
1398-
run: |
1399-
GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
1433+
# ggml-ci-x64-amd-vulkan:
1434+
# runs-on: [self-hosted, Linux, X64, AMD]
1435+
#
1436+
# steps:
1437+
# - name: Clone
1438+
# id: checkout
1439+
# uses: actions/checkout@v4
1440+
#
1441+
# - name: Test
1442+
# id: ggml-ci
1443+
# run: |
1444+
# vulkaninfo --summary
1445+
# GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
1446+
#
1447+
# ggml-ci-x64-amd-rocm:
1448+
# runs-on: [self-hosted, Linux, X64, AMD]
1449+
#
1450+
# steps:
1451+
# - name: Clone
1452+
# id: checkout
1453+
# uses: actions/checkout@v4
1454+
#
1455+
# - name: Test
1456+
# id: ggml-ci
1457+
# run: |
1458+
# amd-smi static
1459+
# GG_BUILD_ROCM=1 GG_BUILD_AMDGPU_TARGETS="gfx1101" bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
14001460

1401-
ggml-ci-x64-amd-v710-rocm:
1402-
runs-on: [self-hosted, Linux, X64, AMD, V710]
1461+
ggml-ci-mac-metal:
1462+
runs-on: [self-hosted, macOS, ARM64]
14031463

14041464
steps:
14051465
- name: Clone
@@ -1409,9 +1469,9 @@ jobs:
14091469
- name: Test
14101470
id: ggml-ci
14111471
run: |
1412-
GG_BUILD_ROCM=1 GG_BUILD_AMDGPU_TARGETS="gfx1101" bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
1472+
GG_BUILD_METAL=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp
14131473
1414-
ggml-ci-mac-metal:
1474+
ggml-ci-mac-vulkan:
14151475
runs-on: [self-hosted, macOS, ARM64]
14161476

14171477
steps:
@@ -1422,18 +1482,5 @@ jobs:
14221482
- name: Test
14231483
id: ggml-ci
14241484
run: |
1425-
GG_BUILD_METAL=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp
1426-
1427-
# TODO: install vulkan drivers
1428-
# ggml-ci-mac-vulkan:
1429-
# runs-on: [self-hosted, macOS, ARM64]
1430-
#
1431-
# steps:
1432-
# - name: Clone
1433-
# id: checkout
1434-
# uses: actions/checkout@v4
1435-
#
1436-
# - name: Test
1437-
# id: ggml-ci
1438-
# run: |
1439-
# GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp
1485+
vulkaninfo --summary
1486+
GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp

CODEOWNERS

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@
6363
/ggml/src/ggml-quants.* @ggerganov
6464
/ggml/src/ggml-threading.* @ggerganov @slaren
6565
/ggml/src/ggml-vulkan/ @0cc4m
66-
/ggml/src/ggml-zdnn/ @taronaeo
66+
/ggml/src/ggml-zdnn/ @taronaeo @Andreas-Krebbel @AlekseiNikiforovIBM
6767
/ggml/src/ggml.c @ggerganov @slaren
6868
/ggml/src/ggml.cpp @ggerganov @slaren
6969
/ggml/src/gguf.cpp @JohannesGaessler @Green-Sky
@@ -95,12 +95,12 @@
9595
/tools/tokenize/ @ggerganov
9696
/tools/tts/ @ggerganov
9797
/vendor/ @ggerganov
98-
.clang-format @slaren
99-
.clang-tidy @slaren
100-
AUTHORS @ggerganov
101-
CMakeLists.txt @ggerganov
102-
CONTRIBUTING.md @ggerganov
103-
LICENSE @ggerganov
104-
README.md @ggerganov
105-
SECURITY.md @ggerganov
98+
/.clang-format @slaren
99+
/.clang-tidy @slaren
100+
/AUTHORS @ggerganov
101+
/CMakeLists.txt @ggerganov
102+
/CONTRIBUTING.md @ggerganov
103+
/LICENSE @ggerganov
104+
/README.md @ggerganov
105+
/SECURITY.md @ggerganov
106106
requirements*.txt @CISC

CONTRIBUTING.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ The project differentiates between 3 levels of contributors:
2525
- Squash-merge PRs
2626
- Use the following format for the squashed commit title: `<module> : <commit title> (#<issue_number>)`. For example: `utils : fix typo in utils.py (#1234)`
2727
- Optionally pick a `<module>` from here: https://github.com/ggml-org/llama.cpp/wiki/Modules
28-
- Let other maintainers, merge their own PRs
28+
- Let other maintainers merge their own PRs
2929
- When merging a PR, make sure you have a good understanding of the changes
3030
- Be mindful of maintenance: most of the work going into a feature happens after the PR is merged. If the PR author is not committed to contribute long-term, someone else needs to take responsibility (you)
3131

0 commit comments

Comments
 (0)