Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .tekton/bats/bats-pull-request.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,12 @@ spec:
value: quay.io/redhat-user-workloads/ramalama-tenant/bats:on-pr-{{revision}}
- name: image-expires-after
value: 5d
- name: build-platforms
value:
- linux-d160-c8xlarge/amd64
- linux-d160-c8xlarge/arm64
- linux/ppc64le
- linux/s390x
- name: dockerfile
value: container-images/bats/Containerfile
pipelineRef:
Expand Down
6 changes: 6 additions & 0 deletions .tekton/bats/bats-push.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,12 @@ spec:
value: '{{revision}}'
- name: output-image
value: quay.io/redhat-user-workloads/ramalama-tenant/bats:{{revision}}
- name: build-platforms
value:
- linux-d160-c8xlarge/amd64
- linux-d160-c8xlarge/arm64
- linux/ppc64le
- linux/s390x
- name: dockerfile
value: container-images/bats/Containerfile
pipelineRef:
Expand Down
2 changes: 2 additions & 0 deletions .tekton/integration/pipelines/bats-integration.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ spec:
default:
- linux-c8xlarge/amd64
- linux-c8xlarge/arm64
- linux/ppc64le
- linux/s390x
- name: commands
description: Test commands to run
type: array
Expand Down
2 changes: 1 addition & 1 deletion .tekton/integration/tasks/test-vm-cmd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ spec:
--security-opt label=disable \
--security-opt unmask=/proc/* \
--device /dev/net/tun \
--device /dev/fuse \
-v /tmp \
${PODMAN_ENV[*]} \
$TEST_IMAGE $TEST_CMD
SCRIPTEOF
Expand Down
6 changes: 6 additions & 0 deletions .tekton/ramalama/ramalama-pull-request.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,12 @@ spec:
value: quay.io/redhat-user-workloads/ramalama-tenant/ramalama:on-pr-{{revision}}
- name: image-expires-after
value: 5d
- name: build-platforms
value:
- linux-d160-c8xlarge/amd64
- linux-d160-c8xlarge/arm64
- linux/ppc64le
- linux/s390x
- name: dockerfile
value: container-images/ramalama/Containerfile
- name: test-image
Expand Down
6 changes: 6 additions & 0 deletions .tekton/ramalama/ramalama-push.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,12 @@ spec:
value: '{{revision}}'
- name: output-image
value: quay.io/redhat-user-workloads/ramalama-tenant/ramalama:{{revision}}
- name: build-platforms
value:
- linux-d160-c8xlarge/amd64
- linux-d160-c8xlarge/arm64
- linux/ppc64le
- linux/s390x
- name: dockerfile
value: container-images/ramalama/Containerfile
- name: test-image
Expand Down
3 changes: 2 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -177,14 +177,15 @@ bats-image:
podman inspect $(BATS_IMAGE) &> /dev/null || \
podman build -t $(BATS_IMAGE) -f container-images/bats/Containerfile .

bats-in-container: extra-opts = --security-opt unmask=/proc/* --device /dev/net/tun --device /dev/fuse
bats-in-container: extra-opts = --security-opt unmask=/proc/* --device /dev/net/tun

%-in-container: bats-image
podman run -it --rm \
--userns=keep-id:size=200000 \
--security-opt label=disable \
--security-opt=mask=/sys/bus/pci/drivers/i915 \
$(extra-opts) \
-v /tmp \
-v $(CURDIR):/src \
$(BATS_IMAGE) make $*

Expand Down
5 changes: 3 additions & 2 deletions container-images/bats/Containerfile
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,13 @@ ENV HOME=/tmp \
WORKDIR /src
ENTRYPOINT ["/usr/bin/entrypoint.sh"]

RUN dnf -y install make bats jq iproute podman openssl httpd-tools diffutils \
RUN dnf -y --setopt=install_weak_deps=false install \
make bats jq iproute podman openssl httpd-tools diffutils \
python3-huggingface-hub \
$([ $(uname -m) == "x86_64" ] && echo ollama) \
# for validate and unit-tests
black codespell shellcheck \
python3-flake8 python3-isort python3-pip python3-pytest \
python3-flake8 python3-isort python3-pip python3-pytest python3-pyyaml \
perl-Clone perl-FindBin \
# for building llama-bench
git-core cmake gcc-c++ curl-devel && \
Expand Down
1 change: 0 additions & 1 deletion container-images/bats/containers.conf
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,3 @@ log_driver="k8s-file"

[engine]
events_logger="file"
infra_image="quay.io/libpod/k8s-pause:3.5"
10 changes: 4 additions & 6 deletions container-images/scripts/build_llama_and_whisper.sh
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,7 @@ dnf_install_rocm() {
rm_non_ubi_repos
}

dnf_install_s390() {
# I think this was for s390, maybe ppc also
dnf_install_s390_ppc64le() {
dnf install -y "openblas-devel"
}

Expand Down Expand Up @@ -126,7 +125,7 @@ dnf_install() {
if [ "$uname_m" = "x86_64" ] || [ "$uname_m" = "aarch64" ]; then
dnf_install_mesa # on x86_64 and aarch64 we use vulkan via mesa
else
dnf_install_s390
dnf_install_s390_ppc64le
fi
elif [[ "$containerfile" = rocm* ]]; then
dnf_install_rocm
Expand Down Expand Up @@ -298,9 +297,8 @@ add_common_flags() {
ramalama)
if [ "$uname_m" = "x86_64" ] || [ "$uname_m" = "aarch64" ]; then
common_flags+=("-DGGML_VULKAN=ON")
elif [ "$uname_m" = "s390x" ]; then
common_flags+=("-DGGML_VXE=ON" "-DGGML_BLAS=ON")
common_flags+=("-DGGML_BLAS_VENDOR=OpenBLAS")
elif [ "$uname_m" = "s390x" ] || [ "$uname_m" = "ppc64le" ]; then
common_flags+=("-DGGML_BLAS=ON" "-DGGML_BLAS_VENDOR=OpenBLAS")
fi
;;
esac
Expand Down
3 changes: 3 additions & 0 deletions docs/ramalama-pull.1.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ Print usage message
#### **--tls-verify**=*true*
require HTTPS and verify certificates when contacting OCI registries

#### **--verify**=*true*
verify the model after pull, disable to allow pulling of models with different endianness

## SEE ALSO
**[ramalama(1)](ramalama.1.md)**

Expand Down
9 changes: 9 additions & 0 deletions ramalama/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from ramalama.chat import default_prefix
from ramalama.common import accel_image, get_accel, perror
from ramalama.config import CONFIG, coerce_to_bool, load_file_config
from ramalama.endian import EndianMismatchError
from ramalama.logger import configure_logger, logger
from ramalama.model import (
MODEL_TYPES,
Expand Down Expand Up @@ -626,6 +627,12 @@ def pull_parser(subparsers):
default=True,
help="require HTTPS and verify certificates when contacting registries",
)
parser.add_argument(
"--verify",
default=CONFIG.verify,
action=CoerceToBool,
help="verify the model after pull, disable to allow pulling of models with different endianness",
)
parser.add_argument("MODEL", completer=suppressCompleter) # positional argument
parser.set_defaults(func=pull_cli)

Expand Down Expand Up @@ -1427,6 +1434,8 @@ def eprint(e, exit_code):
eprint(e, errno.ENOSYS)
except subprocess.CalledProcessError as e:
eprint(e, e.returncode)
except EndianMismatchError:
sys.exit(1)
except KeyboardInterrupt:
sys.exit(0)
except IOError as e:
Expand Down
3 changes: 2 additions & 1 deletion ramalama/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ class BaseConfig:
threads: int = -1
transport: str = "ollama"
user: UserConfig = field(default_factory=UserConfig)
verify: bool = True

def __post_init__(self):
self.container = coerce_to_bool(self.container) if self.container is not None else self.engine is not None
Expand Down Expand Up @@ -189,7 +190,7 @@ def load_env_config(env: Mapping[str, str] | None = None) -> dict[str, Any]:
if 'images' in config:
config['images'] = json.loads(config['images'])

for key in ['ocr', 'keep_groups', 'container']:
for key in ['ocr', 'keep_groups', 'container', 'verify']:
if key in config:
config[key] = coerce_to_bool(config[key])

Expand Down
8 changes: 6 additions & 2 deletions ramalama/hf_style_repo_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
perror,
run_cmd,
)
from ramalama.endian import EndianMismatchError
from ramalama.logger import logger
from ramalama.model import Model
from ramalama.model_store.snapshot_file import SnapshotFile, SnapshotFileType
Expand Down Expand Up @@ -264,8 +265,11 @@ def pull(self, args):
repo = self.create_repository(name, organization, tag)
snapshot_hash = repo.model_hash
files = repo.get_file_list(cached_files)
self.model_store.new_snapshot(tag, snapshot_hash, files)
self.model_store.new_snapshot(tag, snapshot_hash, files, verify=getattr(args, "verify", True))

except EndianMismatchError:
# No use pulling again
raise
except Exception as e:
if not available(self.get_cli_command()):
perror(f"URL pull failed and {self.get_cli_command()} not available")
Expand All @@ -278,7 +282,7 @@ def pull(self, args):
run_cmd(conman_args)

snapshot_hash, files = self._collect_cli_files(tempdir)
self.model_store.new_snapshot(tag, snapshot_hash, files)
self.model_store.new_snapshot(tag, snapshot_hash, files, verify=getattr(args, "verify", True))

def exec(self, cmd_args, args):
try:
Expand Down
5 changes: 3 additions & 2 deletions ramalama/model_store/store.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,7 @@ def verify_snapshot(self, model_tag: str):
self._verify_endianness(model_tag)
self._store.verify_snapshot()

def new_snapshot(self, model_tag: str, snapshot_hash: str, snapshot_files: list[SnapshotFile]):
def new_snapshot(self, model_tag: str, snapshot_hash: str, snapshot_files: list[SnapshotFile], verify: bool = True):
snapshot_hash = sanitize_filename(snapshot_hash)

try:
Expand All @@ -327,7 +327,8 @@ def new_snapshot(self, model_tag: str, snapshot_hash: str, snapshot_files: list[
raise ex

try:
self.verify_snapshot(model_tag)
if verify:
self.verify_snapshot(model_tag)
except EndianMismatchError as ex:
perror(f"Verification of snapshot failed: {ex}")
perror("Removing snapshot...")
Expand Down
2 changes: 1 addition & 1 deletion ramalama/ollama.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ def pull(self, args) -> None:
self.print_pull_message(f"ollama://{organization}/{name}:{tag}")

model_hash = ollama_repo.get_model_hash(manifest)
self.model_store.new_snapshot(tag, model_hash, files)
self.model_store.new_snapshot(tag, model_hash, files, verify=getattr(args, "verify", True))

# If a model has been downloaded via ollama cli, only create symlink in the snapshots directory
if is_model_in_ollama_cache:
Expand Down
8 changes: 4 additions & 4 deletions ramalama/url.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def _assemble_split_file_list(self, snapshot_hash: str) -> list[SnapshotFile]:

return files

def pull(self, _) -> None:
def pull(self, args) -> None:
name, tag, _ = self.extract_model_identifiers()
_, _, all_files = self.model_store.get_cached_files(tag)
if all_files:
Expand All @@ -122,12 +122,12 @@ def pull(self, _) -> None:
required=True,
)
)
self.model_store.new_snapshot(tag, snapshot_hash, files)
self.model_store.new_snapshot(tag, snapshot_hash, files, verify=getattr(args, "verify", True))
return

if is_split_file_model(self.model):
files = self._assemble_split_file_list(snapshot_hash)
self.model_store.new_snapshot(tag, snapshot_hash, files)
self.model_store.new_snapshot(tag, snapshot_hash, files, verify=getattr(args, "verify", True))
return

files.append(
Expand All @@ -141,5 +141,5 @@ def pull(self, _) -> None:
required=True,
)
)
self.model_store.new_snapshot(tag, snapshot_hash, files)
self.model_store.new_snapshot(tag, snapshot_hash, files, verify=getattr(args, "verify", True))
return
2 changes: 2 additions & 0 deletions shortnames/shortnames.conf
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
"granite:2b" = "ollama://granite3.1-dense:2b"
"granite:7b" = "huggingface://instructlab/granite-7b-lab-GGUF/granite-7b-lab-Q4_K_M.gguf"
"granite:8b" = "ollama://granite3.1-dense:8b"
"granite-be-3.0:1b" = "hf://taronaeo/Granite-3.0-1B-A400M-Instruct-BE-GGUF/granite-3.0-1b-a400m-instruct-be.Q2_K.gguf"
"granite-be-3.3:2b" = "hf://taronaeo/Granite-3.3-2B-Instruct-BE-GGUF/granite-3.3-2b-instruct-be.Q4_K_M.gguf"
"hermes" = "huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q4_K_M.gguf"
"ibm/granite" = "ollama://granite3.1-dense:8b"
"ibm/granite:2b" = "ollama://granite3.1-dense:2b"
Expand Down
5 changes: 4 additions & 1 deletion test/system/002-bench.bats
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,10 @@ function setup() {
# bats test_tags=distro-integration
@test "ramalama bench" {
skip_if_no_llama_bench
run_ramalama bench -t 2 smollm:135m
if is_s390x; then
local RAMALAMA_TIMEOUT=1200
fi
run_ramalama bench -t 2 $(test_model smollm:135m)
is "$output" ".*model.*size.*" "model and size in output"
}

Expand Down
7 changes: 4 additions & 3 deletions test/system/030-run.bats
Original file line number Diff line number Diff line change
Expand Up @@ -125,19 +125,20 @@ EOF
is "$output" ".*-e HSA_OVERRIDE_GFX_VERSION=0.0.0" "ensure HSA_OVERRIDE_GFX_VERSION is set from environment"
}

@test "ramalama run smollm with prompt" {
run_ramalama run --temp 0 ${MODEL} "What is the first line of the declaration of independence?"
@test "ramalama run with prompt" {
run_ramalama run --temp 0 $(test_model ${MODEL} granite-be-3.3:2b) "What is the first line of the declaration of independence?"
}

@test "ramalama run --keepalive" {
# timeout within 1 second and generate a 124 error code.
run_ramalama 0 --debug run --keepalive 1s tiny
run_ramalama 0 --debug run --keepalive 1s $(test_model tiny)
}

@test "ramalama run --image bogus" {
skip_if_nocontainer
skip_if_darwin
skip_if_docker
run_ramalama pull tiny
run_ramalama 22 run --image bogus --pull=never tiny
is "$output" ".*Error: bogus: image not known"
run_ramalama 125 run --image bogus1 --rag quay.io/ramalama/rag --pull=never tiny
Expand Down
10 changes: 7 additions & 3 deletions test/system/040-serve.bats
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,9 @@ verify_begin=".*run --rm"
run_ramalama -q --dryrun serve smollm
is "$output" ".*ai.ramalama.model=ollama://library/smollm:latest" "smollm should be expanded to fullname"

model=ollama://smollm:135m
model=$(test_model smollm:135m)
run_ramalama info
full_model=$(jq -r --arg model $model -r '.Shortnames.Names[$model]' <<<"$output")
container1=c_$(safename)
container2=c_$(safename)

Expand All @@ -139,7 +141,7 @@ verify_begin=".*run --rm"
port=${output: -8:4}

run_ramalama chat --ls --url http://127.0.0.1:${port}/v1
is "$output" "smollm:135m" "list of models available correct"
is "$output" ${full_model#*://} "list of models available correct"

run_ramalama containers --noheading
is "$output" ".*${container1}" "list correct for container1"
Expand All @@ -159,7 +161,7 @@ verify_begin=".*run --rm"
@test "ramalama --detach serve multiple" {
skip_if_nocontainer

model=ollama://smollm:135m
model=$(test_model ollama://smollm:135m)
container=c_$(safename)
port1=8100
port2=8200
Expand Down Expand Up @@ -445,6 +447,8 @@ verify_begin=".*run --rm"
@test "ramalama serve --api llama-stack" {
skip_if_docker
skip_if_nocontainer
skip_if_ppc64le
skip_if_s390x
model=tiny
name=c_$(safename)
run_ramalama pull ${model}
Expand Down
18 changes: 18 additions & 0 deletions test/system/050-pull.bats
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,24 @@ load setup_suite
run_ramalama rm oci://quay.io/mmortari/gguf-py-example:v1
}

@test "ramalama pull little-endian" {
if ! is_bigendian; then
skip "Testing pulls of opposite-endian models"
fi
run_ramalama rm --ignore tiny
run_ramalama 1 pull --verify=on tiny
is "$output" ".*Endian mismatch of host (BIG) and model (LITTLE).*" "detected little-endian model"
}

@test "ramalama pull big-endian" {
if is_bigendian; then
skip "Testing pulls of opposite-endian models"
fi
run_ramalama rm --ignore granite-be-3.0:1b
run_ramalama 1 pull --verify=on granite-be-3.0:1b
is "$output" ".*Endian mismatch of host (LITTLE) and model (BIG).*" "detected big-endian model"
}

@test "ramalama URL" {
model=$RAMALAMA_TMPDIR/mymodel.gguf
touch $model
Expand Down
Loading
Loading