Skip to content

Commit 6bd5fb2

Browse files
committed
ggml-cpu: Simplify bounds checking in make_block_q4_0x4
Refactor the memory copying logic in the `make_block_q4_0x4` function to remove explicit bounds checking Use compile-time constants and loop bounds to prevent potential buffer overflows. Signed-off-by: Ilham Syahid S <[email protected]>
1 parent 9d5f18b commit 6bd5fb2

File tree

1 file changed

+12
-24
lines changed

1 file changed

+12
-24
lines changed

src/ggml-cpu/ggml-cpu-aarch64.cpp

Lines changed: 12 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -3601,44 +3601,32 @@ static block_q4_0x4 make_block_q4_0x4(block_q4_0 * in, unsigned int blck_size_in
36013601
}
36023602

36033603
const int end = QK4_0 * 2 / blck_size_interleave;
3604-
const size_t qs_size = sizeof(out.qs);
3604+
constexpr size_t qs_size = QK4_0 * 2; // Size of output qs array
36053605

36063606
if (blck_size_interleave == 8) {
36073607
const uint64_t xor_mask = 0x8888888888888888ULL;
3608-
for (int i = 0; i < end; ++i) {
3608+
for (int i = 0; i < end && (i + 1) * blck_size_interleave <= qs_size; ++i) {
36093609
int src_id = i % 4;
36103610
int src_offset = (i / 4) * blck_size_interleave;
36113611
int dst_offset = i * blck_size_interleave;
36123612

3613-
// Bounds checking
3614-
if (dst_offset + sizeof(uint64_t) <= qs_size &&
3615-
src_offset + sizeof(uint64_t) <= sizeof(in[src_id].qs)) {
3616-
uint64_t elems;
3617-
// Using memcpy to avoid unaligned memory accesses
3618-
memcpy(&elems, &in[src_id].qs[src_offset], sizeof(uint64_t));
3619-
elems ^= xor_mask;
3620-
memcpy(&out.qs[dst_offset], &elems, sizeof(uint64_t));
3621-
} else {
3622-
GGML_ASSERT(false && "buffer overflow prevented in make_block_q4_0x4");
3623-
}
3613+
uint64_t elems;
3614+
// Using memcpy to avoid unaligned memory accesses
3615+
memcpy(&elems, &in[src_id].qs[src_offset], sizeof(uint64_t));
3616+
elems ^= xor_mask;
3617+
memcpy(&out.qs[dst_offset], &elems, sizeof(uint64_t));
36243618
}
36253619
} else if (blck_size_interleave == 4) {
36263620
const uint32_t xor_mask = 0x88888888;
3627-
for (int i = 0; i < end; ++i) {
3621+
for (int i = 0; i < end && (i + 1) * blck_size_interleave <= qs_size; ++i) {
36283622
int src_id = i % 4;
36293623
int src_offset = (i / 4) * blck_size_interleave;
36303624
int dst_offset = i * blck_size_interleave;
36313625

3632-
// Bounds checking
3633-
if (dst_offset + sizeof(uint32_t) <= qs_size &&
3634-
src_offset + sizeof(uint32_t) <= sizeof(in[src_id].qs)) {
3635-
uint32_t elems;
3636-
memcpy(&elems, &in[src_id].qs[src_offset], sizeof(uint32_t));
3637-
elems ^= xor_mask;
3638-
memcpy(&out.qs[dst_offset], &elems, sizeof(uint32_t));
3639-
} else {
3640-
GGML_ASSERT(false && "buffer overflow prevented in make_block_q4_0x4");
3641-
}
3626+
uint32_t elems;
3627+
memcpy(&elems, &in[src_id].qs[src_offset], sizeof(uint32_t));
3628+
elems ^= xor_mask;
3629+
memcpy(&out.qs[dst_offset], &elems, sizeof(uint32_t));
36423630
}
36433631
} else {
36443632
GGML_ASSERT(false && "invalid block size interleave value");

0 commit comments

Comments
 (0)