@@ -3601,44 +3601,32 @@ static block_q4_0x4 make_block_q4_0x4(block_q4_0 * in, unsigned int blck_size_in
3601
3601
}
3602
3602
3603
3603
const int end = QK4_0 * 2 / blck_size_interleave;
3604
- const size_t qs_size = sizeof (out. qs );
3604
+ constexpr size_t qs_size = QK4_0 * 2 ; // Size of output qs array
3605
3605
3606
3606
if (blck_size_interleave == 8 ) {
3607
3607
const uint64_t xor_mask = 0x8888888888888888ULL ;
3608
- for (int i = 0 ; i < end; ++i) {
3608
+ for (int i = 0 ; i < end && (i + 1 ) * blck_size_interleave <= qs_size ; ++i) {
3609
3609
int src_id = i % 4 ;
3610
3610
int src_offset = (i / 4 ) * blck_size_interleave;
3611
3611
int dst_offset = i * blck_size_interleave;
3612
3612
3613
- // Bounds checking
3614
- if (dst_offset + sizeof (uint64_t ) <= qs_size &&
3615
- src_offset + sizeof (uint64_t ) <= sizeof (in[src_id].qs )) {
3616
- uint64_t elems;
3617
- // Using memcpy to avoid unaligned memory accesses
3618
- memcpy (&elems, &in[src_id].qs [src_offset], sizeof (uint64_t ));
3619
- elems ^= xor_mask;
3620
- memcpy (&out.qs [dst_offset], &elems, sizeof (uint64_t ));
3621
- } else {
3622
- GGML_ASSERT (false && " buffer overflow prevented in make_block_q4_0x4" );
3623
- }
3613
+ uint64_t elems;
3614
+ // Using memcpy to avoid unaligned memory accesses
3615
+ memcpy (&elems, &in[src_id].qs [src_offset], sizeof (uint64_t ));
3616
+ elems ^= xor_mask;
3617
+ memcpy (&out.qs [dst_offset], &elems, sizeof (uint64_t ));
3624
3618
}
3625
3619
} else if (blck_size_interleave == 4 ) {
3626
3620
const uint32_t xor_mask = 0x88888888 ;
3627
- for (int i = 0 ; i < end; ++i) {
3621
+ for (int i = 0 ; i < end && (i + 1 ) * blck_size_interleave <= qs_size ; ++i) {
3628
3622
int src_id = i % 4 ;
3629
3623
int src_offset = (i / 4 ) * blck_size_interleave;
3630
3624
int dst_offset = i * blck_size_interleave;
3631
3625
3632
- // Bounds checking
3633
- if (dst_offset + sizeof (uint32_t ) <= qs_size &&
3634
- src_offset + sizeof (uint32_t ) <= sizeof (in[src_id].qs )) {
3635
- uint32_t elems;
3636
- memcpy (&elems, &in[src_id].qs [src_offset], sizeof (uint32_t ));
3637
- elems ^= xor_mask;
3638
- memcpy (&out.qs [dst_offset], &elems, sizeof (uint32_t ));
3639
- } else {
3640
- GGML_ASSERT (false && " buffer overflow prevented in make_block_q4_0x4" );
3641
- }
3626
+ uint32_t elems;
3627
+ memcpy (&elems, &in[src_id].qs [src_offset], sizeof (uint32_t ));
3628
+ elems ^= xor_mask;
3629
+ memcpy (&out.qs [dst_offset], &elems, sizeof (uint32_t ));
3642
3630
}
3643
3631
} else {
3644
3632
GGML_ASSERT (false && " invalid block size interleave value" );
0 commit comments