Skip to content
Open
Changes from 1 commit
Commits
Show all changes
71 commits
Select commit Hold shift + click to select a range
09bc7c2
Use activations to calculate the stats
EAddario Jul 26, 2025
2097f03
Refactor variable names
EAddario Jul 31, 2025
78ddb47
Fix problem up when GGUF does not have in_sum
EAddario Aug 2, 2025
9744a4a
Determine calculation mode
EAddario Aug 2, 2025
cce514a
Compute entropy for activations
EAddario Aug 2, 2025
b7fb362
Compute cosine similarity based on activations
EAddario Aug 2, 2025
9b841eb
Compute l2 norm
EAddario Aug 2, 2025
ee2509f
Adjust threshold
EAddario Aug 2, 2025
fc8f925
Update table display
EAddario Aug 2, 2025
4c01f51
Remove inactive
EAddario Aug 2, 2025
a32a2ec
Reformat report layout
EAddario Aug 2, 2025
4d1325e
Refactor variables
EAddario Aug 3, 2025
5324558
Update table layout
EAddario Aug 3, 2025
fce05aa
Refactor lambda into compute_tensor_averages() function
EAddario Aug 3, 2025
be60469
Refactor function names
EAddario Aug 3, 2025
a6155a8
Add compute_layer_statistics() function
EAddario Aug 3, 2025
2117c4e
Update aggregated statistic report layout
EAddario Aug 3, 2025
90cb1be
Minor cosmetic changes
EAddario Aug 3, 2025
f1c2a4c
Fix printing l2 norm when calc_mode = 1
EAddario Aug 3, 2025
c39c4e2
Refactor variable name
EAddario Aug 4, 2025
adbff66
Merge branch 'master' into imatrix
EAddario Aug 4, 2025
5e40cf4
Do not resize if in_sum is null
EAddario Aug 4, 2025
b373934
Compute aggregated (per layer) l2 norm
EAddario Aug 5, 2025
906548a
Update aggregated sum of squared activations per layer
EAddario Aug 5, 2025
aea9b31
Make ZD Score two-tailed
EAddario Aug 5, 2025
49996a1
Refactor variable names
EAddario Aug 5, 2025
4c3fea8
Update report layout
EAddario Aug 5, 2025
88854c9
Refactor legacy mode
EAddario Aug 5, 2025
030ed3c
Merge branch 'master' into imatrix
EAddario Aug 5, 2025
c7959ed
Merge branch 'master' into imatrix
EAddario Aug 7, 2025
3e9d53c
Refactor variable names
EAddario Aug 7, 2025
e0d6471
Reverse conditional logic to match convention
EAddario Aug 7, 2025
dadd90e
Rename report heading
EAddario Aug 7, 2025
5bb2def
Add --activation-statistics parameter
EAddario Aug 7, 2025
c5ecdaa
Add Euclidean–Cosine Score (ECS)
EAddario Aug 7, 2025
59af503
Update README.md
EAddario Aug 9, 2025
9467963
Merge branch 'master' into imatrix
EAddario Aug 9, 2025
6fe51e1
Fix typo in ECS formula
EAddario Aug 9, 2025
dcac206
Add --activation-statistics logic to avoid doubling the imatrix size …
EAddario Aug 9, 2025
89051cd
Update README.md
EAddario Aug 9, 2025
2756617
Merge branch 'master' into imatrix
EAddario Aug 15, 2025
42bfe3b
Update stats output sort based on imatrix type
EAddario Aug 15, 2025
240a965
Update README.md
EAddario Aug 15, 2025
8589ef4
Update README.md
EAddario Aug 15, 2025
030ec53
Remove unnecessary include
EAddario Aug 16, 2025
d4b0d89
Fix return type bug
EAddario Aug 16, 2025
e3149a2
Use the corresponding size
EAddario Aug 17, 2025
4a487ea
Use { and } around the conditionally-executed statement
EAddario Aug 17, 2025
97d839c
Using one line per variable definition
EAddario Aug 17, 2025
d19e6c9
Use { and } around the conditionally-executed statement
EAddario Aug 17, 2025
12607d3
Use { and } around single line for statement
EAddario Aug 17, 2025
a96013f
Define one variable per line and refactor names
EAddario Aug 17, 2025
2e80323
Use { and } around conditionally-executed single line statements
EAddario Aug 17, 2025
44ea7dd
Change statement order
EAddario Aug 17, 2025
f6934b9
Merge branch 'imatrix' of https://github.com/EAddario/llama.cpp into …
EAddario Aug 17, 2025
1f72bc1
Avoid using if statements with initialiser
EAddario Aug 17, 2025
630750f
Validate number of elements if in_sum is present
EAddario Aug 17, 2025
5aca256
Merge branch 'master' into imatrix
EAddario Aug 21, 2025
3e26364
Clarify the nature of the calculated cosine similarity
EAddario Aug 24, 2025
69b351b
Add --output-format to usage
EAddario Aug 26, 2025
6371902
Add --output-format to usage
EAddario Aug 26, 2025
70dd25b
Merge branch 'master' into imatrix
EAddario Aug 30, 2025
8f1aa78
Remove activation_statistics() option
EAddario Aug 31, 2025
8d0e276
Update README.md
EAddario Aug 31, 2025
7448bdb
Merge branch 'master' into imatrix
EAddario Sep 6, 2025
0c3a019
Merge branch 'master' into imatrix
EAddario Sep 10, 2025
63f3449
Merge branch 'master' into imatrix
EAddario Sep 15, 2025
193d5bb
Merge branch 'master' into imatrix
EAddario Sep 20, 2025
5932eef
Merge branch 'master' into imatrix
EAddario Sep 25, 2025
a28ee30
Merge branch 'master' into imatrix
EAddario Oct 1, 2025
bc38936
Merge branch 'master' into imatrix
EAddario Oct 3, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 54 additions & 10 deletions tools/imatrix/imatrix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,12 @@ static const char * const LLM_KV_IMATRIX_CHUNK_COUNT = "imatrix.chunk_count";
static const char * const LLM_KV_IMATRIX_CHUNK_SIZE = "imatrix.chunk_size";

struct Stats {
std::vector<float> activations;
std::vector<float> values;
std::vector<int64_t> counts;
};

//ToDo: rename sqract variables to be more generic like 'values'
struct tensor_statistics {
std::string tensor;
Stats stats;
Expand Down Expand Up @@ -139,14 +141,28 @@ static void compute_statistics(std::vector<tensor_statistics> & tstats, const st
const int row_size = e.values.size() / n_mat;

std::vector<float> activations;
activations.reserve(e.values.size());

for (int i = 0; i < n_mat; ++i) {
for (int j = 0; j < row_size; ++j) {
activations.push_back(e.values[i*row_size + j] / e.counts[i]);
if (e.activations.empty()) {
activations.reserve(e.values.size());

for (int i = 0; i < n_mat; ++i) {
for (int j = 0; j < row_size; ++j) {
activations.push_back(e.values[i*row_size + j] / e.counts[i]);
}
}
} else {
activations.reserve(e.activations.size());

for (int i = 0; i < n_mat; ++i) {
for (int j = 0; j < row_size; ++j) {
activations.push_back(e.activations[i*row_size + j] / e.counts[i]);
}
}
}
Comment on lines +178 to 194
Copy link
Collaborator

@compilade compilade Aug 24, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When the sums of activations are available, this is completely ignoring the sums of squared activations????

All of the new statistics are done over the per-channel means.

This doesn't seem right.

The sums of squared activations are used for quantization importance, and if they're completely ignored, then the statistics are possibly meaningless for importance purposes.

The mean and mean of squared activations together should allow calculating per-channel variance and stuff like that. Not sure how to turn that into per-tensor stats, though it's likely possible somehow.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's correct, and by design. When available, using mean activations instead yields better statistics since the direction of the change (minus sign) is now available. The ECS stat (dot product of cossim and l2 norm), for example, correctly identifies attn_output and ffn_down as the most sensitive to quantisation. This is not possible with mean of squared activations.

The idea of deriving the per-channel variance through mean and mean of squared activations is quite interesting. I'll look into for a future release.




//ToDo: rename act_ variables to be more generic like 'values'
const float act_total = std::accumulate(activations.begin(), activations.end(), 0.0f);
const float act_max = *std::max_element(activations.begin(), activations.end());
const float act_min = *std::min_element(activations.begin(), activations.end());
Expand Down Expand Up @@ -282,6 +298,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
e.counts.resize(n_as, e.counts[0]);
}
if (e.values.empty()) {
e.activations.resize(src1->ne[0]*n_as, 0);
e.values.resize(src1->ne[0]*n_as, 0);
e.counts.resize(n_as, 0);
}
Expand Down Expand Up @@ -313,6 +330,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
e.counts[ex]++;

for (int64_t j = 0; j < src1->ne[0]; ++j) {
e.activations[e_start + j] += x[j];
e.values[e_start + j] += x[j] * x[j];
if (!std::isfinite((float)e.values[e_start + j])) {
LOG_ERR("%f detected in %s\n", (float)e.values[e_start + j], wname.c_str());
Expand All @@ -338,6 +356,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
const int64_t n_mat = src1->ne[2] * src1->ne[3];

if (e.values.empty()) {
e.activations.resize(src1->ne[0] * n_mat, 0);
e.values.resize(src1->ne[0] * n_mat, 0);
e.counts.resize(n_mat, 0);
}
Expand All @@ -359,6 +378,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
const float * x = (const float *) (data + row * src1->nb[1] + i2 * src1->nb[2] + i3 * src1->ne[3]);
e.counts[mat_id]++;
for (int64_t j = 0; j < src1->ne[0]; ++j) {
e.activations[mat_start + j] += x[j];
e.values[mat_start + j] += x[j] * x[j];
if (!std::isfinite((float)e.values[j])) {
LOG_ERR("%f detected in %s\n", (float)e.values[j], wname.c_str());
Expand Down Expand Up @@ -532,6 +552,7 @@ void IMatrixCollector::save_imatrix(int32_t n_chunk) const {
}

to_store.push_back(kv.first);
data_size += GGML_PAD(ggml_tensor_overhead() + sizeof(float) * kv.second.activations.size(), GGML_MEM_ALIGN);
data_size += GGML_PAD(ggml_tensor_overhead() + sizeof(float) * kv.second.values.size(), GGML_MEM_ALIGN);
data_size += GGML_PAD(ggml_tensor_overhead() + sizeof(float) * kv.second.counts.size(), GGML_MEM_ALIGN);
}
Expand Down Expand Up @@ -584,6 +605,16 @@ void IMatrixCollector::save_imatrix(int32_t n_chunk) const {

gguf_add_tensor(ctx_gguf, in_sum2);
gguf_add_tensor(ctx_gguf, counts);

if (!stat.activations.empty()) {
const int32_t nact = (int32_t) stat.activations.size();
struct ggml_tensor * in_sum = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, nact / nmat, nmat);
ggml_format_name(in_sum, "%s.in_sum", name.c_str()); // ToDo: consider a better name. 'in_act' maybe?
for (int32_t j = 0; j < nval; ++j) {
((float *) in_sum->data)[j] = (float) stat.activations[j];
}
gguf_add_tensor(ctx_gguf, in_sum);
}
}
}

Expand Down Expand Up @@ -722,14 +753,15 @@ bool IMatrixCollector::load_imatrix(const char * file_name) {
}
}

const std::string in_sum_suffix{ ".in_sum" };
const std::string in_sum2_suffix{ ".in_sum2" };
const std::string counts_suffix{ ".counts" };

// Could re-use m_stats instead, but this allows
// checking for completeness of *each* loaded imatrix file
// and also makes it easier to re-use a similar implementation in quantize.cpp
// Using an ordered map to get a deterministic iteration order.
std::map<std::string, std::pair<struct ggml_tensor *, struct ggml_tensor *>> sums_counts_for;
std::map<std::string, std::tuple<struct ggml_tensor *, struct ggml_tensor *, struct ggml_tensor *>> sums_counts_for;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of a tuple, a small struct with struct ggml_tensor * fields might be more convenient.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think keeping it as a tuple simplifies the code and aids maintainability, but if this approach would be a blocker for merging, happy to change.


for (struct ggml_tensor * cur = ggml_get_first_tensor(ctx); cur; cur = ggml_get_next_tensor(ctx, cur)) {
std::string name = cur->name;
Expand All @@ -738,19 +770,24 @@ bool IMatrixCollector::load_imatrix(const char * file_name) {

if (string_remove_suffix(name, in_sum2_suffix)) {
// in_sum2
sums_counts_for[std::move(name)].first = cur;
std::get<0>(sums_counts_for[std::move(name)]) = cur;
} else if (string_remove_suffix(name, counts_suffix)) {
// counts
sums_counts_for[std::move(name)].second = cur;
} else {
std::get<1>(sums_counts_for[std::move(name)]) = cur;
} else if (string_remove_suffix(name, in_sum_suffix)) {
// in_sum
std::get<2>(sums_counts_for[std::move(name)]) = cur;
}
else {
// ignore other tensors
}
}

for (const auto & sc : sums_counts_for) {
const std::string & name = sc.first;
const struct ggml_tensor * in_sum2 = sc.second.first;
const struct ggml_tensor * counts = sc.second.second;
const struct ggml_tensor * in_sum2 = std::get<0>(sc.second);
const struct ggml_tensor * counts = std::get<1>(sc.second);
const struct ggml_tensor * in_sum = std::get<2>(sc.second);

if (!in_sum2 || !counts) {
LOG_ERR("%s: mismatched sums and counts for %s\n", __func__, name.c_str());
Expand All @@ -764,6 +801,7 @@ bool IMatrixCollector::load_imatrix(const char * file_name) {
int64_t nval = ggml_nelements(in_sum2);
if (e.values.empty()) {
e.values.resize(nval, 0.0f);
e.activations.resize(nval, 0.0f);
} else if ((size_t) nval != e.values.size()) {
LOG_ERR("%s: mismatched sums size for %s: %zu != %zu\n", __func__, name.c_str(), (size_t) nval, e.values.size());
gguf_free(ctx_gguf);
Expand Down Expand Up @@ -791,6 +829,12 @@ bool IMatrixCollector::load_imatrix(const char * file_name) {
for (int64_t j = 0; j < ncounts; j++) {
e.counts[j] += std::lround(((const float *) counts->data)[j]);
}
// ToDo: fix blow up when GGUF does not have in_sum
if (in_sum->data != nullptr) {
for (int64_t j = 0; j < nval; j++) {
e.activations[j] += ((const float *) in_sum->data)[j];
}
}
}

// TODO: extract into its own method; this is also used by the legacy format
Expand Down
Loading