@@ -308,6 +308,8 @@ void ggml_abort(const char * file, int line, const char * fmt, ...) {
308
308
}
309
309
310
310
#define GGML_DEBUG 0
311
+ #define GGML_MAX_CONTEXTS 64 // pre-allocated contexts in static memory
312
+
311
313
#define GGML_GELU_FP16
312
314
#define GGML_GELU_QUICK_FP16
313
315
@@ -1985,7 +1987,7 @@ static const size_t GGML_OBJECT_SIZE = sizeof(struct ggml_object);
1985
1987
1986
1988
struct ggml_context {
1987
1989
size_t mem_size;
1988
- void* mem_buffer;
1990
+ void * mem_buffer;
1989
1991
bool mem_buffer_owned;
1990
1992
bool no_alloc;
1991
1993
bool no_alloc_save; // this is used to save the no_alloc state when using scratch buffers
@@ -3839,7 +3841,7 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
3839
3841
is_first_call = false;
3840
3842
}
3841
3843
3842
- // find non-used context in g_state
3844
+ // find non-used static context in g_state
3843
3845
struct ggml_context * ctx = NULL;
3844
3846
3845
3847
for (int i = 0; i < GGML_MAX_CONTEXTS; i++) {
@@ -3852,12 +3854,12 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
3852
3854
}
3853
3855
}
3854
3856
3855
- if (ctx == NULL) {
3856
- GGML_LOG_ERROR("%s: ran out of contexts (max = %d)\n", __func__, GGML_MAX_CONTEXTS);
3857
+ ggml_critical_section_end();
3857
3858
3858
- ggml_critical_section_end();
3859
+ if (ctx == NULL) {
3860
+ GGML_PRINT_DEBUG("%s: no static contexts available, allocating on the heap\n", __func__);
3859
3861
3860
- return NULL ;
3862
+ ctx = GGML_ALIGNED_MALLOC(sizeof(struct ggml_context)) ;
3861
3863
}
3862
3864
3863
3865
// allow to call ggml_init with 0 size
@@ -3886,8 +3888,6 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
3886
3888
3887
3889
GGML_PRINT_DEBUG("%s: context initialized\n", __func__);
3888
3890
3889
- ggml_critical_section_end();
3890
-
3891
3891
return ctx;
3892
3892
}
3893
3893
@@ -3896,6 +3896,10 @@ void ggml_free(struct ggml_context * ctx) {
3896
3896
return;
3897
3897
}
3898
3898
3899
+ if (ctx->mem_buffer_owned) {
3900
+ GGML_ALIGNED_FREE(ctx->mem_buffer);
3901
+ }
3902
+
3899
3903
// make this function thread safe
3900
3904
ggml_critical_section_start();
3901
3905
@@ -3905,23 +3909,19 @@ void ggml_free(struct ggml_context * ctx) {
3905
3909
if (&g_state.contexts[i].context == ctx) {
3906
3910
g_state.contexts[i].used = false;
3907
3911
3908
- GGML_PRINT_DEBUG("%s: context %d has been freed. memory used = %zu\n",
3909
- __func__, i, ggml_used_mem(ctx));
3910
-
3911
- if (ctx->mem_buffer_owned) {
3912
- GGML_ALIGNED_FREE(ctx->mem_buffer);
3913
- }
3912
+ GGML_PRINT_DEBUG("%s: context %d has been freed. memory used = %zu\n", __func__, i, ggml_used_mem(ctx));
3914
3913
3915
3914
found = true;
3916
3915
break;
3917
3916
}
3918
3917
}
3919
3918
3919
+ ggml_critical_section_end();
3920
+
3920
3921
if (!found) {
3921
- GGML_PRINT_DEBUG("%s: context not found\n", __func__);
3922
+ // this is a heap-allocated context
3923
+ GGML_ALIGNED_FREE(ctx);
3922
3924
}
3923
-
3924
- ggml_critical_section_end();
3925
3925
}
3926
3926
3927
3927
size_t ggml_used_mem(const struct ggml_context * ctx) {
0 commit comments