@@ -308,6 +308,7 @@ void ggml_abort(const char * file, int line, const char * fmt, ...) {
308
308
}
309
309
310
310
#define GGML_DEBUG 0
311
+
311
312
#define GGML_GELU_FP16
312
313
#define GGML_GELU_QUICK_FP16
313
314
@@ -1985,7 +1986,7 @@ static const size_t GGML_OBJECT_SIZE = sizeof(struct ggml_object);
1985
1986
1986
1987
struct ggml_context {
1987
1988
size_t mem_size;
1988
- void* mem_buffer;
1989
+ void * mem_buffer;
1989
1990
bool mem_buffer_owned;
1990
1991
bool no_alloc;
1991
1992
bool no_alloc_save; // this is used to save the no_alloc state when using scratch buffers
@@ -3234,7 +3235,6 @@ struct ggml_numa_nodes {
3234
3235
//
3235
3236
3236
3237
struct ggml_state {
3237
- struct ggml_context_container contexts[GGML_MAX_CONTEXTS];
3238
3238
struct ggml_numa_nodes numa;
3239
3239
};
3240
3240
@@ -3816,17 +3816,12 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
3816
3816
const uint64_t t_start = ggml_time_us(); UNUSED(t_start);
3817
3817
3818
3818
g_state = (struct ggml_state) {
3819
- /*.contexts =*/ { { 0 } },
3820
3819
/*.numa =*/ {
3821
3820
.n_nodes = 0,
3822
3821
.total_cpus = 0,
3823
3822
},
3824
3823
};
3825
3824
3826
- for (int i = 0; i < GGML_MAX_CONTEXTS; ++i) {
3827
- g_state.contexts[i].used = false;
3828
- }
3829
-
3830
3825
const uint64_t t_end = ggml_time_us(); UNUSED(t_end);
3831
3826
3832
3827
GGML_PRINT_DEBUG("%s: g_state initialized in %f ms\n", __func__, (t_end - t_start)/1000.0f);
@@ -3839,26 +3834,9 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
3839
3834
is_first_call = false;
3840
3835
}
3841
3836
3842
- // find non-used context in g_state
3843
- struct ggml_context * ctx = NULL;
3844
-
3845
- for (int i = 0; i < GGML_MAX_CONTEXTS; i++) {
3846
- if (!g_state.contexts[i].used) {
3847
- g_state.contexts[i].used = true;
3848
- ctx = &g_state.contexts[i].context;
3849
-
3850
- GGML_PRINT_DEBUG("%s: found unused context %d\n", __func__, i);
3851
- break;
3852
- }
3853
- }
3854
-
3855
- if (ctx == NULL) {
3856
- GGML_PRINT_DEBUG("%s: no unused context found\n", __func__);
3857
-
3858
- ggml_critical_section_end();
3837
+ ggml_critical_section_end();
3859
3838
3860
- return NULL;
3861
- }
3839
+ struct ggml_context * ctx = GGML_MALLOC(sizeof(struct ggml_context));
3862
3840
3863
3841
// allow to call ggml_init with 0 size
3864
3842
if (params.mem_size == 0) {
@@ -3886,42 +3864,31 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
3886
3864
3887
3865
GGML_PRINT_DEBUG("%s: context initialized\n", __func__);
3888
3866
3889
- ggml_critical_section_end();
3890
-
3891
3867
return ctx;
3892
3868
}
3893
3869
3894
- void ggml_free (struct ggml_context * ctx) {
3870
+ void ggml_reset (struct ggml_context * ctx) {
3895
3871
if (ctx == NULL) {
3896
3872
return;
3897
3873
}
3898
3874
3899
- // make this function thread safe
3900
- ggml_critical_section_start();
3901
-
3902
- bool found = false;
3903
-
3904
- for (int i = 0; i < GGML_MAX_CONTEXTS; i++) {
3905
- if (&g_state.contexts[i].context == ctx) {
3906
- g_state.contexts[i].used = false;
3907
-
3908
- GGML_PRINT_DEBUG("%s: context %d has been freed. memory used = %zu\n",
3909
- __func__, i, ggml_used_mem(ctx));
3910
-
3911
- if (ctx->mem_buffer_owned) {
3912
- GGML_ALIGNED_FREE(ctx->mem_buffer);
3913
- }
3875
+ ctx->n_objects = 0;
3876
+ ctx->objects_begin = NULL;
3877
+ ctx->objects_end = NULL;
3878
+ ctx->scratch = (struct ggml_scratch) { 0, 0, NULL, };
3879
+ ctx->scratch_save = (struct ggml_scratch) { 0, 0, NULL, };
3880
+ }
3914
3881
3915
- found = true;
3916
- break;
3917
- }
3882
+ void ggml_free(struct ggml_context * ctx) {
3883
+ if (ctx == NULL) {
3884
+ return;
3918
3885
}
3919
3886
3920
- if (!found ) {
3921
- GGML_PRINT_DEBUG("%s: context not found\n", __func__ );
3887
+ if (ctx->mem_buffer_owned ) {
3888
+ GGML_ALIGNED_FREE(ctx->mem_buffer );
3922
3889
}
3923
3890
3924
- ggml_critical_section_end( );
3891
+ GGML_FREE(ctx );
3925
3892
}
3926
3893
3927
3894
size_t ggml_used_mem(const struct ggml_context * ctx) {
0 commit comments