@@ -2922,6 +2922,7 @@ struct whisper_full_params whisper_full_default_params(enum whisper_sampling_str
2922
2922
/* .thold_pt =*/ 0 .01f ,
2923
2923
/* .thold_ptsum =*/ 0 .01f ,
2924
2924
/* .max_len =*/ 0 ,
2925
+ /* .split_on_word =*/ false ,
2925
2926
/* .max_tokens =*/ 0 ,
2926
2927
2927
2928
/* .speed_up =*/ false ,
@@ -2988,9 +2989,36 @@ static void whisper_exp_compute_token_level_timestamps(
2988
2989
float thold_pt,
2989
2990
float thold_ptsum);
2990
2991
2992
+ // trim from start (in place)
2993
+ static inline void ltrim (std::string &s) {
2994
+ s.erase (s.begin (), std::find_if (s.begin (), s.end (), [](unsigned char ch) {
2995
+ return !std::isspace (ch);
2996
+ }));
2997
+ }
2998
+
2999
+ // trim from end (in place)
3000
+ static inline void rtrim (std::string &s) {
3001
+ s.erase (std::find_if (s.rbegin (), s.rend (), [](unsigned char ch) {
3002
+ return !std::isspace (ch);
3003
+ }).base (), s.end ());
3004
+ }
3005
+
3006
+ // trim from both ends (in place)
3007
+ static inline void trim (std::string &s) {
3008
+ rtrim (s);
3009
+ ltrim (s);
3010
+ }
3011
+
3012
+ static inline bool should_split_on_word (const char * txt, bool split_on_word) {
3013
+ if (!split_on_word) return true ;
3014
+
3015
+ std::string s = txt;
3016
+ return s.substr (0 , 1 ) == " " ;
3017
+ }
3018
+
2991
3019
// wrap the last segment to max_len characters
2992
3020
// returns the number of new segments
2993
- static int whisper_wrap_segment (struct whisper_context & ctx, int max_len) {
3021
+ static int whisper_wrap_segment (struct whisper_context & ctx, int max_len, bool split_on_word ) {
2994
3022
auto segment = ctx.result_all .back ();
2995
3023
2996
3024
int res = 1 ;
@@ -3005,11 +3033,11 @@ static int whisper_wrap_segment(struct whisper_context & ctx, int max_len) {
3005
3033
}
3006
3034
3007
3035
const auto txt = whisper_token_to_str (&ctx, token.id );
3008
-
3009
3036
const int cur = strlen (txt);
3010
3037
3011
- if (acc + cur > max_len && i > 0 ) {
3038
+ if (acc + cur > max_len && i > 0 && should_split_on_word (txt, split_on_word) ) {
3012
3039
// split here
3040
+ trim (text);
3013
3041
ctx.result_all .back ().text = std::move (text);
3014
3042
ctx.result_all .back ().t1 = token.t0 ;
3015
3043
ctx.result_all .back ().tokens .resize (i);
@@ -3037,6 +3065,7 @@ static int whisper_wrap_segment(struct whisper_context & ctx, int max_len) {
3037
3065
}
3038
3066
}
3039
3067
3068
+ trim (text);
3040
3069
ctx.result_all .back ().text = std::move (text);
3041
3070
3042
3071
return res;
@@ -4069,7 +4098,7 @@ int whisper_full(
4069
4098
*ctx, result_all.size () - 1 , params.thold_pt , params.thold_ptsum );
4070
4099
4071
4100
if (params.max_len > 0 ) {
4072
- n_new = whisper_wrap_segment (*ctx, params.max_len );
4101
+ n_new = whisper_wrap_segment (*ctx, params.max_len , params. split_on_word );
4073
4102
}
4074
4103
}
4075
4104
if (params.new_segment_callback ) {
@@ -4113,7 +4142,7 @@ int whisper_full(
4113
4142
*ctx, result_all.size () - 1 , params.thold_pt , params.thold_ptsum );
4114
4143
4115
4144
if (params.max_len > 0 ) {
4116
- n_new = whisper_wrap_segment (*ctx, params.max_len );
4145
+ n_new = whisper_wrap_segment (*ctx, params.max_len , params. split_on_word );
4117
4146
}
4118
4147
}
4119
4148
if (params.new_segment_callback ) {
0 commit comments