From 5a01a5da822cee451ec977bfa6e1ccfbbc9fe4f6 Mon Sep 17 00:00:00 2001 From: kozistr Date: Sat, 20 Sep 2025 19:00:51 +0900 Subject: [PATCH 1/3] fix: raise an error when max_input_length is bigger than max_batch tokens --- router/src/lib.rs | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/router/src/lib.rs b/router/src/lib.rs index 16707bcc..fc938ec4 100644 --- a/router/src/lib.rs +++ b/router/src/lib.rs @@ -184,13 +184,34 @@ pub async fn run( break; } } - let max_input_length = match st_config { + + let base_input_length = match st_config { Some(config) => config.max_seq_length, None => { tracing::warn!("Could not find a Sentence Transformers config"); config.max_position_embeddings - position_offset } }; + + // Raise an error when max_input_length is bigger than max_batch tokens to prevent an infinite loop in the queue + let max_input_length = if base_input_length > max_batch_tokens { + if !auto_truncate { + anyhow::bail!( + "`max_input_length` must be smaller than `max_batch_tokens` when `auto_truncate` is disabled ({} > {})", + base_input_length, + max_batch_tokens + ); + } + tracing::warn!( + "Reduce `max_input_length` to `max_batch_tokens` (from {} to {})", + base_input_length, + max_batch_tokens + ); + max_batch_tokens + } else { + base_input_length + }; + tracing::info!("Maximum number of tokens per request: {max_input_length}"); let tokenization_workers = tokenization_workers.unwrap_or_else(num_cpus::get); From fea48117c85d156f36d803dd43544955a59dfb0b Mon Sep 17 00:00:00 2001 From: Hyeongchan Kim Date: Fri, 26 Sep 2025 01:40:41 +0900 Subject: [PATCH 2/3] Update router/src/lib.rs Co-authored-by: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> --- router/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/router/src/lib.rs b/router/src/lib.rs index fc938ec4..c69111c6 100644 --- a/router/src/lib.rs +++ b/router/src/lib.rs @@ -197,7 +197,7 @@ pub async fn run( let max_input_length = if base_input_length > max_batch_tokens { if !auto_truncate { anyhow::bail!( - "`max_input_length` must be smaller than `max_batch_tokens` when `auto_truncate` is disabled ({} > {})", + "`--max-batch-tokens` cannot be lower than the model `max_input_length` ({} < {}) when `--auto-truncate` is disabled, add the `--auto-truncate` flag to truncate the input sequences to match the `--max-batch-tokens`.", base_input_length, max_batch_tokens ); From 7629ff1f7168d5e151d874b1be5b4a6bd3ed0c7f Mon Sep 17 00:00:00 2001 From: Hyeongchan Kim Date: Fri, 26 Sep 2025 01:43:25 +0900 Subject: [PATCH 3/3] Update router/src/lib.rs Co-authored-by: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> --- router/src/lib.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/router/src/lib.rs b/router/src/lib.rs index c69111c6..39c63bcc 100644 --- a/router/src/lib.rs +++ b/router/src/lib.rs @@ -203,7 +203,8 @@ pub async fn run( ); } tracing::warn!( - "Reduce `max_input_length` to `max_batch_tokens` (from {} to {})", + "The input sequences will be truncated to {} tokens even if the model `max_input_length` is greater than the provided `--max-batch-tokens` ({} > {}), as `--auto-truncate` is enabled.", + max_batch_tokens, base_input_length, max_batch_tokens );