@@ -2440,6 +2440,37 @@ def abort_request(self, recv_req: AbortReq):
2440
2440
req .grammar .cancel ()
2441
2441
req .set_finish_with_abort ("Aborted by AbortReq." )
2442
2442
2443
+ # Delete requests not in the waiting queue when PD disaggregation is enabled
2444
+ if self .disaggregation_mode == DisaggregationMode .PREFILL :
2445
+ # Abort requests that have not yet been bootstrapped
2446
+ for i , req in enumerate (self .disagg_prefill_bootstrap_queue .queue ):
2447
+ logger .debug (f"Abort bootstrap queue request. { req .rid = } " )
2448
+ if recv_req .abort_all or req .rid .startswith (recv_req .rid ):
2449
+ if hasattr (req .disagg_kv_sender , "abort" ):
2450
+ req .disagg_kv_sender .abort ()
2451
+
2452
+ # Abort in-flight requests
2453
+ for i , req in enumerate (self .disagg_prefill_inflight_queue ):
2454
+ logger .debug (f"Abort inflight queue request. { req .rid = } " )
2455
+ if recv_req .abort_all or req .rid .startswith (recv_req .rid ):
2456
+ if hasattr (req .disagg_kv_sender , "abort" ):
2457
+ req .disagg_kv_sender .abort ()
2458
+
2459
+ elif self .disaggregation_mode == DisaggregationMode .DECODE :
2460
+ # Abort requests that have not yet finished preallocation
2461
+ for i , decode_req in enumerate (self .disagg_decode_prealloc_queue .queue ):
2462
+ logger .debug (f"Abort prealloc queue request. { decode_req .req .rid = } " )
2463
+ if recv_req .abort_all or decode_req .req .rid .startswith (recv_req .rid ):
2464
+ if hasattr (decode_req .kv_receiver , "abort" ):
2465
+ decode_req .kv_receiver .abort ()
2466
+
2467
+ # Abort requests waiting for kvcache to release tree cache
2468
+ for i , decode_req in enumerate (self .disagg_decode_transfer_queue .queue ):
2469
+ logger .debug (f"Abort transfer queue request. { decode_req .req .rid = } " )
2470
+ if recv_req .abort_all or decode_req .req .rid .startswith (recv_req .rid ):
2471
+ if hasattr (decode_req .kv_receiver , "abort" ):
2472
+ decode_req .kv_receiver .abort ()
2473
+
2443
2474
# Delete requests in the running batch
2444
2475
if self .cur_batch is self .running_batch or self .cur_batch is None :
2445
2476
reqs = self .running_batch .reqs
0 commit comments