77
77
ParseFunctionCallReq ,
78
78
ProfileReqInput ,
79
79
ReleaseMemoryOccupationReqInput ,
80
+ ReportHealthInput ,
80
81
ResumeMemoryOccupationReqInput ,
81
82
SeparateReasoningReqInput ,
82
83
SetInternalStateReq ,
93
94
from sglang .srt .reasoning_parser import ReasoningParser
94
95
from sglang .srt .server_args import ServerArgs
95
96
from sglang .srt .utils import (
97
+ ServerStatus ,
96
98
add_api_key_middleware ,
97
99
add_prometheus_middleware ,
98
100
delete_directory ,
@@ -220,8 +222,31 @@ async def validate_json_request(raw_request: Request):
220
222
221
223
@app .get ("/health" )
222
224
async def health () -> Response :
223
- """Check the health of the http server."""
224
- return Response (status_code = 200 )
225
+ """Check the status of the http server."""
226
+ code = HTTPStatus .SERVICE_UNAVAILABLE .value
227
+ if _global_state .tokenizer_manager .server_status == ServerStatus .Up :
228
+ code = HTTPStatus .OK .value
229
+ return Response (
230
+ status_code = code ,
231
+ content = json .dumps (
232
+ {"status" : _global_state .tokenizer_manager .server_status .value }
233
+ ),
234
+ )
235
+
236
+
237
+ @app .post ("/health" )
238
+ async def health_update (obj : ReportHealthInput , request : Request ) -> Response :
239
+ """Update the Status of the http server."""
240
+ try :
241
+ server_status = ServerStatus (obj .status )
242
+ _global_state .tokenizer_manager .server_status = server_status
243
+ if server_status != ServerStatus .Up :
244
+ return Response (
245
+ status_code = HTTPStatus .SERVICE_UNAVAILABLE .value , content = obj .msg
246
+ )
247
+ except Exception as e :
248
+ logger .error (e )
249
+ return Response (status_code = HTTPStatus .SERVICE_UNAVAILABLE .value )
225
250
226
251
227
252
@app .get ("/health_generate" )
@@ -256,7 +281,7 @@ async def gen():
256
281
if _global_state .tokenizer_manager .last_receive_tstamp > tic :
257
282
task .cancel ()
258
283
_global_state .tokenizer_manager .rid_to_state .pop (rid , None )
259
- _global_state .tokenizer_manager .health_check_failed = False
284
+ _global_state .tokenizer_manager .server_status = ServerStatus . Up
260
285
return Response (status_code = 200 )
261
286
262
287
task .cancel ()
@@ -270,7 +295,7 @@ async def gen():
270
295
f"last_heartbeat time: { last_receive_time } "
271
296
)
272
297
_global_state .tokenizer_manager .rid_to_state .pop (rid , None )
273
- _global_state .tokenizer_manager .health_check_failed = True
298
+ _global_state .tokenizer_manager .server_status = ServerStatus . UnHealthy
274
299
return Response (status_code = 503 )
275
300
276
301
@@ -1022,9 +1047,13 @@ def _execute_server_warmup(
1022
1047
headers = headers ,
1023
1048
timeout = 600 ,
1024
1049
)
1025
- assert res .status_code == 200 , f"{ res } "
1050
+ if res .status_code == 200 :
1051
+ _global_state .tokenizer_manager .server_status = ServerStatus .Up
1052
+ else :
1053
+ _global_state .tokenizer_manager .server_status = ServerStatus .UnHealthy
1054
+ logger .info (f"{ res } " )
1026
1055
else :
1027
- logger .info (f"Start of prefill warmup ..." )
1056
+ logger .info (f"Start of prefill/decode warmup ..." )
1028
1057
json_data = {
1029
1058
"sampling_params" : {
1030
1059
"temperature" : 0.0 ,
@@ -1046,15 +1075,25 @@ def _execute_server_warmup(
1046
1075
headers = headers ,
1047
1076
timeout = 1800 , # because of deep gemm precache is very long if not precache.
1048
1077
)
1049
- logger .info (
1050
- f"End of prefill warmup with status { res .status_code } , resp: { res .json ()} "
1051
- )
1078
+ if res .status_code == 200 :
1079
+ logger .info (
1080
+ f"End of prefill disaggregation mode warmup with status { res .status_code } , resp: { res .json ()} "
1081
+ )
1082
+ _global_state .tokenizer_manager .server_status = ServerStatus .Up
1083
+ else :
1084
+ logger .info (
1085
+ "Prefill disaggregation mode warm Up Failed, status code: {}" .format (
1086
+ res .status_code
1087
+ )
1088
+ )
1089
+ _global_state .tokenizer_manager .server_status = ServerStatus .UnHealthy
1052
1090
1053
1091
except Exception :
1054
1092
last_traceback = get_exception_traceback ()
1055
1093
if pipe_finish_writer is not None :
1056
1094
pipe_finish_writer .send (last_traceback )
1057
1095
logger .error (f"Initialization failed. warmup error: { last_traceback } " )
1096
+ _global_state .tokenizer_manager .server_status = ServerStatus .Crashed
1058
1097
kill_process_tree (os .getpid ())
1059
1098
return False
1060
1099
0 commit comments