@@ -71,111 +71,61 @@ def _validate_request(self, request: EmbeddingRequest) -> Optional[str]:
71
71
72
72
def _convert_to_internal_request (
73
73
self ,
74
- all_requests : List [ EmbeddingRequest ] ,
75
- request_ids : List [ str ] ,
74
+ request : EmbeddingRequest ,
75
+ request_id : str ,
76
76
) -> tuple [EmbeddingReqInput , Union [EmbeddingRequest , List [EmbeddingRequest ]]]:
77
77
"""Convert OpenAI embedding request to internal format"""
78
- prompts = [ request .input for request in all_requests ]
79
-
80
- # Handle single vs multiple requests
81
- if len ( all_requests ) == 1 :
82
- prompt = prompts [ 0 ]
83
- if isinstance (prompt , str ):
84
- # Single string input
78
+ prompt = request .input
79
+ if isinstance ( prompt , str ):
80
+ # Single string input
81
+ prompt_kwargs = { "text" : prompt }
82
+ elif isinstance ( prompt , list ):
83
+ if len ( prompt ) > 0 and isinstance (prompt [ 0 ] , str ):
84
+ # List of strings
85
85
prompt_kwargs = {"text" : prompt }
86
- elif isinstance (prompt , list ):
87
- if len (prompt ) > 0 and isinstance (prompt [0 ], str ):
88
- # List of strings
89
- prompt_kwargs = {"text" : prompt }
90
- elif len (prompt ) > 0 and isinstance (
91
- prompt [0 ], MultimodalEmbeddingInput
92
- ):
93
- # Handle multimodal embedding inputs
94
- texts = []
95
- images = []
96
- for item in prompt :
97
- # Use padding for text if None - this could be improved
98
- texts .append (item .text if item .text is not None else "padding" )
99
- images .append (item .image if item .image is not None else None )
100
-
101
- generate_prompts = []
102
- # Check if we have a chat template for multimodal embeddings
103
- # This would need to be passed in from the server configuration
104
- chat_template_name = getattr (
105
- self .tokenizer_manager , "chat_template_name" , None
106
- )
107
- if chat_template_name is not None :
108
- convs = generate_embedding_convs (
109
- texts , images , chat_template_name
110
- )
111
- for conv in convs :
112
- generate_prompts .append (conv .get_prompt ())
113
- else :
114
- generate_prompts = texts
115
-
116
- if len (generate_prompts ) == 1 :
117
- prompt_kwargs = {
118
- "text" : generate_prompts [0 ],
119
- "image_data" : images [0 ],
120
- }
121
- else :
122
- prompt_kwargs = {
123
- "text" : generate_prompts ,
124
- "image_data" : images ,
125
- }
86
+ elif len (prompt ) > 0 and isinstance (prompt [0 ], MultimodalEmbeddingInput ):
87
+ # Handle multimodal embedding inputs
88
+ texts = []
89
+ images = []
90
+ for item in prompt :
91
+ # Use padding for text if None - this could be improved
92
+ texts .append (item .text if item .text is not None else "padding" )
93
+ images .append (item .image if item .image is not None else None )
94
+
95
+ generate_prompts = []
96
+ # Check if we have a chat template for multimodal embeddings
97
+ # This would need to be passed in from the server configuration
98
+ chat_template_name = getattr (
99
+ self .tokenizer_manager , "chat_template_name" , None
100
+ )
101
+ if chat_template_name is not None :
102
+ convs = generate_embedding_convs (texts , images , chat_template_name )
103
+ for conv in convs :
104
+ generate_prompts .append (conv .get_prompt ())
105
+ else :
106
+ generate_prompts = texts
107
+
108
+ if len (generate_prompts ) == 1 :
109
+ prompt_kwargs = {
110
+ "text" : generate_prompts [0 ],
111
+ "image_data" : images [0 ],
112
+ }
126
113
else :
127
- # List of integers (token IDs) or empty list
128
- prompt_kwargs = {"input_ids" : prompt }
114
+ prompt_kwargs = {
115
+ "text" : generate_prompts ,
116
+ "image_data" : images ,
117
+ }
129
118
else :
130
- # Other types (should not happen but handle gracefully)
119
+ # List of integers (token IDs) or empty list
131
120
prompt_kwargs = {"input_ids" : prompt }
132
- # Use the passed request_ids for single request
133
- final_request_id = request_ids [0 ] if len (all_requests ) == 1 else request_ids
134
121
else :
135
- # Handle batch requests
136
- if len (prompts ) > 0 :
137
- # Validate that all prompts have the same type
138
- first_prompt = prompts [0 ]
139
- first_type = type (first_prompt )
140
- for i , prompt in enumerate (prompts [1 :], 1 ):
141
- if type (prompt ) != first_type :
142
- raise AssertionError (
143
- f"All prompts in batch must have the same type, but prompt at index { i } has different type"
144
- )
145
-
146
- if isinstance (first_prompt , str ):
147
- # Batch of strings
148
- prompt_kwargs = {"text" : prompts }
149
- elif isinstance (first_prompt , list ):
150
- if len (first_prompt ) > 0 and isinstance (first_prompt [0 ], str ):
151
- # Batch of lists of strings
152
- prompt_kwargs = {"text" : prompts }
153
- elif len (first_prompt ) > 0 and isinstance (
154
- first_prompt [0 ], MultimodalEmbeddingInput
155
- ):
156
- # Handle multimodal batch requests
157
- raise NotImplementedError (
158
- "Multiple requests with multimodal inputs are not supported yet"
159
- )
160
- else :
161
- # Batch of token ID lists
162
- prompt_kwargs = {"input_ids" : prompts }
163
- else :
164
- # Other types
165
- prompt_kwargs = {"input_ids" : prompts }
166
- else :
167
- prompt_kwargs = {"input_ids" : prompts }
168
- # Use the passed request_ids for batch requests
169
- final_request_id = request_ids
170
-
122
+ # Other types (should not happen but handle gracefully)
123
+ prompt_kwargs = {"input_ids" : prompt }
171
124
adapted_request = EmbeddingReqInput (
172
- rid = final_request_id ,
173
125
** prompt_kwargs ,
174
126
)
175
127
176
- return adapted_request , (
177
- all_requests [0 ] if len (all_requests ) == 1 else all_requests
178
- )
128
+ return adapted_request , request
179
129
180
130
async def _handle_non_streaming_request (
181
131
self ,
@@ -194,14 +144,10 @@ async def _handle_non_streaming_request(
194
144
if not isinstance (ret , list ):
195
145
ret = [ret ]
196
146
197
- response = self ._build_embedding_response (
198
- ret , self .tokenizer_manager .model_path
199
- )
147
+ response = self ._build_embedding_response (ret )
200
148
return response
201
149
202
- def _build_embedding_response (
203
- self , ret : List [Dict [str , Any ]], model_path : str
204
- ) -> EmbeddingResponse :
150
+ def _build_embedding_response (self , ret : List [Dict [str , Any ]]) -> EmbeddingResponse :
205
151
"""Build the embedding response"""
206
152
embedding_objects = []
207
153
prompt_tokens = 0
@@ -219,7 +165,7 @@ def _build_embedding_response(
219
165
220
166
return EmbeddingResponse (
221
167
data = embedding_objects ,
222
- model = model_path ,
168
+ model = self . tokenizer_manager . model_path ,
223
169
usage = UsageInfo (
224
170
prompt_tokens = prompt_tokens ,
225
171
total_tokens = prompt_tokens ,
0 commit comments