@@ -62,8 +62,6 @@ def from_model_path(
62
62
model_path : str ,
63
63
device : str = "CPU" ,
64
64
tokenizer : Any = None ,
65
- draft_model_path : Optional [str ] = None ,
66
- draft_model_device : Optional [str ] = "CPU" ,
67
65
** kwargs : Any ,
68
66
) -> OpenVINOLLM :
69
67
"""Construct the oepnvino object from model_path"""
@@ -206,11 +204,7 @@ def put(self, token_id: int) -> bool:
206
204
return False
207
205
return super ().put (token_id )
208
206
209
- if draft_model_path is not None :
210
- draft_model = openvino_genai .draft_model (draft_model_path , draft_model_device )
211
- pipe = openvino_genai .LLMPipeline (model_path , device , draft_model = draft_model )
212
- else :
213
- pipe = openvino_genai .LLMPipeline (model_path , device )
207
+ pipe = openvino_genai .LLMPipeline (model_path , device , ** kwargs )
214
208
215
209
config = pipe .get_generation_config ()
216
210
if tokenizer is None :
@@ -245,7 +239,7 @@ def _call(
245
239
input_ids = tokens ["input_ids" ]
246
240
attention_mask = tokens ["attention_mask" ]
247
241
prompt = openvino_genai .TokenizedInputs (ov .Tensor (input_ids ), ov .Tensor (attention_mask ))
248
- output = self .pipe .generate (prompt , self .config )
242
+ output = self .pipe .generate (prompt , self .config , ** kwargs )
249
243
if not isinstance (self .tokenizer , openvino_genai .Tokenizer ):
250
244
output = self .tokenizer .batch_decode (output .tokens , skip_special_tokens = True )[0 ]
251
245
return output
@@ -280,7 +274,7 @@ def generate_and_signal_complete() -> None:
280
274
genration function for single thread
281
275
"""
282
276
self .streamer .reset ()
283
- self .pipe .generate (prompt , self .config , self .streamer )
277
+ self .pipe .generate (prompt , self .config , self .streamer , ** kwargs )
284
278
stream_complete .set ()
285
279
self .streamer .end ()
286
280
0 commit comments