@@ -144,26 +144,26 @@ StaticLLMPipeline::StaticLLMPipeline(
144
144
*/
145
145
ov::Core core;
146
146
// (1) Read the template model - this will be kvcache model
147
- auto kvcache_model = core.read_model (path / " openvino_model.xml" );
147
+ m_kvcache_model = core.read_model (path / " openvino_model.xml" );
148
148
// (2) Expose KV-cache input and output layers from kvcache model
149
- ov::pass::StatefulToStateless ().run_on_model (kvcache_model );
149
+ ov::pass::StatefulToStateless ().run_on_model (m_kvcache_model );
150
150
// (3) Clone the model - this will be prefill
151
- auto prefill_model = kvcache_model ->clone ();
152
- prefill_model ->set_friendly_name (kvcache_model ->get_friendly_name () + " _prefill" );
151
+ m_prefill_model = m_kvcache_model ->clone ();
152
+ m_prefill_model ->set_friendly_name (m_kvcache_model ->get_friendly_name () + " _prefill" );
153
153
// (4) Reshape both models to static shape
154
154
m_kvcache_desc = KVCacheDesc { 1024u , 0u };
155
155
const uint32_t max_prompt_size = m_kvcache_desc.total_size ;
156
156
const uint32_t max_kvcache_size = m_kvcache_desc.total_size ;
157
- reshape_to_static (prefill_model , max_prompt_size, max_kvcache_size);
158
- reshape_to_static (kvcache_model , 1u , max_kvcache_size);
157
+ reshape_to_static (m_prefill_model , max_prompt_size, max_kvcache_size);
158
+ reshape_to_static (m_kvcache_model , 1u , max_kvcache_size);
159
159
// (5) Add slices to kvcache model
160
- kvcache_model = add_slices_to_kvcache_inputs (kvcache_model );
160
+ m_kvcache_model = add_slices_to_kvcache_inputs (m_kvcache_model );
161
161
// (6) Compile both model
162
162
m_prefill_request = core.compile_model (
163
- prefill_model , device, extract_config_or_default (config, " PREFILL_CONFIG" )
163
+ m_prefill_model , device, extract_config_or_default (config, " PREFILL_CONFIG" )
164
164
).create_infer_request ();
165
165
m_kvcache_request = core.compile_model (
166
- kvcache_model , device, extract_config_or_default (config, " GENERATE_CONFIG" )
166
+ m_kvcache_model , device, extract_config_or_default (config, " GENERATE_CONFIG" )
167
167
).create_infer_request ();
168
168
// (7) Initialize tensors
169
169
prepare_for_new_conversation ();
0 commit comments