Skip to content

Commit a3f66f0

Browse files
committed
[Qwen2] add max_pos_len in modeling_qwen2.py & remove max_pos_len in qwen2
1 parent 66aed65 commit a3f66f0

File tree

6 files changed

+20
-11
lines changed

6 files changed

+20
-11
lines changed

models/Qwen2/share_cache_demo/README.md

+7-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ cd build && cmake .. && make && cp *cpython* .. && cd ..
2525

2626
## 3. 运行python demo
2727
```shell
28-
python pipeline.py --model_path encrypted.bmodel --tokenizer_path ../support/token_config/ --devid 0 --generation_mode penalty_sample --lib_path build/libcipher.so --embedding_path embedding.bin
28+
python3 pipeline.py --model_path encrypted.bmodel --tokenizer_path ../support/token_config/ --devid 0 --generation_mode penalty_sample --lib_path build/libcipher.so --embedding_path embedding.bin
2929
```
3030
* io_alone_mode:当io_alone_mode=0,则正常prefill;当io_alone_mode=1,则使用kvcache复用方案
3131
* model_path_list:模型路径,当使用多个模型时,用逗号隔开
@@ -70,3 +70,9 @@ cp files/Qwen-7B-Chat/* your_torch_model
7070
```shell
7171
model_tool --encrypt -model origin.bmodel -net_name block_0 -lib ./build/libcipher.so -o encrypted.bmodel
7272
```
73+
74+
### 减少日志打印
75+
* 如果想要减少类似`Can't find network name`这种日志打印,可以执行如下命令
76+
```shell
77+
export BMRT_LOG_VERSION=3
78+
```

models/Qwen2/share_cache_demo/chat.cpp

+4-4
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ class Qwen {
7575
size_t size);
7676

7777
// tensors
78-
void make_in_tensors();
78+
void make_in_tensors(bool read_bmodel);
7979
void free_in_tensors();
8080

8181
// sample
@@ -499,8 +499,8 @@ void Qwen::init_params() {
499499
}
500500
}
501501

502-
void Qwen::make_in_tensors() {
503-
if (inputs_pid.device_mem.u.device.device_addr > 0x100000000 && inputs_pid.device_mem.u.device.device_addr < 0x500000000){
502+
void Qwen::make_in_tensors(bool read_bmodel) {
503+
if (!read_bmodel && inputs_pid.device_mem.u.device.device_addr > 0x100000000 && inputs_pid.device_mem.u.device.device_addr < 0x500000000){
504504
free_in_tensors();
505505
}
506506

@@ -557,7 +557,7 @@ void Qwen::init(const std::vector<int> &devices, const std::string &model_path,
557557
init_params();
558558

559559
// step4 : make in tensors
560-
make_in_tensors();
560+
make_in_tensors(read_bmodel);
561561
}
562562

563563
void Qwen::free_in_tensors() {

models/Qwen2/share_cache_demo/export_onnx.py

+2
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ def forward(self, hidden_states, position_ids, attention_mask):
9292
attention_mask=attention_mask,
9393
position_ids=position_ids,
9494
use_cache=True,
95+
max_pos_len=args.max_pos_len
9596
)
9697
present_k, present_v = past_kv
9798
return hidden_states.float(), present_k.float(), present_v.float()
@@ -110,6 +111,7 @@ def forward(self, hidden_states, position_ids, attention_mask, past_k, past_v):
110111
position_ids=position_ids,
111112
attention_mask=attention_mask,
112113
use_cache=True,
114+
max_pos_len=args.max_pos_len
113115
)
114116
present_k, present_v = past_kv
115117
return hidden_states.float(), present_k.float(), present_v.float()

models/Qwen2/share_cache_demo/files/Qwen2-7B-Instruct/config.json

+1-2
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,5 @@
2323
"transformers_version": "4.41.2",
2424
"use_cache": true,
2525
"use_sliding_window": false,
26-
"vocab_size": 152064,
27-
"max_pos_len": 8704
26+
"vocab_size": 152064
2827
}

models/Qwen2/share_cache_demo/files/Qwen2-7B-Instruct/modeling_qwen2.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -239,7 +239,6 @@ def __init__(self, config: Qwen2Config, layer_idx: Optional[int] = None):
239239
self.rope_theta = config.rope_theta
240240
self.is_causal = True
241241
self.attention_dropout = config.attention_dropout
242-
self.max_pos_len = config.max_pos_len
243242

244243
if (self.head_dim * self.num_heads) != self.hidden_size:
245244
raise ValueError(
@@ -265,6 +264,7 @@ def forward(
265264
past_key_value: Optional[Cache] = None,
266265
output_attentions: bool = False,
267266
use_cache: bool = False,
267+
max_pos_len: Optional[int] = 0,
268268
) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
269269
bsz, q_len, _ = hidden_states.size()
270270

@@ -294,7 +294,7 @@ def forward(
294294
)
295295
# kv_seq_len += past_key_value.get_usable_length(kv_seq_len, self.layer_idx)
296296

297-
cos, sin = self.rotary_emb(value_states, seq_len=self.max_pos_len)
297+
cos, sin = self.rotary_emb(value_states, seq_len=max_pos_len)
298298
# if past_key_value is not None:
299299
# cos, sin = self.rotary_emb(value_states, seq_len=kv_seq_len-1)
300300
# else:
@@ -764,6 +764,7 @@ def forward(
764764
past_key_value: Optional[Tuple[torch.Tensor]] = None,
765765
output_attentions: Optional[bool] = False,
766766
use_cache: Optional[bool] = False,
767+
max_pos_len: Optional[int] = 0,
767768
) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]:
768769
"""
769770
Args:
@@ -791,6 +792,7 @@ def forward(
791792
past_key_value=past_key_value,
792793
output_attentions=output_attentions,
793794
use_cache=use_cache,
795+
max_pos_len=max_pos_len
794796
)
795797
hidden_states = residual + hidden_states
796798

models/Qwen2/share_cache_demo/pipeline.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -310,10 +310,10 @@ def main(args):
310310
engine = Qwen(args)
311311

312312
# 1. test one sample
313-
engine.test_sample()
313+
# engine.test_sample()
314314

315315
# 2. test random
316-
# engine.test_random()
316+
engine.test_random()
317317

318318
# 2. test c-eval
319319
# engine.test_ceval()

0 commit comments

Comments
 (0)