Skip to content

Commit 9e90034

Browse files
committed
support InternVL2-2B
1 parent a3f66f0 commit 9e90034

21 files changed

+2499
-140
lines changed

README.md

+2-1
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@
5252
|Yi-34B-chat |:white\_check\_mark:|:white\_check\_mark:| |[LINK](https://huggingface.co/01-ai/Yi-34B-Chat) |
5353
|Qwen-VL-Chat |:white\_check\_mark:|:white\_check\_mark:| |[LINK](https://huggingface.co/Qwen/Qwen-VL-Chat) |
5454
|InternVL2-4B |:white\_check\_mark:|:white\_check\_mark:| |[LINK](https://huggingface.co/OpenGVLab/InternVL2-4B) |
55+
|InternVL2-2B |:white\_check\_mark:|:white\_check\_mark:| |[LINK](https://huggingface.co/OpenGVLab/InternVL2-2B) |
5556

5657

5758
如果您想要知道转换细节和源码,可以到本项目[models](./models)子目录查看各类模型部署细节。
@@ -86,7 +87,7 @@ git clone https://github.com/sophgo/LLM-TPU.git
8687
| Qwen1.5-1.8B | ./run.sh --model qwen1.5-1.8b --arch soc | ./run.sh --model qwen1.5-1.8b --arch pcie |
8788
| LWM-Text-Chat | ./run.sh --model lwm-text-chat --arch soc | ./run.sh --model lwm-text-chat --arch pcie |
8889
| WizardCoder-15B | ./run.sh --model wizardcoder-15b --arch soc | ./run.sh --model wizardcoder-15b --arch pcie |
89-
90+
| InternVL2-4B | ./run.sh --model internvl2-4b --arch soc | ./run.sh --model internvl2-4b --arch pcie |
9091

9192
## 进阶功能
9293
进阶功能说明:

models/InternVL2/README.md

+12-2
Original file line numberDiff line numberDiff line change
@@ -83,10 +83,20 @@ cd build && cmake .. && make && cp *cpython* .. && cd ..
8383
* python demo
8484

8585
```
86-
python3 pipeline.py --model_path internvl2-4b_bm1684x_int4.bmodel --tokenizer ../support/token_config/ --devid 0
86+
python3 pipeline.py --model_path internvl2-4b_bm1684x_int4.bmodel --tokenizer ../support/token_config_4b --devid 0
8787
```
8888
model为实际的model储存路径;tokenizer_path为实际的tokenizer配置的储存路径
8989

9090
* 运行效果
9191

92-
![](../../assets/internvl2-4b.png)
92+
![](../../assets/internvl2-4b.png)
93+
94+
## 常见问题
95+
96+
#### 是否支持InternVL2-2B ?
97+
98+
是支持的,步骤基本一致。
99+
1.`files/InternVL2-2B`里面的文件替换到`InternVL2-2B`中;
100+
2. 执行`export_onnx.py`导出onnx;
101+
3. 执行`./compile.sh --name internvl2-2b`生成模型`internvl2-2b_bm1684x_int4.bmodel`
102+
4. 运行程序是一致的,但是需要指定`token_config_2b`,执行命令:`python3 pipeline.py --model_path internvl2-4b_bm1684x_int4.bmodel --tokenizer ../support/token_config_2b --devid 0`

models/InternVL2/compile/compile.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ fi
5454

5555
if [ x$mode == x"int8" ]; then
5656
quantize_args="--quantize W8BF16"
57-
elif [ x$mode == x"f16" ]; then
57+
elif [ x$mode == x"bf16" ]; then
5858
quantize_args="--quantize BF16"
5959
elif [ x$mode == x"int4" ]; then
6060
quantize_args="--quantize W4BF16 --q_group_size 64"

models/InternVL2/compile/export_onnx.py

+25-70
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
args = parser.parse_args()
3030

3131
model_path = args.model_path
32+
is_4B = "InternVL2-4B" in model_path
3233
folder = f"./tmp/onnx"
3334

3435
origin_model = AutoModelForCausalLM.from_pretrained(
@@ -48,7 +49,7 @@
4849
HEAD_DIM = HIDDEN_SIZE // NUM_ATTENTION_HEADS
4950
VOCAB_SIZE = config.llm_config.vocab_size
5051
DOWNSAMPLE_RATIO = config.downsample_ratio
51-
EOS_TOKEN_ID = config.llm_config.eos_token_id
52+
ID_EOS = config.llm_config.eos_token_id
5253
print(f'Layers: {NUM_LAYERS}\nHidden size: {HIDDEN_SIZE}\n')
5354

5455
vit = origin_model.vision_model
@@ -63,9 +64,10 @@ class Embedding(torch.nn.Module):
6364

6465
def __init__(self):
6566
super().__init__()
67+
self.embed = transformer.get_input_embeddings()
6668

6769
def forward(self, input_ids):
68-
hidden_states = transformer.embed_tokens(input_ids)
70+
hidden_states = self.embed(input_ids)
6971
return hidden_states
7072

7173

@@ -75,13 +77,18 @@ def __init__(self, layer_id):
7577
super().__init__()
7678
self.layer_id = layer_id
7779
self.layer = layers[layer_id]
78-
self.rotary_emb = self.layer.self_attn.rotary_emb
80+
7981
position_ids = torch.tensor(
8082
[range(SEQ_LENGTH)], dtype=torch.long).cuda()
8183
value_states = torch.randn(
8284
(1, SEQ_LENGTH, config.llm_config.num_key_value_heads, HEAD_DIM)).bfloat16().cuda()
83-
self.cos, self.sin = self.rotary_emb(
84-
value_states, position_ids, SEQ_LENGTH)
85+
if is_4B:
86+
self.rotary_emb = self.layer.self_attn.rotary_emb
87+
self.cos, self.sin = self.rotary_emb(
88+
value_states, position_ids, SEQ_LENGTH)
89+
else:
90+
self.rotary_emb = self.layer.attention.rotary_emb
91+
self.cos, self.sin = self.rotary_emb(value_states, SEQ_LENGTH)
8592
self.cos = self.cos.view(SEQ_LENGTH, HEAD_DIM)
8693
self.sin = self.sin.view(SEQ_LENGTH, HEAD_DIM)
8794

@@ -105,13 +112,17 @@ def __init__(self, layer_id):
105112
super().__init__()
106113
self.layer_id = layer_id
107114
self.layer = layers[layer_id]
108-
self.rotary_emb = self.layer.self_attn.rotary_emb
109115
position_ids = torch.tensor(
110116
[range(SEQ_LENGTH)], dtype=torch.long).cuda()
111117
value_states = torch.randn(
112118
(1, SEQ_LENGTH, config.llm_config.num_key_value_heads, HEAD_DIM)).bfloat16().cuda()
113-
self.cos, self.sin = self.rotary_emb(
114-
value_states, position_ids, SEQ_LENGTH)
119+
if is_4B:
120+
self.rotary_emb = self.layer.self_attn.rotary_emb
121+
self.cos, self.sin = self.rotary_emb(
122+
value_states, position_ids, SEQ_LENGTH)
123+
else:
124+
self.rotary_emb = self.layer.attention.rotary_emb
125+
self.cos, self.sin = self.rotary_emb(value_states, SEQ_LENGTH)
115126
self.cos = self.cos.view(SEQ_LENGTH, HEAD_DIM)
116127
self.sin = self.sin.view(SEQ_LENGTH, HEAD_DIM)
117128

@@ -134,10 +145,11 @@ class LmHead(torch.nn.Module):
134145

135146
def __init__(self):
136147
super().__init__()
148+
self.lm_head = origin_model.language_model.get_output_embeddings()
137149

138150
def forward(self, hidden_states):
139151
hidden_states = transformer.norm(hidden_states)
140-
m_logits = origin_model.language_model.lm_head(hidden_states)
152+
m_logits = self.lm_head(hidden_states)
141153
_, token = torch.topk(m_logits.float(), 1)
142154
return token
143155

@@ -251,68 +263,10 @@ def build_transform(input_size):
251263
return transform
252264

253265

254-
def find_closest_aspect_ratio(aspect_ratio, target_ratios, width, height, image_size):
255-
best_ratio_diff = float('inf')
256-
best_ratio = (1, 1)
257-
area = width * height
258-
for ratio in target_ratios:
259-
target_aspect_ratio = ratio[0] / ratio[1]
260-
ratio_diff = abs(aspect_ratio - target_aspect_ratio)
261-
if ratio_diff < best_ratio_diff:
262-
best_ratio_diff = ratio_diff
263-
best_ratio = ratio
264-
elif ratio_diff == best_ratio_diff:
265-
if area > 0.5 * image_size * image_size * ratio[0] * ratio[1]:
266-
best_ratio = ratio
267-
return best_ratio
268-
269-
270-
def dynamic_preprocess(image, min_num=1, max_num=12, image_size=448, use_thumbnail=False):
271-
orig_width, orig_height = image.size
272-
aspect_ratio = orig_width / orig_height
273-
274-
# calculate the existing image aspect ratio
275-
target_ratios = set(
276-
(i, j) for n in range(min_num, max_num + 1) for i in range(1, n + 1) for j in range(1, n + 1) if
277-
i * j <= max_num and i * j >= min_num)
278-
target_ratios = sorted(target_ratios, key=lambda x: x[0] * x[1])
279-
280-
# find the closest aspect ratio to the target
281-
target_aspect_ratio = find_closest_aspect_ratio(
282-
aspect_ratio, target_ratios, orig_width, orig_height, image_size)
283-
284-
# calculate the target width and height
285-
target_width = image_size * target_aspect_ratio[0]
286-
target_height = image_size * target_aspect_ratio[1]
287-
blocks = target_aspect_ratio[0] * target_aspect_ratio[1]
288-
289-
# resize the image
290-
resized_img = image.resize((target_width, target_height))
291-
processed_images = []
292-
for i in range(blocks):
293-
box = (
294-
(i % (target_width // image_size)) * image_size,
295-
(i // (target_width // image_size)) * image_size,
296-
((i % (target_width // image_size)) + 1) * image_size,
297-
((i // (target_width // image_size)) + 1) * image_size
298-
)
299-
# split the image
300-
split_img = resized_img.crop(box)
301-
processed_images.append(split_img)
302-
assert len(processed_images) == blocks
303-
if use_thumbnail and len(processed_images) != 1:
304-
thumbnail_img = image.resize((image_size, image_size))
305-
processed_images.append(thumbnail_img)
306-
return processed_images
307-
308-
309266
def load_image(image_file, input_size=448, max_num=12):
310267
image = Image.open(image_file).convert('RGB')
311268
transform = build_transform(input_size=input_size)
312-
images = dynamic_preprocess(
313-
image, image_size=input_size, use_thumbnail=True, max_num=max_num)
314-
pixel_values = [transform(image) for image in images]
315-
pixel_values = torch.stack(pixel_values)
269+
pixel_values = transform(image)
316270
return pixel_values
317271

318272

@@ -332,7 +286,8 @@ def test_net_with_mask():
332286
pixel_values = load_image(jpg, max_num=1).to(
333287
torch.bfloat16).cuda() # [1, 3, 448, 448]
334288
vit_embeds = vit_infer(pixel_values) # [1, 256, 3072]
335-
289+
ID_IM_END = tokenizer.convert_tokens_to_ids("<|im_end|>")
290+
ID_END = tokenizer.convert_tokens_to_ids("<|end|>")
336291
token_len = len(ids)
337292
ids = ids + (SEQ_LENGTH - token_len) * [0]
338293
input_ids = torch.tensor(ids).view(SEQ_LENGTH).cuda()
@@ -362,7 +317,7 @@ def test_net_with_mask():
362317
lm = LmHead()
363318
token = lm(out.bfloat16()).view(1)
364319
out_ids = [int(token)]
365-
while int(token) < EOS_TOKEN_ID and token_len < SEQ_LENGTH:
320+
while int(token) not in [ID_EOS, ID_IM_END, ID_END] and token_len < SEQ_LENGTH:
366321
token_len += 1
367322
input_ids = torch.tensor([token]).cuda()
368323
out = embed(input_ids).view(1, 1, HIDDEN_SIZE)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
{
2+
"_commit_hash": null,
3+
"architectures": [
4+
"InternVLChatModel"
5+
],
6+
"auto_map": {
7+
"AutoConfig": "configuration_internvl_chat.InternVLChatConfig",
8+
"AutoModel": "modeling_internvl_chat.InternVLChatModel",
9+
"AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel"
10+
},
11+
"downsample_ratio": 0.5,
12+
"dynamic_image_size": true,
13+
"force_image_size": 448,
14+
"llm_config": {
15+
"_name_or_path": "internlm/internlm2-chat-1_8b",
16+
"add_cross_attention": false,
17+
"architectures": [
18+
"InternLM2ForCausalLM"
19+
],
20+
"attn_implementation": "flash_attention_2",
21+
"auto_map": {
22+
"AutoConfig": "configuration_internlm2.InternLM2Config",
23+
"AutoModel": "modeling_internlm2.InternLM2ForCausalLM",
24+
"AutoModelForCausalLM": "modeling_internlm2.InternLM2ForCausalLM"
25+
},
26+
"bad_words_ids": null,
27+
"begin_suppress_tokens": null,
28+
"bias": false,
29+
"bos_token_id": 1,
30+
"chunk_size_feed_forward": 0,
31+
"cross_attention_hidden_size": null,
32+
"decoder_start_token_id": null,
33+
"diversity_penalty": 0.0,
34+
"do_sample": false,
35+
"early_stopping": false,
36+
"encoder_no_repeat_ngram_size": 0,
37+
"eos_token_id": 2,
38+
"exponential_decay_length_penalty": null,
39+
"finetuning_task": null,
40+
"forced_bos_token_id": null,
41+
"forced_eos_token_id": null,
42+
"hidden_act": "silu",
43+
"hidden_size": 2048,
44+
"id2label": {
45+
"0": "LABEL_0",
46+
"1": "LABEL_1"
47+
},
48+
"initializer_range": 0.02,
49+
"intermediate_size": 8192,
50+
"is_decoder": false,
51+
"is_encoder_decoder": false,
52+
"label2id": {
53+
"LABEL_0": 0,
54+
"LABEL_1": 1
55+
},
56+
"length_penalty": 1.0,
57+
"max_length": 20,
58+
"max_position_embeddings": 512,
59+
"min_length": 0,
60+
"model_type": "internlm2",
61+
"no_repeat_ngram_size": 0,
62+
"num_attention_heads": 16,
63+
"num_beam_groups": 1,
64+
"num_beams": 1,
65+
"num_hidden_layers": 24,
66+
"num_key_value_heads": 8,
67+
"num_return_sequences": 1,
68+
"output_attentions": false,
69+
"output_hidden_states": false,
70+
"output_scores": false,
71+
"pad_token_id": 2,
72+
"prefix": null,
73+
"problem_type": null,
74+
"pruned_heads": {},
75+
"remove_invalid_values": false,
76+
"repetition_penalty": 1.0,
77+
"return_dict": true,
78+
"return_dict_in_generate": false,
79+
"rms_norm_eps": 1e-05,
80+
"rope_scaling": {
81+
"factor": 2.0,
82+
"type": "dynamic"
83+
},
84+
"rope_theta": 1000000,
85+
"sep_token_id": null,
86+
"suppress_tokens": null,
87+
"task_specific_params": null,
88+
"temperature": 1.0,
89+
"tf_legacy_loss": false,
90+
"tie_encoder_decoder": false,
91+
"tie_word_embeddings": false,
92+
"tokenizer_class": null,
93+
"top_k": 50,
94+
"top_p": 1.0,
95+
"torch_dtype": "bfloat16",
96+
"torchscript": false,
97+
"transformers_version": "4.37.2",
98+
"typical_p": 1.0,
99+
"use_bfloat16": true,
100+
"use_cache": true,
101+
"vocab_size": 92553
102+
},
103+
"max_dynamic_patch": 12,
104+
"min_dynamic_patch": 1,
105+
"model_type": "internvl_chat",
106+
"ps_version": "v2",
107+
"select_layer": -1,
108+
"template": "internlm2-chat",
109+
"torch_dtype": "bfloat16",
110+
"use_backbone_lora": 0,
111+
"use_llm_lora": 0,
112+
"use_thumbnail": true,
113+
"vision_config": {
114+
"architectures": [
115+
"InternVisionModel"
116+
],
117+
"attention_dropout": 0.0,
118+
"drop_path_rate": 0.0,
119+
"dropout": 0.0,
120+
"hidden_act": "gelu",
121+
"hidden_size": 1024,
122+
"image_size": 448,
123+
"initializer_factor": 1.0,
124+
"initializer_range": 0.02,
125+
"intermediate_size": 4096,
126+
"layer_norm_eps": 1e-06,
127+
"model_type": "intern_vit_6b",
128+
"norm_type": "layer_norm",
129+
"num_attention_heads": 16,
130+
"num_channels": 3,
131+
"num_hidden_layers": 24,
132+
"output_attentions": false,
133+
"output_hidden_states": false,
134+
"patch_size": 14,
135+
"qk_normalization": false,
136+
"qkv_bias": true,
137+
"return_dict": true,
138+
"torch_dtype": "bfloat16",
139+
"transformers_version": "4.37.2",
140+
"use_bfloat16": true,
141+
"use_flash_attn": true
142+
}
143+
}

0 commit comments

Comments
 (0)