Commit b189b61 1 parent 309ab33 commit b189b61 Copy full SHA for b189b61
File tree 1 file changed +8
-8
lines changed
1 file changed +8
-8
lines changed Original file line number Diff line number Diff line change @@ -590,16 +590,16 @@ The `--max_attention_window_size` parameter is set to the `sliding_window` value
590
590
python convert_checkpoint.py --model_dir ./mistral-7b-v0.1 \
591
591
--output_dir ./tllm_checkpoint_1gpu_mistral \
592
592
--dtype float16
593
- trtllm-build --checkpoint_dir ./tllm_checkpoint_2gpu_gptq \
594
- --output_dir ./tmp/mistral/7B/trt_engines/fp16/1-gpu/ \
595
- --gemm_plugin float16 \
596
- --max_input_len 32256
593
+ trtllm-build --checkpoint_dir ./tllm_checkpoint_1gpu_mistral \
594
+ --output_dir ./tmp/mistral/7B/trt_engines/fp16/1-gpu/ \
595
+ --gemm_plugin float16 \
596
+ --max_input_len 32256
597
597
598
598
# Run Mistral 7B fp16 inference with sliding window/cache size 4096
599
- python3 run.py --max_output_len=50 \
600
- --tokenizer_dir ./tmp/llama/7B/ \
601
- --engine_dir=./tmp/llama/7B/trt_engines/fp16/1-gpu/ \
602
- --max_attention_window_size=4096
599
+ python ../ run.py --max_output_len=50 \
600
+ --tokenizer_dir ./mistral-7b-v0.1 \
601
+ --engine_dir=./tmp/llama/7B/trt_engines/fp16/1-gpu/ \
602
+ --max_attention_window_size=4096
603
603
` ` `
604
604
605
605
Note that if you are comparing TRT-LLM with Huggingface,
You can’t perform that action at this time.
0 commit comments