From c36434a921abf41e98adbd3f913ff8daa07ae148 Mon Sep 17 00:00:00 2001 From: zhangkaihuo Date: Thu, 19 Sep 2024 15:36:16 +0800 Subject: [PATCH] only add skip layers --- examples/llava/minicpmv-cli.cpp | 1 + llama.cpp | 17 ++++++++++------- llama.h | 1 + 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/examples/llava/minicpmv-cli.cpp b/examples/llava/minicpmv-cli.cpp index f8aeba563f50c..e58b264324215 100644 --- a/examples/llava/minicpmv-cli.cpp +++ b/examples/llava/minicpmv-cli.cpp @@ -81,6 +81,7 @@ static struct llava_context * llava_init_context(gpt_params * params) { //load last model llama_model_params model_params = llama_model_params_from_gpt_params(*params); model_params.init_time = false; + model_params.has_vocab = false; //llama_model * model2 = llama_load_model_from_file(params->model.c_str(), model_params); //llama_model * model2 = llama_load_model_from_file("/Users/zkh/Downloads/last_16/ggml-model-Q4_0.gguf", model_params); model2 = llama_load_model_from_file(params->skip_model.c_str(), model_params); diff --git a/llama.cpp b/llama.cpp index 62d9efd6a13cd..ec36da68e12c0 100644 --- a/llama.cpp +++ b/llama.cpp @@ -4123,7 +4123,8 @@ static llama_token llama_byte_to_token(const llama_vocab & vocab, uint8_t ch); static void llm_load_vocab( llama_model_loader & ml, - llama_model & model) { + llama_model & model, + bool has_vocab) { auto & vocab = model.vocab; struct gguf_context * ctx = ml.meta; @@ -4296,10 +4297,11 @@ static void llm_load_vocab( } else if (vocab.type == LLAMA_VOCAB_TYPE_WPM) { vocab.linefeed_id = vocab.special_pad_id; } else { - printf("====unknow vocab type\n"); - // const std::vector ids = llama_tokenize_internal(vocab, "\xC4\x8A", false); // U+010A - // GGML_ASSERT(!ids.empty() && "model vocab missing newline token"); - // vocab.linefeed_id = ids[0]; + if(has_vocab){ + const std::vector ids = llama_tokenize_internal(vocab, "\xC4\x8A", false); // U+010A + GGML_ASSERT(!ids.empty() && "model vocab missing newline token"); + vocab.linefeed_id = ids[0]; + } } // special tokens @@ -5930,12 +5932,12 @@ static int llama_model_load(const std::string & fname, llama_model & model, llam throw std::runtime_error("error loading model hyperparameters: " + std::string(e.what())); } try { - llm_load_vocab(ml, model); + llm_load_vocab(ml, model, params.has_vocab); } catch(const std::exception & e) { throw std::runtime_error("error loading model vocabulary: " + std::string(e.what())); } - if (model.skip_layers == 0){ + if (params.has_vocab){ llm_load_print_meta(ml, model); } @@ -14861,6 +14863,7 @@ struct llama_model_params llama_model_default_params() { /*.use_mmap =*/ true, /*.use_mlock =*/ false, /*.init_time =*/ true, + /*.has_vocab =*/ true, }; #ifdef GGML_USE_METAL diff --git a/llama.h b/llama.h index 545397b167acf..c21a00564c217 100644 --- a/llama.h +++ b/llama.h @@ -236,6 +236,7 @@ extern "C" { bool use_mmap; // use mmap if possible bool use_mlock; // force system to keep model in RAM bool init_time; + bool has_vocab; }; struct llama_context_params {