airstation:llama.cpp ic$ git rev-parse HEAD
952d03dbead16e4dbdd1d3458486340673cc2465
airstation:llama.cpp ic$ echo ; awk '(NR>=4341 &&  NR<=4382 ){print NR " " $0}' llama.cpp

4341         // for now, only BPE models have pre-tokenizers
4342         if (vocab.type == LLAMA_VOCAB_TYPE_BPE) {
4343             if (tokenizer_pre.empty()) {
4344                 LLAMA_LOG_WARN("%s: missing pre-tokenizer type, using: 'default'\n", __func__);
4345                 LLAMA_LOG_WARN("%s:                                             \n", __func__);
4346                 LLAMA_LOG_WARN("%s: ************************************        \n", __func__);
4347                 LLAMA_LOG_WARN("%s: GENERATION QUALITY WILL BE DEGRADED!        \n", __func__);
4348                 LLAMA_LOG_WARN("%s: CONSIDER REGENERATING THE MODEL             \n", __func__);
4349                 LLAMA_LOG_WARN("%s: ************************************        \n", __func__);
4350                 LLAMA_LOG_WARN("%s:                                             \n", __func__);
4351                 vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
4352             } else if (
4353                     tokenizer_pre == "default") {
4354                 vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
4355             } else if (
4356                     tokenizer_pre == "llama3"   ||
4357                     tokenizer_pre == "llama-v3" ||
4358                     tokenizer_pre == "llama-bpe") {
4359                 vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_LLAMA3;
4360             } else if (
4361                     tokenizer_pre == "deepseek-llm") {
4362                 vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_LLM;
4363             } else if (
4364                     tokenizer_pre == "deepseek-coder") {
4365                 vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_CODER;
4366             } else if (
4367                     tokenizer_pre == "falcon") {
4368                 vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_FALCON;
4369             } else if (
4370                     tokenizer_pre == "mpt") {
4371                 vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_MPT;
4372             } else if (
4373                     tokenizer_pre == "starcoder") {
4374                 vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_STARCODER;
4375             } else if (
4376                     tokenizer_pre == "gpt-2") {
4377                 vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_GPT2;
4378             } else {
4379                 throw std::runtime_error(format("unknown pre-tokenizer type: '%s'", tokenizer_pre.c_str()));
4380             }
4381         } else {
4382             vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;