#include "ggml.h" #include "common.h" #include "clip.h" #include "llava.h" #include "llama.h" #include "base64.hpp" #include #include #include int main(int argc, char ** argv) { ggml_time_init(); if (argc != 3 && argc != 4) { fprintf(stderr, "usage: %s mmproj-f16.gguf output-mmproj-quantized.gguf TYPE\n", argv[0]); printf("\nGGML_TYPE_Q4_0 = 2\nGGML_TYPE_Q4_1 = 3\nGGML_TYPE_Q5_0 = 6\nGGML_TYPE_Q5_1 = 7\nGGML_TYPE_Q8_0 = 8\n"); return 1; } const std::string fname_inp = argv[1]; const std::string fname_out = argv[2]; int type = GGML_TYPE_Q4_1; if(argc==4) { type = std::stoi(argv[3]); } printf("quantizing mmproj clip model to type=%d... ",type); clip_model_quantize(fname_inp.c_str(), fname_out.c_str(), type); printf("done\n"); return 0; }