Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion expose.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ struct load_model_inputs
const float draft_gpusplit[tensor_split_max] = {};
const char * mmproj_filename = nullptr;
const bool mmproj_cpu = false;
const int visionmaxres = 2048;
const int visionmaxres = 4096;
const bool use_mmap = false;
const bool use_mlock = false;
const bool use_smartcontext = false;
Expand Down
2 changes: 1 addition & 1 deletion gpttype_adapter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ static std::vector<media_object> media_objects;
static std::vector<int> last_media_mem; //for storing dummy tokens that will be consumed by llava
static std::string media_composite_image_signature = ""; //for identifying when the llava images change, we need to invalidate the cache
static int current_media_identifier = MEDIA_TOKEN_IDENTIFIER_A;
static int vision_max_res = 2048;
static int vision_max_res = 4096;
static bool use_mrope = false;

static kcpp_params * kcpp_data = nullptr;
Expand Down
6 changes: 3 additions & 3 deletions koboldcpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -1409,7 +1409,7 @@ def load_model(model_filename):
inputs.draft_gpusplit[n] = 0
inputs.mmproj_filename = args.mmproj.encode("UTF-8") if args.mmproj else "".encode("UTF-8")
inputs.mmproj_cpu = (True if args.mmprojcpu else False)
inputs.visionmaxres = (512 if args.visionmaxres < 512 else (2048 if args.visionmaxres > 2048 else args.visionmaxres))
inputs.visionmaxres = (512 if args.visionmaxres < 512 else (4096 if args.visionmaxres > 4096 else args.visionmaxres))
inputs.use_smartcontext = args.smartcontext
inputs.use_contextshift = (0 if args.noshift else 1)
inputs.use_fastforward = (0 if args.nofastforward else 1)
Expand Down Expand Up @@ -5497,7 +5497,7 @@ def togglerope(a,b,c):
makelabelentry(model_tab, "Multiplier: ", loramult_var, 3, 50,padx=(580 if corrupt_scaler else 390),singleline=True,tooltip="Scale multiplier for Text LoRA Strength. Default is 1.0", labelpadx=(470 if corrupt_scaler else 330))
makefileentry(model_tab, "Mmproj File:", "Select Audio or Vision mmproj File", mmproj_var, 7,width=280,singlerow=True,tooltiptxt="Select a mmproj file to use for multimodal models for vision and audio recognition.\nLeave blank to skip.")
makecheckbox(model_tab, "Vision Force CPU", mmprojcpu_var, 9, tooltiptxt="Force CLIP for Vision mmproj always on CPU.")
makelabelentry(model_tab, "Vision MaxRes:", visionmaxres_var, 9, padx=(450 if corrupt_scaler else 320), singleline=True, tooltip=f"Clamp MMProj vision maximum allowed resolution. Allowed values are between 512 to 2048 px (default {default_visionmaxres}).", labelpadx=(260 if corrupt_scaler else 220))
makelabelentry(model_tab, "Vision MaxRes:", visionmaxres_var, 9, padx=(450 if corrupt_scaler else 320), singleline=True, tooltip=f"Clamp MMProj vision maximum allowed resolution. Allowed values are between 512 to 4096 px (default {default_visionmaxres}).", labelpadx=(260 if corrupt_scaler else 220))
makefileentry(model_tab, "Draft Model:", "Select Speculative Text Model File", draftmodel_var, 11,width=280,singlerow=True,tooltiptxt="Select a draft text model file to use for speculative decoding.\nLeave blank to skip.")
makelabelentry(model_tab, "Draft Amount: ", draftamount_var, 13, 50,padx=(170 if corrupt_scaler else 100),singleline=True,tooltip="How many tokens to draft per chunk before verifying results")
makelabelentry(model_tab, "Splits: ", draftgpusplit_str_vars, 13, 50,padx=(320 if corrupt_scaler else 210),singleline=True,tooltip="Distribution of draft model layers. Leave blank to follow main model's gpu split. Only works if multi-gpu (All) selected in main model.", labelpadx=(260 if corrupt_scaler else 160))
Expand Down Expand Up @@ -7898,7 +7898,7 @@ def range_checker(arg: str):
advparser.add_argument("--nocertify", help="Allows insecure SSL connections. Use this if you have cert errors and need to bypass certificate restrictions.", action='store_true')
advparser.add_argument("--mmproj", metavar=('[filename]'), help="Select a multimodal projector file for vision models like LLaVA.", default="")
advparser.add_argument("--mmprojcpu","--no-mmproj-offload", help="Force CLIP for Vision mmproj always on CPU.", action='store_true')
advparser.add_argument("--visionmaxres", metavar=('[max px]'), help="Clamp MMProj vision maximum allowed resolution. Allowed values are between 512 to 2048 px (default 1024).", type=int, default=default_visionmaxres)
advparser.add_argument("--visionmaxres", metavar=('[max px]'), help="Clamp MMProj vision maximum allowed resolution. Allowed values are between 512 to 4096 px (default 1024).", type=int, default=default_visionmaxres)
advparser.add_argument("--draftmodel","--model-draft","-md", metavar=('[filename]'), help="Load a small draft model for speculative decoding. It will be fully offloaded. Vocab must match the main model.", default="")
advparser.add_argument("--draftamount","--draft-max","--draft-n", metavar=('[tokens]'), help="How many tokens to draft per chunk before verifying results", type=int, default=default_draft_amount)
advparser.add_argument("--draftgpulayers","--gpu-layers-draft","--n-gpu-layers-draft","-ngld", metavar=('[layers]'), help="How many layers to offload to GPU for the draft model (default=full offload)", type=int, default=999)
Expand Down
2 changes: 1 addition & 1 deletion tools/mtmd/llava.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -369,7 +369,7 @@ bool llava_eval_image_embed(llama_context * ctx_llama, const struct llava_image_

struct llava_image_embed * llava_image_embed_make_with_bytes(struct clip_ctx * ctx_clip, int n_threads, const unsigned char * image_bytes, int image_bytes_length) {
clip_image_u8 * img = clip_image_u8_init();
if (!clip_image_load_from_bytes(image_bytes, image_bytes_length, img, 2048)) {
if (!clip_image_load_from_bytes(image_bytes, image_bytes_length, img, 4096)) {
clip_image_u8_free(img);
LOG_ERR("%s: can't load image from bytes, is it a valid image?", __func__);
return NULL;
Expand Down