Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 53 additions & 2 deletions aiserver.py
Original file line number Diff line number Diff line change
Expand Up @@ -1872,7 +1872,7 @@ def load_model(model_backend, initial_load=False):
logger.message(f"KoboldAI has finished loading and is available at the following link for the Classic UI: {koboldai_vars.cloudflare_link}/classic")
logger.message(f"KoboldAI has finished loading and is available at the following link for KoboldAI Lite: {koboldai_vars.cloudflare_link}/lite")
logger.message(f"KoboldAI has finished loading and is available at the following link for the API: {koboldai_vars.cloudflare_link}/api")

koboldai_vars.reset_model_unload_timer()

# Setup IP Whitelisting
# Define a function to check if IP is allowed
Expand Down Expand Up @@ -3256,7 +3256,12 @@ def actionsubmit(
# Ignore new submissions if the AI is currently busy
if koboldai_vars.aibusy and not ignore_aibusy:
return


#Check to see if the model is loaded
if koboldai_vars.model_status != "loaded":
UI_2_unpause_model(None)


while(True):
set_aibusy(1)
koboldai_vars.actions.clear_unused_options()
Expand Down Expand Up @@ -6340,8 +6345,53 @@ def UI_2_load_model(data):
logger.debug("Loading model with user input of: {}".format(data))
model_backends[data['plugin']].set_input_parameters(data)
load_model(data['plugin'])
if model.model in ["Read Only", ""]:
koboldai_vars.model_status = "unloaded"
else:
koboldai_vars.model_status = "loaded"
#load_model(use_gpu=data['use_gpu'], gpu_layers=data['gpu_layers'], disk_layers=data['disk_layers'], online_model=data['online_model'], url=koboldai_vars.colaburl, use_8_bit=data['use_8_bit'])


#==================================================================#
# Event triggered when user pauses a model
#==================================================================#
@socketio.on('pause_model')
@logger.catch
def UI_2_pause_model(data):
logger.info("Pausing model (unloading)")
if 'model' in globals():
model.unload()
koboldai_vars.model_status="unloaded"


#==================================================================#
# Event triggered when user un-pauses a model
#==================================================================#
@socketio.on('unpause_model')
@logger.catch
def UI_2_unpause_model(data):
logger.info("Un-pausing model (loading)")
if 'model' in globals():
model.load()
koboldai_vars.model_status="loaded"

#==================================================================#
# Auto-pause option
#==================================================================#
@app.before_request
@logger.catch
def every_page_load():
koboldai_vars.reset_model_unload_timer()


def check_model_unload_timer():
if koboldai_vars._model_unload_timer is not None:
if datetime.datetime.now() > koboldai_vars._model_unload_timer:
logger.info(koboldai_vars._model_unload_timer)
UI_2_pause_model(None)
koboldai_vars._model_unload_timer = None


#==================================================================#
# Event triggered when load story is clicked
#==================================================================#
Expand Down Expand Up @@ -6842,6 +6892,7 @@ def socket_io_relay(queue, socketio):
data = queue.get()
socketio.emit(data[0], data[1], **data[2])
time.sleep(0.2)
check_model_unload_timer()



Expand Down
16 changes: 16 additions & 0 deletions gensettings.py
Original file line number Diff line number Diff line change
Expand Up @@ -1021,6 +1021,22 @@
"name": "wigen_use_own_wi",
"ui_level": 2
},
{
"uitype": "slider",
"unit": "int",
"label": "Unload Timer",
"id": "setauto_unload_timer",
"min": 0,
"max": 120,
"step": 15,
"default": 0,
"tooltip": "How many minutes to wait until pausing the model (unloads from VRAM). Set to 0 to disable.",
"menu_path": "Interface",
"sub_path": "UI",
"classname": "system",
"name": "auto_unload_timer",
"ui_level": 2
},
]

gensettingsik =[{
Expand Down
8 changes: 8 additions & 0 deletions koboldai_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -575,10 +575,15 @@ def assign_world_info_to_actions(self, *args, **kwargs):
def reset_for_model_load(self):
self._model_settings.reset_for_model_load()

def reset_model_unload_timer(self):
if self._system_settings.auto_unload_timer > 0:
self._system_settings._model_unload_timer = datetime.datetime.now() + datetime.timedelta(minutes=self._system_settings.auto_unload_timer)

def __setattr__(self, name, value):
if name[0] == "_" or name == "tokenizer":
super().__setattr__(name, value)
if name[0] != "_":
self.reset_model_unload_timer()
#Send it to the corrent _setting class
if name in self._model_settings.__dict__:
setattr(self._model_settings, name, value)
Expand Down Expand Up @@ -762,6 +767,7 @@ def reset_for_model_load(self):
self.horde_queue_position = 0
self.horde_queue_size = 0
self.use_alt_rep_pen = False
self.model_status = "unloaded"



Expand Down Expand Up @@ -1355,6 +1361,8 @@ def __init__(self, socketio, koboldai_var):
self.git_repository = ""
self.git_branch = ""
self.disable_model_load = False
self._model_unload_timer = None
self.auto_unload_timer = 0


@dataclass
Expand Down
4 changes: 2 additions & 2 deletions modeling/inference_models/exllamav2/class.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,8 +276,8 @@ def unload(self):
self.cache = None
self.generator = None

self.model_name = ""
self.path = None
#self.model_name = ""
#self.path = None

with torch.no_grad():
with warnings.catch_warnings():
Expand Down
25 changes: 25 additions & 0 deletions static/koboldai.css
Original file line number Diff line number Diff line change
Expand Up @@ -4030,3 +4030,28 @@ select {
#load-model .popup_load_cancel {
grid-area: ok;
}


.model_pause_button[model_model_status="unloaded"] {
display:none
}

.model_play_button[model_model_status="loaded"] {
display:none
}

.model_pause_button[model_model="Read Only"] {
display:none
}

.model_play_button[model_model="Read Only"] {
display:none
}

.model_pause_button[model_model=""] {
display:none
}

.model_play_button[model_model=""] {
display:none
}
6 changes: 6 additions & 0 deletions templates/settings flyout.html
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,12 @@
<span class="material-icons-outlined cursor" tooltip="Load Model" style="font-size: 1.4em;">folder_open</span>
<span class="button_label">Load Model</span>
</button>
<button class="settings_button model_pause_button var_sync_alt_model_model_status var_sync_alt_model_model" onclick="socket.emit('pause_model', {});">
<span class="material-icons-outlined cursor" tooltip="Pause Model" style="font-size: 1.4em;">pause</span>
</button>
<button class="settings_button model_play_button var_sync_alt_model_model_status var_sync_alt_model_model" onclick="socket.emit('unpause_model', {});">
<span class="material-icons-outlined cursor" tooltip="Pause Model" style="font-size: 1.4em;">play_arrow</span>
</button>
{% endif %}
<select class="var_sync_model_selected_preset settings_select presets" onchange='sync_to_server(this)'><option>Preset</option></select>
</div>
Expand Down