Skip to content
Closed
3 changes: 3 additions & 0 deletions api/azimuth/provider/dto.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,9 @@ class Quota:
allocated: int
#: The amount of the resource that has been used
used: int
#: Openstack resource class associated with the Coral credits quota which
#: this quota is also controlled by
linked_credits_resource: str = None
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

related_resource_names: Sequence[str] = None



@dataclass(frozen=True)
Expand Down
89 changes: 89 additions & 0 deletions api/azimuth/provider/openstack/provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import base64
import dataclasses
import datetime
import functools
import hashlib
import logging
Expand All @@ -14,7 +15,11 @@
import certifi
import dateutil.parser
import rackit
import requests
import yaml
from django.utils.timezone import make_aware

from azimuth.settings import cloud_settings

from .. import base, dto, errors # noqa: TID252
from . import api
Expand Down Expand Up @@ -286,6 +291,16 @@ def __init__(
project_id_safe = self._connection.project_id.replace("-", "")
self._project_share_user = prefix + project_id_safe

# Get Coral bearer token if enabled
if cloud_settings.CORAL_CREDITS.ADMIN_PASSWORD is not None:
self._coral_auth_token = requests.post(
cloud_settings.CORAL_CREDITS.CORAL_URI + "/api-token-auth/",
json={
"username": "admin",
"password": cloud_settings.CORAL_CREDITS.ADMIN_PASSWORD,
},
).json()["token"]

def _log(self, message, *args, level=logging.INFO, **kwargs):
logger.log(
level,
Expand Down Expand Up @@ -337,13 +352,15 @@ def quotas(self):
None,
compute_limits.total_cores,
compute_limits.total_cores_used,
linked_credits_resource="VCPU",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

or PCPU, I think.

),
dto.Quota(
"ram",
"RAM",
"MB",
compute_limits.total_ram,
compute_limits.total_ram_used,
linked_credits_resource="MEMORY_MB",
),
dto.Quota(
"machines",
Expand All @@ -365,6 +382,9 @@ def quotas(self):
len(list(self._connection.network.floatingips.all())),
)
)
# Get coral credits if available
if cloud_settings.CORAL_CREDITS.CORAL_URI is not None:
quotas.extend(self.get_coral_quotas())
# The volume service is optional
# In the case where the service is not enabled, just don't add the quotas
try:
Expand All @@ -391,6 +411,75 @@ def quotas(self):
pass
return quotas

def get_coral_quotas(self):
headers = {"Authorization": "Bearer " + self._coral_auth_token}
accounts = requests.get(
cloud_settings.CORAL_CREDITS.CORAL_URI + "/resource_provider_account",
headers=headers,
).json()

tenancy_account_list = list(
filter(
lambda a: a["project_id"].replace("-", "") == self._tenancy.id, accounts
)
)
if len(tenancy_account_list) != 1:
return []
tenancy_account = tenancy_account_list[0]["account"]
all_allocations = requests.get(
cloud_settings.CORAL_CREDITS.CORAL_URI + "/allocation", headers=headers
).json()
account_allocations = filter(
lambda a: a["account"] == tenancy_account, all_allocations
)

datetime_format = "%Y-%m-%dT%H:%M:%SZ"
current_time = make_aware(datetime.datetime.now())
target_tz = current_time.tzinfo

active_allocation_list = list(
filter(
lambda a: datetime.datetime.strptime(
a["start"], datetime_format
).replace(tzinfo=target_tz)
< current_time
and current_time
< datetime.datetime.strptime(a["end"], datetime_format).replace(
tzinfo=target_tz
),
account_allocations,
)
)

human_readable_names = {
"MEMORY_MB": "RAM (MB)",
"DISK_GB": "Root disk (GB)", # TODO: is this always the root disk?
}

quotas = []
if len(active_allocation_list) == 1:
active_allocation_id = active_allocation_list[0]["id"]
for resource in requests.get(
cloud_settings.CORAL_CREDITS.CORAL_URI
+ "/allocation/"
+ str(active_allocation_id)
+ "/resources",
headers=headers,
).json():
resource_name = resource["resource_class"]["name"]
quotas.append(
dto.Quota(
resource_name,
human_readable_names.get(resource_name, resource_name)
+ " hours (credits)",
"resource hours",
resource["allocated_resource_hours"],
resource["allocated_resource_hours"]
- resource["resource_hours"],
)
)
return quotas

def _from_api_image(self, api_image):
"""
Converts an OpenStack API image object into a :py:class:`.dto.Image`.
Expand Down
10 changes: 10 additions & 0 deletions api/azimuth/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,14 @@ class SchedulingSettings(SettingsObject):
#: Indicates whether advanced scheduling should be enabled
ENABLED = Setting(default=False)

# Maximum duration of platform in hours, unlimited if unset
MAX_PLATFORM_DURATION_HOURS = Setting(default=None)


class CoralCreditsSetting(SettingsObject):
ADMIN_PASSWORD = Setting(default=None)
CORAL_URI = Setting(default=None)


class AzimuthSettings(SettingsObject):
"""
Expand Down Expand Up @@ -295,6 +303,8 @@ class AzimuthSettings(SettingsObject):
#: Configuration for advanced scheduling
SCHEDULING = NestedSetting(SchedulingSettings)

CORAL_CREDITS = NestedSetting(CoralCreditsSetting)

#: URL for documentation
DOCUMENTATION_URL = Setting(
default="https://azimuth-cloud.github.io/azimuth-user-docs/"
Expand Down
36 changes: 36 additions & 0 deletions api/azimuth/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import contextlib
import dataclasses
import datetime
import functools
import logging
import math
Expand Down Expand Up @@ -1243,6 +1244,17 @@ def clusters(request, tenant):
context={"session": session, "cluster_manager": cluster_manager},
)
input_serializer.is_valid(raise_exception=True)

if not check_max_platform_duration(input_serializer.validated_data):
return response.Response(
{
"detail": "Platform exceeds max duration of "
+ str(cloud_settings.SCHEDULING.MAX_PLATFORM_DURATION_HOURS)
+ " hours."
},
status=status.HTTP_409_CONFLICT,
)

# Check that the cluster fits within quota
calculator = scheduling.CaaSClusterCalculator(session)
resources = calculator.calculate(
Expand Down Expand Up @@ -1483,6 +1495,21 @@ def kubernetes_cluster_template_details(request, tenant, template):
return response.Response(serializer.data)


def check_max_platform_duration(platform_data):
if (
cloud_settings.SCHEDULING.MAX_PLATFORM_DURATION_HOURS is None
or not cloud_settings.SCHEDULING.ENABLED
):
return True
end_time = platform_data["schedule"].end_time
now = datetime.datetime.now(tz=datetime.timezone.utc)
duration = (end_time - now).total_seconds() / 3600
if duration < cloud_settings.SCHEDULING.MAX_PLATFORM_DURATION_HOURS:
return True
else:
return False


def kubernetes_cluster_check_quotas(session, cluster, template, **data):
"""
Check the quotas for a Kubernetes cluster.
Expand Down Expand Up @@ -1642,6 +1669,15 @@ def kubernetes_clusters(request, tenant):
context={"session": session, "capi_session": capi_session},
)
input_serializer.is_valid(raise_exception=True)
if not check_max_platform_duration(input_serializer.validated_data):
return response.Response(
{
"detail": "Platform exceeds max duration of "
+ str(cloud_settings.SCHEDULING.MAX_PLATFORM_DURATION_HOURS)
+ " hours."
},
status=status.HTTP_409_CONFLICT,
)
# Check that the cluster fits within quota
resources, fits, _ = kubernetes_cluster_check_quotas(
session, None, **input_serializer.validated_data
Expand Down
3 changes: 3 additions & 0 deletions chart/files/api/settings/11-scheduling.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
AZIMUTH:
SCHEDULING:
ENABLED: {{ ternary "true" "false" .Values.scheduling.enabled }}
{{- with .Values.scheduling.maxPlatformDurationHours }}
MAX_PLATFORM_DURATION_HOURS: {{ . }}
{{- end }}
8 changes: 8 additions & 0 deletions chart/files/api/settings/13-coral-credits.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{{- with .Values.settings.coralCredits }}
AZIMUTH:
CORAL_CREDITS:
CORAL_URI: {{ quote .uri }}
{{- with .passwordSecretRef }}
ADMIN_PASSWORD: {{ index (lookup "v1" "Secret" .namespace .name).data .key | b64dec }}
{{- end }}
{{- end }}
2 changes: 2 additions & 0 deletions chart/templates/api/settings.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,5 @@ data:
{{- tpl (.Files.Get "files/api/settings/11-scheduling.yaml") . | b64enc | nindent 4 }}
12-apps-provider.yaml: |
{{- tpl (.Files.Get "files/api/settings/12-apps-provider.yaml") . | b64enc | nindent 4 }}
13-coral-credits.yaml: |
{{- tpl (.Files.Get "files/api/settings/13-coral-credits.yaml") . | b64enc | nindent 4 }}
4 changes: 3 additions & 1 deletion chart/tests/__snapshot__/snapshot_test.yaml.snap
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ templated manifests should match snapshot:
template:
metadata:
annotations:
azimuth.stackhpc.com/settings-checksum: 4456f249ad2b10af8275e59c37bb0435e01dcc236bdb926b6ebc3c19ba8eeea6
azimuth.stackhpc.com/settings-checksum: ba9764bba470b2cacf65a4646a795dd5a62707a4879baac5749a8884af5dc0cd
azimuth.stackhpc.com/theme-checksum: ec0f36322392deee39d80b7f77ecd634df60358857af9dc208077860c4e174ab
kubectl.kubernetes.io/default-container: api
labels:
Expand Down Expand Up @@ -273,6 +273,8 @@ templated manifests should match snapshot:
QVpJTVVUSDoKICBTQ0hFRFVMSU5HOgogICAgRU5BQkxFRDogZmFsc2UK
12-apps-provider.yaml: |
Cg==
13-coral-credits.yaml: |
Cg==
kind: Secret
metadata:
labels:
Expand Down
8 changes: 8 additions & 0 deletions chart/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,12 @@ settings:
# # and "ephemeral_disk" for the current flavor
# description: >-
# {{ cpus }} CPUs, {{ ram }} RAM, {{ disk }} disk, {{ ephemeral_disk }} ephemeral disk
coralCredits:
# uri:
# passwordSecretRef:
# name:
# namespace:
# key:

# Configuration for authentication
authentication:
Expand Down Expand Up @@ -410,6 +416,8 @@ sshKeyStore:
scheduling:
# Indicates whether scheduling should be enabled
enabled: false
# Maximum duration of platform in hours (int), unlimited if unset
maxPlatformDurationHours:

# Settings for the Kubernetes apps provider
appsProvider:
Expand Down
9 changes: 8 additions & 1 deletion ui/src/components/pages/tenancy/platforms/scheduling.js
Original file line number Diff line number Diff line change
Expand Up @@ -62,14 +62,21 @@ const ProjectedQuotaProgressBar = ({ quota }) => {


const ProjectedQuotas = ({ quotas }) => {
const sortedQuotas = sortBy(
let sortedQuotas = sortBy(
quotas,
q => {
// Use a tuple of (index, name) so we can support unknown quotas
const index = quotaOrdering.findIndex(el => el === q.resource);
return [index >= 0 ? index : quotaOrdering.length, q.resource];
}
);

// These components don't seem to get optional fields from the UI
// to filter for Coral credits resources with so just showing known
// quotas for now until we have a way to calculate projections for Coral
// or otherwise unknown quotas
sortedQuotas = sortedQuotas.filter((q) => quotaOrdering.includes(q.resource));

return sortedQuotas.map(
quota => <ProjectedQuotaProgressBar
key={quota.resource}
Expand Down
10 changes: 9 additions & 1 deletion ui/src/components/pages/tenancy/quotas.js
Original file line number Diff line number Diff line change
Expand Up @@ -56,14 +56,22 @@ const quotaOrdering = ["machines", "volumes", "external_ips", "cpus", "ram", "st


const Quotas = ({ resourceData }) => {
const sortedQuotas = sortBy(
let sortedQuotas = sortBy(
Object.values(resourceData),
q => {
// Use a tuple of (index, name) so we can support unknown quotas
const index = quotaOrdering.findIndex(el => el === q.resource);
return [index >= 0 ? index : quotaOrdering.length, q.resource];
}
);

// If quota is unlimited but has an associated Coral quota, hide it
const resourceNames = sortedQuotas.map((q) => q.resource)
sortedQuotas = sortedQuotas.filter((q) =>
q.linked_credits_resource == null ||
!(resourceNames.includes(q.linked_credits_resource) && q.allocated < 0)
)

return (
// The volume service is optional, so quotas might not always be available for it
<Row className="g-3 justify-content-center">
Expand Down
Loading