diff --git a/api/azimuth/provider/dto.py b/api/azimuth/provider/dto.py index 130eda3b..3222cf2b 100644 --- a/api/azimuth/provider/dto.py +++ b/api/azimuth/provider/dto.py @@ -67,6 +67,9 @@ class Quota: allocated: int #: The amount of the resource that has been used used: int + #: Openstack resource class associated with the Coral credits quota which + #: this quota is also controlled by + linked_credits_resource: str = None @dataclass(frozen=True) diff --git a/api/azimuth/provider/openstack/provider.py b/api/azimuth/provider/openstack/provider.py index 5b43bea5..69852e90 100644 --- a/api/azimuth/provider/openstack/provider.py +++ b/api/azimuth/provider/openstack/provider.py @@ -4,6 +4,7 @@ import base64 import dataclasses +import datetime import functools import hashlib import logging @@ -14,7 +15,11 @@ import certifi import dateutil.parser import rackit +import requests import yaml +from django.utils.timezone import make_aware + +from azimuth.settings import cloud_settings from .. import base, dto, errors # noqa: TID252 from . import api @@ -286,6 +291,16 @@ def __init__( project_id_safe = self._connection.project_id.replace("-", "") self._project_share_user = prefix + project_id_safe + # Get Coral bearer token if enabled + if cloud_settings.CORAL_CREDITS.ADMIN_PASSWORD is not None: + self._coral_auth_token = requests.post( + cloud_settings.CORAL_CREDITS.CORAL_URI + "/api-token-auth/", + json={ + "username": "admin", + "password": cloud_settings.CORAL_CREDITS.ADMIN_PASSWORD, + }, + ).json()["token"] + def _log(self, message, *args, level=logging.INFO, **kwargs): logger.log( level, @@ -337,6 +352,7 @@ def quotas(self): None, compute_limits.total_cores, compute_limits.total_cores_used, + linked_credits_resource="VCPU", ), dto.Quota( "ram", @@ -344,6 +360,7 @@ def quotas(self): "MB", compute_limits.total_ram, compute_limits.total_ram_used, + linked_credits_resource="MEMORY_MB", ), dto.Quota( "machines", @@ -365,6 +382,9 @@ def quotas(self): len(list(self._connection.network.floatingips.all())), ) ) + # Get coral credits if available + if cloud_settings.CORAL_CREDITS.CORAL_URI is not None: + quotas.extend(self.get_coral_quotas()) # The volume service is optional # In the case where the service is not enabled, just don't add the quotas try: @@ -391,6 +411,75 @@ def quotas(self): pass return quotas + def get_coral_quotas(self): + headers = {"Authorization": "Bearer " + self._coral_auth_token} + accounts = requests.get( + cloud_settings.CORAL_CREDITS.CORAL_URI + "/resource_provider_account", + headers=headers, + ).json() + + tenancy_account_list = list( + filter( + lambda a: a["project_id"].replace("-", "") == self._tenancy.id, accounts + ) + ) + if len(tenancy_account_list) != 1: + return [] + tenancy_account = tenancy_account_list[0]["account"] + all_allocations = requests.get( + cloud_settings.CORAL_CREDITS.CORAL_URI + "/allocation", headers=headers + ).json() + account_allocations = filter( + lambda a: a["account"] == tenancy_account, all_allocations + ) + + datetime_format = "%Y-%m-%dT%H:%M:%SZ" + current_time = make_aware(datetime.datetime.now()) + target_tz = current_time.tzinfo + + active_allocation_list = list( + filter( + lambda a: datetime.datetime.strptime( + a["start"], datetime_format + ).replace(tzinfo=target_tz) + < current_time + and current_time + < datetime.datetime.strptime(a["end"], datetime_format).replace( + tzinfo=target_tz + ), + account_allocations, + ) + ) + + human_readable_names = { + "MEMORY_MB": "RAM (MB)", + "DISK_GB": "Root disk (GB)", # TODO: is this always the root disk? + } + + quotas = [] + if len(active_allocation_list) == 1: + active_allocation_id = active_allocation_list[0]["id"] + for resource in requests.get( + cloud_settings.CORAL_CREDITS.CORAL_URI + + "/allocation/" + + str(active_allocation_id) + + "/resources", + headers=headers, + ).json(): + resource_name = resource["resource_class"]["name"] + quotas.append( + dto.Quota( + resource_name, + human_readable_names.get(resource_name, resource_name) + + " hours (credits)", + "resource hours", + resource["allocated_resource_hours"], + resource["allocated_resource_hours"] + - resource["resource_hours"], + ) + ) + return quotas + def _from_api_image(self, api_image): """ Converts an OpenStack API image object into a :py:class:`.dto.Image`. diff --git a/api/azimuth/settings.py b/api/azimuth/settings.py index 351660b7..9c7a62bd 100644 --- a/api/azimuth/settings.py +++ b/api/azimuth/settings.py @@ -215,6 +215,14 @@ class SchedulingSettings(SettingsObject): #: Indicates whether advanced scheduling should be enabled ENABLED = Setting(default=False) + # Maximum duration of platform in hours, unlimited if unset + MAX_PLATFORM_DURATION_HOURS = Setting(default=None) + + +class CoralCreditsSetting(SettingsObject): + ADMIN_PASSWORD = Setting(default=None) + CORAL_URI = Setting(default=None) + class AzimuthSettings(SettingsObject): """ @@ -295,6 +303,8 @@ class AzimuthSettings(SettingsObject): #: Configuration for advanced scheduling SCHEDULING = NestedSetting(SchedulingSettings) + CORAL_CREDITS = NestedSetting(CoralCreditsSetting) + #: URL for documentation DOCUMENTATION_URL = Setting( default="https://azimuth-cloud.github.io/azimuth-user-docs/" diff --git a/api/azimuth/views.py b/api/azimuth/views.py index b49abd2f..8caf73cb 100644 --- a/api/azimuth/views.py +++ b/api/azimuth/views.py @@ -4,6 +4,7 @@ import contextlib import dataclasses +import datetime import functools import logging import math @@ -1243,6 +1244,17 @@ def clusters(request, tenant): context={"session": session, "cluster_manager": cluster_manager}, ) input_serializer.is_valid(raise_exception=True) + + if not check_max_platform_duration(input_serializer.validated_data): + return response.Response( + { + "detail": "Platform exceeds max duration of " + + str(cloud_settings.SCHEDULING.MAX_PLATFORM_DURATION_HOURS) + + " hours." + }, + status=status.HTTP_409_CONFLICT, + ) + # Check that the cluster fits within quota calculator = scheduling.CaaSClusterCalculator(session) resources = calculator.calculate( @@ -1483,6 +1495,21 @@ def kubernetes_cluster_template_details(request, tenant, template): return response.Response(serializer.data) +def check_max_platform_duration(platform_data): + if ( + cloud_settings.SCHEDULING.MAX_PLATFORM_DURATION_HOURS is None + or not cloud_settings.SCHEDULING.ENABLED + ): + return True + end_time = platform_data["schedule"].end_time + now = datetime.datetime.now(tz=datetime.timezone.utc) + duration = (end_time - now).total_seconds() / 3600 + if duration < cloud_settings.SCHEDULING.MAX_PLATFORM_DURATION_HOURS: + return True + else: + return False + + def kubernetes_cluster_check_quotas(session, cluster, template, **data): """ Check the quotas for a Kubernetes cluster. @@ -1642,6 +1669,15 @@ def kubernetes_clusters(request, tenant): context={"session": session, "capi_session": capi_session}, ) input_serializer.is_valid(raise_exception=True) + if not check_max_platform_duration(input_serializer.validated_data): + return response.Response( + { + "detail": "Platform exceeds max duration of " + + str(cloud_settings.SCHEDULING.MAX_PLATFORM_DURATION_HOURS) + + " hours." + }, + status=status.HTTP_409_CONFLICT, + ) # Check that the cluster fits within quota resources, fits, _ = kubernetes_cluster_check_quotas( session, None, **input_serializer.validated_data diff --git a/chart/files/api/settings/11-scheduling.yaml b/chart/files/api/settings/11-scheduling.yaml index 01dac681..64abd292 100644 --- a/chart/files/api/settings/11-scheduling.yaml +++ b/chart/files/api/settings/11-scheduling.yaml @@ -1,3 +1,6 @@ AZIMUTH: SCHEDULING: ENABLED: {{ ternary "true" "false" .Values.scheduling.enabled }} + {{- with .Values.scheduling.maxPlatformDurationHours }} + MAX_PLATFORM_DURATION_HOURS: {{ . }} + {{- end }} diff --git a/chart/files/api/settings/13-coral-credits.yaml b/chart/files/api/settings/13-coral-credits.yaml new file mode 100644 index 00000000..f47204c2 --- /dev/null +++ b/chart/files/api/settings/13-coral-credits.yaml @@ -0,0 +1,8 @@ +{{- with .Values.settings.coralCredits }} +AZIMUTH: + CORAL_CREDITS: + CORAL_URI: {{ quote .uri }} + {{- with .passwordSecretRef }} + ADMIN_PASSWORD: {{ index (lookup "v1" "Secret" .namespace .name).data .key | b64dec }} + {{- end }} +{{- end }} diff --git a/chart/templates/api/settings.yaml b/chart/templates/api/settings.yaml index ebe2a587..2be8b42c 100644 --- a/chart/templates/api/settings.yaml +++ b/chart/templates/api/settings.yaml @@ -35,3 +35,5 @@ data: {{- tpl (.Files.Get "files/api/settings/11-scheduling.yaml") . | b64enc | nindent 4 }} 12-apps-provider.yaml: | {{- tpl (.Files.Get "files/api/settings/12-apps-provider.yaml") . | b64enc | nindent 4 }} + 13-coral-credits.yaml: | + {{- tpl (.Files.Get "files/api/settings/13-coral-credits.yaml") . | b64enc | nindent 4 }} diff --git a/chart/tests/__snapshot__/snapshot_test.yaml.snap b/chart/tests/__snapshot__/snapshot_test.yaml.snap old mode 100644 new mode 100755 index d5b68090..531e1675 --- a/chart/tests/__snapshot__/snapshot_test.yaml.snap +++ b/chart/tests/__snapshot__/snapshot_test.yaml.snap @@ -158,7 +158,7 @@ templated manifests should match snapshot: template: metadata: annotations: - azimuth.stackhpc.com/settings-checksum: 4456f249ad2b10af8275e59c37bb0435e01dcc236bdb926b6ebc3c19ba8eeea6 + azimuth.stackhpc.com/settings-checksum: ba9764bba470b2cacf65a4646a795dd5a62707a4879baac5749a8884af5dc0cd azimuth.stackhpc.com/theme-checksum: ec0f36322392deee39d80b7f77ecd634df60358857af9dc208077860c4e174ab kubectl.kubernetes.io/default-container: api labels: @@ -273,6 +273,8 @@ templated manifests should match snapshot: QVpJTVVUSDoKICBTQ0hFRFVMSU5HOgogICAgRU5BQkxFRDogZmFsc2UK 12-apps-provider.yaml: | Cg== + 13-coral-credits.yaml: | + Cg== kind: Secret metadata: labels: diff --git a/chart/values.yaml b/chart/values.yaml index e1905439..425fa5f7 100644 --- a/chart/values.yaml +++ b/chart/values.yaml @@ -182,6 +182,12 @@ settings: # # and "ephemeral_disk" for the current flavor # description: >- # {{ cpus }} CPUs, {{ ram }} RAM, {{ disk }} disk, {{ ephemeral_disk }} ephemeral disk + coralCredits: + # uri: + # passwordSecretRef: + # name: + # namespace: + # key: # Configuration for authentication authentication: @@ -410,6 +416,8 @@ sshKeyStore: scheduling: # Indicates whether scheduling should be enabled enabled: false + # Maximum duration of platform in hours (int), unlimited if unset + maxPlatformDurationHours: # Settings for the Kubernetes apps provider appsProvider: diff --git a/ui/src/components/pages/tenancy/platforms/scheduling.js b/ui/src/components/pages/tenancy/platforms/scheduling.js index 1a6cbf65..009a6fa5 100644 --- a/ui/src/components/pages/tenancy/platforms/scheduling.js +++ b/ui/src/components/pages/tenancy/platforms/scheduling.js @@ -62,7 +62,7 @@ const ProjectedQuotaProgressBar = ({ quota }) => { const ProjectedQuotas = ({ quotas }) => { - const sortedQuotas = sortBy( + let sortedQuotas = sortBy( quotas, q => { // Use a tuple of (index, name) so we can support unknown quotas @@ -70,6 +70,13 @@ const ProjectedQuotas = ({ quotas }) => { return [index >= 0 ? index : quotaOrdering.length, q.resource]; } ); + + // These components don't seem to get optional fields from the UI + // to filter for Coral credits resources with so just showing known + // quotas for now until we have a way to calculate projections for Coral + // or otherwise unknown quotas + sortedQuotas = sortedQuotas.filter((q) => quotaOrdering.includes(q.resource)); + return sortedQuotas.map( quota => { - const sortedQuotas = sortBy( + let sortedQuotas = sortBy( Object.values(resourceData), q => { // Use a tuple of (index, name) so we can support unknown quotas @@ -64,6 +64,14 @@ const Quotas = ({ resourceData }) => { return [index >= 0 ? index : quotaOrdering.length, q.resource]; } ); + + // If quota is unlimited but has an associated Coral quota, hide it + const resourceNames = sortedQuotas.map((q) => q.resource) + sortedQuotas = sortedQuotas.filter((q) => + q.linked_credits_resource == null || + !(resourceNames.includes(q.linked_credits_resource) && q.allocated < 0) + ) + return ( // The volume service is optional, so quotas might not always be available for it