Skip to content

Commit d9f8e90

Browse files
committed
Initial Blueprint G4
1 parent ee2410d commit d9f8e90

File tree

1 file changed

+114
-0
lines changed

1 file changed

+114
-0
lines changed

examples/ml-slurm-g4.yaml

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
---
16+
17+
blueprint_name: ml-slurm-g4
18+
19+
vars:
20+
project_id: ## Set GCP Project ID Here ##
21+
deployment_name: ## Set Deployment Name Here ##
22+
region: ## Set GCP Region Here ##
23+
zone: ## Set GCP Zone ID Here ##
24+
new_image:
25+
family: slurm-gcp-6-11-ubuntu-2204-lts-nvidia-570
26+
project: schedmd-slurm-public
27+
disk_size_gb: 200
28+
g4_reservation_name: "" ## Set Reservation Name
29+
30+
# Documentation for each of the modules used below can be found at
31+
# https://github.com/GoogleCloudPlatform/hpc-toolkit/blob/main/modules/README.md
32+
deployment_groups:
33+
- group: primary
34+
modules:
35+
- id: network
36+
source: modules/network/vpc
37+
settings:
38+
39+
- group: cluster
40+
modules:
41+
42+
- id: g4_nodeset
43+
source: community/modules/compute/schedmd-slurm-gcp-v6-nodeset
44+
use: [network]
45+
settings:
46+
node_count_dynamic_max: 0
47+
enable_placement: false
48+
node_count_static: 1
49+
bandwidth_tier: gvnic_enabled
50+
machine_type: g4-standard-48
51+
disk_type: hyperdisk-balanced
52+
instance_image: $(vars.new_image)
53+
instance_image_custom: true
54+
reservation_name: $(vars.g4_reservation_name)
55+
56+
- id: g4_partition
57+
source: community/modules/compute/schedmd-slurm-gcp-v6-partition
58+
use: [g4_nodeset]
59+
settings:
60+
is_default: true
61+
partition_name: g4
62+
exclusive: false
63+
64+
- id: slurm_login
65+
source: community/modules/scheduler/schedmd-slurm-gcp-v6-login
66+
use: [network]
67+
settings:
68+
machine_type: e2-standard-2
69+
enable_login_public_ips: true
70+
instance_image: $(vars.new_image)
71+
instance_image_custom: true
72+
73+
- id: homefs
74+
source: modules/file-system/filestore
75+
use: [network]
76+
settings:
77+
filestore_tier: BASIC_SSD
78+
size_gb: 2560
79+
filestore_share_name: homeshare
80+
local_mount: /home
81+
82+
# - id: private_service_access
83+
# source: community/modules/network/private-service-access
84+
# use: [network]
85+
86+
# To use Managed Lustre as for the shared /home directory:
87+
# 1. Comment out the filestore block above and the `filestore_ip_range` line in the vars block.
88+
# 2. Uncomment the managed-lustre and private-service-access blocks.
89+
# 3. Ensure the instance_image being used has the Lustre client installed.
90+
# - id: homefs
91+
# source: modules/file-system/managed-lustre
92+
# use:
93+
# - network
94+
# - private_service_access
95+
# settings:
96+
# size_gib: 18000
97+
# name: lustre-instance1
98+
# local_mount: /home
99+
# remote_mount: lustrefs
100+
# outputs:
101+
# - network_storage
102+
103+
- id: slurm_controller
104+
source: community/modules/scheduler/schedmd-slurm-gcp-v6-controller
105+
use:
106+
- network
107+
- g4_partition
108+
- slurm_login
109+
- homefs
110+
settings:
111+
machine_type: e2-standard-2
112+
enable_controller_public_ips: true
113+
instance_image: $(vars.new_image)
114+
instance_image_custom: true

0 commit comments

Comments
 (0)