Skip to content

Commit 089c449

Browse files
authored
Implements Virtualization overhead modelling (#357)
1 parent a5f3c19 commit 089c449

File tree

20 files changed

+871
-15
lines changed

20 files changed

+871
-15
lines changed

opendc-compute/opendc-compute-topology/src/main/kotlin/org/opendc/compute/topology/TopologyFactories.kt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ import org.opendc.compute.topology.specs.HostSpec
3232
import org.opendc.compute.topology.specs.PowerSourceSpec
3333
import org.opendc.compute.topology.specs.TopologySpec
3434
import org.opendc.compute.topology.specs.toDistributionPolicy
35+
import org.opendc.compute.topology.specs.toVirtualizationOverheadModel
3536
import org.opendc.simulator.compute.models.CpuModel
3637
import org.opendc.simulator.compute.models.GpuModel
3738
import org.opendc.simulator.compute.models.MachineModel
@@ -171,6 +172,7 @@ private fun HostJSONSpec.toHostSpec(clusterName: String): HostSpec {
171172
val unknownMemoryUnit = MemoryUnit(memory.vendor, memory.modelName, memory.memorySpeed.toMHz(), memory.memorySize.toMiB().toLong())
172173
val gpuUnits =
173174
List(gpu?.count ?: 0) {
175+
val virtualizationOverheadModel = gpu?.virtualizationOverHeadModel?.toVirtualizationOverheadModel()
174176
GpuModel(
175177
globalGpuId++,
176178
gpu!!.coreCount,
@@ -180,6 +182,7 @@ private fun HostJSONSpec.toHostSpec(clusterName: String): HostSpec {
180182
gpu.vendor,
181183
gpu.modelName,
182184
gpu.architecture,
185+
virtualizationOverheadModel,
183186
)
184187
}
185188

opendc-compute/opendc-compute-topology/src/main/kotlin/org/opendc/compute/topology/specs/TopologySpecs.kt

Lines changed: 49 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ import org.opendc.simulator.compute.power.batteries.policy.DoubleThresholdBatter
3535
import org.opendc.simulator.compute.power.batteries.policy.RunningMeanBatteryPolicy
3636
import org.opendc.simulator.compute.power.batteries.policy.RunningMeanPlusBatteryPolicy
3737
import org.opendc.simulator.compute.power.batteries.policy.SingleThresholdBatteryPolicy
38+
import org.opendc.simulator.compute.virtualization.VirtualizationOverheadModelFactory.VirtualizationOverheadModelEnum
3839
import org.opendc.simulator.engine.engine.FlowEngine
3940
import org.opendc.simulator.engine.graph.distributionPolicies.FlowDistributorFactory.DistributionPolicy
4041

@@ -69,8 +70,12 @@ public data class ClusterJSONSpec(
6970
* @param name The name of the host.
7071
* @param cpu The CPU available in this cluster
7172
* @param memory The amount of RAM memory available in Byte
72-
* @param powerModel The power model used to determine the power draw of a host
7373
* @param count The power model used to determine the power draw of a host
74+
* @param gpu The GPU available in this cluster (optional)
75+
* @param cpuPowerModel The power model used to determine the power draw of the CPU
76+
* @param gpuPowerModel The power model used to determine the power draw of the GPU
77+
* @param cpuDistributionPolicy The distribution policy used to distribute CPU resources
78+
* @param gpuDistributionPolicy The distribution policy used to distribute GPU resources
7479
*/
7580
@Serializable
7681
public data class HostJSONSpec(
@@ -133,6 +138,7 @@ public data class GPUJSONSpec(
133138
val vendor: String = "unknown",
134139
val modelName: String = "unknown",
135140
val architecture: String = "unknown",
141+
val virtualizationOverHeadModel: VirtualizationOverheadModelSpec = NoVirtualizationOverheadModelSpec(),
136142
)
137143

138144
@Serializable
@@ -216,6 +222,48 @@ public data class MaxMinFairnessDistributionPolicySpec(
216222
override val type: DistributionPolicy = DistributionPolicy.MAX_MIN_FAIRNESS,
217223
) : DistributionPolicySpec
218224

225+
@Serializable
226+
public sealed interface VirtualizationOverheadModelSpec {
227+
public val type: VirtualizationOverheadModelEnum
228+
}
229+
230+
@Serializable
231+
@SerialName("NONE")
232+
public data class NoVirtualizationOverheadModelSpec(
233+
override val type: VirtualizationOverheadModelEnum =
234+
VirtualizationOverheadModelEnum.NONE,
235+
) : VirtualizationOverheadModelSpec
236+
237+
@Serializable
238+
@SerialName("CONSTANT")
239+
public data class ConstantVirtualizationOverheadModelSpec(
240+
override val type: VirtualizationOverheadModelEnum = VirtualizationOverheadModelEnum.CONSTANT,
241+
val percentageOverhead: Double? = -1.0,
242+
) : VirtualizationOverheadModelSpec
243+
244+
@Serializable
245+
@SerialName("SHARE_BASED")
246+
public data class ShareBasedVirtualizationOverheadModelSpec(
247+
override val type: VirtualizationOverheadModelEnum = VirtualizationOverheadModelEnum.SHARE_BASED,
248+
) : VirtualizationOverheadModelSpec
249+
250+
public fun VirtualizationOverheadModelSpec.toVirtualizationOverheadModel(): VirtualizationOverheadModelEnum {
251+
return when (this) {
252+
is NoVirtualizationOverheadModelSpec -> VirtualizationOverheadModelEnum.NONE
253+
is ConstantVirtualizationOverheadModelSpec ->
254+
VirtualizationOverheadModelEnum.CONSTANT.apply {
255+
if (percentageOverhead != null) {
256+
// -1.0 is used to indicate that no percentage overhead is specified
257+
if (percentageOverhead != -1.0 && (percentageOverhead < 0.0 || percentageOverhead > 1.0)) {
258+
throw IllegalArgumentException("Percentage overhead must be between 0.0 and 1.0")
259+
}
260+
setProperty("percentageOverhead", percentageOverhead)
261+
}
262+
}
263+
is ShareBasedVirtualizationOverheadModelSpec -> VirtualizationOverheadModelEnum.SHARE_BASED
264+
}
265+
}
266+
219267
/**
220268
* Definition of a power source used for JSON input.
221269
*
Lines changed: 275 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,275 @@
1+
/*
2+
* Copyright (c) 2024 AtLarge Research
3+
*
4+
* Permission is hereby granted, free of charge, to any person obtaining a copy
5+
* of this software and associated documentation files (the "Software"), to deal
6+
* in the Software without restriction, including without limitation the rights
7+
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8+
* copies of the Software, and to permit persons to whom the Software is
9+
* furnished to do so, subject to the following conditions:
10+
*
11+
* The above copyright notice and this permission notice shall be included in all
12+
* copies or substantial portions of the Software.
13+
*
14+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19+
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20+
* SOFTWARE.
21+
*/
22+
23+
package org.opendc.experiments.base
24+
25+
import org.junit.jupiter.api.Assertions.assertEquals
26+
import org.junit.jupiter.api.Assertions.assertInstanceOf
27+
import org.junit.jupiter.api.Test
28+
import org.junit.jupiter.api.assertAll
29+
import org.opendc.compute.workload.Task
30+
import org.opendc.simulator.compute.virtualization.OverheadModels.ConstantVirtualizationOverhead
31+
import org.opendc.simulator.compute.virtualization.OverheadModels.NoVirtualizationOverHead
32+
import org.opendc.simulator.compute.virtualization.OverheadModels.ShareBasedVirtualizationOverhead
33+
import org.opendc.simulator.compute.workload.trace.TraceFragment
34+
import java.util.ArrayList
35+
36+
class VirtualizationOverheadTests {
37+
/**
38+
* Test that the different virtualization overhead models are loaded correctly from a topology file.
39+
*/
40+
@Test
41+
fun loadsVirtualizationOverheadModelCorrectly() {
42+
val noModelTopology = createTopology("virtualizationOverhead/single_gpu_no_model.json")
43+
val noOverHeadTopology = createTopology("virtualizationOverhead/single_gpu_no_overhead.json")
44+
val constantOverHeadTopology = createTopology("virtualizationOverhead/single_gpu_constant_overhead.json")
45+
val customConstantOverHeadTopology = createTopology("virtualizationOverhead/single_gpu_custom_constant_overhead.json")
46+
val shareBasedOverheadTopology = createTopology("virtualizationOverhead/single_gpu_share_based_overhead.json")
47+
48+
assertAll(
49+
{
50+
assertInstanceOf(
51+
NoVirtualizationOverHead::class.java,
52+
noModelTopology[0].hostSpecs[0].model.gpuModels[0].virtualizationOverheadModel,
53+
"Did not load default model correctly, when no model was given.",
54+
)
55+
},
56+
// no overhead
57+
{
58+
assertInstanceOf(
59+
NoVirtualizationOverHead::class.java,
60+
noOverHeadTopology[0].hostSpecs[0].model.gpuModels[0].virtualizationOverheadModel,
61+
"Did not load no overhead model correctly.",
62+
)
63+
},
64+
// default constant overhead
65+
{
66+
assertInstanceOf(
67+
ConstantVirtualizationOverhead::class.java,
68+
constantOverHeadTopology[0].hostSpecs[0].model.gpuModels[0].virtualizationOverheadModel,
69+
"Did not load constant overhead model correctly.",
70+
)
71+
},
72+
{
73+
assertEquals(
74+
0.05,
75+
(
76+
constantOverHeadTopology[0].hostSpecs[0].model.gpuModels[0].virtualizationOverheadModel
77+
as ConstantVirtualizationOverhead
78+
).percentageOverhead,
79+
"Constant overhead should have 5% overhead",
80+
)
81+
},
82+
// custom constant overhead
83+
{
84+
assertInstanceOf(
85+
ConstantVirtualizationOverhead::class.java,
86+
customConstantOverHeadTopology[0].hostSpecs[0].model.gpuModels[0].virtualizationOverheadModel,
87+
"Did not load constant overhead model correctly, when overhead factor was given.",
88+
)
89+
},
90+
{
91+
assertEquals(
92+
0.25,
93+
(
94+
customConstantOverHeadTopology[0].hostSpecs[0].model.gpuModels[0].virtualizationOverheadModel
95+
as ConstantVirtualizationOverhead
96+
).percentageOverhead,
97+
"Custom constant overhead should have 25% overhead",
98+
)
99+
},
100+
// share-based overhead
101+
{
102+
assertInstanceOf(
103+
ShareBasedVirtualizationOverhead::class.java,
104+
shareBasedOverheadTopology[0].hostSpecs[0].model.gpuModels[0].virtualizationOverheadModel,
105+
"Did not load shared based overhead model correctly",
106+
)
107+
},
108+
)
109+
}
110+
111+
/**
112+
* Test that the NoVirtualizationOverhead model does not apply any overhead.
113+
*/
114+
@Test
115+
fun noVirtualizationOverheadModelTest() {
116+
val topology = createTopology("virtualizationOverhead/single_gpu_no_overhead.json")
117+
val workload: ArrayList<Task> =
118+
arrayListOf(
119+
createTestTask(
120+
name = "0",
121+
fragments =
122+
arrayListOf(
123+
TraceFragment(10 * 60 * 1000, 1000.0, 1, 1000.0, 1),
124+
),
125+
),
126+
)
127+
128+
val monitor = runTest(topology, workload)
129+
assertEquals(1000.0, monitor.taskGpuDemands["0"]?.get(1), "Task 0 should have gpu demand 1000.0")
130+
assertEquals(1000.0, monitor.taskGpuSupplied["0"]?.get(1), "Task 0 should have gpu supplied 1000.0 ")
131+
assertEquals(1000.0, monitor.hostGpuDemands["H01"]?.get(1)?.get(0), "Host H01 should have gpu demand 1000.0")
132+
assertEquals(1000.0, monitor.hostGpuSupplied["H01"]?.get(1)?.get(0), "Host H01 should have gpu supply 1000.0")
133+
}
134+
135+
/**
136+
* Test that the constant overhead model does apply the correct amount of overhead.
137+
*/
138+
@Test
139+
fun constantVirtualizationOverheadModelTest() {
140+
val topology = createTopology("virtualizationOverhead/single_gpu_constant_overhead.json")
141+
val workload: ArrayList<Task> =
142+
arrayListOf(
143+
createTestTask(
144+
name = "0",
145+
fragments =
146+
arrayListOf(
147+
TraceFragment(10 * 60 * 1000, 1000.0, 1, 1000.0, 1),
148+
),
149+
),
150+
)
151+
152+
val monitor = runTest(topology, workload)
153+
assertAll(
154+
{ assertEquals(1000.0, monitor.taskGpuDemands["0"]?.get(1), "Task 0 should have gpu demand 1000.0") },
155+
{ assertEquals(0.95 * 1000.0, monitor.taskGpuSupplied["0"]?.get(1), "Task 0 should have gpu supplied 950.0 ") },
156+
{ assertEquals(1000.0, monitor.hostGpuDemands["H01"]?.get(1)?.get(0), "Host H01 should have gpu demand 1000.0") },
157+
{ assertEquals(0.95 * 1000.0, monitor.hostGpuSupplied["H01"]?.get(1)?.get(0), "Host H01 should have gpu supply 950.0") },
158+
)
159+
}
160+
161+
/**
162+
* Test that the custom constant overhead model does not apply the correct amount of overhead.
163+
*/
164+
@Test
165+
fun customConstantVirtualizationOverheadModelTest() {
166+
val topology = createTopology("virtualizationOverhead/single_gpu_custom_constant_overhead.json")
167+
val workload: ArrayList<Task> =
168+
arrayListOf(
169+
createTestTask(
170+
name = "0",
171+
fragments =
172+
arrayListOf(
173+
TraceFragment(10 * 60 * 1000, 1000.0, 1, 1000.0, 1),
174+
),
175+
),
176+
)
177+
178+
val monitor = runTest(topology, workload)
179+
assertAll(
180+
{ assertEquals(1000.0, monitor.taskGpuDemands["0"]?.get(1), "Task 0 should have gpu demand 1000.0") },
181+
{ assertEquals(0.75 * 1000.0, monitor.taskGpuSupplied["0"]?.get(1), "Task 0 should have gpu supplied 750.0 ") },
182+
{ assertEquals(1000.0, monitor.hostGpuDemands["H01"]?.get(1)?.get(0), "Host H01 should have gpu demand 1000.0") },
183+
{ assertEquals(0.75 * 1000.0, monitor.hostGpuSupplied["H01"]?.get(1)?.get(0), "Host H01 should have gpu supply 750.0") },
184+
)
185+
}
186+
187+
/**
188+
* Test that the share-based overhead model does not applies the correct amount of overhead, depending on the number of VMs.
189+
*/
190+
@Test
191+
fun shareBasedVirtualizationOverheadModelTest() {
192+
val topology = createTopology("virtualizationOverhead/single_gpu_share_based_overhead.json")
193+
val workload1: ArrayList<Task> =
194+
arrayListOf(
195+
createTestTask(
196+
name = "0",
197+
fragments =
198+
arrayListOf(
199+
TraceFragment(10 * 60 * 1000, 1000.0, 1, 1000.0, 1),
200+
),
201+
),
202+
)
203+
204+
val workload2: ArrayList<Task> =
205+
arrayListOf(
206+
createTestTask(
207+
name = "0",
208+
fragments =
209+
arrayListOf(
210+
TraceFragment(10 * 60 * 1000, 0.0, 0, 1000.0, 1),
211+
),
212+
),
213+
createTestTask(
214+
name = "1",
215+
fragments =
216+
arrayListOf(
217+
TraceFragment(10 * 60 * 1000, 0.0, 0, 1000.0, 1),
218+
),
219+
),
220+
)
221+
222+
val workload3: ArrayList<Task> =
223+
arrayListOf(
224+
createTestTask(
225+
name = "0",
226+
fragments =
227+
arrayListOf(
228+
TraceFragment(10 * 60 * 1000, 0.0, 0, 1000.0, 1),
229+
),
230+
),
231+
createTestTask(
232+
name = "1",
233+
fragments =
234+
arrayListOf(
235+
TraceFragment(10 * 60 * 1000, 0.0, 0, 1000.0, 1),
236+
),
237+
),
238+
createTestTask(
239+
name = "2",
240+
fragments =
241+
arrayListOf(
242+
TraceFragment(10 * 60 * 1000, 0.0, 0, 1000.0, 1),
243+
),
244+
),
245+
)
246+
247+
val monitor1 = runTest(topology, workload1)
248+
val monitor2 = runTest(topology, workload2)
249+
val monitor3 = runTest(topology, workload3)
250+
251+
assertAll(
252+
// Test with one VM
253+
{ assertEquals(1000.0, monitor1.taskGpuDemands["0"]?.get(1), "Task 0 should have gpu demand 1000.0") },
254+
{ assertEquals(1000.0, monitor1.taskGpuSupplied["0"]?.get(1), "Task 0 should have gpu supplied 1000.0 ") },
255+
{ assertEquals(1000.0, monitor1.hostGpuDemands["H01"]?.get(1)?.get(0), "Host H01 should have gpu demand 1000.0") },
256+
{ assertEquals(1000.0, monitor1.hostGpuSupplied["H01"]?.get(1)?.get(0), "Host H01 should have gpu supply 1000.0") },
257+
// Test with two VMs
258+
{ assertEquals(1000.0, monitor2.taskGpuDemands["0"]?.get(1), "Task 0 should have gpu demand 1000.0") },
259+
{ assertEquals(500.0, monitor2.taskGpuSupplied["0"]?.get(1), "Task 0 should have gpu supplied 500.0") },
260+
{ assertEquals(1000.0, monitor2.taskGpuDemands["1"]?.get(1), "Task 0 should have gpu demand 1000.0") },
261+
{ assertEquals(500.0, monitor2.taskGpuSupplied["1"]?.get(1), "Task 0 should have gpu supplied 500.0") },
262+
{ assertEquals(2000.0, monitor2.hostGpuDemands["H01"]?.get(1)?.get(0), "Host H01 should have gpu demand 2000.0") },
263+
{ assertEquals(1000.0, monitor2.hostGpuSupplied["H01"]?.get(1)?.get(0), "Host H01 should have gpu supply 1000.0") },
264+
// Test with three VMs
265+
{ assertEquals(1000.0, monitor3.taskGpuDemands["0"]?.get(1), "Task 0 should have gpu demand 1000.0") },
266+
{ assertEquals(333.3, monitor3.taskGpuSupplied["0"]?.get(1) ?: 0.0, 0.05, "Task 0 should have gpu supplied 333.3 ") },
267+
{ assertEquals(1000.0, monitor3.taskGpuDemands["1"]?.get(1), "Task 0 should have gpu demand 1000.0") },
268+
{ assertEquals(333.3, monitor3.taskGpuSupplied["1"]?.get(1) ?: 0.0, 0.05, "Task 0 should have gpu supplied 333.3 ") },
269+
{ assertEquals(1000.0, monitor3.taskGpuDemands["2"]?.get(1), "Task 0 should have gpu demand 1000.0") },
270+
{ assertEquals(333.3, monitor3.taskGpuSupplied["2"]?.get(1) ?: 0.0, 0.05, "Task 0 should have gpu supplied 333.3 ") },
271+
{ assertEquals(3000.0, monitor3.hostGpuDemands["H01"]?.get(1)?.get(0), "Host H01 should have gpu demand 3000.0") },
272+
{ assertEquals(1000.0, monitor3.hostGpuSupplied["H01"]?.get(1)?.get(0), "Host H01 should have gpu supply 700.0") },
273+
)
274+
}
275+
}

opendc-experiments/opendc-experiments-base/src/test/resources/topologies/Gpus/single_gpu_full.json

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,10 @@
2929
"memoryBandwidth": "900 GBps",
3030
"vendor": "NVIDIA",
3131
"modelName": "Tesla V100",
32-
"architecture": "Volta"
32+
"architecture": "Volta",
33+
"virtualizationOverHeadModel": {
34+
"type": "CONSTANT"
35+
}
3336
},
3437
"gpuPowerModel": {
3538
"modelType": "linear",

0 commit comments

Comments
 (0)