Skip to content

Commit 63807e4

Browse files
saleelksystems-assistant[bot]
authored andcommitted
clr: Use graph segment scheduling to process HIP Graphs
(#1372) * clr: Use graph segment scheduling to process HIP Graphs * Add a broader path to use capture packet capture for all topologies * Refactor code * Use DEBUG_HIP_GRAPH_SEGMENT_SCHEDULING to toggle new vs classic path, Enabled by default * clr: Few fixes and improvements * clr: Detect complex graphs to take classic path * Use DEBUG_HIP_GRAPH_SEGMENT_SCHEDULING=2 to force segment scheduling path * clr: Fix a cornercase stack corruption * clr: Track commands of segments instead of snapshots * clr: Fix Batch dispatch logic * Track fence_dirty_ flag for command of other streams * Dependency resolution markers can now accomodate dirty fence on cross streams [rocm-systems] ROCm/rocm-systems#1372 (commit c105dcd)
1 parent eb12088 commit 63807e4

File tree

11 files changed

+1547
-491
lines changed

11 files changed

+1547
-491
lines changed

hipamd/src/hip_graph.cpp

Lines changed: 60 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1425,9 +1425,9 @@ hipError_t hipGraphExecMemcpyNodeSetParams1D(hipGraphExec_t hGraphExec, hipGraph
14251425
if (status != hipSuccess) {
14261426
HIP_RETURN(status);
14271427
}
1428-
if (DEBUG_CLR_GRAPH_PACKET_CAPTURE) {
1429-
status = reinterpret_cast<hip::GraphExec*>(hGraphExec)
1430-
->UpdateAQLPacket(reinterpret_cast<hip::GraphKernelNode*>(clonedNode));
1428+
auto graphExec = reinterpret_cast<hip::GraphExec*>(hGraphExec);
1429+
if (graphExec->IsSegmentSchedulingEnabled()) {
1430+
status = graphExec->UpdateAQLPacket(reinterpret_cast<hip::GraphKernelNode*>(clonedNode));
14311431
}
14321432
HIP_RETURN(status);
14331433
}
@@ -1523,12 +1523,14 @@ hipError_t ihipGraphInstantiate(hip::GraphExec** pGraphExec, hip::Graph* graph,
15231523
return hipErrorOutOfMemory;
15241524
}
15251525
graph->clone(*pGraphExec, true);
1526-
(*pGraphExec)->ScheduleNodes();
1527-
if (false == (*pGraphExec)->TopologicalOrder()) {
1526+
1527+
hipError_t scheduleStatus = (*pGraphExec)->ScheduleNodes();
1528+
if (scheduleStatus != hipSuccess) {
15281529
delete *pGraphExec;
1529-
return hipErrorInvalidValue;
1530+
*pGraphExec = nullptr;
1531+
return scheduleStatus;
15301532
}
1531-
graph->SetGraphInstantiated(true);
1533+
15321534
if (DEBUG_HIP_GRAPH_DOT_PRINT) {
15331535
static int i = 1;
15341536
std::string filename =
@@ -1538,7 +1540,10 @@ hipError_t ihipGraphInstantiate(hip::GraphExec** pGraphExec, hip::Graph* graph,
15381540
LogPrintfInfo("[hipGraph] graph dump:%s", filename.c_str());
15391541
}
15401542
}
1541-
if (DEBUG_CLR_GRAPH_PACKET_CAPTURE) {
1543+
1544+
graph->SetGraphInstantiated(true);
1545+
1546+
if ((*pGraphExec)->IsSegmentSchedulingEnabled()) {
15421547
(*pGraphExec)->SetKernelArgManager(new hip::GraphKernelArgManager());
15431548
}
15441549
return (*pGraphExec)->Init();
@@ -1555,7 +1560,7 @@ hipError_t hipGraphInstantiate(hipGraphExec_t* pGraphExec, hipGraph_t graph,
15551560
if (status == hipSuccess) {
15561561
*pGraphExec = reinterpret_cast<hipGraphExec_t>(ge);
15571562
}
1558-
HIP_RETURN(status);
1563+
HIP_RETURN(status, ReturnPtrValue(pGraphExec));
15591564
}
15601565

15611566
hipError_t hipGraphInstantiateWithFlags(hipGraphExec_t* pGraphExec, hipGraph_t graph,
@@ -1574,7 +1579,7 @@ hipError_t hipGraphInstantiateWithFlags(hipGraphExec_t* pGraphExec, hipGraph_t g
15741579
hip::GraphExec* ge;
15751580
hipError_t status = ihipGraphInstantiate(&ge, reinterpret_cast<hip::Graph*>(graph), flags);
15761581
*pGraphExec = reinterpret_cast<hipGraphExec_t>(ge);
1577-
HIP_RETURN(status);
1582+
HIP_RETURN(status, ReturnPtrValue(pGraphExec));
15781583
}
15791584

15801585
hipError_t hipGraphInstantiateWithParams(hipGraphExec_t* pGraphExec, hipGraph_t graph,
@@ -1609,7 +1614,7 @@ hipError_t hipGraphInstantiateWithParams(hipGraphExec_t* pGraphExec, hipGraph_t
16091614
HIP_RETURN(status);
16101615
}
16111616

1612-
HIP_RETURN(hipSuccess);
1617+
HIP_RETURN(hipSuccess, ReturnPtrValue(pGraphExec));
16131618
}
16141619

16151620
hipError_t hipGraphExecDestroy(hipGraphExec_t pGraphExec) {
@@ -1820,9 +1825,9 @@ hipError_t hipGraphExecMemcpyNodeSetParams(hipGraphExec_t hGraphExec, hipGraphNo
18201825
if (status != hipSuccess) {
18211826
HIP_RETURN(status);
18221827
}
1823-
if (DEBUG_CLR_GRAPH_PACKET_CAPTURE) {
1824-
status = reinterpret_cast<hip::GraphExec*>(hGraphExec)
1825-
->UpdateAQLPacket(reinterpret_cast<hip::GraphKernelNode*>(clonedNode));
1828+
auto graphExec = reinterpret_cast<hip::GraphExec*>(hGraphExec);
1829+
if (graphExec->IsSegmentSchedulingEnabled()) {
1830+
status = graphExec->UpdateAQLPacket(reinterpret_cast<hip::GraphKernelNode*>(clonedNode));
18261831
}
18271832
HIP_RETURN(status);
18281833
}
@@ -1871,9 +1876,9 @@ hipError_t hipGraphExecMemsetNodeSetParams(hipGraphExec_t hGraphExec, hipGraphNo
18711876
if (status != hipSuccess) {
18721877
HIP_RETURN(status);
18731878
}
1874-
if (DEBUG_CLR_GRAPH_PACKET_CAPTURE) {
1875-
status = reinterpret_cast<hip::GraphExec*>(hGraphExec)
1876-
->UpdateAQLPacket(reinterpret_cast<hip::GraphKernelNode*>(clonedNode));
1879+
auto graphExec = reinterpret_cast<hip::GraphExec*>(hGraphExec);
1880+
if (graphExec->IsSegmentSchedulingEnabled()) {
1881+
status = graphExec->UpdateAQLPacket(reinterpret_cast<hip::GraphKernelNode*>(clonedNode));
18771882
}
18781883
HIP_RETURN(status);
18791884
}
@@ -1931,9 +1936,9 @@ hipError_t hipGraphExecKernelNodeSetParams(hipGraphExec_t hGraphExec, hipGraphNo
19311936
if (status != hipSuccess) {
19321937
HIP_RETURN(status);
19331938
}
1934-
if (DEBUG_CLR_GRAPH_PACKET_CAPTURE) {
1935-
status = reinterpret_cast<hip::GraphExec*>(hGraphExec)
1936-
->UpdateAQLPacket(reinterpret_cast<hip::GraphKernelNode*>(clonedNode));
1939+
auto graphExec = reinterpret_cast<hip::GraphExec*>(hGraphExec);
1940+
if (graphExec->IsSegmentSchedulingEnabled()) {
1941+
status = graphExec->UpdateAQLPacket(reinterpret_cast<hip::GraphKernelNode*>(clonedNode));
19371942
}
19381943
HIP_RETURN(status);
19391944
}
@@ -2008,13 +2013,18 @@ hipError_t hipGraphExecChildGraphNodeSetParams(hipGraphExec_t hGraphExec, hipGra
20082013
if (status != hipSuccess) {
20092014
return status;
20102015
}
2011-
if (reinterpret_cast<hip::ChildGraphNode*>(clonedNode)->GetGraphCaptureStatus()) {
2016+
2017+
hip::ChildGraphNode* childNode = reinterpret_cast<hip::ChildGraphNode*>(clonedNode);
2018+
2019+
// After SetParams updates node parameters in-place, we need to update the cached AQL packets
2020+
auto graphExec = reinterpret_cast<hip::GraphExec*>(hGraphExec);
2021+
if (graphExec->IsSegmentSchedulingEnabled() || childNode->GetGraphCaptureStatus()) {
20122022
std::vector<hip::GraphNode*> childGraphNodes;
2013-
reinterpret_cast<hip::ChildGraphNode*>(clonedNode)->TopologicalOrder(childGraphNodes);
2023+
childNode->TopologicalOrder(childGraphNodes);
20142024
for (std::vector<hip::GraphNode*>::size_type i = 0; i != childGraphNodes.size(); i++) {
20152025
if (childGraphNodes[i]->GraphCaptureEnabled()) {
2016-
status = reinterpret_cast<hip::ChildGraphNode*>(clonedNode)
2017-
->UpdateAQLPacket(reinterpret_cast<hip::GraphKernelNode*>(childGraphNodes[i]));
2026+
status =
2027+
childNode->UpdateAQLPacket(reinterpret_cast<hip::GraphKernelNode*>(childGraphNodes[i]));
20182028
if (status != hipSuccess) {
20192029
return status;
20202030
}
@@ -2414,9 +2424,9 @@ hipError_t hipGraphExecMemcpyNodeSetParamsFromSymbol(hipGraphExec_t hGraphExec,
24142424
if (status != hipSuccess) {
24152425
HIP_RETURN(status);
24162426
}
2417-
if (DEBUG_CLR_GRAPH_PACKET_CAPTURE) {
2418-
status = reinterpret_cast<hip::GraphExec*>(hGraphExec)
2419-
->UpdateAQLPacket(reinterpret_cast<hip::GraphKernelNode*>(clonedNode));
2427+
auto graphExec = reinterpret_cast<hip::GraphExec*>(hGraphExec);
2428+
if (graphExec->IsSegmentSchedulingEnabled()) {
2429+
status = graphExec->UpdateAQLPacket(reinterpret_cast<hip::GraphKernelNode*>(clonedNode));
24202430
}
24212431
HIP_RETURN(status);
24222432
}
@@ -2497,9 +2507,9 @@ hipError_t hipGraphExecMemcpyNodeSetParamsToSymbol(hipGraphExec_t hGraphExec, hi
24972507
if (status != hipSuccess) {
24982508
HIP_RETURN(status);
24992509
}
2500-
if (DEBUG_CLR_GRAPH_PACKET_CAPTURE) {
2501-
status = reinterpret_cast<hip::GraphExec*>(hGraphExec)
2502-
->UpdateAQLPacket(reinterpret_cast<hip::GraphKernelNode*>(clonedNode));
2510+
auto graphExec = reinterpret_cast<hip::GraphExec*>(hGraphExec);
2511+
if (graphExec->IsSegmentSchedulingEnabled()) {
2512+
status = graphExec->UpdateAQLPacket(reinterpret_cast<hip::GraphKernelNode*>(clonedNode));
25032513
}
25042514
HIP_RETURN(status);
25052515
}
@@ -2734,10 +2744,11 @@ hipError_t hipGraphExecUpdate(hipGraphExec_t hGraphExec, hipGraph_t hGraph,
27342744
*updateResult_out = hipGraphExecUpdateErrorNotSupported;
27352745
}
27362746
HIP_RETURN(hipErrorGraphExecUpdateFailure);
2737-
} else if (DEBUG_CLR_GRAPH_PACKET_CAPTURE && newGraphNodes[i]->GraphCaptureEnabled()) {
2738-
status =
2739-
reinterpret_cast<hip::GraphExec*>(hGraphExec)
2740-
->UpdateAQLPacket(reinterpret_cast<hip::GraphKernelNode*>(oldGraphExecNodes[i]));
2747+
} else {
2748+
auto graphExec = reinterpret_cast<hip::GraphExec*>(hGraphExec);
2749+
if (graphExec->IsSegmentSchedulingEnabled() && newGraphNodes[i]->GraphCaptureEnabled()) {
2750+
status = graphExec->UpdateAQLPacket(reinterpret_cast<hip::GraphKernelNode*>(oldGraphExecNodes[i]));
2751+
}
27412752
}
27422753
} else {
27432754
*hErrorNode_out = reinterpret_cast<hipGraphNode_t>(newGraphNodes[i]);
@@ -3091,12 +3102,16 @@ hipError_t hipGraphNodeSetEnabled(hipGraphExec_t hGraphExec, hipGraphNode_t hNod
30913102
HIP_RETURN(hipErrorInvalidValue);
30923103
}
30933104
clonedNode->SetEnabled(isEnabled);
3094-
// Update packet batches when node is enabled/disabled
3095-
hipError_t status = graphExec->UpdatePacketBatchesForNodeEnableDisable(clonedNode, isEnabled != 0);
3096-
if (status != hipSuccess) {
3097-
HIP_RETURN(status);
3105+
3106+
hipError_t status = hipSuccess;
3107+
if (graphExec->IsSegmentSchedulingEnabled()) {
3108+
// Update packet batches when node is enabled/disabled
3109+
status = graphExec->UpdatePacketBatchesForNodeEnableDisable(clonedNode, isEnabled != 0);
3110+
if (status != hipSuccess) {
3111+
HIP_RETURN(status);
3112+
}
30983113
}
3099-
HIP_RETURN(hipSuccess);
3114+
HIP_RETURN(status);
31003115
}
31013116

31023117
hipError_t hipGraphNodeGetEnabled(hipGraphExec_t hGraphExec, hipGraphNode_t hNode,
@@ -3449,8 +3464,9 @@ hipError_t hipDrvGraphExecMemsetNodeSetParams(hipGraphExec_t hGraphExec, hipGrap
34493464
if (status != hipSuccess) {
34503465
HIP_RETURN(status);
34513466
}
3452-
if (DEBUG_CLR_GRAPH_PACKET_CAPTURE) {
3453-
status = reinterpret_cast<hip::GraphExec*>(hGraphExec)->UpdateAQLPacket(clonedNode);
3467+
auto graphExec = reinterpret_cast<hip::GraphExec*>(hGraphExec);
3468+
if (graphExec->IsSegmentSchedulingEnabled()) {
3469+
status = graphExec->UpdateAQLPacket(clonedNode);
34543470
}
34553471
HIP_RETURN(status);
34563472
}
@@ -3572,8 +3588,9 @@ hipError_t hipGraphExecNodeSetParams(hipGraphExec_t graphExec, hipGraphNode_t no
35723588
return status;
35733589
}
35743590

3575-
if (DEBUG_CLR_GRAPH_PACKET_CAPTURE) {
3576-
status = reinterpret_cast<hip::GraphExec*>(graphExec)->UpdateAQLPacket(clonedNode);
3591+
auto graphExecPtr = reinterpret_cast<hip::GraphExec*>(graphExec);
3592+
if (graphExecPtr->IsSegmentSchedulingEnabled()) {
3593+
status = graphExecPtr->UpdateAQLPacket(clonedNode);
35773594
}
35783595
return status;
35793596
}

0 commit comments

Comments
 (0)