@@ -1458,7 +1458,7 @@ IR_Builder::constructSrcPayloadRenderTarget(vISA_RT_CONTROLS cntrls,
14581458 }
14591459
14601460 auto checkType = [](G4_SrcRegRegion *src) {
1461- return src->getType () == Type_F || src->isNullReg ();
1461+ return src == nullptr || src ->getType () == Type_F || src->isNullReg ();
14621462 };
14631463 vISA_ASSERT_INPUT (checkType (R) && checkType (G) && checkType (B) && checkType (A),
14641464 " RGBA type must be F" );
@@ -1490,135 +1490,152 @@ IR_Builder::constructSrcPayloadRenderTarget(vISA_RT_CONTROLS cntrls,
14901490
14911491 G4_SrcRegRegion *srcToUse = nullptr ;
14921492
1493- // creating payload
1494- unsigned int numElts = numRows * getGRFSize () / TypeSize (Type_F);
1495- auto payloadUD = createSendPayloadDcl (numElts, Type_UD);
1496- auto payloadUW = createSendPayloadDcl (numElts, Type_UW);
1497- auto payloadF = createSendPayloadDcl (numElts, Type_F);
1498- auto payloadUB = createSendPayloadDcl (numElts, Type_UB);
1499-
1500- payloadUW->setAliasDeclare (payloadUD, 0 );
1501- payloadF->setAliasDeclare (payloadUD, 0 );
1502- payloadUB->setAliasDeclare (payloadUD, 0 );
1503-
1504- // Check whether coalescing is possible
1505- // coalesc payload by checking whether the source is already prepared in a
1506- // continuous region. If so, we could reuse the source region directly
1507- // instead of copying it again.
1508- bool canCoalesce = true ;
1509- G4_SrcRegRegion *leadingParam =
1510- cntrls.s0aPresent ? s0a : (cntrls.oMPresent ? oM : R);
1511-
1512- if (R->isNullReg () || G->isNullReg () || B->isNullReg () || A->isNullReg ()) {
1513- canCoalesce = false ;
1514- }
1493+ if (numRows > 0 )
1494+ {
1495+ // creating payload
1496+ unsigned int numElts = numRows * getGRFSize () / TypeSize (Type_F);
1497+ auto payloadUD = createSendPayloadDcl (numElts, Type_UD);
1498+ auto payloadUW = createSendPayloadDcl (numElts, Type_UW);
1499+ auto payloadF = createSendPayloadDcl (numElts, Type_F);
1500+ auto payloadUB = createSendPayloadDcl (numElts, Type_UB);
15151501
1516- if (canCoalesce) {
1517- auto payloadDcl = leadingParam-> getTopDcl ()-> getRootDeclare ( );
1518- uint32_t nextOffset = getByteOffsetSrcRegion (leadingParam );
1502+ payloadUW-> setAliasDeclare (payloadUD, 0 );
1503+ payloadF-> setAliasDeclare (payloadUD, 0 );
1504+ payloadUB-> setAliasDeclare (payloadUD, 0 );
15191505
1520- // s0a is leading param if present, so no need to check for its
1521- // contiguousness
1522- auto isContiguous = [this ](G4_SrcRegRegion *src, uint32_t offset,
1523- G4_Declare *dcl) {
1524- auto srcDcl = src->getTopDcl ()->getRootDeclare ();
1525- if (srcDcl != dcl) {
1526- return false ; // different declares are not contiguous
1527- }
1528- return offset ==
1529- getByteOffsetSrcRegion (
1530- src); // offset must be equal to the src's byte offset
1531- };
1506+ // Check whether coalescing is possible
1507+ // coalesc payload by checking whether the source is already prepared in a
1508+ // continuous region. If so, we could reuse the source region directly
1509+ // instead of copying it again.
1510+ bool canCoalesce = true ;
1511+ G4_SrcRegRegion *leadingParam =
1512+ cntrls.s0aPresent ? s0a : (cntrls.oMPresent ? oM : R);
15321513
1533- if (canCoalesce && cntrls.oMPresent ) {
1534- canCoalesce = isContiguous (oM, nextOffset, payloadDcl);
1535- nextOffset += getGRFSize ();
1514+ if (R == nullptr || R->isNullReg () ||
1515+ G == nullptr || G->isNullReg () ||
1516+ B == nullptr || B->isNullReg () ||
1517+ A == nullptr || A->isNullReg ()) {
1518+ canCoalesce = false ;
15361519 }
15371520
15381521 if (canCoalesce) {
1539- canCoalesce = isContiguous (R, nextOffset, payloadDcl);
1540- nextOffset += getGRFSize () * mult;
1541- if (canCoalesce) {
1542- canCoalesce = isContiguous (G, nextOffset, payloadDcl);
1543- nextOffset += getGRFSize () * mult;
1522+ auto payloadDcl = leadingParam->getTopDcl ()->getRootDeclare ();
1523+ uint32_t nextOffset = getByteOffsetSrcRegion (leadingParam);
1524+
1525+ // s0a is leading param if present, so no need to check for its
1526+ // contiguousness
1527+ auto isContiguous = [this ](G4_SrcRegRegion *src, uint32_t offset,
1528+ G4_Declare *dcl) {
1529+ auto srcDcl = src->getTopDcl ()->getRootDeclare ();
1530+ if (srcDcl != dcl) {
1531+ return false ; // different declares are not contiguous
1532+ }
1533+ return offset ==
1534+ getByteOffsetSrcRegion (
1535+ src); // offset must be equal to the src's byte offset
1536+ };
1537+
1538+ if (canCoalesce && cntrls.oMPresent ) {
1539+ canCoalesce = isContiguous (oM, nextOffset, payloadDcl);
1540+ nextOffset += getGRFSize ();
15441541 }
1542+
15451543 if (canCoalesce) {
1546- canCoalesce = isContiguous (B , nextOffset, payloadDcl);
1544+ canCoalesce = isContiguous (R , nextOffset, payloadDcl);
15471545 nextOffset += getGRFSize () * mult;
1546+ if (canCoalesce) {
1547+ canCoalesce = isContiguous (G, nextOffset, payloadDcl);
1548+ nextOffset += getGRFSize () * mult;
1549+ }
1550+ if (canCoalesce) {
1551+ canCoalesce = isContiguous (B, nextOffset, payloadDcl);
1552+ nextOffset += getGRFSize () * mult;
1553+ }
1554+ if (canCoalesce) {
1555+ canCoalesce = isContiguous (A, nextOffset, payloadDcl);
1556+ nextOffset += getGRFSize () * mult;
1557+ }
15481558 }
1549- if (canCoalesce) {
1550- canCoalesce = isContiguous (A, nextOffset, payloadDcl);
1559+
1560+ if (canCoalesce && cntrls.zPresent ) {
1561+ canCoalesce = isContiguous (Z, nextOffset, payloadDcl);
15511562 nextOffset += getGRFSize () * mult;
15521563 }
1553- }
15541564
1555- if (canCoalesce && cntrls.zPresent ) {
1556- canCoalesce = isContiguous (Z, nextOffset, payloadDcl);
1557- nextOffset += getGRFSize () * mult;
1565+ // last element is stencil
1566+ if (canCoalesce && cntrls.isStencil ) {
1567+ canCoalesce = isContiguous (S, nextOffset, payloadDcl);
1568+ }
15581569 }
15591570
1560- // last element is stencil
1561- if (canCoalesce && cntrls.isStencil ) {
1562- canCoalesce = isContiguous (S, nextOffset, payloadDcl);
1563- }
1564- }
1571+ if (!canCoalesce) {
1572+ // Copy parameters to payload
1573+ // ToDo: optimize to generate split send
1574+ unsigned regOff = 0 ;
15651575
1566- if (!canCoalesce) {
1567- // Copy parameters to payload
1568- // ToDo: optimize to generate split send
1569- unsigned regOff = 0 ;
1576+ if (cntrls.s0aPresent ) {
1577+ Copy_SrcRegRegion_To_Payload (payloadF, regOff, s0a, execSize, instOpt);
1578+ }
15701579
1571- if (cntrls.s0aPresent ) {
1572- Copy_SrcRegRegion_To_Payload (payloadF , regOff, s0a , execSize, instOpt);
1573- }
1580+ if (cntrls.oMPresent ) {
1581+ Copy_SrcRegRegion_To_Payload (payloadUW , regOff, oM , execSize, instOpt);
1582+ }
15741583
1575- if (cntrls.oMPresent ) {
1576- Copy_SrcRegRegion_To_Payload (payloadUW, regOff, oM, execSize, instOpt);
1577- }
1584+ auto offIncrement = mult;
15781585
1579- auto offIncrement = mult;
1586+ if (R != nullptr ) {
1587+ if (!R->isNullReg ())
1588+ Copy_SrcRegRegion_To_Payload (payloadF, regOff, R, execSize, instOpt);
1589+ else
1590+ regOff += offIncrement;
1591+ }
15801592
1581- if (!R->isNullReg ())
1582- Copy_SrcRegRegion_To_Payload (payloadF, regOff, R, execSize, instOpt);
1583- else
1584- regOff += offIncrement;
1593+ if (G != nullptr ) {
1594+ if (!G->isNullReg ())
1595+ Copy_SrcRegRegion_To_Payload (payloadF, regOff, G, execSize, instOpt);
1596+ else
1597+ regOff += offIncrement;
1598+ }
15851599
1586- if (!G->isNullReg ())
1587- Copy_SrcRegRegion_To_Payload (payloadF, regOff, G, execSize, instOpt);
1588- else
1589- regOff += offIncrement;
1600+ if (B != nullptr ) {
1601+ if (!B->isNullReg ())
1602+ Copy_SrcRegRegion_To_Payload (payloadF, regOff, B, execSize, instOpt);
1603+ else
1604+ regOff += offIncrement;
1605+ }
15901606
1591- if (!B->isNullReg ())
1592- Copy_SrcRegRegion_To_Payload (payloadF, regOff, B, execSize, instOpt);
1593- else
1594- regOff += offIncrement;
1607+ if (A != nullptr ) {
1608+ if (!A->isNullReg ())
1609+ Copy_SrcRegRegion_To_Payload (payloadF, regOff, A, execSize, instOpt);
1610+ else
1611+ regOff += offIncrement;
1612+ }
15951613
1596- if (!A->isNullReg ())
1597- Copy_SrcRegRegion_To_Payload (payloadF, regOff, A, execSize, instOpt);
1598- else
1599- regOff += offIncrement;
1614+ if (cntrls.zPresent ) {
1615+ Copy_SrcRegRegion_To_Payload (payloadF, regOff, Z, execSize, instOpt);
1616+ }
16001617
1601- if (cntrls.zPresent ) {
1602- Copy_SrcRegRegion_To_Payload (payloadF , regOff, Z , execSize, instOpt);
1603- }
1618+ if (cntrls.isStencil ) {
1619+ Copy_SrcRegRegion_To_Payload (payloadUB , regOff, S , execSize, instOpt);
1620+ }
16041621
1605- if (cntrls.isStencil ) {
1606- Copy_SrcRegRegion_To_Payload (payloadUB, regOff, S, execSize, instOpt);
1622+ srcToUse = createSrcRegRegion (payloadUD, getRegionStride1 ());
1623+ } else {
1624+ // Coalesce and directly use original raw operand
1625+ leadingParam->setType (*this ,
1626+ R->getType ()); // it shouldn't matter, but change it
1627+ // in case leading param is oM
1628+ srcToUse = leadingParam;
16071629 }
1608-
1609- srcToUse = createSrcRegRegion (payloadUD, getRegionStride1 ());
16101630 } else {
1611- // Coalesce and directly use original raw operand
1612- leadingParam->setType (*this ,
1613- R->getType ()); // it shouldn't matter, but change it
1614- // in case leading param is oM
1615- srcToUse = leadingParam;
1631+ numRows = 1 ;
1632+ srcToUse = createNullSrc (Type_UD);
16161633 }
16171634 // set chmask
1618- uint32_t chMask = (!R->isNullReg () ? 0x1 : 0 ) |
1619- ((!G->isNullReg () ? 0x1 : 0 ) << 0x1 ) |
1620- ((!B->isNullReg () ? 0x1 : 0 ) << 0x2 ) |
1621- (((!A->isNullReg () || cntrls.s0aPresent ) ? 0x1 : 0 ) << 0x3 );
1635+ uint32_t chMask = (R && !R->isNullReg () ? 0x1 : 0 ) |
1636+ ((G && !G->isNullReg () ? 0x1 : 0 ) << 0x1 ) |
1637+ ((B && !B->isNullReg () ? 0x1 : 0 ) << 0x2 ) |
1638+ (((A && !A->isNullReg () || cntrls.s0aPresent ) ? 0x1 : 0 ) << 0x3 );
16221639
16231640 return std::make_tuple (srcToUse, numRows, chMask);
16241641}
0 commit comments