[MPS] Fix AvgPool2d for float16 (pytorch#136822)

malfet · pytorchmergebot · commit 9d72f7481b5f · 2024-09-27T15:32:18.000Z
This was a stupid cast error that caused MPSGraph to crash with the following exception ``` (mpsFileLoc): /AppleInternal/Library/BuildRoots/e0873e53-5185-11ef-9a51-9ab6d782fe32/Library/Caches/com.apple.xbs/Sources/MetalPerformanceShadersGraph/mpsgraph/MetalPerformanceShadersGraph/Core/Files/MPSGraphUtilities.mm:233:0: error: 'mps.multiply' op requires the same element type for all operands and results (mpsFileLoc): /AppleInternal/Library/BuildRoots/e0873e53-5185-11ef-9a51-9ab6d782fe32/Library/Caches/com.apple.xbs/Sources/MetalPerformanceShadersGraph/mpsgraph/MetalPerformanceShadersGraph/Core/Files/MPSGraphUtilities.mm:233:0: note: see current operation: %3 = "mps.multiply"(%2, %arg1) : (tensor<1x3x9x9xf16>, tensor<1xf32>) -> tensor<*xf32> (mpsFileLoc): /AppleInternal/Library/BuildRoots/e0873e53-5185-11ef-9a51-9ab6d782fe32/Library/Caches/com.apple.xbs/Sources/MetalPerformanceShadersGraph/mpsgraph/MetalPerformanceShadersGraph/Core/Files/MPSGraphUtilities.mm:233:0: error: 'mps.multiply' op requires the same element type for all operands and results (mpsFileLoc): /AppleInternal/Library/BuildRoots/e0873e53-5185-11ef-9a51-9ab6d782fe32/Library/Caches/com.apple.xbs/Sources/MetalPerformanceShadersGraph/mpsgraph/MetalPerformanceShadersGraph/Core/Files/MPSGraphUtilities.mm:233:0: note: see current operation: %3 = "mps.multiply"(%2, %arg1) : (tensor<1x3x9x9xf16>, tensor<1xf32>) -> tensor<*xf32> /AppleInternal/Library/BuildRoots/e0873e53-5185-11ef-9a51-9ab6d782fe32/Library/Caches/com.apple.xbs/Sources/MetalPerformanceShadersGraph/mpsgraph/MetalPerformanceShadersGraph/Core/Files/MPSGraphExecutable.mm:953: failed assertion `original module failed verification' ``` Pull Request resolved: pytorch#136822 Approved by: https://github.com/Skylion007 ghstack dependencies: pytorch#136754, pytorch#136755, pytorch#136821
diff --git a/aten/src/ATen/native/mps/operations/Pooling.mm b/aten/src/ATen/native/mps/operations/Pooling.mm
@@ -311,18 +311,22 @@ static void avg_pool2d_template(const Tensor& input,
       MPSGraphTensor* avgPoolTensor = [mpsGraph avgPooling2DWithSourceTensor:paddedTensor descriptor:desc name:nil];
       if (cachedGraph.divisorTensor) {
         // workaround: custom divisor isn't supported by MPS backend, so we scale manually
-        return [mpsGraph multiplicationWithPrimaryTensor:avgPoolTensor
-                                         secondaryTensor:cachedGraph.divisorTensor
-                                                    name:nil];
+        return
+            [mpsGraph multiplicationWithPrimaryTensor:avgPoolTensor
+                                      secondaryTensor:mps::castMPSTensor(
+                                                          mpsGraph, cachedGraph.divisorTensor, [avgPoolTensor dataType])
+                                                 name:nil];
       } else {
         return avgPoolTensor;
       }
     } else { // backward pass
       MPSGraphTensor* scaledGradTensor = cachedGraph.gradOutputTensor;
       if (cachedGraph.divisorTensor) {
-        scaledGradTensor = [mpsGraph multiplicationWithPrimaryTensor:cachedGraph.gradOutputTensor
-                                                     secondaryTensor:cachedGraph.divisorTensor
-                                                                name:nil];
+        scaledGradTensor = [mpsGraph
+            multiplicationWithPrimaryTensor:cachedGraph.gradOutputTensor
+                            secondaryTensor:mps::castMPSTensor(
+                                                mpsGraph, cachedGraph.divisorTensor, [scaledGradTensor dataType])
+                                       name:nil];
       }
       MPSGraphTensor* avgPoolTensor = [mpsGraph avgPooling2DGradientWithGradientTensor:scaledGradTensor
                                                                           sourceTensor:paddedTensor
diff --git a/test/test_mps.py b/test/test_mps.py
@@ -1005,8 +1005,6 @@ def mps_ops_modifier(ops):
 
     SKIPLIST = {
         # Unsupported
-        # input types 'tensor<1x3x9x9xf16>' and 'tensor<1xf32>' are not broadcast compatible
-        'nn.functional.avg_pool2d': [torch.float16],
 
         # This doesn't work on M1, but is partially working on M2 with the exception of torch.float16
         'nn.functional.conv3d': None,
@@ -12035,6 +12033,7 @@ class TestConsistency(TestCaseMPS):
         'var_mean_unbiased',
         'acosh', 'asinh', 'asin',
         'masked.std',
+        'nn.functional.avg_pool2d',  # NS: Only for backward pass
         'nn.functional.normalize',
         'nn.functional.triplet_margin_loss',
         'nn.functional.triplet_margin_with_distance_loss',