Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
81 commits
Select commit Hold shift + click to select a range
d4943e4
Add an implementation an NHWC implementation of convolution to avoid …
orlmon01 Dec 19, 2025
1606a1c
Add a value for channels_last to bench_sconv.cpp
orlmon01 Dec 19, 2025
f80cc39
Merge branch 'microsoft:main' into main
orlmon01 Jan 7, 2026
6045333
Merge branch 'microsoft:main' into main
orlmon01 Jan 12, 2026
2dd199e
Update internal_testing_tests.cc
orlmon01 Jan 12, 2026
eb026d1
Merge branch 'microsoft:main' into main
orlmon01 Jan 14, 2026
4df9cea
Update nhwc_transformer_test.cc
orlmon01 Jan 14, 2026
b133782
Update internal_testing_tests.cc
orlmon01 Jan 14, 2026
0c2d1cd
Update ort_model_only_test.cc
orlmon01 Jan 14, 2026
25c0be7
Lintrunner fixes
orlmon01 Jan 14, 2026
bee0892
Merge branch 'microsoft:main' into main
orlmon01 Jan 15, 2026
a64af7c
Merge branch 'microsoft:main' into main
orlmon01 Jan 16, 2026
bc1ada6
Merge branch 'microsoft:main' into main
orlmon01 Jan 21, 2026
0482150
Update onnxruntime/core/optimizer/nhwc_transformer.cc
orlmon01 Jan 26, 2026
f9606cd
Update onnxruntime/core/framework/kernel_type_str_resolver.cc
orlmon01 Jan 26, 2026
63d9c55
Update onnxruntime/core/providers/cpu/nn/conv.cc
orlmon01 Jan 26, 2026
457513b
Update onnxruntime/contrib_ops/cpu/cpu_contrib_kernels.cc
orlmon01 Jan 26, 2026
b836bd3
Update onnxruntime/test/framework/ort_model_only_test.cc
orlmon01 Jan 26, 2026
d305b8f
Merge branch 'microsoft:main' into main
orlmon01 Jan 26, 2026
891dad5
Additional guards to not include KLEIDIAI specific kernels
orlmon01 Feb 4, 2026
7acbfcf
Merge branch 'microsoft:main' into main
orlmon01 Feb 4, 2026
878dff6
Merge branch 'microsoft:main' into main
orlmon01 Feb 6, 2026
0a04afc
Merge branch 'microsoft:main' into main
orlmon01 Feb 11, 2026
aed3e5f
Merge branch 'microsoft:main' into main
orlmon01 Feb 18, 2026
2467ca9
Apply suggestions from code review
orlmon01 Mar 3, 2026
ceb89d1
Merge branch 'microsoft:main' into main
orlmon01 Mar 3, 2026
2df52ff
Adding some extra tests and removing unnecessary code
orlmon01 Mar 3, 2026
ed8a97d
Merge branch 'microsoft:main' into main
orlmon01 Mar 10, 2026
ec13eef
Adding additional USE_KLEIDIAI guards
orlmon01 Mar 10, 2026
621806b
Apply suggestions from code review
orlmon01 Mar 10, 2026
78c7728
Adding USE_KLEIDIAI guard to kleidiai specific kernel_type_str_resolv…
orlmon01 Mar 10, 2026
055c62c
Updates to address comments and codex issues
orlmon01 Mar 10, 2026
958d73d
Merge branch 'microsoft:main' into main
orlmon01 Mar 10, 2026
8eef03f
Fix for errors around NHWC and FusedSum
orlmon01 Mar 12, 2026
f06c843
Merge branch 'microsoft:main' into main
orlmon01 Mar 12, 2026
768a793
Extensive Refactor of NHWC Convolution to try and fix tests
orlmon01 Mar 12, 2026
601029c
Update onnxruntime/test/framework/ort_model_only_test.cc
orlmon01 Mar 18, 2026
75c55a9
Merge branch 'microsoft:main' into main
orlmon01 Mar 18, 2026
f152310
Merge branch 'microsoft:main' into main
orlmon01 Mar 23, 2026
ba355ea
Fixes for failing unittests:
orlmon01 Mar 23, 2026
f91dd75
Update onnxruntime/test/optimizer/transpose_optimizer_test.cc
orlmon01 Mar 23, 2026
452a457
Fix for copilot suggestion around fp16 intrinsics
orlmon01 Mar 23, 2026
3c5d2ee
Further codex fixes and added another regression test
orlmon01 Mar 23, 2026
a799cac
Update the nhwc transformer tests to check according to supported har…
orlmon01 Mar 24, 2026
7970ee0
Add further checks to ensure mlas paths for convolution are correct f…
orlmon01 Mar 24, 2026
107661c
Guard the KleidiAi specific functions in android to fix -wunused errors
orlmon01 Mar 24, 2026
e6b52b6
Further guards and checks for nwhctransformer tests on x86
orlmon01 Mar 25, 2026
a9ccef7
Merge branch 'microsoft:main' into main
orlmon01 Mar 25, 2026
4c4d168
Add a check for unused params when USE_KLEIDIAI is off to convolve.cpp
orlmon01 Mar 25, 2026
fa098db
Co-pilot fixes
orlmon01 Mar 25, 2026
80a80c1
Tighten up nhwc params
orlmon01 Mar 25, 2026
a00dcc4
One more test guard
orlmon01 Mar 25, 2026
ac36400
Small tweak to apple clang flags for macos builds
orlmon01 Mar 25, 2026
db137fc
Updates to use MLAS_TARGET_ARM64 instead of __aarch64__
orlmon01 Apr 7, 2026
6240564
Merge branch 'main' into main
orlmon01 Apr 7, 2026
69ce23e
Tweak to update conv tests to comply with new function decl
orlmon01 Apr 7, 2026
0342841
Update NHWC implementation to honour use_kleidiai flag
orlmon01 Apr 21, 2026
69d91c3
Add a comment to LayoutTransformDoesNotRetargetNhwcFusedConv test to …
orlmon01 Apr 21, 2026
a6b78dd
Merge branch 'microsoft:main' into main
orlmon01 Apr 21, 2026
648d68f
Tighten up the checks around NCHWc and add a unittest
orlmon01 Apr 21, 2026
cf8d08f
Add missing padding to PreExistingFusedConvWithNchwcSumInput
orlmon01 Apr 22, 2026
5a6f110
Changing ort_model_only_test.cc to use the same function for all models
orlmon01 Apr 29, 2026
522a861
Change the fallback convert functions in conv.cc to use more optimise…
orlmon01 Apr 29, 2026
9689832
Merge branch 'microsoft:main' into main
orlmon01 Apr 29, 2026
3710581
Remove the filter for NhwcTransformer as it is no longer needed
orlmon01 Apr 29, 2026
0f0a114
Adding comments to fuse_initializers_transformer_test.cc to explain f…
orlmon01 Apr 29, 2026
9356f98
Cleanup of com.microsoft.NhwcFusedConv to be available in minimal builds
orlmon01 Apr 29, 2026
f8b85ad
Remove unneeded comment
orlmon01 Apr 29, 2026
7d08d03
Added descriptions to the nhwc_schema_defs for the nhwcfusedconv oper…
orlmon01 Apr 29, 2026
9f7d631
Fix for CoPilot issue to prevent dividing by 0 when strides or dilati…
orlmon01 Apr 29, 2026
fff37d9
Moving the channels last check earlier in conv.cc so it returns soone…
orlmon01 Apr 29, 2026
9c62be6
Update kernel_type_str_resolver.cc whitespace change
orlmon01 Apr 30, 2026
3167f65
Update kernel_type_str_resolver.cc
orlmon01 Apr 30, 2026
fcd3f14
Update fuse_initializers_transformer_test.cc
orlmon01 Apr 30, 2026
38f9a2b
Remove the conditional ifdef from kernel_type_str_resolver_utils.cc
orlmon01 Apr 30, 2026
057f493
Update the hashing in convolve_kleidi to reduce the aliasing risk
orlmon01 Apr 30, 2026
74a3366
Merge branch 'microsoft:main' into main
orlmon01 Apr 30, 2026
7d37596
Remove an unnecessary copy in conv.cc
orlmon01 Apr 30, 2026
f403ebe
Merge branch 'microsoft:main' into main
orlmon01 Apr 30, 2026
54415bf
Generated Docs update
orlmon01 Apr 30, 2026
39edc59
Removing unneeded nullptr chwck in nchwc_transformer.cc
orlmon01 May 1, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cmake/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -638,6 +638,7 @@ else()
check_cxx_compiler_flag(-Wcatch-value HAS_CATCH_VALUE)
check_cxx_compiler_flag(-Wclass-memaccess HAS_CLASS_MEMACCESS)
check_cxx_compiler_flag(-Wcharacter-conversion HAS_CHARACTER_CONVERSION)
check_cxx_compiler_flag(-Wno-error=character-conversion HAS_NO_ERROR_CHARACTER_CONVERSION)
Comment thread
hariharans29 marked this conversation as resolved.
check_cxx_compiler_flag(-Wdangling-reference HAS_DANGLING_REFERENCE)
check_cxx_compiler_flag(-Wdeprecated-anon-enum-enum-conversion HAS_DEPRECATED_ANON_ENUM_ENUM_CONVERSION)
check_cxx_compiler_flag(-Wdeprecated-builtins HAS_DEPRECATED_BUILTINS)
Expand Down
15 changes: 9 additions & 6 deletions cmake/onnxruntime_unittests.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,13 @@ function(filter_test_srcs test_srcs_var)
endfunction()

set(disabled_warnings)

function(onnxruntime_disable_gtest_character_conversion_as_error target_name)
if (HAS_NO_ERROR_CHARACTER_CONVERSION)
target_compile_options(${target_name} PRIVATE "$<$<COMPILE_LANGUAGE:CXX>:-Wno-error=character-conversion>")
endif()
endfunction()

function(AddTest)
cmake_parse_arguments(_UT "DYN" "TARGET" "LIBS;SOURCES;DEPENDS;TEST_ARGS" ${ARGN})
list(REMOVE_DUPLICATES _UT_SOURCES)
Expand Down Expand Up @@ -170,9 +177,7 @@ function(AddTest)
if (${HAS_NOERROR})
target_compile_options(${_UT_TARGET} PRIVATE "$<$<COMPILE_LANGUAGE:CXX>:-Wno-error=uninitialized>")
endif()
if (${HAS_CHARACTER_CONVERSION})
target_compile_options(${_UT_TARGET} PRIVATE "$<$<COMPILE_LANGUAGE:CXX>:-Wno-error=character-conversion>")
endif()
onnxruntime_disable_gtest_character_conversion_as_error(${_UT_TARGET})
endif()

set(TEST_ARGS ${_UT_TEST_ARGS})
Expand Down Expand Up @@ -847,9 +852,7 @@ if(MSVC)
"$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/wd6326>")
else()
target_include_directories(onnxruntime_test_utils PRIVATE ${CMAKE_CURRENT_BINARY_DIR} ${ONNXRUNTIME_ROOT})
if (HAS_CHARACTER_CONVERSION)
target_compile_options(onnxruntime_test_utils PRIVATE "$<$<COMPILE_LANGUAGE:CXX>:-Wno-error=character-conversion>")
endif()
onnxruntime_disable_gtest_character_conversion_as_error(onnxruntime_test_utils)
endif()
if (onnxruntime_USE_NCCL)
target_include_directories(onnxruntime_test_utils PRIVATE ${NCCL_INCLUDE_DIRS})
Expand Down
13 changes: 6 additions & 7 deletions docs/ContribOperators.md
Original file line number Diff line number Diff line change
Expand Up @@ -3569,7 +3569,6 @@ This version of the operator has been available since version 1 of the 'com.micr
### <a name="com.microsoft.NhwcFusedConv"></a><a name="com.microsoft.nhwcfusedconv">**com.microsoft.NhwcFusedConv**</a>

NhwcFusedConv is a Conv operator with optional activation and add operators fused in.
Only has fp16 implementation as of 2023/04/15.

#### Version

Expand Down Expand Up @@ -3600,26 +3599,26 @@ This version of the operator has been available since version 1 of the 'com.micr

<dl>
<dt><tt>X</tt> : T</dt>
<dd></dd>
<dd>Input activation tensor in channels-last layout. For 2D convolution this is [N, H, W, C], where N is batch size, H/W are spatial dimensions, and C is the number of input channels.</dd>
<dt><tt>W</tt> : T</dt>
<dd></dd>
<dd>Convolution weight tensor in the standard ONNX Conv filter layout [M, C/group, kH, kW], where M is the number of output channels.</dd>
<dt><tt>B</tt> (optional) : T</dt>
<dd></dd>
<dd>Optional 1D bias tensor of shape [M].</dd>
<dt><tt>Z</tt> (optional) : T</dt>
<dd>Tensor to be added to the output, must be the same shape and format as the output tensor.</dd>
<dd>Optional residual/add tensor in the same channels-last layout and shape as the output tensor Y. For 2D convolution this is [N, out_H, out_W, M].</dd>
</dl>

#### Outputs

<dl>
<dt><tt>Y</tt> : T</dt>
<dd></dd>
<dd>Output tensor in channels-last layout. For 2D convolution this is [N, out_H, out_W, M], where M is the number of output channels.</dd>
</dl>

#### Type Constraints

<dl>
<dt><tt>T</tt> : tensor(float16)</dt>
<dt><tt>T</tt> : tensor(float16), tensor(float)</dt>
<dd>Constrain input and output types to float tensors</dd>
</dl>

Expand Down
6 changes: 6 additions & 0 deletions onnxruntime/contrib_ops/cpu/cpu_contrib_kernels.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1,
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, EmbedLayerNormalization);
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, ExpandDims);
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, FusedConv);
#ifdef USE_KLEIDIAI
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, NhwcFusedConv);
#endif
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, FusedGemm);
#if !defined(DISABLE_GENERATION_OPS)
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, GreedySearch);
Expand Down Expand Up @@ -313,6 +316,9 @@ Status RegisterCpuContribKernels(KernelRegistry& kernel_registry) {
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, EmbedLayerNormalization)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, ExpandDims)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, FusedConv)>,
#ifdef USE_KLEIDIAI
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, NhwcFusedConv)>,
#endif
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, FusedGemm)>,
#if !defined(DISABLE_GENERATION_OPS)
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, GreedySearch)>,
Expand Down
13 changes: 13 additions & 0 deletions onnxruntime/contrib_ops/cpu/fused_conv.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,5 +26,18 @@ ONNX_CPU_OPERATOR_TYPED_MS_KERNEL(
.TypeConstraint("T", DataTypeImpl::GetTensorType<float>()),
FusedConvFloat);

#ifdef USE_KLEIDIAI
ONNX_CPU_OPERATOR_TYPED_MS_KERNEL(
NhwcFusedConv,
1,
float,
KernelDefBuilder()
Comment thread
orlmon01 marked this conversation as resolved.
// Allow the optional "sum" input (index 3) to be reused as the output buffer (index 0),
// consistent with the FusedConv kernel registration.
.MayInplace(3, 0)
.TypeConstraint("T", DataTypeImpl::GetTensorType<float>()),
FusedConvFloat);
Comment thread
orlmon01 marked this conversation as resolved.
#endif

} // namespace contrib
} // namespace onnxruntime
Loading
Loading