@@ -787,6 +787,40 @@ _SIMD_INL_ void SIMD_Type_t<int, 128, uint##XX##_t>::MultiplyInplaceRaw(uint##XX
787787 _mm_store_si128 ((__m128i*)to, _mm_mullo_epi##XX (_mm_load_si128 ((__m128i*)to), _mm_load_si128 ((__m128i*)from)));\
788788}
789789
790+ #define CREATE_INT128_OPERATOR_DIVIDE (XX ) \
791+ template <>\
792+ _SIMD_INL_ SIMD_Type_t<int , 128 , int ##XX##_t> SIMD_Type_t<int , 128 , int ##XX##_t>::Divide(const SIMD_Type_t& a, const SIMD_Type_t& b) {\
793+ SIMD_Type_t result;\
794+ _mm_store_si128 ((__m128i*)result.Data , _mm_div_epi##XX (_mm_load_si128 ((__m128i*)a.Data ), _mm_load_si128 ((__m128i*)b.Data )));\
795+ return result;\
796+ }\
797+ template <>\
798+ _SIMD_INL_ SIMD_Type_t<int , 128 , uint##XX##_t> SIMD_Type_t<int , 128 ,uint##XX##_t>::Divide(const SIMD_Type_t& a, const SIMD_Type_t& b) {\
799+ SIMD_Type_t result;\
800+ _mm_store_si128 ((__m128i*)result.Data , _mm_div_epi##XX (_mm_load_si128 ((__m128i*)a.Data ), _mm_load_si128 ((__m128i*)b.Data )));\
801+ return result;\
802+ }\
803+ template <>\
804+ _SIMD_INL_ void SIMD_Type_t<int , 128 , int ##XX##_t>::DivideInplace(SIMD_Type_t& to, const SIMD_Type_t& from)\
805+ {\
806+ _mm_store_si128 ((__m128i*)to.Data , _mm_div_epi##XX (_mm_load_si128 ((__m128i*)to.Data ), _mm_load_si128 ((__m128i*)from.Data )));\
807+ }\
808+ template <>\
809+ _SIMD_INL_ void SIMD_Type_t<int , 128 , uint##XX##_t>::DivideInplace(SIMD_Type_t& to, const SIMD_Type_t& from)\
810+ {\
811+ _mm_store_si128 ((__m128i*)to.Data , _mm_div_epi##XX (_mm_load_si128 ((__m128i*)to.Data ), _mm_load_si128 ((__m128i*)from.Data )));\
812+ }\
813+ template <>\
814+ _SIMD_INL_ void SIMD_Type_t<int , 128 , int ##XX##_t>::DivideInplaceRaw(int ##XX##_t* to, const int ##XX##_t* from)\
815+ {\
816+ _mm_store_si128 ((__m128i*)to, _mm_div_epi##XX (_mm_load_si128 ((__m128i*)to), _mm_load_si128 ((__m128i*)from)));\
817+ }\
818+ template <>\
819+ _SIMD_INL_ void SIMD_Type_t<int , 128 , uint##XX##_t>::DivideInplaceRaw(uint##XX##_t* to, const uint##XX##_t* from)\
820+ {\
821+ _mm_store_si128 ((__m128i*)to, _mm_div_epi##XX (_mm_load_si128 ((__m128i*)to), _mm_load_si128 ((__m128i*)from)));\
822+ }
823+
790824// ██╗███╗ ██╗████████╗ ██████╗ ███████╗ ██████╗
791825// ██║████╗ ██║╚══██╔══╝ ╚════██╗██╔════╝██╔════╝
792826// ██║██╔██╗ ██║ ██║█████╗ █████╔╝███████╗███████╗
@@ -897,6 +931,42 @@ _SIMD_INL_ void SIMD_Type_t<int, 256, uint##XX##_t>::MultiplyInplaceRaw(uint##XX
897931 _mm256_store_si256 ((__m256i*)to, _mm256_mullo_epi##XX (_mm256_load_si256 ((__m256i*)to), _mm256_load_si256 ((__m256i*)from)));\
898932}
899933
934+ #define CREATE_INT256_OPERATOR_DIVIDE (XX ) \
935+ template <>\
936+ _SIMD_INL_ SIMD_Type_t<int , 256 , int ##XX##_t> SIMD_Type_t<int , 256 , int ##XX##_t>::Divide(const SIMD_Type_t& a, const SIMD_Type_t& b) {\
937+ SIMD_Type_t result;\
938+ _mm256_store_si256 ((__m256i*)result.Data , _mm256_div_epi##XX (_mm256_load_si256 ((__m256i*)a.Data ), _mm256_load_si256 ((__m256i*)b.Data )));\
939+ return result;\
940+ }\
941+ template <>\
942+ _SIMD_INL_ SIMD_Type_t<int , 256 , uint##XX##_t> SIMD_Type_t<int , 256 ,uint##XX##_t>::Divide(const SIMD_Type_t& a, const SIMD_Type_t& b) {\
943+ SIMD_Type_t result;\
944+ _mm256_store_si256 ((__m256i*)result.Data , _mm256_div_epi##XX (_mm256_load_si256 ((__m256i*)a.Data ), _mm256_load_si256 ((__m256i*)b.Data )));\
945+ return result;\
946+ }\
947+ template <>\
948+ _SIMD_INL_ void SIMD_Type_t<int , 256 , int ##XX##_t>::DivideInplace(SIMD_Type_t& to, const SIMD_Type_t& from)\
949+ {\
950+ _mm256_store_si256 ((__m256i*)to.Data , _mm256_div_epi##XX (_mm256_load_si256 ((__m256i*)to.Data ), _mm256_load_si256 ((__m256i*)from.Data )));\
951+ }\
952+ template <>\
953+ _SIMD_INL_ void SIMD_Type_t<int , 256 , uint##XX##_t>::DivideInplace(SIMD_Type_t& to, const SIMD_Type_t& from)\
954+ {\
955+ _mm256_store_si256 ((__m256i*)to.Data , _mm256_div_epi##XX (_mm256_load_si256 ((__m256i*)to.Data ), _mm256_load_si256 ((__m256i*)from.Data )));\
956+ }\
957+ template <>\
958+ _SIMD_INL_ void SIMD_Type_t<int , 256 , int ##XX##_t>::DivideInplaceRaw(int ##XX##_t* to, const int ##XX##_t* from)\
959+ {\
960+ _mm256_store_si256 ((__m256i*)to, _mm256_div_epi##XX (_mm256_load_si256 ((__m256i*)to), _mm256_load_si256 ((__m256i*)from)));\
961+ }\
962+ template <>\
963+ _SIMD_INL_ void SIMD_Type_t<int , 256 , uint##XX##_t>::DivideInplaceRaw(uint##XX##_t* to, const uint##XX##_t* from)\
964+ {\
965+ _mm256_store_si256 ((__m256i*)to, _mm256_div_epi##XX (_mm256_load_si256 ((__m256i*)to), _mm256_load_si256 ((__m256i*)from)));\
966+ }
967+
968+
969+
900970// ██╗███╗ ██╗████████╗ ███████╗ ██╗██████╗
901971// ██║████╗ ██║╚══██╔══╝ ██╔════╝███║╚════██╗
902972// ██║██╔██╗ ██║ ██║█████╗███████╗╚██║ █████╔╝
@@ -1007,6 +1077,40 @@ _SIMD_INL_ void SIMD_Type_t<int, 512, uint##XX##_t>::MultiplyInplaceRaw(uint##XX
10071077 _mm512_store_si512 ((__m512i*)to, _mm512_mullo_epi##XX (_mm512_load_si512 ((__m512i*)to), _mm512_load_si512 ((__m512i*)from)));\
10081078}
10091079
1080+ #define CREATE_INT512_OPERATOR_DIVIDE (XX ) \
1081+ template <>\
1082+ _SIMD_INL_ SIMD_Type_t<int , 512 , int ##XX##_t> SIMD_Type_t<int , 512 , int ##XX##_t>::Multiply(const SIMD_Type_t& a, const SIMD_Type_t& b) {\
1083+ SIMD_Type_t result;\
1084+ _mm512_store_si512 ((__m512i*)result.Data , _mm512_mullo_epi##XX (_mm512_load_si512 ((__m512i*)a.Data ), _mm512_load_si512 ((__m512i*)b.Data )));\
1085+ return result;\
1086+ }\
1087+ template <>\
1088+ _SIMD_INL_ SIMD_Type_t<int , 512 , uint##XX##_t> SIMD_Type_t<int , 512 ,uint##XX##_t>::Multiply(const SIMD_Type_t& a, const SIMD_Type_t& b) {\
1089+ SIMD_Type_t result;\
1090+ _mm512_store_si512 ((__m512i*)result.Data , _mm512_mullo_epi##XX (_mm512_load_si512 ((__m512i*)a.Data ), _mm512_load_si512 ((__m512i*)b.Data )));\
1091+ return result;\
1092+ }\
1093+ template <>\
1094+ _SIMD_INL_ void SIMD_Type_t<int , 512 , int ##XX##_t>::MultiplyInplace(SIMD_Type_t& to, const SIMD_Type_t& from)\
1095+ {\
1096+ _mm512_store_si512 ((__m512i*)to.Data , _mm512_mullo_epi##XX (_mm512_load_si512 ((__m512i*)to.Data ), _mm512_load_si512 ((__m512i*)from.Data )));\
1097+ }\
1098+ template <>\
1099+ _SIMD_INL_ void SIMD_Type_t<int , 512 , uint##XX##_t>::MultiplyInplace(SIMD_Type_t& to, const SIMD_Type_t& from)\
1100+ {\
1101+ _mm512_store_si512 ((__m512i*)to.Data , _mm512_mullo_epi##XX (_mm512_load_si512 ((__m512i*)to.Data ), _mm512_load_si512 ((__m512i*)from.Data )));\
1102+ }\
1103+ template <>\
1104+ _SIMD_INL_ void SIMD_Type_t<int , 512 , int ##XX##_t>::MultiplyInplaceRaw(int ##XX##_t* to, const int ##XX##_t* from)\
1105+ {\
1106+ _mm512_store_si512 ((__m512i*)to, _mm512_mullo_epi##XX (_mm512_load_si512 ((__m512i*)to), _mm512_load_si512 ((__m512i*)from)));\
1107+ }\
1108+ template <>\
1109+ _SIMD_INL_ void SIMD_Type_t<int , 512 , uint##XX##_t>::MultiplyInplaceRaw(uint##XX##_t* to, const uint##XX##_t* from)\
1110+ {\
1111+ _mm512_store_si512 ((__m512i*)to, _mm512_mullo_epi##XX (_mm512_load_si512 ((__m512i*)to), _mm512_load_si512 ((__m512i*)from)));\
1112+ }
1113+
10101114// ███████╗ ██╗ ██████╗ █████╗ ████████╗
10111115// ██╔════╝ ██║ ██╔══ ██╗ ██╔══██╗ ╚══██╔══╝
10121116// █████╗ ██║ ██║ ██║ ███████║ ██║
@@ -1234,12 +1338,18 @@ _SIMD_INL_ void SIMD_Type_t<double, XXX, double>::DivideInplaceRaw(double* to, c
12341338 CREATE_INT128_OPERATOR_PLUS (16 );
12351339 CREATE_INT128_OPERATOR_PLUS (32 );
12361340 CREATE_INT128_OPERATOR_PLUS (64 );
1341+
12371342 CREATE_INT128_OPERATOR_MINUS (8 );
12381343 CREATE_INT128_OPERATOR_MINUS (16 );
12391344 CREATE_INT128_OPERATOR_MINUS (32 );
12401345 CREATE_INT128_OPERATOR_MINUS (64 );
12411346
12421347 CREATE_INT128_OPERATOR_MULTIPLY (16 );
1348+
1349+ CREATE_INT128_OPERATOR_DIVIDE (8 );
1350+ CREATE_INT128_OPERATOR_DIVIDE (16 );
1351+ CREATE_INT128_OPERATOR_DIVIDE (32 );
1352+ CREATE_INT128_OPERATOR_DIVIDE (64 );
12431353#endif
12441354
12451355#if defined(SSE4_1_AVAILABLE)
@@ -1254,6 +1364,7 @@ _SIMD_INL_ void SIMD_Type_t<double, XXX, double>::DivideInplaceRaw(double* to, c
12541364 CREATE_INT256_OPERATOR_PLUS (16 );
12551365 CREATE_INT256_OPERATOR_PLUS (32 );
12561366 CREATE_INT256_OPERATOR_PLUS (64 );
1367+
12571368 CREATE_INT256_OPERATOR_MINUS (8 );
12581369 CREATE_INT256_OPERATOR_MINUS (16 );
12591370 CREATE_INT256_OPERATOR_MINUS (32 );
@@ -1285,6 +1396,11 @@ _SIMD_INL_ void SIMD_Type_t<double, XXX, double>::DivideInplaceRaw(double* to, c
12851396 CREATE_DOUBLE_OPERATOR_MINUS (256 );
12861397 CREATE_DOUBLE_OPERATOR_MULTIPLY (256 );
12871398 CREATE_DOUBLE_OPERATOR_DIVIDE (256 );
1399+
1400+ CREATE_INT256_OPERATOR_DIVIDE (8 );
1401+ CREATE_INT256_OPERATOR_DIVIDE (16 );
1402+ CREATE_INT256_OPERATOR_DIVIDE (32 );
1403+ CREATE_INT256_OPERATOR_DIVIDE (64 );
12881404#endif
12891405
12901406#if defined(SIMD_USE_TYPE_FLOAT_256)
@@ -1296,40 +1412,50 @@ _SIMD_INL_ void SIMD_Type_t<double, XXX, double>::DivideInplaceRaw(double* to, c
12961412#endif
12971413
12981414#if defined(AVX512F_AVAILABLE)
1299- // TODO: This part couldn't be tested yet due to some hardware incapabilities... :(
1415+ #define SIMD_USE_TYPE_INT_512 1
1416+ #define SIMD_USE_TYPE_FLOAT_512 1
1417+ #define SIMD_USE_TYPE_DOUBLE_512 1
1418+
13001419 CREATE_INT128_OPERATOR_MULTIPLY (64 );
13011420 CREATE_INT256_OPERATOR_MULTIPLY (64 );
13021421
13031422 CREATE_INT512_OPERATOR_PLUS (8 );
13041423 CREATE_INT512_OPERATOR_PLUS (16 );
13051424 CREATE_INT512_OPERATOR_PLUS (32 );
13061425 CREATE_INT512_OPERATOR_PLUS (64 );
1426+
13071427 CREATE_INT512_OPERATOR_MINUS (8 );
13081428 CREATE_INT512_OPERATOR_MINUS (16 );
13091429 CREATE_INT512_OPERATOR_MINUS (32 );
13101430 CREATE_INT512_OPERATOR_MINUS (64 );
1431+
13111432 CREATE_INT512_OPERATOR_MULTIPLY (16 );
13121433 CREATE_INT512_OPERATOR_MULTIPLY (32 );
13131434 CREATE_INT512_OPERATOR_MULTIPLY (64 );
13141435
1315- DECLARE_SIMD_USE_TYPE_INT (int , 512 );
1316-
13171436 CREATE_FLOAT_OPERATOR_PLUS (512 );
13181437 CREATE_FLOAT_OPERATOR_MINUS (512 );
13191438 CREATE_FLOAT_OPERATOR_MULTIPLY (512 );
13201439 CREATE_FLOAT_OPERATOR_DIVIDE (512 );
1321-
1322- DECLARE_SIMD_USE_TYPE_FLOATING (float , 512 );
1323-
1440+
13241441 CREATE_DOUBLE_OPERATOR_PLUS (512 );
13251442 CREATE_DOUBLE_OPERATOR_MINUS (512 );
13261443 CREATE_DOUBLE_OPERATOR_MULTIPLY (512 );
13271444 CREATE_DOUBLE_OPERATOR_DIVIDE (512 );
1445+ #endif
13281446
1329- DECLARE_SIMD_USE_TYPE_FLOATING (double , 512 );
1447+ #if defined(SIMD_USE_TYPE_INT_512)
1448+ DECLARE_SIMD_USE_TYPE_INT (int , 512 );
1449+ #endif
13301450
1451+ #if defined(SIMD_USE_TYPE_FLOAT_512)
1452+ DECLARE_SIMD_USE_TYPE_FLOATING (float , 512 );
13311453#endif
13321454
1455+ #if defined(SIMD_USE_TYPE_DOUBLE_512)
1456+ DECLARE_SIMD_USE_TYPE_FLOATING (double , 512 );
1457+ #endif
1458+
13331459// SIMD::int_XXX checks are not ideal...
13341460template <typename T>
13351461using IsSIMDType = typename std::enable_if<
0 commit comments