Skip to content

Commit b764ef6

Browse files
committed
Add int division
1 parent 422d630 commit b764ef6

File tree

7 files changed

+312
-96
lines changed

7 files changed

+312
-96
lines changed

README.md

Lines changed: 1 addition & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -21,20 +21,7 @@ A high-performance C++ framework for SIMD (Single Instruction Multiple Data) ope
2121

2222
### Unit Tests Status
2323

24-
**All Tests Passed**: 10 out of 10 tests passed successfully
25-
26-
| Test | Status | Time |
27-
|------|--------|------|
28-
| int128_Addition | ✓ PASS | 0 ms |
29-
| int128_Subtraction | ✓ PASS | 0 ms |
30-
| int128_Multiplication | ✓ PASS | 0 ms |
31-
| int256_Addition | ✓ PASS | 1 ms |
32-
| int256_Subtraction | ✓ PASS | 1 ms |
33-
| int256_Multiplication | ✓ PASS | 1 ms |
34-
| float256_Addition | ✓ PASS | 1 ms |
35-
| float256_Subtraction | ✓ PASS | 1 ms |
36-
| float256_Multiplication | ✓ PASS | 1 ms |
37-
| float256_Division | ✓ PASS | 1 ms |
24+
![Unittest Results](benchmark_results_windows_gcc/unit_test_results.png)
3825

3926
### Performance Benchmarks
4027

SIMD.h

Lines changed: 133 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -787,6 +787,40 @@ _SIMD_INL_ void SIMD_Type_t<int, 128, uint##XX##_t>::MultiplyInplaceRaw(uint##XX
787787
_mm_store_si128((__m128i*)to, _mm_mullo_epi##XX(_mm_load_si128((__m128i*)to), _mm_load_si128((__m128i*)from)));\
788788
}
789789

790+
#define CREATE_INT128_OPERATOR_DIVIDE(XX) \
791+
template<>\
792+
_SIMD_INL_ SIMD_Type_t<int, 128, int##XX##_t> SIMD_Type_t<int, 128, int##XX##_t>::Divide(const SIMD_Type_t& a, const SIMD_Type_t& b) {\
793+
SIMD_Type_t result;\
794+
_mm_store_si128((__m128i*)result.Data, _mm_div_epi##XX(_mm_load_si128((__m128i*)a.Data), _mm_load_si128((__m128i*)b.Data)));\
795+
return result;\
796+
}\
797+
template<>\
798+
_SIMD_INL_ SIMD_Type_t<int, 128, uint##XX##_t> SIMD_Type_t<int, 128,uint##XX##_t>::Divide(const SIMD_Type_t& a, const SIMD_Type_t& b) {\
799+
SIMD_Type_t result;\
800+
_mm_store_si128((__m128i*)result.Data, _mm_div_epi##XX(_mm_load_si128((__m128i*)a.Data), _mm_load_si128((__m128i*)b.Data)));\
801+
return result;\
802+
}\
803+
template<>\
804+
_SIMD_INL_ void SIMD_Type_t<int, 128, int##XX##_t>::DivideInplace(SIMD_Type_t& to, const SIMD_Type_t& from)\
805+
{\
806+
_mm_store_si128((__m128i*)to.Data, _mm_div_epi##XX(_mm_load_si128((__m128i*)to.Data), _mm_load_si128((__m128i*)from.Data)));\
807+
}\
808+
template<>\
809+
_SIMD_INL_ void SIMD_Type_t<int, 128, uint##XX##_t>::DivideInplace(SIMD_Type_t& to, const SIMD_Type_t& from)\
810+
{\
811+
_mm_store_si128((__m128i*)to.Data, _mm_div_epi##XX(_mm_load_si128((__m128i*)to.Data), _mm_load_si128((__m128i*)from.Data)));\
812+
}\
813+
template<>\
814+
_SIMD_INL_ void SIMD_Type_t<int, 128, int##XX##_t>::DivideInplaceRaw(int##XX##_t* to, const int##XX##_t* from)\
815+
{\
816+
_mm_store_si128((__m128i*)to, _mm_div_epi##XX(_mm_load_si128((__m128i*)to), _mm_load_si128((__m128i*)from)));\
817+
}\
818+
template<>\
819+
_SIMD_INL_ void SIMD_Type_t<int, 128, uint##XX##_t>::DivideInplaceRaw(uint##XX##_t* to, const uint##XX##_t* from)\
820+
{\
821+
_mm_store_si128((__m128i*)to, _mm_div_epi##XX(_mm_load_si128((__m128i*)to), _mm_load_si128((__m128i*)from)));\
822+
}
823+
790824
// ██╗███╗ ██╗████████╗ ██████╗ ███████╗ ██████╗
791825
// ██║████╗ ██║╚══██╔══╝ ╚════██╗██╔════╝██╔════╝
792826
// ██║██╔██╗ ██║ ██║█████╗ █████╔╝███████╗███████╗
@@ -897,6 +931,42 @@ _SIMD_INL_ void SIMD_Type_t<int, 256, uint##XX##_t>::MultiplyInplaceRaw(uint##XX
897931
_mm256_store_si256((__m256i*)to, _mm256_mullo_epi##XX(_mm256_load_si256((__m256i*)to), _mm256_load_si256((__m256i*)from)));\
898932
}
899933

934+
#define CREATE_INT256_OPERATOR_DIVIDE(XX) \
935+
template<>\
936+
_SIMD_INL_ SIMD_Type_t<int, 256, int##XX##_t> SIMD_Type_t<int, 256, int##XX##_t>::Divide(const SIMD_Type_t& a, const SIMD_Type_t& b) {\
937+
SIMD_Type_t result;\
938+
_mm256_store_si256((__m256i*)result.Data, _mm256_div_epi##XX(_mm256_load_si256((__m256i*)a.Data), _mm256_load_si256((__m256i*)b.Data)));\
939+
return result;\
940+
}\
941+
template<>\
942+
_SIMD_INL_ SIMD_Type_t<int, 256, uint##XX##_t> SIMD_Type_t<int, 256,uint##XX##_t>::Divide(const SIMD_Type_t& a, const SIMD_Type_t& b) {\
943+
SIMD_Type_t result;\
944+
_mm256_store_si256((__m256i*)result.Data, _mm256_div_epi##XX(_mm256_load_si256((__m256i*)a.Data), _mm256_load_si256((__m256i*)b.Data)));\
945+
return result;\
946+
}\
947+
template<>\
948+
_SIMD_INL_ void SIMD_Type_t<int, 256, int##XX##_t>::DivideInplace(SIMD_Type_t& to, const SIMD_Type_t& from)\
949+
{\
950+
_mm256_store_si256((__m256i*)to.Data, _mm256_div_epi##XX(_mm256_load_si256((__m256i*)to.Data), _mm256_load_si256((__m256i*)from.Data)));\
951+
}\
952+
template<>\
953+
_SIMD_INL_ void SIMD_Type_t<int, 256, uint##XX##_t>::DivideInplace(SIMD_Type_t& to, const SIMD_Type_t& from)\
954+
{\
955+
_mm256_store_si256((__m256i*)to.Data, _mm256_div_epi##XX(_mm256_load_si256((__m256i*)to.Data), _mm256_load_si256((__m256i*)from.Data)));\
956+
}\
957+
template<>\
958+
_SIMD_INL_ void SIMD_Type_t<int, 256, int##XX##_t>::DivideInplaceRaw(int##XX##_t* to, const int##XX##_t* from)\
959+
{\
960+
_mm256_store_si256((__m256i*)to, _mm256_div_epi##XX(_mm256_load_si256((__m256i*)to), _mm256_load_si256((__m256i*)from)));\
961+
}\
962+
template<>\
963+
_SIMD_INL_ void SIMD_Type_t<int, 256, uint##XX##_t>::DivideInplaceRaw(uint##XX##_t* to, const uint##XX##_t* from)\
964+
{\
965+
_mm256_store_si256((__m256i*)to, _mm256_div_epi##XX(_mm256_load_si256((__m256i*)to), _mm256_load_si256((__m256i*)from)));\
966+
}
967+
968+
969+
900970
// ██╗███╗ ██╗████████╗ ███████╗ ██╗██████╗
901971
// ██║████╗ ██║╚══██╔══╝ ██╔════╝███║╚════██╗
902972
// ██║██╔██╗ ██║ ██║█████╗███████╗╚██║ █████╔╝
@@ -1007,6 +1077,40 @@ _SIMD_INL_ void SIMD_Type_t<int, 512, uint##XX##_t>::MultiplyInplaceRaw(uint##XX
10071077
_mm512_store_si512((__m512i*)to, _mm512_mullo_epi##XX(_mm512_load_si512((__m512i*)to), _mm512_load_si512((__m512i*)from)));\
10081078
}
10091079

1080+
#define CREATE_INT512_OPERATOR_DIVIDE(XX) \
1081+
template<>\
1082+
_SIMD_INL_ SIMD_Type_t<int, 512, int##XX##_t> SIMD_Type_t<int, 512, int##XX##_t>::Multiply(const SIMD_Type_t& a, const SIMD_Type_t& b) {\
1083+
SIMD_Type_t result;\
1084+
_mm512_store_si512((__m512i*)result.Data, _mm512_mullo_epi##XX(_mm512_load_si512((__m512i*)a.Data), _mm512_load_si512((__m512i*)b.Data)));\
1085+
return result;\
1086+
}\
1087+
template<>\
1088+
_SIMD_INL_ SIMD_Type_t<int, 512, uint##XX##_t> SIMD_Type_t<int, 512,uint##XX##_t>::Multiply(const SIMD_Type_t& a, const SIMD_Type_t& b) {\
1089+
SIMD_Type_t result;\
1090+
_mm512_store_si512((__m512i*)result.Data, _mm512_mullo_epi##XX(_mm512_load_si512((__m512i*)a.Data), _mm512_load_si512((__m512i*)b.Data)));\
1091+
return result;\
1092+
}\
1093+
template<>\
1094+
_SIMD_INL_ void SIMD_Type_t<int, 512, int##XX##_t>::MultiplyInplace(SIMD_Type_t& to, const SIMD_Type_t& from)\
1095+
{\
1096+
_mm512_store_si512((__m512i*)to.Data, _mm512_mullo_epi##XX(_mm512_load_si512((__m512i*)to.Data), _mm512_load_si512((__m512i*)from.Data)));\
1097+
}\
1098+
template<>\
1099+
_SIMD_INL_ void SIMD_Type_t<int, 512, uint##XX##_t>::MultiplyInplace(SIMD_Type_t& to, const SIMD_Type_t& from)\
1100+
{\
1101+
_mm512_store_si512((__m512i*)to.Data, _mm512_mullo_epi##XX(_mm512_load_si512((__m512i*)to.Data), _mm512_load_si512((__m512i*)from.Data)));\
1102+
}\
1103+
template<>\
1104+
_SIMD_INL_ void SIMD_Type_t<int, 512, int##XX##_t>::MultiplyInplaceRaw(int##XX##_t* to, const int##XX##_t* from)\
1105+
{\
1106+
_mm512_store_si512((__m512i*)to, _mm512_mullo_epi##XX(_mm512_load_si512((__m512i*)to), _mm512_load_si512((__m512i*)from)));\
1107+
}\
1108+
template<>\
1109+
_SIMD_INL_ void SIMD_Type_t<int, 512, uint##XX##_t>::MultiplyInplaceRaw(uint##XX##_t* to, const uint##XX##_t* from)\
1110+
{\
1111+
_mm512_store_si512((__m512i*)to, _mm512_mullo_epi##XX(_mm512_load_si512((__m512i*)to), _mm512_load_si512((__m512i*)from)));\
1112+
}
1113+
10101114
// ███████╗ ██╗ ██████╗ █████╗ ████████╗
10111115
// ██╔════╝ ██║ ██╔══ ██╗ ██╔══██╗ ╚══██╔══╝
10121116
// █████╗ ██║ ██║ ██║ ███████║ ██║
@@ -1234,12 +1338,18 @@ _SIMD_INL_ void SIMD_Type_t<double, XXX, double>::DivideInplaceRaw(double* to, c
12341338
CREATE_INT128_OPERATOR_PLUS(16);
12351339
CREATE_INT128_OPERATOR_PLUS(32);
12361340
CREATE_INT128_OPERATOR_PLUS(64);
1341+
12371342
CREATE_INT128_OPERATOR_MINUS(8);
12381343
CREATE_INT128_OPERATOR_MINUS(16);
12391344
CREATE_INT128_OPERATOR_MINUS(32);
12401345
CREATE_INT128_OPERATOR_MINUS(64);
12411346

12421347
CREATE_INT128_OPERATOR_MULTIPLY(16);
1348+
1349+
CREATE_INT128_OPERATOR_DIVIDE(8);
1350+
CREATE_INT128_OPERATOR_DIVIDE(16);
1351+
CREATE_INT128_OPERATOR_DIVIDE(32);
1352+
CREATE_INT128_OPERATOR_DIVIDE(64);
12431353
#endif
12441354

12451355
#if defined(SSE4_1_AVAILABLE)
@@ -1254,6 +1364,7 @@ _SIMD_INL_ void SIMD_Type_t<double, XXX, double>::DivideInplaceRaw(double* to, c
12541364
CREATE_INT256_OPERATOR_PLUS(16);
12551365
CREATE_INT256_OPERATOR_PLUS(32);
12561366
CREATE_INT256_OPERATOR_PLUS(64);
1367+
12571368
CREATE_INT256_OPERATOR_MINUS(8);
12581369
CREATE_INT256_OPERATOR_MINUS(16);
12591370
CREATE_INT256_OPERATOR_MINUS(32);
@@ -1285,6 +1396,11 @@ _SIMD_INL_ void SIMD_Type_t<double, XXX, double>::DivideInplaceRaw(double* to, c
12851396
CREATE_DOUBLE_OPERATOR_MINUS(256);
12861397
CREATE_DOUBLE_OPERATOR_MULTIPLY(256);
12871398
CREATE_DOUBLE_OPERATOR_DIVIDE(256);
1399+
1400+
CREATE_INT256_OPERATOR_DIVIDE(8);
1401+
CREATE_INT256_OPERATOR_DIVIDE(16);
1402+
CREATE_INT256_OPERATOR_DIVIDE(32);
1403+
CREATE_INT256_OPERATOR_DIVIDE(64);
12881404
#endif
12891405

12901406
#if defined(SIMD_USE_TYPE_FLOAT_256)
@@ -1296,40 +1412,50 @@ _SIMD_INL_ void SIMD_Type_t<double, XXX, double>::DivideInplaceRaw(double* to, c
12961412
#endif
12971413

12981414
#if defined(AVX512F_AVAILABLE)
1299-
//TODO: This part couldn't be tested yet due to some hardware incapabilities... :(
1415+
#define SIMD_USE_TYPE_INT_512 1
1416+
#define SIMD_USE_TYPE_FLOAT_512 1
1417+
#define SIMD_USE_TYPE_DOUBLE_512 1
1418+
13001419
CREATE_INT128_OPERATOR_MULTIPLY(64);
13011420
CREATE_INT256_OPERATOR_MULTIPLY(64);
13021421

13031422
CREATE_INT512_OPERATOR_PLUS(8);
13041423
CREATE_INT512_OPERATOR_PLUS(16);
13051424
CREATE_INT512_OPERATOR_PLUS(32);
13061425
CREATE_INT512_OPERATOR_PLUS(64);
1426+
13071427
CREATE_INT512_OPERATOR_MINUS(8);
13081428
CREATE_INT512_OPERATOR_MINUS(16);
13091429
CREATE_INT512_OPERATOR_MINUS(32);
13101430
CREATE_INT512_OPERATOR_MINUS(64);
1431+
13111432
CREATE_INT512_OPERATOR_MULTIPLY(16);
13121433
CREATE_INT512_OPERATOR_MULTIPLY(32);
13131434
CREATE_INT512_OPERATOR_MULTIPLY(64);
13141435

1315-
DECLARE_SIMD_USE_TYPE_INT(int, 512);
1316-
13171436
CREATE_FLOAT_OPERATOR_PLUS(512);
13181437
CREATE_FLOAT_OPERATOR_MINUS(512);
13191438
CREATE_FLOAT_OPERATOR_MULTIPLY(512);
13201439
CREATE_FLOAT_OPERATOR_DIVIDE(512);
1321-
1322-
DECLARE_SIMD_USE_TYPE_FLOATING(float, 512);
1323-
1440+
13241441
CREATE_DOUBLE_OPERATOR_PLUS(512);
13251442
CREATE_DOUBLE_OPERATOR_MINUS(512);
13261443
CREATE_DOUBLE_OPERATOR_MULTIPLY(512);
13271444
CREATE_DOUBLE_OPERATOR_DIVIDE(512);
1445+
#endif
13281446

1329-
DECLARE_SIMD_USE_TYPE_FLOATING(double, 512);
1447+
#if defined(SIMD_USE_TYPE_INT_512)
1448+
DECLARE_SIMD_USE_TYPE_INT(int, 512);
1449+
#endif
13301450

1451+
#if defined(SIMD_USE_TYPE_FLOAT_512)
1452+
DECLARE_SIMD_USE_TYPE_FLOATING(float, 512);
13311453
#endif
13321454

1455+
#if defined(SIMD_USE_TYPE_DOUBLE_512)
1456+
DECLARE_SIMD_USE_TYPE_FLOATING(double, 512);
1457+
#endif
1458+
13331459
//SIMD::int_XXX checks are not ideal...
13341460
template<typename T>
13351461
using IsSIMDType = typename std::enable_if<

0 commit comments

Comments
 (0)