@@ -89,7 +89,7 @@ TEST(CodeGenCUDA, basic) {
8989
9090 CodeGenCUDA_Dev codegen (target);
9191
92- auto func = Lower (" elementwise_add " , stages, {A, B, C});
92+ auto func = Lower (" elementwise_mul " , stages, {A, B, C});
9393
9494 auto compiled = codegen.Compile (func);
9595
@@ -115,7 +115,7 @@ TEST(CodeGenCUDA, Module_output) {
115115
116116 CodeGenCUDA_Dev codegen (target);
117117
118- auto func = Lower (" elementwise_add " , stages, {A, B, C});
118+ auto func = Lower (" elementwise_mul " , stages, {A, B, C});
119119
120120 Module::Builder builder (" module" , target);
121121 builder.AddFunction (func);
@@ -149,7 +149,7 @@ TEST(CodeGenCUDA2, test_of_cacheread) {
149149 stages[B_cache]->ComputeAt (stages[C], 1 );
150150 CodeGenCUDA_Dev codegen (target);
151151
152- auto func = Lower (" elementwise_add " , stages, {A, B, C});
152+ auto func = Lower (" elementwise_mul " , stages, {A, B, C});
153153
154154 Module::Builder builder (" module" , target);
155155 builder.AddFunction (func);
@@ -181,7 +181,7 @@ TEST(CodeGenCUDA2, test_of_cacheread) {
181181
182182 dim3 grid (10 , 1 , 1 );
183183 dim3 block (10 , 1 , 1 );
184- cuda_module.LaunchKernel (0 , " elementwise_add " , grid, block, args);
184+ cuda_module.LaunchKernel (0 , " elementwise_mul " , grid, block, args);
185185
186186 CUDA_CALL (cudaMemcpy (host_data3.data (),
187187 reinterpret_cast <void *>(Cd),
@@ -221,7 +221,7 @@ TEST(CodeGenCUDA2, test_of_splitcudakernel) {
221221
222222 CodeGenCUDA_Dev codegen (target);
223223
224- auto func = lang::LowerVec (" elementwise_add " , stages, {A, B, C, D}, {}, {}, nullptr , target);
224+ auto func = lang::LowerVec (" elementwise_mul_and_add " , stages, {A, B, C, D}, {}, {}, nullptr , target);
225225
226226 Module::Builder builder (" module" , target);
227227 for (auto & i : func) {
@@ -251,15 +251,15 @@ typedef char int8_t;
251251
252252
253253__global__
254- void __launch_bounds__(200) elementwise_add (const float* __restrict__ X, const float* __restrict__ Y, float* __restrict__ C)
254+ void __launch_bounds__(200) elementwise_mul_and_add (const float* __restrict__ X, const float* __restrict__ Y, float* __restrict__ C)
255255{
256256 if (((int)blockIdx.x < 100)) {
257257 if (((int)threadIdx.x < 200)) {
258258 C[((200 * (int)blockIdx.x) + (int)threadIdx.x)] = (X[((200 * (int)blockIdx.x) + (int)threadIdx.x)] * Y[((200 * (int)blockIdx.x) + (int)threadIdx.x)]);
259259 };
260260 };
261261}__global__
262- void __launch_bounds__(200) elementwise_add_1 (const float* __restrict__ X, const float* __restrict__ Y, const float* __restrict__ C, float* __restrict__ D)
262+ void __launch_bounds__(200) elementwise_mul_and_add_1 (const float* __restrict__ X, const float* __restrict__ Y, const float* __restrict__ C, float* __restrict__ D)
263263{
264264 if (((int)blockIdx.x < 100)) {
265265 if (((int)threadIdx.x < 200)) {
0 commit comments