@@ -9,7 +9,7 @@ template <typename Dtype, bool has_scale, bool shared>
99__global__ void normalize_kernel_no_across_spatial (const int size_in_channel, const int n,const int channels, \
1010 const Dtype* scale, const Dtype* bottom_data, Dtype* top_data, const float eps, const int p){
1111
12- CUDA_KERNEL_LOOP (index, size_in_channel){
12+ CUDA_KERNEL_LOOP (index, size_in_channel * n ){
1313 float sqr_sum = 0 .f ;
1414 int num_index=index/size_in_channel;
1515 int index_in_channel=index%size_in_channel;
@@ -26,17 +26,16 @@ __global__ void normalize_kernel_no_across_spatial(const int size_in_channel, co
2626 if (p == 1 ) {
2727 norm = 1 .f / (sqr_sum + eps);
2828 } else {
29- norm = 1 .f / ( sqrtf (sqr_sum) + eps);
29+ norm = 1 .f / sqrtf (sqr_sum + eps);
3030 }
31- Dtype has_scale_norm=scale[0 ]*norm;
3231 for (int i = 0 ; i < channels; ++i) {
3332 if (has_scale) {
3433 if (shared) {
3534 top_data[data_index + i * size_in_channel] = \
36- bottom_data[data_index + i * size_in_channel] * scale[0 ] * has_scale_norm ;
35+ bottom_data[data_index + i * size_in_channel] * scale[0 ] * norm ;
3736 } else {
3837 top_data[data_index + i * size_in_channel] = \
39- bottom_data[data_index + i * size_in_channel] * scale[i] * has_scale_norm ;
38+ bottom_data[data_index + i * size_in_channel] * scale[i] * norm ;
4039 }
4140 } else {
4241 top_data[data_index + i * size_in_channel] = \
0 commit comments