Skip to content
This repository was archived by the owner on Jan 3, 2023. It is now read-only.

Commit 9a3a031

Browse files
shssfrkimballn1
authored andcommitted
IntelGPUBackend: BatchNorm 5x1 operation (#1244)
* IntelGPUBackend: BatchNorm 5x1 operation * Update intelgpu_op_batchnorm.cpp * PR1244 Comments are adressed
1 parent f3d7946 commit 9a3a031

File tree

4 files changed

+244
-4
lines changed

4 files changed

+244
-4
lines changed

src/ngraph/runtime/intelgpu/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ set(SRC
1818
intelgpu_backend.cpp
1919
intelgpu_tensor_view.cpp
2020
intelgpu_layout.cpp
21+
intelgpu_op_batchnorm.cpp
2122
)
2223

2324
if (NGRAPH_INTELGPU_ENABLE)

src/ngraph/runtime/intelgpu/intelgpu_backend.cpp

Lines changed: 49 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,10 +35,10 @@ void arguments_check(const shared_ptr<Node>& op, size_t input, size_t output)
3535
if (op->get_input_size() != input || op->get_output_size() != output)
3636
{
3737
ostringstream os;
38-
os << "Operation \"" << op->description() << "\" input and output sizes mismatch.\n"
39-
<< "Expected input size=" << op->get_input_size() << ", provided=" << input << "\n"
40-
<< "Expected output size=" << op->get_output_size() << ", provided=" << output;
41-
throw std::invalid_argument(os.str());
38+
os << "Operation \"" << op->description() << "\" input and output sizes mismatch."
39+
<< " Expected input size=" << op->get_input_size() << ", provided=" << input
40+
<< ". Expected output size=" << op->get_output_size() << ", provided=" << output;
41+
throw invalid_argument(os.str());
4242
}
4343
}
4444

@@ -140,6 +140,51 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func)
140140
{
141141
do_eltwise_operation(topology, op, cldnn::eltwise_mode::prod);
142142
}
143+
else if ("BatchNorm" == op->description())
144+
{
145+
const shared_ptr<op::BatchNorm> batch_norm = static_pointer_cast<op::BatchNorm>(op);
146+
const double eps = batch_norm->get_eps_value();
147+
148+
if (op->get_inputs().size() < 3 || op->get_outputs().empty())
149+
{
150+
arguments_check(op, 3, 1); // throw exception in this case
151+
}
152+
153+
const string& output_name = op->get_outputs().begin()->get_tensor().get_name();
154+
const string& gamma_name = op->get_inputs().at(0).get_tensor().get_name();
155+
const string& beta_name = op->get_inputs().at(1).get_tensor().get_name();
156+
const string& input_name = op->get_inputs().at(2).get_tensor().get_name();
157+
const Shape& input_shape = op->get_inputs().at(2).get_shape();
158+
159+
if (op->get_outputs().size() == 1)
160+
{
161+
arguments_check(op, 5, 1);
162+
163+
const string& mean_name = op->get_inputs().at(3).get_tensor().get_name();
164+
const string& variance_name = op->get_inputs().at(4).get_tensor().get_name();
165+
166+
do_batch_norm_operation(topology,
167+
output_name,
168+
eps,
169+
input_name,
170+
input_shape,
171+
gamma_name,
172+
beta_name,
173+
mean_name,
174+
variance_name);
175+
}
176+
else if (op->get_outputs().size() == 3)
177+
{
178+
arguments_check(op, 3, 3);
179+
180+
do_batch_norm_operation(
181+
topology, output_name, eps, input_name, input_shape, gamma_name, beta_name);
182+
}
183+
else
184+
{
185+
arguments_check(op, 5, 1); // throw exception in this case
186+
}
187+
}
143188
else
144189
{
145190
ostringstream os;
Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
/*******************************************************************************
2+
* Copyright 2017-2018 Intel Corporation
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*******************************************************************************/
16+
17+
#include <CPP/batch_norm.hpp>
18+
#include <CPP/concatenation.hpp>
19+
#include <CPP/scale.hpp>
20+
#include <CPP/split.hpp>
21+
22+
#include "ngraph/runtime/intelgpu/intelgpu_op_batchnorm.hpp"
23+
24+
#include "ngraph/op/batch_norm.hpp"
25+
26+
using namespace std;
27+
using namespace ngraph;
28+
29+
// This function converts Shape dimension id into cldnn::concatenation id
30+
static cldnn::concatenation::concatenation_axis get_cldnn_axis(size_t tensor_channel)
31+
{
32+
switch (tensor_channel)
33+
{
34+
case 0: return cldnn::concatenation::along_b;
35+
case 1: return cldnn::concatenation::along_f;
36+
case 2: return cldnn::concatenation::along_y;
37+
case 3: return cldnn::concatenation::along_x;
38+
default: throw invalid_argument("intelgpu::get_cldnn_axis() wrong input tensor channel.");
39+
}
40+
}
41+
42+
static string do_matrix_split(cldnn::topology& topology,
43+
const string& name,
44+
const vector<pair<cldnn::primitive_id, cldnn::tensor>>& offsets)
45+
{
46+
const string result = name + "_split";
47+
48+
const cldnn::split op_split(result, name, offsets);
49+
topology.add(op_split);
50+
return result;
51+
}
52+
53+
static string get_batch_norm_mean(cldnn::topology& topology, const string& input_name)
54+
{
55+
throw invalid_argument(
56+
"intelgpu::get_batch_norm_mean() Calculation matrix mean is not yet supported.");
57+
}
58+
59+
static string get_batch_norm_variance(cldnn::topology& topology,
60+
const string& input_name,
61+
const string& mean_name)
62+
{
63+
throw invalid_argument(
64+
"intelgpu::get_batch_norm_variance() Calculation matrix variance is not yet supported.");
65+
}
66+
67+
void runtime::intelgpu::do_batch_norm_operation(cldnn::topology& topology,
68+
const string& output_name,
69+
double eps,
70+
const string& input_name,
71+
const Shape& input_shape,
72+
const string& gamma_name,
73+
const string& beta_name,
74+
const string& mean_name_inp,
75+
const string& variance_name_inp)
76+
{
77+
vector<pair<cldnn::primitive_id, cldnn::tensor>> split_offsets;
78+
vector<pair<cldnn::primitive_id, cldnn::tensor>> vec_offsets;
79+
vector<cldnn::primitive_id> dim_set;
80+
81+
if (input_shape.size() < 2 || input_shape.size() > 4)
82+
{
83+
throw invalid_argument("intelgpu::do_batch_norm_operation() wrong input shape.");
84+
}
85+
86+
// According to the documentation, input data channel is always being axis 1
87+
// Assumed the second dimension from the left. Example {0, 1, 0, 0} or {0, 1}
88+
// Also, input data must be at least 2D array
89+
const size_t shape_channel = 1;
90+
const size_t cldnn_channel = 4 - input_shape.size() + shape_channel;
91+
92+
const size_t split_arr_count = input_shape.at(shape_channel);
93+
for (size_t i = 0; i < split_arr_count; ++i)
94+
{
95+
const string str_i = to_string(i);
96+
const cldnn::tensor vec_offset(0, 0, i, 0);
97+
vec_offsets.push_back(pair<cldnn::primitive_id, cldnn::tensor>(str_i, vec_offset));
98+
99+
vector<cldnn::tensor::value_type> offset({0, 0, 0, 0}); // No action by default
100+
offset.at(cldnn_channel) = i;
101+
102+
cout << "Splitted to " << i << " with " << vector_to_string(offset) << "\n";
103+
const cldnn::tensor input_offset(offset.at(0), offset.at(1), offset.at(3), offset.at(2));
104+
split_offsets.push_back(pair<cldnn::primitive_id, cldnn::tensor>(str_i, input_offset));
105+
}
106+
107+
string mean_name = mean_name_inp;
108+
if (mean_name_inp.empty())
109+
{
110+
mean_name = get_batch_norm_mean(topology, input_name);
111+
}
112+
113+
string variance_name = variance_name_inp;
114+
if (variance_name_inp.empty())
115+
{
116+
variance_name = get_batch_norm_variance(topology, input_name, mean_name);
117+
}
118+
119+
const string input_split_name = do_matrix_split(topology, input_name, split_offsets);
120+
const string mean_split_name = do_matrix_split(topology, mean_name, vec_offsets);
121+
const string variance_split_name = do_matrix_split(topology, variance_name, vec_offsets);
122+
const string gamma_split_name = do_matrix_split(topology, gamma_name, vec_offsets);
123+
const string beta_split_name = do_matrix_split(topology, beta_name, vec_offsets);
124+
125+
for (size_t i = 0; i < split_arr_count; ++i)
126+
{
127+
const string suf = ':' + to_string(i);
128+
const string out_bn_name = output_name + "_out_bn";
129+
130+
const cldnn::batch_norm cldd_batchnorm(out_bn_name + suf,
131+
input_split_name + suf,
132+
mean_split_name + suf,
133+
variance_split_name + suf,
134+
eps);
135+
topology.add(cldd_batchnorm);
136+
137+
const cldnn::scale op_scale(
138+
output_name + suf, out_bn_name + suf, gamma_split_name + suf, beta_split_name + suf);
139+
topology.add(op_scale);
140+
141+
dim_set.push_back(output_name + suf);
142+
}
143+
144+
const cldnn::concatenation op_concat(output_name, dim_set, get_cldnn_axis(cldnn_channel));
145+
topology.add(op_concat);
146+
}
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
/*******************************************************************************
2+
* Copyright 2017-2018 Intel Corporation
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*******************************************************************************/
16+
17+
#pragma once
18+
19+
#include <CPP/topology.hpp>
20+
21+
#include "ngraph/shape.hpp"
22+
23+
namespace ngraph
24+
{
25+
namespace runtime
26+
{
27+
namespace intelgpu
28+
{
29+
// This implements BatchNorm nGraph operation
30+
// Since nGraph uses channels in this operation but clDNN uses full input data
31+
// at one time we have to use following algorithm:
32+
// 1. Split all input data arrays into several matrices by channel axis
33+
// 2. Independently do cldnn::batch_norm on particular matrix
34+
// 3. Every result of the cldnn::batch_norm must be scaled and
35+
// shifted because cldnn::batch_norm dosn't use gamma and beta
36+
// 4. Concatenate all results into output matrix by channel axis
37+
void do_batch_norm_operation(cldnn::topology& topology,
38+
const std::string& output_name,
39+
double eps,
40+
const std::string& input_name,
41+
const Shape& input_shape,
42+
const std::string& gamma_name,
43+
const std::string& beta_name,
44+
const std::string& mean_name = std::string(),
45+
const std::string& variance_name = std::string());
46+
}
47+
}
48+
}

0 commit comments

Comments
 (0)