Skip to content

Commit 2d34063

Browse files
committed
as backup
1 parent ce5f9dd commit 2d34063

File tree

3 files changed

+62
-28
lines changed

3 files changed

+62
-28
lines changed

include/core/generator.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -250,7 +250,7 @@ auto array_generator(const char* images_ubyte, const char* labels_ubyte) {
250250
return {n_class, channel, height, width};
251251
}
252252

253-
NormalGenerator::Dataset next_batch(int batch_size) {\
253+
NormalGenerator::Dataset next_batch(int batch_size) {
254254
int index = curr_iter;
255255
if (curr_iter + batch_size <= n_sample) {
256256
curr_iter += batch_size;

include/core/tensor.h

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -213,18 +213,20 @@ class Tensor {
213213
return len;
214214
}
215215

216-
void set_data(const float *host) const {
216+
void set_data(const float *host, bool sync = true) const {
217217
size_t len = count();
218218
ensure(CUDA_SUCCESS == cuMemcpyHtoDAsync_v2((CUdeviceptr)d_data->get(), host, len * sizeof(float), devices[currentDev].hStream));
219-
ensure(CUDA_SUCCESS == cuStreamSynchronize(devices[currentDev].hStream));
219+
if (sync)
220+
ensure(CUDA_SUCCESS == cuStreamSynchronize(devices[currentDev].hStream));
220221
}
221222

222-
vector<float> get_data() const {
223+
vector<float> get_data(bool sync = true) const {
223224
size_t len = count();
224225
vector<float> host(len);
225226
if (len > 0) {
226227
ensure(CUDA_SUCCESS == cuMemcpyDtoHAsync_v2(host.data(), (CUdeviceptr)d_data->get(), len * sizeof(float), devices[currentDev].hStream));
227-
ensure(CUDA_SUCCESS == cuStreamSynchronize(devices[currentDev].hStream));
228+
if (sync)
229+
ensure(CUDA_SUCCESS == cuStreamSynchronize(devices[currentDev].hStream));
228230
}
229231
return move(host);
230232
}
@@ -238,10 +240,9 @@ class Tensor {
238240
return move(mat);
239241
}
240242

241-
Tensor copy() const {
242-
Tensor ans(this->shape);
243-
ensure(CUDA_SUCCESS == cuMemcpyDtoDAsync_v2((CUdeviceptr)ans.d_data->get(), (CUdeviceptr)this->d_data->get(), ans.count() * sizeof(float), devices[currentDev].hStream));
244-
return move(ans);
243+
void copyTo(const Tensor &dst) const {
244+
die_if(dst.shape != this->shape, "Cannot copy tensor among two tensors with different shapes.");
245+
ensure(CUDA_SUCCESS == cuMemcpyDtoDAsync_v2((CUdeviceptr)dst.d_data->get(), (CUdeviceptr)this->d_data->get(), dst.count() * sizeof(float), devices[currentDev].hStream));
245246
}
246247

247248
Tensor matmul(const Tensor &that, bool transposeThis = false, bool transposeThat = false) const {
@@ -288,9 +289,7 @@ class Tensor {
288289
}
289290

290291
Tensor self_add(const Tensor &that, float ceof = 1.0f) const {
291-
ensure(this->shape == that.shape);
292-
ensure(CUBLAS_STATUS_SUCCESS == cublasSaxpy(cublasHandle, that.count(), &ceof, (float*)that.d_data->get(), 1, (float*)this->d_data->get(), 1));
293-
return *this;
292+
return self_update(that, ceof, 1.0f);
294293
}
295294

296295
Tensor add(const Tensor &that, float ceof = 1.0f) const {

lite-model.cc

Lines changed: 51 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -55,10 +55,8 @@ int main(int argc, char **argv) {
5555
int ngpus = 1;
5656
int batch_size = 64, steps = 50000;
5757

58-
// * Mnist_MLP
59-
vector<unique_ptr<NormalGenerator>> gens;
60-
for (int i = 0; i < ngpus; ++i)
61-
gens.push_back(array_generator(CIFAR10_IMAGES, CIFAR10_LABELS)); // gen->save_to_directory("/cifar10");
58+
// cifar10: 350 * 64 * 4 images/ sec;
59+
auto gen = array_generator(CIFAR10_IMAGES, CIFAR10_LABELS);
6260
// auto gen = image_generator("/cifar10", 32, 32, 1 << 11, 8);
6361

6462
/* auto model = make_shared<InputLayer>("image_place_0", gen->channel, gen->height, gen->width)
@@ -72,42 +70,78 @@ int main(int argc, char **argv) {
7270
->compile(); */
7371

7472
// * ImageNet_AlexNet
75-
/* die_if(0 != system("test -e /tmp/CatsAndDogs/.succ || (echo 'Downloading Cats-and-Dogs dataset ..' && curl -L https://github.com/ghostplant/public/releases/download/cats-and-dogs/cats-and-dogs.tar.gz | tar xzvf - -C /tmp >/dev/null && touch /tmp/CatsAndDogs/.succ)"), "Failed to download sample dataset.");
76-
auto gen = image_generator("/tmp/CatsAndDogs/train", 224, 224, 2048 * 8, 8),
73+
// die_if(0 != system("test -e /tmp/CatsAndDogs/.succ || (echo 'Downloading Cats-and-Dogs dataset ..' && curl -L https://github.com/ghostplant/public/releases/download/cats-and-dogs/cats-and-dogs.tar.gz | tar xzvf - -C /tmp >/dev/null && touch /tmp/CatsAndDogs/.succ)"), "Failed to download sample dataset.");
74+
/* auto gen = image_generator("/tmp/CatsAndDogs/train", 224, 224, 2048 * 8, 8),
7775
val_gen = image_generator("/tmp/CatsAndDogs/validate", 224, 224, 2048, 1); */
7876

7977
vector<shared_ptr<Model>> model_replias(ngpus);
8078
vector<shared_ptr<Optimizor>> optimizors(ngpus);
8179

8280
for (int i = 0; i < ngpus; ++i) {
8381
Tensor::activateCurrentDevice(i);
84-
auto img_shape = gens[i]->get_shape();
82+
auto img_shape = gen->get_shape();
8583
model_replias[i] = lite_dnn::apps::cifar10_alexnet::
8684
create_model("image_place_0", "label_place_0", {img_shape[1], img_shape[2], img_shape[3]}, img_shape[0]);
87-
// model_replias[i]->load_weights_from_file("weights.lw");
85+
if (i == 0) {
86+
Tensor::activateCurrentDevice(0);
87+
model_replias[0]->load_weights_from_file("weights.lw");
88+
}
8889

89-
optimizors[i] = make_shared<MomentumOptimizor>(model_replias[i], 0.9f, 0.01f);
90+
optimizors[i] = make_shared<SGDOptimizor>(model_replias[i], 0.01f, 0.001f);
9091
}
9192

9293
unsigned long lastClock = get_microseconds();
9394

94-
for (int k = 0; k < steps; ++k) {
95+
vector<vector<Tensor>> grads(ngpus);
96+
Tensor::activateCurrentDevice(0);
97+
auto ws = model_replias[0]->collect_all_weights();
98+
for (int j = 1; j < ngpus; ++j) {
99+
auto wj = model_replias[j]->collect_all_weights();
100+
for (int i = 0; i < ws.size(); ++i)
101+
ws[i].copyTo(wj[i]);
102+
}
103+
Tensor::synchronizeCurrentDevice();
95104

105+
for (int k = 0; k < steps; ++k) {
96106
for (int i = 0; i < ngpus; ++i) {
97107
Tensor::activateCurrentDevice(i);
98-
99-
auto batch_data = gens[i]->next_batch(batch_size);
100-
unordered_map<string, Tensor> feed_dict = {{"image_place_0", batch_data.images}, {"label_place_0", batch_data.labels}};
108+
auto batch_data = gen->next_batch(batch_size);
109+
auto feed_dict = unordered_map<string, Tensor>({{"image_place_0", batch_data.images}, {"label_place_0", batch_data.labels}});
101110

102111
auto predicts = model_replias[i]->predict(feed_dict);
103-
optimizors[i]->apply_updates(model_replias[i]->collect_all_gradients(feed_dict));
112+
grads[i] = model_replias[i]->collect_all_gradients(feed_dict);
104113
}
105114

115+
/* vector<vector<float>> parameters(grads[0].size());
116+
for (int j = 0; j < parameters.size(); ++j)
117+
parameters[j].resize(grads[0][j].count());
118+
for (int i = 0; i < ngpus; ++i) {
119+
Tensor::activateCurrentDevice(i);
120+
for (int j = 0; j < parameters.size(); ++j) {
121+
auto param = grads[i][j].get_data();
122+
ensure(param.size() == parameters[j].size());
123+
for (int k = 0; k < param.size(); ++k) {
124+
parameters[j][k] = parameters[j][k] * i / (i + 1.0f) + param[k] * 1.0f / (i + 1.0f);
125+
}
126+
}
127+
}
128+
for (int i = 0; i < ngpus; ++i) {
129+
Tensor::activateCurrentDevice(i);
130+
for (int j = 0; j < parameters.size(); ++j)
131+
grads[i][j].set_data(parameters[j].data());
132+
} */
133+
for (int i = 0; i < ngpus; ++i) {
134+
Tensor::activateCurrentDevice(i);
135+
optimizors[i]->apply_updates(grads[i]);
136+
}
137+
138+
Tensor::activateCurrentDevice(0);
139+
106140
unsigned long currClock = get_microseconds();
107141
if (currClock >= lastClock + 1000000) {
108142
int dev = 0;
109143
Tensor::activateCurrentDevice(dev);
110-
auto val_batch_data = gens[dev]->next_batch(batch_size);
144+
auto val_batch_data = gen->next_batch(batch_size);
111145
auto val_predicts = model_replias[dev]->predict({{"image_place_0", val_batch_data.images}});
112146
auto val_lacc = val_predicts.get_loss_and_accuracy_with(val_batch_data.labels);
113147

@@ -116,7 +150,8 @@ int main(int argc, char **argv) {
116150
}
117151
}
118152

119-
// model_replias[0]->save_weights_to_file("weights.lw");
153+
Tensor::activateCurrentDevice(0);
154+
model_replias[0]->save_weights_to_file("weights.lw");
120155
Tensor::quit();
121156
return 0;
122157
}

0 commit comments

Comments
 (0)