@@ -55,10 +55,8 @@ int main(int argc, char **argv) {
55
55
int ngpus = 1 ;
56
56
int batch_size = 64 , steps = 50000 ;
57
57
58
- // * Mnist_MLP
59
- vector<unique_ptr<NormalGenerator>> gens;
60
- for (int i = 0 ; i < ngpus; ++i)
61
- gens.push_back (array_generator (CIFAR10_IMAGES, CIFAR10_LABELS)); // gen->save_to_directory("/cifar10");
58
+ // cifar10: 350 * 64 * 4 images/ sec;
59
+ auto gen = array_generator (CIFAR10_IMAGES, CIFAR10_LABELS);
62
60
// auto gen = image_generator("/cifar10", 32, 32, 1 << 11, 8);
63
61
64
62
/* auto model = make_shared<InputLayer>("image_place_0", gen->channel, gen->height, gen->width)
@@ -72,42 +70,78 @@ int main(int argc, char **argv) {
72
70
->compile(); */
73
71
74
72
// * ImageNet_AlexNet
75
- /* die_if(0 != system("test -e /tmp/CatsAndDogs/.succ || (echo 'Downloading Cats-and-Dogs dataset ..' && curl -L https://github.com/ghostplant/public/releases/download/cats-and-dogs/cats-and-dogs.tar.gz | tar xzvf - -C /tmp >/dev/null && touch /tmp/CatsAndDogs/.succ)"), "Failed to download sample dataset.");
76
- auto gen = image_generator("/tmp/CatsAndDogs/train", 224, 224, 2048 * 8, 8),
73
+ // die_if(0 != system("test -e /tmp/CatsAndDogs/.succ || (echo 'Downloading Cats-and-Dogs dataset ..' && curl -L https://github.com/ghostplant/public/releases/download/cats-and-dogs/cats-and-dogs.tar.gz | tar xzvf - -C /tmp >/dev/null && touch /tmp/CatsAndDogs/.succ)"), "Failed to download sample dataset.");
74
+ /* auto gen = image_generator("/tmp/CatsAndDogs/train", 224, 224, 2048 * 8, 8),
77
75
val_gen = image_generator("/tmp/CatsAndDogs/validate", 224, 224, 2048, 1); */
78
76
79
77
vector<shared_ptr<Model>> model_replias (ngpus);
80
78
vector<shared_ptr<Optimizor>> optimizors (ngpus);
81
79
82
80
for (int i = 0 ; i < ngpus; ++i) {
83
81
Tensor::activateCurrentDevice (i);
84
- auto img_shape = gens[i] ->get_shape ();
82
+ auto img_shape = gen ->get_shape ();
85
83
model_replias[i] = lite_dnn::apps::cifar10_alexnet::
86
84
create_model (" image_place_0" , " label_place_0" , {img_shape[1 ], img_shape[2 ], img_shape[3 ]}, img_shape[0 ]);
87
- // model_replias[i]->load_weights_from_file("weights.lw");
85
+ if (i == 0 ) {
86
+ Tensor::activateCurrentDevice (0 );
87
+ model_replias[0 ]->load_weights_from_file (" weights.lw" );
88
+ }
88
89
89
- optimizors[i] = make_shared<MomentumOptimizor >(model_replias[i], 0 .9f , 0 .01f );
90
+ optimizors[i] = make_shared<SGDOptimizor >(model_replias[i], 0 .01f , 0 .001f );
90
91
}
91
92
92
93
unsigned long lastClock = get_microseconds ();
93
94
94
- for (int k = 0 ; k < steps; ++k) {
95
+ vector<vector<Tensor>> grads (ngpus);
96
+ Tensor::activateCurrentDevice (0 );
97
+ auto ws = model_replias[0 ]->collect_all_weights ();
98
+ for (int j = 1 ; j < ngpus; ++j) {
99
+ auto wj = model_replias[j]->collect_all_weights ();
100
+ for (int i = 0 ; i < ws.size (); ++i)
101
+ ws[i].copyTo (wj[i]);
102
+ }
103
+ Tensor::synchronizeCurrentDevice ();
95
104
105
+ for (int k = 0 ; k < steps; ++k) {
96
106
for (int i = 0 ; i < ngpus; ++i) {
97
107
Tensor::activateCurrentDevice (i);
98
-
99
- auto batch_data = gens[i]->next_batch (batch_size);
100
- unordered_map<string, Tensor> feed_dict = {{" image_place_0" , batch_data.images }, {" label_place_0" , batch_data.labels }};
108
+ auto batch_data = gen->next_batch (batch_size);
109
+ auto feed_dict = unordered_map<string, Tensor>({{" image_place_0" , batch_data.images }, {" label_place_0" , batch_data.labels }});
101
110
102
111
auto predicts = model_replias[i]->predict (feed_dict);
103
- optimizors [i]-> apply_updates ( model_replias[i]->collect_all_gradients (feed_dict) );
112
+ grads [i] = model_replias[i]->collect_all_gradients (feed_dict);
104
113
}
105
114
115
+ /* vector<vector<float>> parameters(grads[0].size());
116
+ for (int j = 0; j < parameters.size(); ++j)
117
+ parameters[j].resize(grads[0][j].count());
118
+ for (int i = 0; i < ngpus; ++i) {
119
+ Tensor::activateCurrentDevice(i);
120
+ for (int j = 0; j < parameters.size(); ++j) {
121
+ auto param = grads[i][j].get_data();
122
+ ensure(param.size() == parameters[j].size());
123
+ for (int k = 0; k < param.size(); ++k) {
124
+ parameters[j][k] = parameters[j][k] * i / (i + 1.0f) + param[k] * 1.0f / (i + 1.0f);
125
+ }
126
+ }
127
+ }
128
+ for (int i = 0; i < ngpus; ++i) {
129
+ Tensor::activateCurrentDevice(i);
130
+ for (int j = 0; j < parameters.size(); ++j)
131
+ grads[i][j].set_data(parameters[j].data());
132
+ } */
133
+ for (int i = 0 ; i < ngpus; ++i) {
134
+ Tensor::activateCurrentDevice (i);
135
+ optimizors[i]->apply_updates (grads[i]);
136
+ }
137
+
138
+ Tensor::activateCurrentDevice (0 );
139
+
106
140
unsigned long currClock = get_microseconds ();
107
141
if (currClock >= lastClock + 1000000 ) {
108
142
int dev = 0 ;
109
143
Tensor::activateCurrentDevice (dev);
110
- auto val_batch_data = gens[dev] ->next_batch (batch_size);
144
+ auto val_batch_data = gen ->next_batch (batch_size);
111
145
auto val_predicts = model_replias[dev]->predict ({{" image_place_0" , val_batch_data.images }});
112
146
auto val_lacc = val_predicts.get_loss_and_accuracy_with (val_batch_data.labels );
113
147
@@ -116,7 +150,8 @@ int main(int argc, char **argv) {
116
150
}
117
151
}
118
152
119
- // model_replias[0]->save_weights_to_file("weights.lw");
153
+ Tensor::activateCurrentDevice (0 );
154
+ model_replias[0 ]->save_weights_to_file (" weights.lw" );
120
155
Tensor::quit ();
121
156
return 0 ;
122
157
}
0 commit comments