Skip to content

InvalidArgumentError while training #2

@davletov-aa

Description

@davletov-aa

Hi! Im trying to train model, but training fails seemingly at the end of the epoch with InvalidArgumentError: Incompatible shapes: [832] vs. [64,13]
[[Node: metrics_4/acc_1/Equal = Equal[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"](metrics_4/acc_1/Reshape, metrics_4/acc_1/Cast)]].

code from jupyter:

%load_ext autoreload
%autoreload 2
from rnnmorph.train import train

train(['../morphoRuEval/morphoRuEval/gikrya_fixed.txt'],
      'rnnmorph/models/ru/train_config.json', 'rnnmorph/models/ru/build_config.json', 'ru')

environment:

_tflow_select 2.3.0 mkl
absl-py 0.7.0 py36_0
astor 0.7.1 py36_0
blas 1.0 mkl
c-ares 1.15.0 h1de35cc_1
ca-certificates 2018.11.29 ha4d7672_0 conda-forge
certifi 2018.11.29 py36_1000 conda-forge
DAWG-Python 0.7.2
docopt 0.6.2
gast 0.2.2 py36_0
git-lfs 2.7.1 0 conda-forge
grpcio 1.16.1 py36h044775b_1
h5py 2.9.0 py36h3134771_0
hdf5 1.10.4 hfa1e0ec_0
intel-openmp 2019.1 144
jsonpickle 0.9.6 py_1
keras 2.2.4 0
keras-applications 1.0.6 py36_0
keras-base 2.2.4 py36_0
keras-preprocessing 1.0.5 py36_0
libcxx 4.0.1 hcfea43d_1
libcxxabi 4.0.1 hcfea43d_1
libedit 3.1.20181209 hb402a30_0
libffi 3.2.1 h475c297_4
libgfortran 3.0.1 h93005f0_2
libprotobuf 3.6.1 hd9629dc_0
markdown 3.0.1 py36_0
mkl 2019.1 144
mkl_fft 1.0.10 py36h5e564d8_0
mkl_random 1.0.2 py36h27c97d8_0
ncurses 6.1 h0a44026_1
nltk 3.4 py36_1
numpy 1.16.2 py36hacdab7b_0
numpy-base 1.16.2 py36h6575580_0
openssl 1.1.1b h1de35cc_1 conda-forge
pip 19.0.3 py36_0
protobuf 3.6.1 py36h0a44026_0
pymorphy2 0.8
pymorphy2-dicts 2.4.393442.3710985
python 3.6.8 haf84260_0
pyyaml 3.13 py36h1de35cc_0
readline 7.0 h1de35cc_5
russian-tagsets 0.6
scikit-learn 0.20.2 py36h27c97d8_0
scipy 1.2.1 py36h1410ff5_0
setuptools 40.8.0 py36_0
six 1.12.0 py36_0
sqlite 3.26.0 ha441bb4_0
tensorboard 1.12.2 py36haf313ee_0
tensorflow 1.12.0 mkl_py36h2b2bbaf_0
tensorflow-base 1.12.0 mkl_py36h70e0e9a_0
termcolor 1.1.0 py36_1
tk 8.6.8 ha441bb4_0
tqdm 4.31.1 py_0
werkzeug 0.14.1 py36_0
wheel 0.33.1 py36_0
xz 5.2.4 h1de35cc_4
yaml 0.1.7 hc338f04_2
zlib 1.2.11 h1de35cc_3

output:


Layer (type) Output Shape Param # Connected to

grammemes (InputLayer) (None, None, 56) 0


dropout_1 (Dropout) (None, None, 56) 0 grammemes[0][0]


chars (InputLayer) (None, None, 32) 0


dense_1 (Dense) (None, None, 30) 1710 dropout_1[0][0]


time_distributed_1 (TimeDistrib (None, None, 200) 487124 chars[0][0]


LSTM_input (Concatenate) (None, None, 230) 0 dense_1[0][0]
time_distributed_1[0][0]


dense_4 (Dense) (None, None, 200) 46200 LSTM_input[0][0]


LSTM_1_forward (LSTM) (None, None, 128) 168448 dense_4[0][0]


LSTM_1_backward (ReversedLSTM) (None, None, 128) 168448 dense_4[0][0]


BiLSTM_input (Concatenate) (None, None, 256) 0 LSTM_1_forward[0][0]
LSTM_1_backward[0][0]


bidirectional_1 (Bidirectional) (None, None, 256) 394240 BiLSTM_input[0][0]


time_distributed_2 (TimeDistrib (None, None, 128) 32896 bidirectional_1[0][0]


time_distributed_3 (TimeDistrib (None, None, 128) 0 time_distributed_2[0][0]


time_distributed_4 (TimeDistrib (None, None, 128) 512 time_distributed_3[0][0]


time_distributed_5 (TimeDistrib (None, None, 128) 0 time_distributed_4[0][0]


main_pred (Dense) (None, None, 253) 32637 time_distributed_5[0][0]

Total params: 1,332,215
Trainable params: 1,331,959
Non-trainable params: 256


None
gikrya_fixed.txt: 0%| | 0.00/58.9M [00:00<?, ?B/s]
------------Big Epoch 0------------
gikrya_fixed.txt: 9%|▉ | 5.24M/58.9M [00:00<00:10, 5.23MB/s]
Epoch 1/1


InvalidArgumentError Traceback (most recent call last)
in ()
1 train(['../morphoRuEval/morphoRuEval/gikrya_fixed.txt'],
----> 2 'rnnmorph/models/ru/train_config.json', 'rnnmorph/models/ru/build_config.json', 'ru')

~/GIT/top_morph/rnnmorph/rnnmorph/train.py in train(file_names, train_config_path, build_config_path, language, embeddings_path)
52 print(embeddings.shape)
53 model.build(build_config, embeddings)
---> 54 model.train(file_names, train_config, build_config)
55
56

~/GIT/top_morph/rnnmorph/rnnmorph/model.py in train(self, file_names, train_config, build_config)
271 char_set=self.char_set)
272 for epoch, (inputs, target) in enumerate(batch_generator):
--> 273 self.train_model.fit(inputs, target, batch_size=train_config.batch_size, epochs=1, verbose=2)
274 if epoch != 0 and epoch % train_config.dump_model_freq == 0:
275 self.save(train_config.train_model_config_path, train_config.train_model_weights_path,

~/anaconda3/envs/py_3.6/lib/python3.6/site-packages/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, **kwargs)
1037 initial_epoch=initial_epoch,
1038 steps_per_epoch=steps_per_epoch,
-> 1039 validation_steps=validation_steps)
1040
1041 def evaluate(self, x=None, y=None,

~/anaconda3/envs/py_3.6/lib/python3.6/site-packages/keras/engine/training_arrays.py in fit_loop(model, f, ins, out_labels, batch_size, epochs, verbose, callbacks, val_f, val_ins, shuffle, callback_metrics, initial_epoch, steps_per_epoch, validation_steps)
197 ins_batch[i] = ins_batch[i].toarray()
198
--> 199 outs = f(ins_batch)
200 outs = to_list(outs)
201 for l, o in zip(out_labels, outs):

~/anaconda3/envs/py_3.6/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py in call(self, inputs)
2713 return self._legacy_call(inputs)
2714
-> 2715 return self._call(inputs)
2716 else:
2717 if py_any(is_tensor(x) for x in inputs):

~/anaconda3/envs/py_3.6/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py in _call(self, inputs)
2673 fetched = self._callable_fn(*array_vals, run_metadata=self.run_metadata)
2674 else:
-> 2675 fetched = self._callable_fn(*array_vals)
2676 return fetched[:len(self.outputs)]
2677

~/anaconda3/envs/py_3.6/lib/python3.6/site-packages/tensorflow/python/client/session.py in call(self, *args, **kwargs)
1380 ret = tf_session.TF_SessionRunCallable(
1381 self._session._session, self._handle, args, status,
-> 1382 run_metadata_ptr)
1383 if run_metadata:
1384 proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)

~/anaconda3/envs/py_3.6/lib/python3.6/site-packages/tensorflow/python/framework/errors_impl.py in exit(self, type_arg, value_arg, traceback_arg)
517 None, None,
518 compat.as_text(c_api.TF_Message(self.status.status)),
--> 519 c_api.TF_GetCode(self.status.status))
520 # Delete the underlying status object from memory otherwise it stays alive
521 # as there is a reference to status from this from the traceback due to

InvalidArgumentError: Incompatible shapes: [3328] vs. [256,13]
[[Node: metrics_5/acc_2/Equal = Equal[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"](metrics_5/acc_2/Reshape, metrics_5/acc_2/Cast)]]

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions