You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Training or predicting ...
Training
Traceback (most recent call last):
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1375, in _do_call
return fn(*args)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1360, in _run_fn
target_list, run_metadata)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1453, in _call_tf_sessionrun
run_metadata)
tensorflow.python.framework.errors_impl.NotFoundError: Key bert/embeddings/LayerNorm/beta/accum not found in checkpoint
[[{{node save/RestoreV2}}]]
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 1304, in restore
{self.saver_def.filename_tensor_name: save_path})
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 968, in run
run_metadata_ptr)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1191, in _run
feed_dict_tensor, options, run_metadata)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1369, in _do_run
run_metadata)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1394, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.NotFoundError: Key bert/embeddings/LayerNorm/beta/accum not found in checkpoint
[[node save/RestoreV2 (defined at /home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow_estimator/python/estimator/estimator.py:1510) ]]
Original stack trace for 'save/RestoreV2':
File "../tapas/run_task_main.py", line 908, in <module>
app.run(main)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/absl/app.py", line 303, in run
_run_main(main, args)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/absl/app.py", line 251, in _run_main
sys.exit(main(argv))
File "../tapas/run_task_main.py", line 893, in main
loop_predict=FLAGS.loop_predict,
File "../tapas/run_task_main.py", line 526, in _train_and_predict
max_steps=tapas_config.num_train_steps,
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow_estimator/python/estimator/tpu/tpu_estimator.py", line 3105, in train
saving_listeners=saving_listeners)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow_estimator/python/estimator/estimator.py", line 349, in train
loss = self._train_model(input_fn, hooks, saving_listeners)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow_estimator/python/estimator/estimator.py", line 1175, in _train_model
return self._train_model_default(input_fn, hooks, saving_listeners)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow_estimator/python/estimator/estimator.py", line 1208, in _train_model_default
saving_listeners)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow_estimator/python/estimator/estimator.py", line 1510, in _train_with_estimator_spec
save_graph_def=self._config.checkpoint_save_graph_def) as mon_sess:
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py", line 605, in MonitoredTrainingSession
stop_grace_period_secs=stop_grace_period_secs)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py", line 1039, in __init__
stop_grace_period_secs=stop_grace_period_secs)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py", line 750, in __init__
self._sess = _RecoverableSession(self._coordinated_creator)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py", line 1232, in __init__
_WrappedSession.__init__(self, self._create_session())
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py", line 1237, in _create_session
return self._sess_creator.create_session()
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py", line 903, in create_session
self.tf_sess = self._session_creator.create_session()
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py", line 661, in create_session
self._scaffold.finalize()
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py", line 244, in finalize
self._saver.build()
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 848, in build
self._build(self._filename, build_save=True, build_restore=True)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 886, in _build
build_restore=build_restore)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 510, in _build_internal
restore_sequentially, reshape)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 389, in _AddShardedRestoreOps
name="restore_shard"))
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 336, in _AddRestoreOps
restore_sequentially)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 583, in bulk_restore
return io_ops.restore_v2(filename_tensor, names, slices, dtypes)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/ops/gen_io_ops.py", line 1493, in restore_v2
name=name)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 750, in _apply_op_helper
attrs=attr_protos, op_def=op_def)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3565, in _create_op_internal
op_def=op_def)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 2045, in __init__
self._traceback = tf_stack.extract_stack_for_node(self._c_op)
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/py_checkpoint_reader.py", line 70, in get_tensor
self, compat.as_bytes(tensor_str))
RuntimeError: Key _CHECKPOINTABLE_OBJECT_GRAPH not found in checkpoint
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 1314, in restore
names_to_keys = object_graph_key_mapping(save_path)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 1632, in object_graph_key_mapping
object_graph_string = reader.get_tensor(trackable.OBJECT_GRAPH_PROTO_KEY)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/py_checkpoint_reader.py", line 74, in get_tensor
error_translator(e)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/py_checkpoint_reader.py", line 35, in error_translator
raise errors_impl.NotFoundError(None, None, error_message)
tensorflow.python.framework.errors_impl.NotFoundError: Key _CHECKPOINTABLE_OBJECT_GRAPH not found in checkpoint
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "../tapas/run_task_main.py", line 908, in <module>
app.run(main)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/absl/app.py", line 303, in run
_run_main(main, args)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/absl/app.py", line 251, in _run_main
sys.exit(main(argv))
File "../tapas/run_task_main.py", line 893, in main
loop_predict=FLAGS.loop_predict,
File "../tapas/run_task_main.py", line 526, in _train_and_predict
max_steps=tapas_config.num_train_steps,
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow_estimator/python/estimator/tpu/tpu_estimator.py", line 3110, in train
rendezvous.raise_errors()
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow_estimator/python/estimator/tpu/error_handling.py", line 150, in raise_errors
six.reraise(typ, value, traceback)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/six.py", line 703, in reraise
raise value
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow_estimator/python/estimator/tpu/tpu_estimator.py", line 3105, in train
saving_listeners=saving_listeners)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow_estimator/python/estimator/estimator.py", line 349, in train
loss = self._train_model(input_fn, hooks, saving_listeners)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow_estimator/python/estimator/estimator.py", line 1175, in _train_model
return self._train_model_default(input_fn, hooks, saving_listeners)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow_estimator/python/estimator/estimator.py", line 1208, in _train_model_default
saving_listeners)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow_estimator/python/estimator/estimator.py", line 1510, in _train_with_estimator_spec
save_graph_def=self._config.checkpoint_save_graph_def) as mon_sess:
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py", line 605, in MonitoredTrainingSession
stop_grace_period_secs=stop_grace_period_secs)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py", line 1039, in __init__
stop_grace_period_secs=stop_grace_period_secs)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py", line 750, in __init__
self._sess = _RecoverableSession(self._coordinated_creator)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py", line 1232, in __init__
_WrappedSession.__init__(self, self._create_session())
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py", line 1237, in _create_session
return self._sess_creator.create_session()
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py", line 903, in create_session
self.tf_sess = self._session_creator.create_session()
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py", line 670, in create_session
init_fn=self._scaffold.init_fn)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/session_manager.py", line 321, in prepare_session
config=config)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/session_manager.py", line 251, in _restore_checkpoint
sess, saver, ckpt.model_checkpoint_path)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/session_manager.py", line 71, in _restore_checkpoint_and_maybe_run_saved_model_initializers
saver.restore(sess, path)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 1320, in restore
err, "a Variable name or other graph key that is missing")
tensorflow.python.framework.errors_impl.NotFoundError: Restoring from checkpoint failed. This is most likely due to a Variable name or other graph key that is missing from the checkpoint. Please ensure that you have not altered the graph expected based on the checkpoint. Original error:
Key bert/embeddings/LayerNorm/beta/accum not found in checkpoint
[[node save/RestoreV2 (defined at /home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow_estimator/python/estimator/estimator.py:1510) ]]
Original stack trace for 'save/RestoreV2':
File "../tapas/run_task_main.py", line 908, in <module>
app.run(main)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/absl/app.py", line 303, in run
_run_main(main, args)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/absl/app.py", line 251, in _run_main
sys.exit(main(argv))
File "../tapas/run_task_main.py", line 893, in main
loop_predict=FLAGS.loop_predict,
File "../tapas/run_task_main.py", line 526, in _train_and_predict
max_steps=tapas_config.num_train_steps,
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow_estimator/python/estimator/tpu/tpu_estimator.py", line 3105, in train
saving_listeners=saving_listeners)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow_estimator/python/estimator/estimator.py", line 349, in train
loss = self._train_model(input_fn, hooks, saving_listeners)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow_estimator/python/estimator/estimator.py", line 1175, in _train_model
return self._train_model_default(input_fn, hooks, saving_listeners)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow_estimator/python/estimator/estimator.py", line 1208, in _train_model_default
saving_listeners)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow_estimator/python/estimator/estimator.py", line 1510, in _train_with_estimator_spec
save_graph_def=self._config.checkpoint_save_graph_def) as mon_sess:
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py", line 605, in MonitoredTrainingSession
stop_grace_period_secs=stop_grace_period_secs)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py", line 1039, in __init__
stop_grace_period_secs=stop_grace_period_secs)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py", line 750, in __init__
self._sess = _RecoverableSession(self._coordinated_creator)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py", line 1232, in __init__
_WrappedSession.__init__(self, self._create_session())
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py", line 1237, in _create_session
return self._sess_creator.create_session()
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py", line 903, in create_session
self.tf_sess = self._session_creator.create_session()
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py", line 661, in create_session
self._scaffold.finalize()
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/monitored_session.py", line 244, in finalize
self._saver.build()
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 848, in build
self._build(self._filename, build_save=True, build_restore=True)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 886, in _build
build_restore=build_restore)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 510, in _build_internal
restore_sequentially, reshape)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 389, in _AddShardedRestoreOps
name="restore_shard"))
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 336, in _AddRestoreOps
restore_sequentially)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 583, in bulk_restore
return io_ops.restore_v2(filename_tensor, names, slices, dtypes)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/ops/gen_io_ops.py", line 1493, in restore_v2
name=name)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 750, in _apply_op_helper
attrs=attr_protos, op_def=op_def)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3565, in _create_op_internal
op_def=op_def)
File "/home/fch/miniconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 2045, in __init__
self._traceback = tf_stack.extract_stack_for_node(self._c_op)
The tensorflow version in my machine is show as follows: tensorflow 2.2.3 tensorflow-addons 0.14.0 tensorflow-datasets 4.4.0 tensorflow-estimator 2.5.0 tensorflow-gpu 2.5.0 tensorflow-hub 0.12.0 tensorflow-metadata 1.2.0 tensorflow-model-optimization 0.7.0 tensorflow-probability 0.12.0
If anybody can help me with this it would be greatly appreciated. Thanks so much for the work and time!
The text was updated successfully, but these errors were encountered:
Due to lack of GPU memory,I set batch_size to 32 and gradient_accumulation_steps to 32 when fine-tunning in SQA.
The scripts are as follows.
And I run into the error:
The tensorflow version in my machine is show as follows:
tensorflow 2.2.3 tensorflow-addons 0.14.0 tensorflow-datasets 4.4.0 tensorflow-estimator 2.5.0 tensorflow-gpu 2.5.0 tensorflow-hub 0.12.0 tensorflow-metadata 1.2.0 tensorflow-model-optimization 0.7.0 tensorflow-probability 0.12.0
If anybody can help me with this it would be greatly appreciated. Thanks so much for the work and time!
The text was updated successfully, but these errors were encountered: