I am running tensorflow 2.4 on colab. I tried to save the model using tf.train.Checkpoint()
since it includes model subclassing, but after restoration I saw It didn’t restored any weights of my model.
Here are few snippets:
### From tensorflow tutorial nmt_with_attention class Encoder(tf.keras.Model): def __init__(self, vocab_size, embedding_dim, enc_units, batch_sz): ... self.gru = tf.keras.layers.GRU(self.enc_units, return_sequences=True, return_state=True, recurrent_initializer='glorot_uniform') . . . class NMT_Train(tf.keras.Model): def __init__(self, inp_vocab_size, tar_vocab_size, max_length_inp, max_length_tar, emb_dims, units, batch_size, source_tokenizer, target_tokenizer): super(NMT_Train, self).__init__() self.encoder = Encoder(inp_vocab_size, emb_dims, units, batch_size) ... . . . model = NMT_Train(INP_VOCAB, TAR_VOCAB, MAXLEN, MAXLEN, EMB_DIMS, UNITS, BATCH_SIZE, english_tokenizer, hindi_tokenizer) model.compile(optimizer = tf.keras.optimizers.Adam(), loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits= True)) model.fit(dataset, epochs=2) checkpoint = tf.train.Checkpoint(model = model) manager = tf.train.CheckpointManager(checkpoint, './ckpts', max_to_keep=1) manager.save() model.encoder.gru.get_weights() ### get the output ##[array([[-0.0627057 , 0.05900152, 0.06614069, ... model.optimizer.get_weights() ### get the output ##[90, array([[ 6.6851695e-05, -4.6736805e-06, -2.3183979e-05, ...
When I later restored it I didn’t get any gru weights:
model = NMT_Train(INP_VOCAB, TAR_VOCAB, MAXLEN, MAXLEN, EMB_DIMS, UNITS, BATCH_SIZE, english_tokenizer, hindi_tokenizer) model.compile(optimizer = tf.keras.optimizers.Adam(), loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits= True)) checkpoint = tf.train.Checkpoint(model = model) manager = tf.train.CheckpointManager(checkpoint, './ckpts', max_to_keep=1) manager.restore_or_initialize() model.encoder.gru.get_weights() ### empty list ## [] model.optimizer.get_weights() ### empty list ## []
I also tried checkpoint.restore(manager.latest_checkpoint)
but nothing changed.
Is there any thing wrong I am doing?? Or suggest any other way around to save the model so that I can retrain it for further epochs.
Advertisement
Answer
You are defining a keras model, so why do not use keras model chekpoints?
From Keras documentation:
model.compile(loss=..., optimizer=..., metrics=['accuracy']) EPOCHS = 10 checkpoint_filepath = '/tmp/checkpoint' model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint( filepath=checkpoint_filepath, save_weights_only=True, monitor='val_accuracy', mode='max', save_best_only=True) # Model weights are saved at the end of every epoch, if it's the best seen # so far. model.fit(epochs=EPOCHS, callbacks=[model_checkpoint_callback]) # The model weights (that are considered the best) are loaded into the model. model.load_weights(checkpoint_filepath)