I have try to tied weights in tensorflow 2.0 keras, with below code. but it shows this errors? does anyone know how to write tied weights dense layer ?
tf.random.set_seed(0) with tf.device('/cpu:0'): # This returns a tensor inputs = Input(shape=(784,)) # a layer instance is callable on a tensor, and returns a tensor layer_1 = Dense(64, activation='relu') layer_1_output = layer_1(inputs) layer_2 = Dense(64, activation='relu') layer_2_output = layer_2(layer_1_output) weights = tf.transpose(layer_1.weights[0]).numpy() print(weights.shape) transpose_layer = Dense( 784, activation='relu') transpose_layer_output = transpose_layer(layer_2_output) transpose_layer.set_weights(weights) predictions = Dense(10, activation='softmax')(transpose_layer) # This creates a model that includes # the Input layer and three Dense layers model = Model(inputs=inputs, outputs=predictions) model.compile(optimizer=tf.keras.optimizers.Adam(0.001), loss='categorical_crossentropy', metrics=['accuracy']) # print(model.weights) model.summary()
Errors
Traceback (most recent call last): File "practice_2.py", line 62, in <module> transpose_layer.set_weights(weights) File "/Users/cheesiang_leow/.virtualenvs/tensorflow-2.0/lib/python3.6/site- packages/tensorflow/python/keras/engine/base_layer.py", line 934, in set_weights str(weights)[:50] + '...') ValueError: You called `set_weights(weights)` on layer "dense_2" with a weight list of length 64, but the layer was expecting 2 weights. Provided weights: [[-0.03499636 0.0214913 0.04076344 ... -0.06531...
Advertisement
Answer
It took much of my time to figure out, but I think this is the way of Tied Weights by subclassing Keras Dense layer.
class TiedLayer(Dense): def __init__(self, layer_sizes, l2_normalize=False, dropout=0.0, *args, **kwargs): self.layer_sizes = layer_sizes self.l2_normalize = l2_normalize self.dropout = dropout self.kernels = [] self.biases = [] self.biases2 = [] self.uses_learning_phase = True self.activation = kwargs['activation'] if self.activation == "leaky_relu": self.activation = kwargs.pop('activation') self.activation = LeakyReLU() print(self.activation) super().__init__(units=1, *args, **kwargs) # 'units' not used def compute_output_shape(self, input_shape): return input_shape def build(self, input_shape): assert len(input_shape) >= 2 input_dim = int(input_shape[-1]) self.input_spec = InputSpec(min_ndim=2, axes={-1: input_dim}) # print(input_dim) for i in range(len(self.layer_sizes)): self.kernels.append( self.add_weight( shape=( input_dim, self.layer_sizes[i]), initializer=self.kernel_initializer, name='ae_kernel_{}'.format(i), regularizer=self.kernel_regularizer, constraint=self.kernel_constraint)) if self.use_bias: self.biases.append( self.add_weight( shape=( self.layer_sizes[i], ), initializer=self.bias_initializer, name='ae_bias_{}'.format(i), regularizer=self.bias_regularizer, constraint=self.bias_constraint)) input_dim = self.layer_sizes[i] if self.use_bias: for n, i in enumerate(range(len(self.layer_sizes)-2, -1, -1)): self.biases2.append( self.add_weight( shape=( self.layer_sizes[i], ), initializer=self.bias_initializer, name='ae_bias2_{}'.format(n), regularizer=self.bias_regularizer, constraint=self.bias_constraint)) self.biases2.append(self.add_weight( shape=( int(input_shape[-1]), ), initializer=self.bias_initializer, name='ae_bias2_{}'.format(len(self.layer_sizes)), regularizer=self.bias_regularizer, constraint=self.bias_constraint)) self.built = True def call(self, inputs): return self.decode(self.encode(inputs)) def _apply_dropout(self, inputs): dropped = K.backend.dropout(inputs, self.dropout) return K.backend.in_train_phase(dropped, inputs) def encode(self, inputs): latent = inputs for i in range(len(self.layer_sizes)): if self.dropout > 0: latent = self._apply_dropout(latent) print(self.kernels[i]) latent = K.backend.dot(latent, self.kernels[i]) if self.use_bias: print(self.biases[i]) latent = K.backend.bias_add(latent, self.biases[i]) if self.activation is not None: latent = self.activation(latent) if self.l2_normalize: latent = latent / K.backend.l2_normalize(latent, axis=-1) return latent def decode(self, latent): recon = latent for i in range(len(self.layer_sizes)): if self.dropout > 0: recon = self._apply_dropout(recon) print(self.kernels[len(self.layer_sizes) - i - 1]) recon = K.backend.dot(recon, K.backend.transpose( self.kernels[len(self.layer_sizes) - i - 1])) if self.use_bias: print(self.biases2[i]) recon = K.backend.bias_add(recon, self.biases2[i]) if self.activation is not None: recon = self.activation(recon) return recon def get_config(self): config = { 'layer_sizes': self.layer_sizes } base_config = super().get_config() base_config.pop('units', None) return dict(list(base_config.items()) + list(config.items())) @classmethod def from_config(cls, config): return cls(**config)
Hope it can help someone else.