Incomparable weight shape between caffe and tensorflow / keras

I am trying to convert a caffe model to keras, I have successfully been able to use both MMdnn and even caffe-tensorflow. The output I have are .npy files and .pb files. I have not had much luck with the .pb files, so I stuck to .npy files which contain the weights and biases. I have reconstructed an mAlexNet network as follows:

import tensorflow as tf
from tensorflow import keras
from keras.layers import Conv2D, MaxPool2D, Dropout, Dense, Flatten

def define_malexnet():
    input = keras.Input(shape=(224, 224, 3), name='data')
    x = Conv2D(16, kernel_size=(11,11), strides=(4,4), activation='relu', name='conv1')(input)
    x = MaxPool2D(pool_size=(3,3), strides=(2,2), padding='same', name='pool1')(x)
    x = Conv2D(20, kernel_size=(5,5), strides=(1,1), activation='relu', name='conv2')(x)
    x = MaxPool2D(pool_size=(3,3), strides=(2,2), name='pool2')(x)
    x = Conv2D(30, kernel_size=(3,3), strides=(1,1), activation='relu', name='conv3')(x)
    x = MaxPool2D(pool_size=(3,3), strides=(2,2), name='pool3')(x)
    x = Flatten()(x)
    x = Dense(48, activation='relu', name='fc4')(x)
    output = Dense(2, activation='softmax', name='fc5')(x)
    
    occupancy_model = keras.Model(input, output, name='occupancy_malexnet')
    occupancy_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return occupancy_model

JavaScript
​x
 
import tensorflow as tf
from tensorflow import keras
from keras.layers import Conv2D, MaxPool2D, Dropout, Dense, Flatten
​
def define_malexnet():
    input = keras.Input(shape=(224, 224, 3), name='data')
    x = Conv2D(16, kernel_size=(11,11), strides=(4,4), activation='relu', name='conv1')(input)
    x = MaxPool2D(pool_size=(3,3), strides=(2,2), padding='same', name='pool1')(x)
    x = Conv2D(20, kernel_size=(5,5), strides=(1,1), activation='relu', name='conv2')(x)
    x = MaxPool2D(pool_size=(3,3), strides=(2,2), name='pool2')(x)
    x = Conv2D(30, kernel_size=(3,3), strides=(1,1), activation='relu', name='conv3')(x)
    x = MaxPool2D(pool_size=(3,3), strides=(2,2), name='pool3')(x)
    x = Flatten()(x)
    x = Dense(48, activation='relu', name='fc4')(x)
    output = Dense(2, activation='softmax', name='fc5')(x)
    
    occupancy_model = keras.Model(input, output, name='occupancy_malexnet')
    occupancy_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return occupancy_model
​

Then I try to load the weights using this code snippet:

import numpy as np

weights_data = np.load('weights.npy', allow_pickle=True).item()
model = define_malexnet()
    
for layer in model.layers:
  if layer.name in weights_data.keys():
    layer_weights = weights_data[layer.name]
    layer.set_weights((layer_weights['weights'], layer_weights['bias']))

JavaScript
 
import numpy as np
​
weights_data = np.load('weights.npy', allow_pickle=True).item()
model = define_malexnet()
    
for layer in model.layers:
  if layer.name in weights_data.keys():
    layer_weights = weights_data[layer.name]
    layer.set_weights((layer_weights['weights'], layer_weights['bias']))
​

During this process I get an error:

ValueError: Layer conv1 weight shape (16,) is not compatible with provided weight shape (1, 1, 1, 16).

Now as I understand this is because of the different backends and how they initialize weights, but I have not found a way to solve this problem. My question is, how do I tweak the weights loaded from the file to fit my keras model? Link to weights.npy file https://drive.google.com/file/d/1QKzY-WxiUnf9VnlhWQS38DE3uF5I_qTl/view?usp=sharing.

Answer

The problem is the bias vector. It is shaped as a 4D tensor but Keras assumes it is a 1D tensor. Just flatten the bias vector:

import numpy as np

weights_data = np.load('weights.npy', allow_pickle=True).item()
model = define_malexnet()
    
for layer in model.layers:
  if layer.name in weights_data.keys():
    layer_weights = weights_data[layer.name]
    layer.set_weights((layer_weights['weights'], layer_weights['bias'].flatten()))

JavaScript
 
import numpy as np
​
weights_data = np.load('weights.npy', allow_pickle=True).item()
model = define_malexnet()
    
for layer in model.layers:
  if layer.name in weights_data.keys():
    layer_weights = weights_data[layer.name]
    layer.set_weights((layer_weights['weights'], layer_weights['bias'].flatten()))
​

As a sanity check, once I create your model I will access the conv1 weights and your corresponding weights you cached then compare them both:

In [22]: weights1 = model.layers[1].weights[0].numpy()

In [23]: weights2 = weights_data['conv1']['weights']

In [24]: np.allclose(weights1, weights2)
Out[24]: True

JavaScript
 
In [22]: weights1 = model.layers[1].weights[0].numpy()
​
In [23]: weights2 = weights_data['conv1']['weights']
​
In [24]: np.allclose(weights1, weights2)
Out[24]: True
​
​

The same for the biases:

In [25]: bias1 = model.layers[1].weights[1].numpy()

In [26]: bias2 = weights_data['conv1']['bias']

In [27]: np.allclose(bias1, bias2)
Out[27]: True

JavaScript
 
In [25]: bias1 = model.layers[1].weights[1].numpy()
​
In [26]: bias2 = weights_data['conv1']['bias']
​
In [27]: np.allclose(bias1, bias2)
Out[27]: True
​

Notice that I didn’t have to flatten the biases from your cached results because np.allclose flattens singleton dimensions internally.

Advertisement

Answer