Could not find out why the mat1 from the convolutional network is 128x4 and not 4x128. The following is the convolutional network used: The model training code is as follows: The error log shown is: mat1 should be the output of the convolutional network after it is flattened, and mat2 is the linear network following it. Appreciate any help. Thanks!

mat1 and mat2 shapes cannot be multiplied (128×4 and 128×64)

Could not find out why the mat1 from the convolutional network is 128×4 and not 4×128. The following is the convolutional network used:

model = torch.nn.Sequential(
torch.nn.Conv2d(2,32,kernel_size=3,padding=1),
torch.nn.ReLU(),
torch.nn.MaxPool2d(2,2),

torch.nn.Conv2d(32,64,kernel_size=3,padding=1),
torch.nn.ReLU(),
torch.nn.MaxPool2d(2,2),

torch.nn.Conv2d(64,128,kernel_size=3,padding=1),
torch.nn.ReLU(),
torch.nn.MaxPool2d(2,2,padding=1),
torch.nn.Flatten(),

torch.nn.Linear(128, 64),
torch.nn.ReLU(),
torch.nn.Linear(64,4)
)

JavaScript
​x
 
model = torch.nn.Sequential(
torch.nn.Conv2d(2,32,kernel_size=3,padding=1),
torch.nn.ReLU(),
torch.nn.MaxPool2d(2,2),
​
torch.nn.Conv2d(32,64,kernel_size=3,padding=1),
torch.nn.ReLU(),
torch.nn.MaxPool2d(2,2),
​
torch.nn.Conv2d(64,128,kernel_size=3,padding=1),
torch.nn.ReLU(),
torch.nn.MaxPool2d(2,2,padding=1),
torch.nn.Flatten(),
​
torch.nn.Linear(128, 64),
torch.nn.ReLU(),
torch.nn.Linear(64,4)
)
​

The model training code is as follows:

epochs = 1000
losses = [] #A
for i in range(epochs): #B
    game = Gridworld(size=size, mode='static') #C
    # state_ = game.board.render_np().reshape(1,l1) + np.random.rand(1,l1)/10.0 #D
    state_ = game.board.render_np() + np.random.rand(size,size)/10.0 #D
    state1 = torch.from_numpy(state_).float() #E
    print(state1.shape)
    status = 1 #F
    while(status == 1): #G
        qval = model(state1) #H
        qval_ = qval.data.numpy()
        if (random.random() < epsilon): #I
            action_ = np.random.randint(0,4)
        else:
            action_ = np.argmax(qval_)
       
        action = action_set[action_] #J
        game.makeMove(action) #K
        state2_ = game.board.render_np().reshape(1,l1) + np.random.rand(1,l1)/10.0
        state2 = torch.from_numpy(state2_).float() #L
        reward = game.reward()
        with torch.no_grad():
            newQ = model(state2.reshape(1,l1))
        maxQ = torch.max(newQ) #M
        if reward == -1: #N
            Y = reward + (gamma * maxQ)
        else:
            Y = reward
        Y = torch.Tensor([Y]).detach()
        X = qval.squeeze()[action_] #O
        loss = loss_fn(X, Y) #P
        print(i, loss.item())
        clear_output(wait=True)
        optimizer.zero_grad()
        loss.backward()
        losses.append(loss.item())
        optimizer.step()
        state1 = state2
        if reward != -1: #Q
            status = 0
    if epsilon > 0.1: #R
        epsilon -= (1/epochs)

JavaScript
 
epochs = 1000
losses = [] #A
for i in range(epochs): #B
    game = Gridworld(size=size, mode='static') #C
    # state_ = game.board.render_np().reshape(1,l1) + np.random.rand(1,l1)/10.0 #D
    state_ = game.board.render_np() + np.random.rand(size,size)/10.0 #D
    state1 = torch.from_numpy(state_).float() #E
    print(state1.shape)
    status = 1 #F
    while(status == 1): #G
        qval = model(state1) #H
        qval_ = qval.data.numpy()
        if (random.random() < epsilon): #I
            action_ = np.random.randint(0,4)
        else:
            action_ = np.argmax(qval_)
       
        action = action_set[action_] #J
        game.makeMove(action) #K
        state2_ = game.board.render_np().reshape(1,l1) + np.random.rand(1,l1)/10.0
        state2 = torch.from_numpy(state2_).float() #L
        reward = game.reward()
        with torch.no_grad():
            newQ = model(state2.reshape(1,l1))
        maxQ = torch.max(newQ) #M
        if reward == -1: #N
            Y = reward + (gamma * maxQ)
        else:
            Y = reward
        Y = torch.Tensor([Y]).detach()
        X = qval.squeeze()[action_] #O
        loss = loss_fn(X, Y) #P
        print(i, loss.item())
        clear_output(wait=True)
        optimizer.zero_grad()
        loss.backward()
        losses.append(loss.item())
        optimizer.step()
        state1 = state2
        if reward != -1: #Q
            status = 0
    if epsilon > 0.1: #R
        epsilon -= (1/epochs)
​

The error log shown is:

torch.Size([2, 12, 12])
---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-22-d2f43f09fd01> in <module>()
     74     status = 1 #F
     75     while(status == 1): #G
---> 76         qval = model(state1) #H
     77         qval_ = qval.data.numpy()
     78         if (random.random() < epsilon): #I

3 frames
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/linear.py in forward(self, input)
    101 
    102     def forward(self, input: Tensor) -> Tensor:
--> 103         return F.linear(input, self.weight, self.bias)
    104 
    105     def extra_repr(self) -> str:

RuntimeError: mat1 and mat2 shapes cannot be multiplied (128x4 and 128x64)

JavaScript
 
torch.Size([2, 12, 12])
---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-22-d2f43f09fd01> in <module>()
     74     status = 1 #F
     75     while(status == 1): #G
---> 76         qval = model(state1) #H
     77         qval_ = qval.data.numpy()
     78         if (random.random() < epsilon): #I
​
3 frames
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/linear.py in forward(self, input)
    101 
    102     def forward(self, input: Tensor) -> Tensor:
--> 103         return F.linear(input, self.weight, self.bias)
    104 
    105     def extra_repr(self) -> str:
​
RuntimeError: mat1 and mat2 shapes cannot be multiplied (128x4 and 128x64)
​

mat1 should be the output of the convolutional network after it is flattened, and mat2 is the linear network following it. Appreciate any help. Thanks!

Answer

Here are the output shapes for each layer

Conv2d(2,32,kernel_size=3,padding=1)   # 32x12x12
MaxPool2d(2,2)                         # 32x6x6
Conv2d(32,64,kernel_size=3,padding=1)  # 64x6x6
MaxPool2d(2,2)                         # 64x3x3
Conv2d(64,128,kernel_size=3,padding=1) # 128x3x3
MaxPool2d(2,2,padding=1)               # 128x2x2
Flatten()                              # 128x4

JavaScript
 
Conv2d(2,32,kernel_size=3,padding=1)   # 32x12x12
MaxPool2d(2,2)                         # 32x6x6
Conv2d(32,64,kernel_size=3,padding=1)  # 64x6x6
MaxPool2d(2,2)                         # 64x3x3
Conv2d(64,128,kernel_size=3,padding=1) # 128x3x3
MaxPool2d(2,2,padding=1)               # 128x2x2
Flatten()                              # 128x4
​

You’ll need to change the kernel parameters and padding sizes if you wish to obtain an output of a given shape. This link might help in calculating the output shapes after each layer.

Another approach is that you could take a transpose of the flattened array and pass it into the Linear layers. You’ll need to add the line in your forward function like below

import torch
import torch.nn as nn

class NN(nn.Module):
  def __init__(self):
      super(NN, self).__init__()
      
      self.layer1 = nn.Sequential(
          torch.nn.Conv2d(2,32,kernel_size=3,padding=1),
          torch.nn.ReLU(),
          torch.nn.MaxPool2d(2,2))

      self.layer2 = nn.Sequential(
          torch.nn.Conv2d(32,64,kernel_size=3,padding=1),
          torch.nn.ReLU(),
          torch.nn.MaxPool2d(2,2))
      
      self.layer3 = nn.Sequential(
          torch.nn.Conv2d(64,128,kernel_size=3,padding=1),
          torch.nn.ReLU(),
          torch.nn.MaxPool2d(2,2,padding=1))
      
      self.flattened_tensor = nn.Flatten()

      self.linear_layer = nn.Sequential(
          torch.nn.Linear(128, 64),
          torch.nn.ReLU(),
          torch.nn.Linear(64,4)
      )
    
  def forward(self, inp):
    conv_output = self.layer3(self.layer2(self.layer1(inp)))
    flattened_output = self.flattened_tensor(conv_output)
    
    transposed_matrix = torch.transpose(flattened_output, 0, 1)
    
    linear_output = self.linear_layer(transposed_matrix)
    return linear_output

model = NN()
output = model(arr)

JavaScript
 
import torch
import torch.nn as nn
​
class NN(nn.Module):
  def __init__(self):
      super(NN, self).__init__()
      
      self.layer1 = nn.Sequential(
          torch.nn.Conv2d(2,32,kernel_size=3,padding=1),
          torch.nn.ReLU(),
          torch.nn.MaxPool2d(2,2))
​
      self.layer2 = nn.Sequential(
          torch.nn.Conv2d(32,64,kernel_size=3,padding=1),
          torch.nn.ReLU(),
          torch.nn.MaxPool2d(2,2))
      
      self.layer3 = nn.Sequential(
          torch.nn.Conv2d(64,128,kernel_size=3,padding=1),
          torch.nn.ReLU(),
          torch.nn.MaxPool2d(2,2,padding=1))
      
      self.flattened_tensor = nn.Flatten()
​
      self.linear_layer = nn.Sequential(
          torch.nn.Linear(128, 64),
          torch.nn.ReLU(),
          torch.nn.Linear(64,4)
      )
    
  def forward(self, inp):
    conv_output = self.layer3(self.layer2(self.layer1(inp)))
    flattened_output = self.flattened_tensor(conv_output)
    
    transposed_matrix = torch.transpose(flattened_output, 0, 1)
    
    linear_output = self.linear_layer(transposed_matrix)
    return linear_output
​
model = NN()
output = model(arr)
​

Advertisement

Answer