I’m making a CNN and I’ve got this error that the matrices don’t align and i understand the error but i don’t know how to fix it. Here is the code:
JavaScript
x
655
655
1
import numpy as np
2
import nnfs
3
import emnist
4
import os
5
import cv2
6
import pickle
7
import copy
8
nnfs.init()
9
# Dense layer
10
class Layer_Dense:
11
# Layer initialization
12
def __init__(self, n_inputs, n_neurons,
13
weight_regularizer_l1=0, weight_regularizer_l2=0,
14
bias_regularizer_l1=0, bias_regularizer_l2=0):
15
16
# Initialize weights and biases
17
self.weights = 0.01 * np.random.randn(n_inputs, n_neurons)
18
self.biases = np.zeros((1, n_neurons))
19
# Set regularization strength
20
self.weight_regularizer_l1 = weight_regularizer_l1
21
self.weight_regularizer_l2 = weight_regularizer_l2
22
self.bias_regularizer_l1 = bias_regularizer_l1
23
self.bias_regularizer_l2 = bias_regularizer_l2
24
# Forward pass
25
def forward(self, inputs, training):
26
# Remember input values
27
self.inputs = inputs
28
# Calculate output values from inputs, weights and biases
29
self.output = np.dot(inputs, self.weights) + self.biases
30
# Backward pass
31
def backward(self, dvalues):
32
# Gradients on parameters
33
self.dweights = np.dot(self.inputs.T, dvalues)
34
self.dbiases = np.sum(dvalues, axis=0, keepdims=True)
35
# Gradients on regularization
36
# L1 on weights
37
if self.weight_regularizer_l1 > 0:
38
dL1 = np.ones_like(self.weights)
39
dL1[self.weights < 0] = -1
40
self.dweights += self.weight_regularizer_l1 * dL1
41
# L2 on weights
42
if self.weight_regularizer_l2 > 0:
43
self.dweights += 2 * self.weight_regularizer_l2 *
44
self.weights
45
# L1 on biases
46
if self.bias_regularizer_l1 > 0:
47
dL1 = np.ones_like(self.biases)
48
dL1[self.biases < 0] = -1
49
self.dbiases += self.bias_regularizer_l1 * dL1
50
# L2 on biases
51
if self.bias_regularizer_l2 > 0:
52
self.dbiases += 2 * self.bias_regularizer_l2 *
53
self.biases
54
55
# Gradient on values
56
self.dinputs = np.dot(dvalues, self.weights.T)
57
# Retrieve layer parameters
58
def get_parameters(self):
59
return self.weights, self.biases
60
# Set weights and biases in a layer instance
61
def set_parameters(self, weights, biases):
62
self.weights = weights
63
self.biases = biases
64
65
# Dropout
66
class Layer_Dropout:
67
# Init
68
def __init__(self, rate):
69
# Store rate, we invert it as for example for dropout
70
# of 0.1 we need success rate of 0.9
71
self.rate = 1 - rate
72
# Forward pass
73
def forward(self, inputs, training):
74
# Save input values
75
self.inputs = inputs
76
# If not in the training mode - return values
77
if not training:
78
self.output = inputs.copy()
79
return
80
# Generate and save scaled mask
81
self.binary_mask = np.random.binomial(1, self.rate,size=inputs.shape) / self.rate
82
# Apply mask to output values
83
self.output = inputs * self.binary_mask
84
# Backward pass
85
def backward(self, dvalues):
86
# Gradient on values
87
self.dinputs = dvalues * self.binary_mask
88
89
#Input "layer"
90
class Layer_Input:
91
# Forward pass
92
def forward(self, inputs, training):
93
self.output = inputs
94
95
# ReLU activation
96
class Activation_ReLU:
97
# Forward pass
98
def forward(self, inputs, training):
99
# Remember input values
100
self.inputs = inputs
101
# Calculate output values from inputs
102
self.output = np.maximum(0, inputs)
103
# Backward pass
104
def backward(self, dvalues):
105
# Since we need to modify original variable,
106
# let's make a copy of values first
107
self.dinputs = dvalues.copy()
108
# Zero gradient where input values were negative
109
self.dinputs[self.inputs <= 0] = 0
110
# Calculate predictions for outputs
111
def predictions(self, outputs):
112
return outputs
113
114
# Softmax activation
115
class Activation_Softmax:
116
# Forward pass
117
def forward(self, inputs, training):
118
# Remember input values
119
self.inputs = inputs
120
# Get unnormalized probabilities
121
exp_values = np.exp(inputs - np.max(inputs, axis=1,keepdims=True))
122
# Normalize them for each sample
123
probabilities = exp_values / np.sum(exp_values, axis=1,keepdims=True)
124
self.output = probabilities
125
# Backward pass
126
def backward(self, dvalues):
127
# Create uninitialized array
128
self.dinputs = np.empty_like(dvalues)
129
# Enumerate outputs and gradients
130
for index, (single_output, single_dvalues) in enumerate(zip(self.output, dvalues)):
131
# Flatten output array
132
single_output = single_output.reshape(-1, 1)
133
# Calculate Jacobian matrix of the output
134
jacobian_matrix = np.diagflat(single_output) - np.dot(single_output, single_output.T)
135
# Calculate sample-wise gradient
136
# and add it to the array of sample gradients
137
self.dinputs[index] = np.dot(jacobian_matrix,single_dvalues)
138
# Calculate predictions for outputs
139
def predictions(self, outputs):
140
return np.argmax(outputs, axis=1)
141
# Adam optimizer
142
class Optimizer_Adam:
143
# Initialize optimizer - set settings
144
def __init__(self, learning_rate=0.001, decay=0., epsilon=1e-7,
145
beta_1=0.9, beta_2=0.999):
146
self.learning_rate = learning_rate
147
self.current_learning_rate = learning_rate
148
self.decay = decay
149
self.iterations = 0
150
self.epsilon = epsilon
151
self.beta_1 = beta_1
152
self.beta_2 = beta_2
153
# Call once before any parameter updates
154
def pre_update_params(self):
155
if self.decay:
156
self.current_learning_rate = self.learning_rate * (1. / (1. + self.decay * self.iterations))
157
# Update parameters
158
def update_params(self, layer):
159
# If layer does not contain cache arrays,
160
# create them filled with zeros
161
if not hasattr(layer, 'weight_cache'):
162
layer.weight_momentums = np.zeros_like(layer.weights)
163
layer.weight_cache = np.zeros_like(layer.weights)
164
layer.bias_momentums = np.zeros_like(layer.biases)
165
layer.bias_cache = np.zeros_like(layer.biases)
166
# Update momentum with current gradients
167
layer.weight_momentums = self.beta_1 * layer.weight_momentums + (1 - self.beta_1) * layer.dweights
168
layer.bias_momentums = self.beta_1 * layer.bias_momentums + (1 - self.beta_1) * layer.dbiases
169
# Get corrected momentum
170
# self.iteration is 0 at first pass
171
# and we need to start with 1 here
172
weight_momentums_corrected = layer.weight_momentums / (1 - self.beta_1 ** (self.iterations + 1))
173
bias_momentums_corrected = layer.bias_momentums / (1 - self.beta_1 ** (self.iterations + 1))
174
# Update cache with squared current gradients
175
layer.weight_cache = self.beta_2 * layer.weight_cache + (1 - self.beta_2) * layer.dweights**2
176
layer.bias_cache = self.beta_2 * layer.bias_cache + (1 - self.beta_2) * layer.dbiases**2
177
# Get corrected cache
178
weight_cache_corrected = layer.weight_cache / (1 - self.beta_2 ** (self.iterations + 1))
179
bias_cache_corrected = layer.bias_cache / (1 - self.beta_2 ** (self.iterations + 1))
180
# Vanilla SGD parameter update + normalization
181
# with square rooted cache
182
layer.weights += -self.current_learning_rate * weight_momentums_corrected / (np.sqrt(weight_cache_corrected) + self.epsilon)
183
layer.biases += -self.current_learning_rate * bias_momentums_corrected / (np.sqrt(bias_cache_corrected) + self.epsilon)
184
# Call once after any parameter updates
185
def post_update_params(self):
186
self.iterations += 1
187
188
# Common loss class
189
class Loss:
190
# Regularization loss calculation
191
def regularization_loss(self):
192
# 0 by default
193
regularization_loss = 0
194
# Calculate regularization loss
195
# iterate all trainable layers
196
for layer in self.trainable_layers:
197
# L1 regularization - weights
198
# calculate only when factor greater than 0
199
if layer.weight_regularizer_l1 > 0:
200
regularization_loss += layer.weight_regularizer_l1 * np.sum(np.abs(layer.weights))
201
# L2 regularization - weights
202
if layer.weight_regularizer_l2 > 0:
203
regularization_loss += layer.weight_regularizer_l2 * np.sum(layer.weights * layer.weights)
204
# L1 regularization - biases
205
# calculate only when factor greater than 0
206
if layer.bias_regularizer_l1 > 0:
207
regularization_loss += layer.bias_regularizer_l1 * np.sum(np.abs(layer.biases))
208
# L2 regularization - biases
209
if layer.bias_regularizer_l2 > 0:
210
regularization_loss += layer.bias_regularizer_l2 * np.sum(layer.biases * layer.biases)
211
return regularization_loss
212
# Set/remember trainable layers
213
def remember_trainable_layers(self, trainable_layers):
214
self.trainable_layers = trainable_layers
215
# Calculates the data and regularization losses
216
# given model output and ground truth values
217
def calculate(self, output, y, *, include_regularization=False):
218
# Calculate sample losses
219
sample_losses = self.forward(output, y)
220
# Calculate mean loss
221
data_loss = np.mean(sample_losses)
222
# Add accumulated sum of losses and sample count
223
self.accumulated_sum += np.sum(sample_losses)
224
self.accumulated_count += len(sample_losses)
225
# If just data loss - return it
226
if not include_regularization:
227
return data_loss
228
# Return the data and regularization losses
229
return data_loss, self.regularization_loss()
230
# Calculates accumulated loss
231
def calculate_accumulated(self, *, include_regularization=False):
232
# Calculate mean loss
233
data_loss = self.accumulated_sum / self.accumulated_count
234
# If just data loss - return it
235
if not include_regularization:
236
return data_loss
237
# Return the data and regularization losses
238
return data_loss, self.regularization_loss()
239
# Reset variables for accumulated loss
240
def new_pass(self):
241
self.accumulated_sum = 0
242
self.accumulated_count = 0
243
244
# Cross-entropy loss
245
class Loss_CategoricalCrossentropy(Loss):
246
# Forward pass
247
def forward(self, y_pred, y_true):
248
# Number of samples in a batch
249
samples = len(y_pred)
250
# Clip data to prevent division by 0
251
# Clip both sides to not drag mean towards any value
252
y_pred_clipped = np.clip(y_pred, 1e-7, 1 - 1e-7)
253
# Probabilities for target values -
254
# only if categorical labels
255
if len(y_true.shape) == 1:
256
correct_confidences = y_pred_clipped[range(samples),y_true]
257
# Mask values - only for one-hot encoded labels
258
elif len(y_true.shape) == 2:
259
correct_confidences = np.sum(y_pred_clipped * y_true,axis=1)
260
# Losses
261
negative_log_likelihoods = -np.log(correct_confidences)
262
return negative_log_likelihoods
263
# Backward pass
264
def backward(self, dvalues, y_true):
265
# Number of samples
266
samples = len(dvalues)
267
# Number of labels in every sample
268
# We'll use the first sample to count them
269
labels = len(dvalues[0])
270
# If labels are sparse, turn them into one-hot vector
271
if len(y_true.shape) == 1:
272
y_true = np.eye(labels)[y_true]
273
# Calculate gradient
274
self.dinputs = -y_true / dvalues
275
# Normalize gradient
276
self.dinputs = self.dinputs / samples
277
278
# Softmax classifier - combined Softmax activation
279
# and cross-entropy loss for faster backward step
280
class Activation_Softmax_Loss_CategoricalCrossentropy():
281
# Backward pass
282
def backward(self, dvalues, y_true):
283
# Number of samples
284
samples = len(dvalues)
285
# If labels are one-hot encoded,
286
# turn them into discrete values
287
if len(y_true.shape) == 2:
288
y_true = np.argmax(y_true, axis=1)
289
# Copy so we can safely modify
290
self.dinputs = dvalues.copy()
291
# Calculate gradient
292
self.dinputs[range(samples), y_true] -= 1
293
# Normalize gradient
294
self.dinputs = self.dinputs / samples
295
# Common accuracy class
296
class Accuracy:
297
# Calculates an accuracy
298
# given predictions and ground truth values
299
def calculate(self, predictions, y):
300
# Get comparison results
301
comparisons = self.compare(predictions, y)
302
# Calculate an accuracy
303
accuracy = np.mean(comparisons)
304
# Add accumulated sum of matching values and sample count
305
self.accumulated_sum += np.sum(comparisons)
306
self.accumulated_count += len(comparisons)
307
# Return accuracy
308
return accuracy
309
# Calculates accumulated accuracy
310
def calculate_accumulated(self):
311
# Calculate an accuracy
312
accuracy = self.accumulated_sum / self.accumulated_count
313
# Return the data and regularization losses
314
return accuracy
315
# Reset variables for accumulated accuracy
316
def new_pass(self):
317
self.accumulated_sum = 0
318
self.accumulated_count = 0
319
320
# Accuracy calculation for classification model
321
class Accuracy_Categorical(Accuracy):
322
def __init__(self, *, binary=False):
323
# Binary mode?
324
self.binary = binary
325
# No initialization is needed
326
def init(self, y):
327
pass
328
# Compares predictions to the ground truth values
329
def compare(self, predictions, y):
330
if not self.binary and len(y.shape) == 2:
331
y = np.argmax(y, axis=1)
332
return predictions == y
333
# Model class
334
class Model:
335
def __init__(self):
336
# Create a list of network objects
337
self.layers = []
338
# Softmax classifier's output object
339
self.softmax_classifier_output = None
340
# Add objects to the model
341
def add(self, layer):
342
self.layers.append(layer)
343
#
344
# Set loss, optimizer and accuracy
345
def set(self, *, loss=None, optimizer=None, accuracy=None):
346
if loss is not None:
347
self.loss = loss
348
if optimizer is not None:
349
self.optimizer = optimizer
350
if accuracy is not None:
351
self.accuracy = accuracy
352
# Finalize the model
353
def finalize(self):
354
# Create and set the input layer
355
self.input_layer = Layer_Input()
356
# Count all the objects
357
layer_count = len(self.layers)
358
# Initialize a list containing trainable layers:
359
self.trainable_layers = []
360
# Iterate the objects
361
for i in range(layer_count):
362
# If it's the first layer,
363
# the previous layer object is the input layer
364
if i == 0:
365
self.layers[i].prev = self.input_layer
366
self.layers[i].next = self.layers[i+1]
367
# All layers except for the first and the last
368
elif i < layer_count - 1:
369
self.layers[i].prev = self.layers[i-1]
370
self.layers[i].next = self.layers[i+1]
371
# The last layer - the next object is the loss
372
# Also let's save aside the reference to the last object
373
# whose output is the model's output
374
else:
375
self.layers[i].prev = self.layers[i-1]
376
self.layers[i].next = self.loss
377
self.output_layer_activation = self.layers[i]
378
# If layer contains an attribute called "weights",
379
# it's a trainable layer -
380
# add it to the list of trainable layers
381
# We don't need to check for biases -
382
# checking for weights is enough
383
if hasattr(self.layers[i], 'weights'):
384
self.trainable_layers.append(self.layers[i])
385
# Update loss object with trainable layers
386
if self.loss is not None:
387
self.loss.remember_trainable_layers(self.trainable_layers)
388
# If output activation is Softmax and
389
# loss function is Categorical Cross-Entropy
390
# create an object of combined activation
391
# and loss function containing
392
# faster gradient calculation
393
if isinstance(self.layers[-1], Activation_Softmax) and isinstance(self.loss, Loss_CategoricalCrossentropy):
394
# Create an object of combined activation
395
# and loss functions
396
self.softmax_classifier_output = Activation_Softmax_Loss_CategoricalCrossentropy()
397
# Train the model
398
def train(self, X, y, *, epochs=1, batch_size=None,print_every=1, validation_data=None):
399
# Initialize accuracy object
400
self.accuracy.init(y)
401
# Default value if batch size is not being set
402
train_steps = 1
403
# Calculate number of steps
404
if batch_size is not None:
405
train_steps = len(X) // batch_size
406
# Dividing rounds down. If there are some remaining
407
# data but not a full batch, this won't include it
408
# Add `1` to include this not full batch
409
if train_steps * batch_size < len(X):
410
train_steps += 1
411
# Main training loop
412
for epoch in range(1, epochs+1):
413
# Print epoch number
414
print(f'epoch: {epoch}')
415
# Reset accumulated values in loss and accuracy objects
416
self.loss.new_pass()
417
self.accuracy.new_pass()
418
# Iterate over steps
419
for step in range(train_steps):
420
# If batch size is not set -
421
# train using one step and full dataset
422
if batch_size is None:
423
batch_X = X
424
batch_y = y
425
# Otherwise slice a batch
426
else:
427
batch_X = X[step*batch_size:(step+1)*batch_size]
428
atch_y = y[step*batch_size:(step+1)*batch_size]
429
# Perform the forward pass
430
output = self.forward(batch_X, training=True)
431
# Calculate loss
432
data_loss, regularization_loss = self.loss.calculate(output, batch_y,include_regularization=True)
433
loss = data_loss + regularization_loss
434
# Get predictions and calculate an accuracy
435
predictions = self.output_layer_activation.predictions(output)
436
accuracy = self.accuracy.calculate(predictions,batch_y)
437
# Perform backward pass
438
self.backward(output, batch_y)
439
# Optimize (update parameters)
440
self.optimizer.pre_update_params()
441
for layer in self.trainable_layers:
442
self.optimizer.update_params(layer)
443
self.optimizer.post_update_params()
444
# Print a summary
445
if not step % print_every or step == train_steps - 1:
446
print(f'step: {step}, ' +
447
f'acc: {accuracy:.3f}, ' +
448
f'loss: {loss:.3f} (' +
449
f'data_loss: {data_loss:.3f}, ' +
450
f'reg_loss: {regularization_loss:.3f}), ' +
451
f'lr: {self.optimizer.current_learning_rate}')
452
# Get and print epoch loss and accuracy
453
epoch_data_loss, epoch_regularization_loss = self.loss.calculate_accumulated(include_regularization=True)
454
epoch_loss = epoch_data_loss + epoch_regularization_loss
455
epoch_accuracy = self.accuracy.calculate_accumulated()
456
print(f'training, ' +
457
f'acc: {epoch_accuracy:.3f}, ' +
458
f'loss: {epoch_loss:.3f} (' +
459
f'data_loss: {epoch_data_loss:.3f}, ' +
460
f'reg_loss: {epoch_regularization_loss:.3f}), ' +
461
f'lr: {self.optimizer.current_learning_rate}')
462
# If there is the validation data
463
if validation_data is not None:
464
# Evaluate the model:
465
self.evaluate(*validation_data,batch_size=batch_size)
466
# Evaluates the model using passed-in dataset
467
def evaluate(self, X_val, y_val, *, batch_size=None):
468
# Default value if batch size is not being set
469
validation_steps = 1
470
# Calculate number of steps
471
if batch_size is not None:
472
validation_steps = len(X_val) // batch_size
473
# Dividing rounds down. If there are some remaining
474
# data but not a full batch, this won't include it
475
# Add `1` to include this not full batch
476
if validation_steps * batch_size < len(X_val):
477
validation_steps += 1
478
# Reset accumulated values in loss
479
# and accuracy objects
480
self.loss.new_pass()
481
self.accuracy.new_pass()
482
# Iterate over steps
483
for step in range(validation_steps):
484
# If batch size is not set -
485
# train using one step and full dataset
486
if batch_size is None:
487
batch_X = X_val
488
batch_y = y_val
489
# Otherwise slice a batch
490
else:
491
batch_X = X_val[step*batch_size:(step+1)*batch_size]
492
batch_y = y_val[step*batch_size:(step+1)*batch_size]
493
# Perform the forward pass
494
output = self.forward(batch_X, training=False)
495
# Calculate the loss
496
self.loss.calculate(output, batch_y)
497
# Get predictions and calculate an accuracy
498
predictions = self.output_layer_activation.predictions(output)
499
self.accuracy.calculate(predictions, batch_y)
500
# Get and print validation loss and accuracy
501
validation_loss = self.loss.calculate_accumulated()
502
validation_accuracy = self.accuracy.calculate_accumulated()
503
# Print a summary
504
print(f'validation, ' +
505
f'acc: {validation_accuracy:.3f}, ' +
506
f'loss: {validation_loss:.3f}')
507
# Predicts on the samples
508
def predict(self, X, *, batch_size=None):
509
# Default value if batch size is not being set
510
prediction_steps = 1
511
# Calculate number of steps
512
if batch_size is not None:
513
prediction_steps = len(X) // batch_size
514
# Dividing rounds down. If there are some remaining
515
# data but not a full batch, this won't include it
516
# Add `1` to include this not full batch
517
if prediction_steps * batch_size < len(X):
518
prediction_steps += 1
519
# Model outputs
520
output = []
521
# Iterate over steps
522
for step in range(prediction_steps):
523
# If batch size is not set -
524
# train using one step and full dataset
525
if batch_size is None:
526
batch_X = X
527
# Otherwise slice a batch
528
else:
529
batch_X = X[step*batch_size:(step+1)*batch_size]
530
# Perform the forward pass
531
batch_output = self.forward(batch_X, training=False)
532
# Append batch prediction to the list of predictions
533
output.append(batch_output)
534
# Stack and return results
535
return np.vstack(output)
536
# Performs forward pass
537
def forward(self, X, training):
538
# Call forward method on the input layer
539
# this will set the output property that
540
# the first layer in "prev" object is expecting
541
self.input_layer.forward(X, training)
542
# Call forward method of every object in a chain
543
# Pass output of the previous object as a parameter
544
for layer in self.layers:
545
layer.forward(layer.prev.output, training)
546
# "layer" is now the last object from the list,
547
# return its output
548
# Performs backward pass
549
def backward(self, output, y):
550
# If softmax classifier
551
if self.softmax_classifier_output is not None:
552
# First call backward method
553
# on the combined activation/loss
554
# this will set dinputs property
555
self.softmax_classifier_output.backward(output, y)
556
# Since we'll not call backward method of the last layer
557
# which is Softmax activation
558
# as we used combined activation/loss
559
# object, let's set dinputs in this object
560
self.layers[-1].dinputs = self.softmax_classifier_output.dinputs
561
# Call backward method going through
562
# all the objects but last
563
# in reversed order passing dinputs as a parameter
564
for layer in reversed(self.layers[:-1]):
565
layer.backward(layer.next.dinputs)
566
return
567
# First call backward method on the loss
568
# this will set dinputs property that the last
569
# layer will try to access shortly
570
self.loss.backward(output, y)
571
# Call backward method going through all the objects
572
# in reversed order passing dinputs as a parameter
573
for layer in reversed(self.layers):
574
layer.backward(layer.next.dinputs)
575
# Retrieves and returns parameters of trainable layers
576
def get_parameters(self):
577
# Create a list for parameters
578
parameters = []
579
# Iterable trainable layers and get their parameters
580
for layer in self.trainable_layers:
581
parameters.append(layer.get_parameters())
582
# Return a list
583
return parameters
584
#Updates the model with new parameters
585
def set_parameters(self, parameters):
586
# Iterate over the parameters and layers
587
# and update each layers with each set of the parameters
588
for parameter_set, layer in zip(parameters,self.trainable_layers):
589
layer.set_parameters(*parameter_set)
590
# Saves the parameters to a file
591
def save_parameters(self, path):
592
# Open a file in the binary-write mode
593
# and save parameters into it
594
with open(path, 'wb') as f:
595
pickle.dump(self.get_parameters(), f)
596
# Loads the weights and updates a model instance with them
597
def load_parameters(self, path):
598
# Open file in the binary-read mode,
599
# load weights and update trainable layers
600
with open(path, 'rb') as f:
601
self.set_parameters(pickle.load(f))
602
# Saves the model
603
def save(self, path):
604
# Make a deep copy of current model instance
605
model = copy.deepcopy(self)
606
# Reset accumulated values in loss and accuracy objects
607
model.loss.new_pass()
608
model.accuracy.new_pass()
609
# Remove data from the input layer
610
# and gradients from the loss object
611
model.input_layer.__dict__.pop('output', None)
612
model.loss.__dict__.pop('dinputs', None)
613
# For each layer remove inputs, output and dinputs properties
614
for layer in model.layers:
615
for property in ['inputs', 'output', 'dinputs','dweights', 'dbiases']:
616
layer.__dict__.pop(property, None)
617
# Open a file in the binary-write mode and save the model
618
with open(path, 'wb') as f:
619
pickle.dump(model, f)
620
# Loads and returns a model
621
@staticmethod
622
def load(path):
623
# Open file in the binary-read mode, load a model
624
with open(path, 'rb') as f:
625
model = pickle.load(f)
626
# Return a model
627
return model
628
629
# Create dataset
630
X, y = emnist.extract_training_samples('digits')
631
X_test, y_test = emnist.extract_test_samples('digits')
632
633
# Instantiate the model
634
model = Model()
635
636
# Add layers
637
model.add(Layer_Dense(2, 512, weight_regularizer_l2=5e-4,bias_regularizer_l2=5e-4))
638
model.add(Activation_ReLU())
639
model.add(Layer_Dropout(0.1))
640
model.add(Layer_Dense(512, 3))
641
model.add(Activation_Softmax())
642
643
# Set loss, optimizer and accuracy objects
644
model.set(
645
loss=Loss_CategoricalCrossentropy(),
646
optimizer=Optimizer_Adam(learning_rate=0.05, decay=5e-5),
647
accuracy=Accuracy_Categorical()
648
)
649
650
# Finalize the model
651
model.finalize()
652
653
# Train the model
654
model.train(X, y, validation_data=(X_test, y_test),epochs=10000, print_every=100)
655
And this is the error i get in sublime text:
JavaScript
1
15
15
1
epoch: 1
2
Traceback (most recent call last):
3
File "/media/luke/New Volume/final project/untitled.py", line 654, in <module>
4
model.train(X, y, validation_data=(X_test, y_test),epochs=10000, print_every=100)
5
File "/media/luke/New Volume/final project/untitled.py", line 430, in train
6
output = self.forward(batch_X, training=True)
7
File "/media/luke/New Volume/final project/untitled.py", line 545, in forward
8
layer.forward(layer.prev.output, training)
9
File "/media/luke/New Volume/final project/untitled.py", line 29, in forward
10
self.output = np.dot(inputs, self.weights) + self.biases
11
File "/home/luke/.local/lib/python3.8/site-packages/nnfs/core.py", line 22, in dot
12
return orig_dot(*[a.astype('float64') for a in args], **kwargs).astype('float32')
13
File "<__array_function__ internals>", line 5, in dot
14
ValueError: shapes (240000,28,28) and (2,512) not aligned: 28 (dim 2) != 2 (dim 0)
15
As you can see it gets to epoch 1 then when trying to do the numpy dot product and then cant do it.
I’d appreciate any help
Thanks :)
Advertisement
Answer
Firstly, you should flatten your input so its shape is (240000, 28*28)
= (240000, 784)
. After that, the problem is in this line:
JavaScript
1
2
1
model.add(Layer_Dense(2, 512, weight_regularizer_l2=5e-4,bias_regularizer_l2=5e-4))
2
You set your input size to 2, when it should be 784 which is the number of pixels in each image (assuming you’re using MNIST).
JavaScript
1
2
1
model.add(Layer_Dense(784, 512, weight_regularizer_l2=5e-4,bias_regularizer_l2=5e-4))
2
Should work correctly if your inputs are flattened.
Edit: To flatten your inputs I would use np.reshape
as demonstrated here https://stackoverflow.com/a/18758049/11777402.
JavaScript
1
2
1
X.reshape(240000, 784)
2