I am attempting to make a model that can predict whether a credit card transaction was fraudulent or not. My dataset is available on Kaggle. Everything works up to when I fit my model, when I get this error:
JavaScript
x
5
1
ValueError: Data cardinality is ambiguous:
2
x sizes: 7433462
3
y sizes: 284807
4
Make sure all arrays contain the same number of samples.
5
Could someone help me figure out what’s wrong?
JavaScript
1
34
34
1
import numpy as np
2
import pandas as pd
3
import tensorflow as tf
4
from tensorflow import keras
5
from tensorflow.keras.models import Sequential
6
from tensorflow.keras.layers import Activation, Dense
7
from tensorflow.keras.optimizers import Adam
8
from tensorflow.keras.metrics import categorical_crossentropy
9
from sklearn.utils import shuffle
10
from sklearn.preprocessing import MinMaxScaler
11
12
data = pd.read_csv("creditcard.csv")
13
trainSamples = data['Class']
14
labels = ['Time', 'V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10', 'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V20', 'V21', 'V22', 'V23', 'V24', 'V25', 'V26', 'V27', 'V28', 'Amount']
15
trainSamples = data[labels]
16
17
trainLabels = np.array(trainLabels)
18
trainSamples = np.array(trainSamples)
19
20
trainLabels = shuffle(trainLabels)
21
trainSamples = shuffle(trainSamples)
22
23
scaler = MinMaxScaler(feature_range = (0, 1))
24
scaledTrainSample = scaler.fit_transform(trainSamples.reshape(-1,1))
25
26
model = Sequential([
27
Dense(units = 16, input_shape = (1, ), activation = 'relu'),
28
Dense(units = 32, activation = 'relu'),
29
Dense(units = 2, activation = 'softmax')
30
])
31
32
model.compile(optimizer = Adam(learning_rate = 0.0001), loss = 'sparse_categorical_crossentropy', metrics = ['accuracy'])
33
model.fit(x = scaledTrainSample, y = trainLabels, validation_split = 0.1, batch_size = 10, epochs = 300, verbose = 2)
34
Advertisement
Answer
The main issue with your code is that the model’s input shape should be 30 and not 1 as you have 30 features, while the output shape should be 1 and not 2 since you have only one binary label (i.e. only two classes, 0 or 1). There were also a few other bugs which were corrected in the code below.
JavaScript
1
45
45
1
import numpy as np
2
import pandas as pd
3
import tensorflow as tf
4
from tensorflow.keras.models import Sequential
5
from tensorflow.keras.layers import Dense
6
from tensorflow.keras.optimizers import Adam
7
from sklearn.utils import shuffle
8
from sklearn.preprocessing import MinMaxScaler
9
tf.random.set_seed(0)
10
11
# import the data
12
df = pd.read_csv('creditcard.csv')
13
14
# extract the features and target
15
X = df.drop(labels=['Class'], axis=1).values
16
y = df['Class'].values
17
18
# count the number of classes
19
print(np.unique(y))
20
# [0 1]
21
22
# shuffle the data
23
X, y = shuffle(X, y, random_state=42)
24
25
# scale the features
26
scaler = MinMaxScaler(feature_range=(0, 1))
27
X = scaler.fit_transform(X)
28
29
# build the model
30
model = Sequential([
31
Dense(units=16, activation='relu', input_shape=(X.shape[1], )),
32
Dense(units=32, activation='relu'),
33
Dense(units=1, activation='sigmoid')
34
])
35
36
# fit the model
37
model.compile(optimizer=Adam(learning_rate=0.0001), loss='binary_crossentropy', metrics=['accuracy'])
38
model.fit(x=X, y=y, validation_split=0.1, batch_size=256, epochs=3)
39
# Epoch 1/3
40
# 1002/1002 [==============================] - 1s 761us/step - loss: 0.1787 - accuracy: 0.9983 - val_loss: 0.0193 - val_accuracy: 0.9981
41
# Epoch 2/3
42
# 1002/1002 [==============================] - 1s 684us/step - loss: 0.0136 - accuracy: 0.9983 - val_loss: 0.0130 - val_accuracy: 0.9981
43
# Epoch 3/3
44
# 1002/1002 [==============================] - 1s 680us/step - loss: 0.0119 - accuracy: 0.9983 - val_loss: 0.0127 - val_accuracy: 0.9981
45