I have the x
list as
x = list(np.arange(10)) min = np.min(x) max = np.max(x)
and I can create windowed dataset using the below method:
def get_windowed_data(series,window_size): dt = tf.data.Dataset.from_tensor_slices(series) dt = dt.window(window_size, shift = 1,drop_remainder = True) dt = dt.flat_map(lambda window: window.batch(window_size)) # make each window a batch dt = dt.map(lambda window: (window[:-1],window[-1:])) # consider the last element as label and the rest as window return dt
which gives me the output result. So, each row contains of a tuple which the the first element is a list with multiple and the second element is a list with single element.
[0 1 2 3] [4] [1 2 3 4] [5] [2 3 4 5] [6] [3 4 5 6] [7] [4 5 6 7] [8] [5 6 7 8] [9]
Now i wish to normalize (between 0 and 1) only the data in the first element and keep the labels as before and have tried the below code:
def get_windowed_data(series,window_size,min,max): dt = tf.data.Dataset.from_tensor_slices(series) dt = dt.window(window_size, shift = 1,drop_remainder = True) #dt = dt.flat_map(lambda window: window.batch(window_size)) # make each window a batch dt = dt.flat_map(lambda window: ([ (x-min)/max for x in window[:-1].numpy()],window[-1:])) return dt
So, for example the output of the first two row should be:
[0.0, 0.1111111111111111, 0.2222222222222222, 0.3333333333333333] [4] [0.1111111111111111, 0.2222222222222222, 0.3333333333333333, 0.4444444444444444] [5]
However, using my code it complains with:
lambda window: ([ (x-min)/max for x in window[:-1].numpy()],window[-1:])) TypeError: '_VariantDataset' object is not subscriptable
Advertisement
Answer
After splitting into two elements, you can use another map
function:
ds = ds.map(lambda wx, wy: ((wx - min) / max, wy))
wx
is the window, wy
is the target here. So complete example looks as follows:
import tensorflow as tf import numpy as np x = list(np.arange(10)) min = np.min(x) max = np.max(x) def get_windowed_data(series, window_size, min_value, max_value): ds = tf.data.Dataset.from_tensor_slices(series) ds = ds.window(window_size, shift=1, drop_remainder=True) ds = ds.flat_map(lambda w: w.batch(window_size)) ds = ds.map(lambda w: (w[:-1], w[-1:])) ds = ds.map(lambda wx, wy: ((wx - min_value) / max_value, wy)) return ds data_normalized = get_windowed_data(x, 5, min, max) for x, y in data_normalized: print(x.numpy(), y.numpy())
This will print:
[0. 0.11111111 0.22222222 0.33333333] [4] [0.11111111 0.22222222 0.33333333 0.44444444] [5] [0.22222222 0.33333333 0.44444444 0.55555556] [6] [0.33333333 0.44444444 0.55555556 0.66666667] [7] [0.44444444 0.55555556 0.66666667 0.77777778] [8] [0.55555556 0.66666667 0.77777778 0.88888889] [9]