I’m trying to prepare some image data for my neural to classify. As part of the image preprocessing step, I’m applying the HOG filter in my dataset class as such:
class GetHogData(Dataset): def __init__(self, df, root, transform = None): self.df = df self.root = root self.transform = transform def __len__(self): return len(self.df) def __getitem__(self, idx): if torch.is_tensor(idx): idx = idx.tolist() img_path = os.path.join(self.root, self.df.iloc[idx, 0]) # image = Image.open(img_path) image = cv2.imread(img_path) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) label = self.df.iloc[idx, 1] if self.transform: image = self.transform(image) hog_, hog_image = hog( image, orientations = 9, pixels_per_cell = (14,14), cells_per_block = (2,2), block_norm = "L1") image = np.transpose(image, (2, 0, 1)) img_hog_lbl = { "image" : torch.tensor(image, dtype = torch.float32), "label" : torch.tensor(label, dtype = torch.long), "hog": torch.tensor(hog_, dtype = torch.float32) } return img_hog_lbl
After this, I define my train and validation transformation as such:
# Image mean and standard dev img_mean = [0.485, 0.456, 0.406] img_std = [0.229, 0.224, 0.225] train_trans = transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(img_mean, img_std) ]) test_trans = transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(img_mean, img_std) ])
and finally, I create the loaders as such:
train_img = GetHogData(df = train_lab, root = "/content/train", transform = train_trans) test_img = GetHogData(df = test_lab ,root = "/content/test", transform = test_trans)
However, when I attempt to preview the training image with test_img[1]
I get the error:
TypeError Traceback (most recent call last) <ipython-input-132-b9a9394eb1e0> in <module>() ----> 1 test_img[1] 5 frames /usr/local/lib/python3.7/dist-packages/torchvision/transforms/functional_pil.py in resize(img, size, interpolation) 207 def resize(img, size, interpolation=Image.BILINEAR): 208 if not _is_pil_image(img): --> 209 raise TypeError('img should be PIL Image. Got {}'.format(type(img))) 210 if not (isinstance(size, int) or (isinstance(size, Sequence) and len(size) in (1, 2))): 211 raise TypeError('Got inappropriate size arg: {}'.format(size)) TypeError: img should be PIL Image. Got <class 'numpy.ndarray'>
I’ve tried to add transforms.ToPILImage()
to my transforms by doing:
# Image mean and standard dev img_mean = [0.485, 0.456, 0.406] img_std = [0.229, 0.224, 0.225] train_trans = transforms.Compose([ transforms.ToPILImage(), transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(img_mean, img_std) ]) test_trans = transforms.Compose([ transforms.ToPILImage(), transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(img_mean, img_std) ])
but I got the error:
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) <ipython-input-135-b9a9394eb1e0> in <module>() ----> 1 test_img[1] 1 frames <ipython-input-129-8551c2e76038> in __getitem__(self, idx) 27 pixels_per_cell = (14,14), 28 cells_per_block = (2,2), ---> 29 block_norm = "L1") 30 31 image = np.transpose(image, (2, 0, 1)) /usr/local/lib/python3.7/dist-packages/skimage/feature/_hog.py in hog(image, orientations, pixels_per_cell, cells_per_block, block_norm, visualize, transform_sqrt, feature_vector, multichannel) 273 n_blocks_col = (n_cells_col - b_col) + 1 274 normalized_blocks = np.zeros((n_blocks_row, n_blocks_col, --> 275 b_row, b_col, orientations)) 276 277 for r in range(n_blocks_row): ValueError: negative dimensions are not allowed
Does anybody have any ideas? Thanks in advance!
Edit – New Error:
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) <ipython-input-154-b9a9394eb1e0> in <module>() ----> 1 test_img[1] <ipython-input-151-8551c2e76038> in __getitem__(self, idx) 27 pixels_per_cell = (14,14), 28 cells_per_block = (2,2), ---> 29 block_norm = "L1") 30 31 image = np.transpose(image, (2, 0, 1)) ValueError: too many values to unpack (expected 2)
Advertisement
Answer
The problem is as I wrote in the comment, skimage requires the data to be ndarray but you give it a torch tensor thus that error.
Try this
train_trans = transforms.Compose([ transforms.ToPILImage(), transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(img_mean, img_std), lambda x: np.rollaxis(x.numpy(), 0, 3) ])
Edit
This is basically transform the output to ndarray and change channel axis.
But as you can see it’s not the best way to fix things since you have to transform PIL image to tensor then transform tensor to ndarray and then transform ndarray back to tensor again.
The better way to do this is transform PIL image directly to ndarray and normalize that, for example.
in getitem
if self.transform: image = self.transform(image) # add these image = np.array(image) mean = [0.485, 0.456, 0.406] std = [0.229, 0.224, 0.225] x[..., 0] -= mean[0] x[..., 1] -= mean[1] x[..., 2] -= mean[2] x[..., 0] /= std[0] x[..., 1] /= std[1] x[..., 2] /= std[2] # these are your code hog_, hog_image = hog(
And in transform just use
train_trans = transforms.Compose([ transforms.ToPILImage(), transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), ])
Edit2
Refer to this line. You need to either add visualize=True
in hog()
or remove , hog_image
. If you don’t need hog_image
then the latter is preferred.
hog_, hog_image = hog( image, visualize=True,
hog_ = hog(