Skip to content
Advertisement

How to extract, modify and restore correctly modified bounding boxes

I’m trying to do relatively simple code where I extract contours of some areas in the image and draw 1 or multiple rectangles on them (normally with a “Object Detection model”) (works fine). However, I then need to transform the coordinates of the rectangles drawn on the cropped areas back to the original image (and draw them over it to make sure the conversion went well) (which is not the current case).

The problem I’m having is probably related to the way I calculate the transformation matrix for the final cv2.getPerspectiveTransform, but I can’t find the right way to do it yet. I have tried with the coordinates of the original system (as in the example below) or from the coordinates of the boxes that were drawn, but none seem to give the expected result.

The example presented is a simplified case of drawing boxes since normally, the coordinates of these will be given by the AI model. Also, one cannot simply reuse cv2.warpPerspective on the drawn images since the main interest is to have the final coordinates of the drawn boxes.

Starting image:

Starting image

Result for the first extracted rectangle (good):

Result for the first extracted rectangle

Result for the second extracted rectangle (good):

Result for the second extracted rectangle

Result for the starting image with the rectangle drawn (wrong result):

Result for the starting image with the rectangle drawn

import numpy as np, cv2, os, copy

#-------------------------
# Starting information

PATH = r"C:UsersvincentrmPictures"
folder_final_dataset = os.path.join(PATH, "Test_folder_2")

if not os.path.isdir(folder_final_dataset): os.mkdir(folder_final_dataset)

img_name = os.path.join(PATH, "Test_img_rot_square.png");
mask_name = img_name;

# Used for the images writed during the process:
name_img_wo_extension = os.path.split(img_name)[1]
extension = os.path.splitext(name_img_wo_extension)[1]
name_img_wo_extension = name_img_wo_extension[:-len(extension)]

#-------------------------------------------
# Step #0: Read the image

input_img = cv2.imread(img_name)
mask_output = cv2.imread(mask_name)

mask_output = cv2.cvtColor(mask_output, cv2.COLOR_BGR2GRAY)
ret, mask_output = cv2.threshold(mask_output, 127, 255, 0)

#-------------------------------------------
# Step #1: Identify the elements on the image
#----------------------

if cv2.__version__[0] == 3: # ex. 3.4.1
    img2, contours, hierarchy = cv2.findContours(mask_output, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
else: # ex. 4.5.3
    contours, hierarchy = cv2.findContours(mask_output, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
#end

#-------------------------------------------
# Step #2: Extraction of the contours of the image with rotated box
#----------------------
tempo_img = input_img

#-----------------------------------
input_img_shape = input_img.shape

for (no, c) in enumerate(contours):
    #Method used: Rotated square
    
    # Create mask where white is what we want, black otherwise
    mask_2 = tempo_img
    
    # Content: ( center (x,y), (width, height), angle of rotation ).
    rect = cv2.minAreaRect(c)
    
    # get width and height of the detected rectangle
    width = int(rect[1][0])
    height = int(rect[1][1])
    
    box = cv2.boxPoints(rect)
    box = np.int0(box)
    
    src_pts = box.astype("float32")
    
    # coordinate of the points in box points after the rectangle has been
    # straightened
    dst_pts = np.array([[0, height-1],
                        [0, 0],
                        [width-1, 0],
                        [width-1, height-1]], dtype="float32")
    
    # the perspective transformation matrix
    #   - src_pts = coordinate of the rect in the original img
    #   - dst_pts = coordinate of this rect in the final img.
    M = cv2.getPerspectiveTransform(src_pts, dst_pts)
    
    # directly warp the rotated rectangle to get the straightened rectangle
    out = cv2.warpPerspective(mask_2, M, (width, height))
        
    #================================================
    # Part #3: As as demo, we will simply calculate the points of the box in fonction
    #          of the extacted rotated box, but normaly, it will be gived by a 
    #          trained "Object Detection Model" on Tensorflow
    #------------------------
    
    out_shape = out.shape[0:2] # (H,W) <-> (y,x)
    area_box_draw = [0.15]*2
    
    # Format: (y1,x1, y2,x2) - as I normaly have with Tensorflow
    boxes = [ int(out_shape[0]*area_box_draw[0]), int(out_shape[1]*area_box_draw[1]), 
              int(out_shape[0]*(1-area_box_draw[0])), int(out_shape[1]*(1-area_box_draw[1])) ]
    
    boxes = np.expand_dims(boxes, axis=0) # Only used to reproduce Tensorflow format where could have multiple boxes.
    
    color = [(255,0,0), (0,0,255)][no%2] # ["blue", "red"][no%2]
    
    #------------------
    boxes = boxes[:, ::-1] # Invert (y,x) to (x,y)  
    
    for i in range(0, boxes.shape[0]):
        cv2.rectangle(out, tuple(boxes[i, 0:2].tolist()), tuple(boxes[i, 2:4].tolist()), color, 8)
    #end
    boxes = boxes[:, ::-1] # Revert back from (x,y) to (y,x)
    
    #-----------------------------------------------
    
    # Write the obtain images on the extracted section to verify if it's correct or not.
    file_name = os.path.join(folder_final_dataset, name_img_wo_extension+"_"+str(no)+extension)
    cv2.imwrite(file_name, out)
    
    #=================================================
    # This is the part where it's doesn't work as we want:
    #--------------------------------------------
    
    img_shape = np.array(list(out.shape[0:2])*2)
    
    tempo_box = copy.copy(boxes)

    #Format of the coordinate at this point: (y1,x1, y2,x2).
    nb_box = tempo_box.shape[0]
    new_box_pos = [None for i in range(0, nb_box)]
    
    #------------------------------------------
    
    #Format here: (y1 - 0 ,x1 - 1, y2 - 2, x2 - 3)
    height = tempo_box[0, 2] - tempo_box[0, 0]
    width = tempo_box[0, 3] - tempo_box[0, 1]
    
    # The rect angligned horizontaly: one behind the other one.
    # dst_pts = np.array([[0, height-1], [0, 0],  [width-1, 0], [width-1, height-1]], dtype="float32")
    
    # Was worst at my sense: aligned vertically.
    # dst_pts = np.array([[0, 0], [width-1, 0], [width-1, height-1], [0, height-1]], dtype="float32")
    
    M_2 = cv2.getPerspectiveTransform(dst_pts, src_pts) # Similar result: cv2.invert(M) # But not always the case...
    #M_2 = cv2.invert(M)[1]
    
    # Convert from [ [y1,x1, y2,x2] ] to [ [y1,x1], [y2,x2] ].
    tempo_box = tempo_box.reshape(-1,2).astype(np.float32)
    tempo_box = tempo_box[:, ::-1] # (y,x) to (x,y) format.
    
    converted = cv2.perspectiveTransform(np.expand_dims(tempo_box, axis=0), M_2)
    #converted = converted[:, ::-1] # (x,y) to (y,x) format.
    converted = converted.reshape(-1,4) # Return to rect-format
    
    color = [(255,0,0), (0,0,255)][no%2] # ["blue", "red"][no%2]
    
    converted = np.int0(converted)
    
    #converted = converted[:, ::-1] # (y,x) to (x,y)
    
    for i in range(0, converted.shape[0]):
        cv2.rectangle(input_img, tuple(converted[i, 0:2].tolist()), tuple(converted[i, 2:4].tolist()), color, 8)
    #end
    #converted = converted[:, ::-1] # # (y,x) to (x,y)
    
#end_for_loop_over_all_contour

#Write the final obtain image in order to be able to see it.
file_name = os.path.join(folder_final_dataset, name_img_wo_extension+"_Final_version"+extension)

cv2.imwrite(file_name, input_img)

Advertisement

Answer

As suggested in the comments to the question, the solution was to just draw a polygon with 4 points instead of continuing to try to draw rectangles with 2 points.

I’m sharing the code for the final solution (along with some code related to the tests I did), in case someone else runs into a similar issue.

Final result (with the expected result): enter image description here

import numpy as np, cv2, os, copy

#-------------------------
# Starting information

PATH = r"C:UsersvincentrmPictures"
folder_final_dataset = os.path.join(PATH, "Test_folder_2")

if not os.path.isdir(folder_final_dataset): os.mkdir(folder_final_dataset)

img_name = os.path.join(PATH, "Test_img_rot_squarre.png");
mask_name = img_name;

# Used for the images writed during the process:
name_img_wo_extension = os.path.split(img_name)[1]
extension = os.path.splitext(name_img_wo_extension)[1]
name_img_wo_extension = name_img_wo_extension[:-len(extension)]

do_create_with_xy_format = False # Original: False - seem to work correctly with both format.

#-------------------------------------------
# Step #0: Read the image

input_img = cv2.imread(img_name)
mask_output = cv2.imread(mask_name)

mask_output = cv2.cvtColor(mask_output, cv2.COLOR_BGR2GRAY)
ret, mask_output = cv2.threshold(mask_output, 127, 255, 0)

#-------------------------------------------
# Step #1: Identify the elements on the image
#----------------------

if cv2.__version__[0] == 3: # ex. 3.4.1
    img2, contours, hierarchy = cv2.findContours(mask_output, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
else: # ex. 4.5.3
    contours, hierarchy = cv2.findContours(mask_output, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
#end

#-------------------------------------------
# Step #2: Extraction of the contours of the image with rotated box
#----------------------
tempo_img = input_img

#-----------------------------------
input_img_shape = input_img.shape

for (no, c) in enumerate(contours):
    #Method used: Rotated squarre
    
    # Create mask where white is what we want, black otherwise
    mask_2 = tempo_img
    
    # Content: ( center (x,y), (width, height), angle of rotation ).
    rect = cv2.minAreaRect(c)
    
    # get width and height of the detected rectangle
    width = int(rect[1][0])
    height = int(rect[1][1])
    
    box = cv2.boxPoints(rect)
    box = np.int0(box)
    
    src_pts = box.astype("float32")
    
    # coordinate of the points in box points after the rectangle has been
    # straightened
    dst_pts = np.array([[0, height-1],
                        [0, 0],
                        [width-1, 0],
                        [width-1, height-1]], dtype="float32")
    
    # the perspective transformation matrix
    #   - src_pts = coordinate of the rect in the original img
    #   - dst_pts = coordinate of this rect in the final img.
    M = cv2.getPerspectiveTransform(src_pts, dst_pts)
    
    # directly warp the rotated rectangle to get the straightened rectangle
    out = cv2.warpPerspective(mask_2, M, (width, height))
        
    #================================================
    # Part #3: As as demo, we will simply calculate the points of the box in fonction
    #          of the extacted rotated box, but normaly, it will be gived by a 
    #          trained "Object Detection Model" on Tensorflow
    #------------------------
    
    out_shape = out.shape[0:2] # (H,W) <-> (y,x)
    area_box_draw = [0.15]*2
    
    # Format: (y1,x1, y2,x2) - as I normaly have with Tensorflow
    if not do_create_with_xy_format:
        boxes = [ int(out_shape[0]*area_box_draw[0]), int(out_shape[1]*area_box_draw[1]), 
                  int(out_shape[0]*(1-area_box_draw[0])), int(out_shape[1]*(1-area_box_draw[1])) ]
    else:
        # If create it directly with the (x,y) format.
        boxes = [ int(out_shape[1]*area_box_draw[1]), int(out_shape[0]*area_box_draw[0]), 
                  int(out_shape[1]*(1-area_box_draw[1])), int(out_shape[0]*(1-area_box_draw[0])) ]
    #end
    
    boxes = np.expand_dims(boxes, axis=0) # Only used to reproduce Tensorflow format where could have multiple boxes.
    
    color = [(255,0,0), (0,0,255)][no%2] # ["blue", "red"][no%2]
    
    #------------------
    if not do_create_with_xy_format:
        boxes = boxes[:, ::-1] # Invert (y,x) to (x,y)  
    #end
    
    for i in range(0, boxes.shape[0]):
        cv2.rectangle(out, tuple(boxes[i, 0:2].tolist()), tuple(boxes[i, 2:4].tolist()), color, 8)
    #end
    
    if not do_create_with_xy_format:
        boxes = boxes[:, ::-1] # Revert back from (x,y) to (y,x)
    #end
    #-----------------------------------------------
    
    # Write the obtain images on the extracted section to verify if it's correct or not.
    file_name = os.path.join(folder_final_dataset, name_img_wo_extension+"_"+str(no)+extension)
    cv2.imwrite(file_name, out)
    
    #=================================================
    # This is the part where it's doesn't work as we want:
    #--------------------------------------------
    
    img_shape = np.array(list(out.shape[0:2])*2)
    
    tempo_box = copy.copy(boxes)

    #Format of the coordinate at this point: (y1,x1, y2,x2).
    nb_box = tempo_box.shape[0]
    
    #------------------------------------------
    
    #Format here: (y1 - 0 ,x1 - 1, y2 - 2, x2 - 3)
    if not do_create_with_xy_format:
        height = tempo_box[0, 2] - tempo_box[0, 0]
        width = tempo_box[0, 3] - tempo_box[0, 1]
        
    else:
        #Format: (x1 - 0, y1 - 1, x2 - 2, y2 - 3)
        width = tempo_box[0, 2] - tempo_box[0, 0]
        height = tempo_box[0, 3] - tempo_box[0, 1]
    #end
    
    M_2 = cv2.getPerspectiveTransform(dst_pts, src_pts) # Similar result: cv2.invert(M) # But not always the case...
    #M_2 = cv2.invert(M)[1]
    
    if not do_create_with_xy_format:
        top_left = tempo_box[0, 0:2].tolist();
        top_right = [ top_left[0], top_left[1]+width ];
        bottom_left = [ top_left[0]+height, top_left[1] ];
        bottom_right = tempo_box[0, 2:4].tolist();
        
    else:
        top_left = tempo_box[0, 0:2].tolist();
        top_right = [ top_left[0]+width, top_left[1] ] # (x,y) <-> (w,h)
        bottom_left = [ top_left[0], top_left[1] + height ] # # (x,y) <-> (w,h)
        bottom_right = tempo_box[0, 2:4].tolist()
    #end
    
    tempo_box = np.array( [top_left, top_right, bottom_right, bottom_left ], dtype = np.float32)
    
    if not do_create_with_xy_format:
        tempo_box = tempo_box[:, ::-1] # (y,x) to (x,y) format.
    #end
    
    converted = cv2.perspectiveTransform(np.expand_dims(tempo_box, axis=0), M_2)
    
    color = [(255,0,0), (0,0,255)][no%2] # ["blue", "red"][no%2]
    
    converted = np.int0(converted)
    
    # Proposition was to draw 4-point polygons instead of a 2 point rectangle:
    for i in range(0, converted.shape[0]):
        input_img = cv2.polylines(input_img, [converted[i]], True, color, 8)
    #end
    
#end_for_loop_over_all_contour

#Write the final obtain image in order to be able to see it.
file_name = os.path.join(folder_final_dataset, name_img_wo_extension+"_Final_version"+extension)

cv2.imwrite(file_name, input_img)
Advertisement