How to extract, modify and restore correctly modified bounding boxes

Question

I'm trying to do relatively simple code where I extract contours of some areas in the image and draw 1 or multiple rectangles on them (normally with a "Object Detection model") (works fine). However, I then need to transform the coordinates of the rectangles drawn on the cropped areas back to the original image (and draw them over it to

Accepted Answer

As suggested in the comments to the question, the solution was to just draw a polygon with 4 points instead of continuing to try to draw rectangles with 2 points.I’m sharing the code for the final solution (along with some code related to the tests I did), in case someone else runs into a similar issue.Final result (with the expected result):import numpy as np, cv2, os, copy#-------------------------# Starting informationPATH = r"C:UsersvincentrmPictures"folder_final_dataset = os.path.join(PATH, "Test_folder_2")if not os.path.isdir(folder_final_dataset): os.mkdir(folder_final_dataset)img_name = os.path.join(PATH, "Test_img_rot_squarre.png");mask_name = img_name;# Used for the images writed during the process:name_img_wo_extension = os.path.split(img_name)[1]extension = os.path.splitext(name_img_wo_extension)[1]name_img_wo_extension = name_img_wo_extension[:-len(extension)]do_create_with_xy_format = False # Original: False - seem to work correctly with both format.#-------------------------------------------# Step #0: Read the imageinput_img = cv2.imread(img_name)mask_output = cv2.imread(mask_name)mask_output = cv2.cvtColor(mask_output, cv2.COLOR_BGR2GRAY)ret, mask_output = cv2.threshold(mask_output, 127, 255, 0)#-------------------------------------------# Step #1: Identify the elements on the image#----------------------if cv2.__version__[0] == 3: # ex. 3.4.1 img2, contours, hierarchy = cv2.findContours(mask_output, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)else: # ex. 4.5.3 contours, hierarchy = cv2.findContours(mask_output, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)#end#-------------------------------------------# Step #2: Extraction of the contours of the image with rotated box#----------------------tempo_img = input_img#-----------------------------------input_img_shape = input_img.shapefor (no, c) in enumerate(contours): #Method used: Rotated squarre # Create mask where white is what we want, black otherwise mask_2 = tempo_img # Content: ( center (x,y), (width, height), angle of rotation ). rect = cv2.minAreaRect(c) # get width and height of the detected rectangle width = int(rect[1][0]) height = int(rect[1][1]) box = cv2.boxPoints(rect) box = np.int0(box) src_pts = box.astype("float32") # coordinate of the points in box points after the rectangle has been # straightened dst_pts = np.array([[0, height-1], [0, 0], [width-1, 0], [width-1, height-1]], dtype="float32") # the perspective transformation matrix # - src_pts = coordinate of the rect in the original img # - dst_pts = coordinate of this rect in the final img. M = cv2.getPerspectiveTransform(src_pts, dst_pts) # directly warp the rotated rectangle to get the straightened rectangle out = cv2.warpPerspective(mask_2, M, (width, height)) #================================================ # Part #3: As as demo, we will simply calculate the points of the box in fonction # of the extacted rotated box, but normaly, it will be gived by a # trained "Object Detection Model" on Tensorflow #------------------------ out_shape = out.shape[0:2] # (H,W) <-> (y,x) area_box_draw = [0.15]*2 # Format: (y1,x1, y2,x2) - as I normaly have with Tensorflow if not do_create_with_xy_format: boxes = [ int(out_shape[0]*area_box_draw[0]), int(out_shape[1]*area_box_draw[1]), int(out_shape[0]*(1-area_box_draw[0])), int(out_shape[1]*(1-area_box_draw[1])) ] else: # If create it directly with the (x,y) format. boxes = [ int(out_shape[1]*area_box_draw[1]), int(out_shape[0]*area_box_draw[0]), int(out_shape[1]*(1-area_box_draw[1])), int(out_shape[0]*(1-area_box_draw[0])) ] #end boxes = np.expand_dims(boxes, axis=0) # Only used to reproduce Tensorflow format where could have multiple boxes. color = [(255,0,0), (0,0,255)][no%2] # ["blue", "red"][no%2] #------------------ if not do_create_with_xy_format: boxes = boxes[:, ::-1] # Invert (y,x) to (x,y) #end for i in range(0, boxes.shape[0]): cv2.rectangle(out, tuple(boxes[i, 0:2].tolist()), tuple(boxes[i, 2:4].tolist()), color, 8) #end if not do_create_with_xy_format: boxes = boxes[:, ::-1] # Revert back from (x,y) to (y,x) #end #----------------------------------------------- # Write the obtain images on the extracted section to verify if it's correct or not. file_name = os.path.join(folder_final_dataset, name_img_wo_extension+"_"+str(no)+extension) cv2.imwrite(file_name, out) #================================================= # This is the part where it's doesn't work as we want: #-------------------------------------------- img_shape = np.array(list(out.shape[0:2])*2) tempo_box = copy.copy(boxes) #Format of the coordinate at this point: (y1,x1, y2,x2). nb_box = tempo_box.shape[0] #------------------------------------------ #Format here: (y1 - 0 ,x1 - 1, y2 - 2, x2 - 3) if not do_create_with_xy_format: height = tempo_box[0, 2] - tempo_box[0, 0] width = tempo_box[0, 3] - tempo_box[0, 1] else: #Format: (x1 - 0, y1 - 1, x2 - 2, y2 - 3) width = tempo_box[0, 2] - tempo_box[0, 0] height = tempo_box[0, 3] - tempo_box[0, 1] #end M_2 = cv2.getPerspectiveTransform(dst_pts, src_pts) # Similar result: cv2.invert(M) # But not always the case... #M_2 = cv2.invert(M)[1] if not do_create_with_xy_format: top_left = tempo_box[0, 0:2].tolist(); top_right = [ top_left[0], top_left[1]+width ]; bottom_left = [ top_left[0]+height, top_left[1] ]; bottom_right = tempo_box[0, 2:4].tolist(); else: top_left = tempo_box[0, 0:2].tolist(); top_right = [ top_left[0]+width, top_left[1] ] # (x,y) <-> (w,h) bottom_left = [ top_left[0], top_left[1] + height ] # # (x,y) <-> (w,h) bottom_right = tempo_box[0, 2:4].tolist() #end tempo_box = np.array( [top_left, top_right, bottom_right, bottom_left ], dtype = np.float32) if not do_create_with_xy_format: tempo_box = tempo_box[:, ::-1] # (y,x) to (x,y) format. #end converted = cv2.perspectiveTransform(np.expand_dims(tempo_box, axis=0), M_2) color = [(255,0,0), (0,0,255)][no%2] # ["blue", "red"][no%2] converted = np.int0(converted) # Proposition was to draw 4-point polygons instead of a 2 point rectangle: for i in range(0, converted.shape[0]): input_img = cv2.polylines(input_img, [converted[i]], True, color, 8) #end #end_for_loop_over_all_contour#Write the final obtain image in order to be able to see it.file_name = os.path.join(folder_final_dataset, name_img_wo_extension+"_Final_version"+extension)cv2.imwrite(file_name, input_img)

Advertisement

Answer