I just set up my specifications for object detection. This what I have built:
- OpenCV from source (OpenCV tutorial)
- Build darknet (Build Darknet)
The specifications of my laptop are:
- Windows 10 Pro
- RTX 3060Ti (CUDA 11.1, CUDNN 8.6)
- Python 3.7.9
After compiled, I tested the darknet based on the tutorial, it detects my GPU, the picture is as below: Success, using darknet.
However, due to many arguments needed to perform detection for darknet, I have built another python program that can make me easily to just run the program.
So here’s the problem, when I run “from_video.py”, the detection performed is in CPU. The files arranged as follow New folder. Lets take one python program, “from_video.py” for testing, “main.py” will load the model, check the GPU. Code for the python files are as follow.
from_video.py
import cv2 from main import * videoPath = "./data/test_sampah1.mp4" darknetmain = darknet_main() darknetmain.setGPU(is_GPU=True) video = cv2.VideoCapture(videoPath) width = int(video.get(3)) height = int(video.get(4)) size = (width,height) # result = cv2.VideoWriter('sampah1_output.mpeg',cv2.VideoWriter_fourcc(*'MPJG'),20,size) if video.isOpened(): while(True): res, cv_img = video.read() if res==False: break imcaptions, boundingBoxs = darknetmain.performDetect(cv_img) if len(imcaptions)>0: if len(imcaptions) > 0: for i in range(len(imcaptions)): name = imcaptions[i] name = name[:5] print(name + " is found") cv_img = cv2.rectangle(cv_img, boundingBoxs[i][0], boundingBoxs[i][2], (0, 255, 0), 2) cv_img = cv2.putText(cv_img, imcaptions[i], boundingBoxs[i][0], cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255)) cv2.imshow("result", cv_img) #result.write(cv_img) cv2.waitKey(1) else: print("no result") #result.release() video.release() else: print("Cannot read the video file")
main.py
from ctypes import * import math import random import os import numpy as np import cv2 class BOX(Structure): _fields_ = [("x", c_float), ("y", c_float), ("w", c_float), ("h", c_float)] class DETECTION(Structure): _fields_ = [("bbox", BOX), ("classes", c_int), ("prob", POINTER(c_float)), ("mask", POINTER(c_float)), ("objectness", c_float), ("sort_class", c_int), ("uc", POINTER(c_float)), ("points", c_int), ("embeddings", POINTER(c_float)), ("embedding_size", c_int), ("sim", c_float), ("track_id", c_int)] class IMAGE(Structure): _fields_ = [("w", c_int), ("h", c_int), ("c", c_int), ("data", POINTER(c_float))] class METADATA(Structure): _fields_ = [("classes", c_int), ("names", POINTER(c_char_p))] class darknet_main(): def __init__(self): self.netMain = None self.metaMain = None self.altNames = None self.thresh = 0.5 self.configPath = './model/yolov3_custom_fyp_testing.cfg' self.weightPath = './model/yolov3_custom_fyp_last.weights' self.metaPath = "./model/obj_fyp.data" self.frame = None def setGPU(self, is_GPU): self.hasGPU = is_GPU if self.hasGPU: self.lib = CDLL("yolo_cpp_dll.dll", RTLD_GLOBAL) else: self.lib = CDLL("yolo_cpp_dll_nogpu.dll", RTLD_GLOBAL) self.lib.network_width.argtypes = [c_void_p] self.lib.network_width.restype = c_int self.lib.network_height.argtypes = [c_void_p] self.lib.network_height.restype = c_int self.predict = self.lib.network_predict self.predict.argtypes = [c_void_p, POINTER(c_float)] self.predict.restype = POINTER(c_float) if self.hasGPU: self.set_gpu = self.lib.cuda_set_device self.set_gpu.argtypes = [c_int] self.make_image = self.lib.make_image self.make_image.argtypes = [c_int, c_int, c_int] self.make_image.restype = IMAGE self.get_network_boxes = self.lib.get_network_boxes self.get_network_boxes.argtypes = [c_void_p, c_int, c_int, c_float, c_float, POINTER(c_int), c_int, POINTER(c_int), c_int] self.get_network_boxes.restype = POINTER(DETECTION) self.make_network_boxes = self.lib.make_network_boxes self.make_network_boxes.argtypes = [c_void_p] self.make_network_boxes.restype = POINTER(DETECTION) self.free_detections = self.lib.free_detections self.free_detections.argtypes = [POINTER(DETECTION), c_int] self.free_ptrs = self.lib.free_ptrs self.free_ptrs.argtypes = [POINTER(c_void_p), c_int] self.network_predict = self.lib.network_predict self.network_predict.argtypes = [c_void_p, POINTER(c_float)] self.reset_rnn = self.lib.reset_rnn self.reset_rnn.argtypes = [c_void_p] self.load_net = self.lib.load_network self.load_net.argtypes = [c_char_p, c_char_p, c_int] self.load_net.restype = c_void_p self.load_net_custom = self.lib.load_network_custom self.load_net_custom.argtypes = [c_char_p, c_char_p, c_int, c_int] self.load_net_custom.restype = c_void_p self.do_nms_obj = self.lib.do_nms_obj self.do_nms_obj.argtypes = [POINTER(DETECTION), c_int, c_int, c_float] self.do_nms_sort = self.lib.do_nms_sort self.do_nms_sort.argtypes = [POINTER(DETECTION), c_int, c_int, c_float] self.free_image = self.lib.free_image self.free_image.argtypes = [IMAGE] self.letterbox_image = self.lib.letterbox_image self.letterbox_image.argtypes = [IMAGE, c_int, c_int] self.letterbox_image.restype = IMAGE self.load_meta = self.lib.get_metadata self.lib.get_metadata.argtypes = [c_char_p] self.lib.get_metadata.restype = METADATA self.load_image = self.lib.load_image_color self.load_image.argtypes = [c_char_p, c_int, c_int] self.load_image.restype = IMAGE self.rgbgr_image = self.lib.rgbgr_image self.rgbgr_image.argtypes = [IMAGE] self.predict_image = self.lib.network_predict_image self.predict_image.argtypes = [c_void_p, IMAGE] self.predict_image.restype = POINTER(c_float) self.netMain = self.load_net_custom(self.configPath.encode("ascii"), self.weightPath.encode("ascii"), 0, 1) # batch size = 1 self.metaMain = self.load_meta(self.metaPath.encode("ascii")) try: with open(self.metaPath) as metaFH: metaContents = metaFH.read() import re match = re.search("names *= *(.*)$", metaContents, re.IGNORECASE | re.MULTILINE) if match: result = match.group(1) else: result = None try: if os.path.exists(result): with open(result) as namesFH: self.namesList = namesFH.read().strip().split("n") self.altNames = [x.strip() for x in self.namesList] except TypeError: pass except Exception: pass def sample(self, probs): s = sum(probs) probs = [a/s for a in probs] r = random.uniform(0, 1) for i in range(len(probs)): r = r - probs[i] if r <= 0: return i return len(probs)-1 def c_array(self, ctype, values): arr = (ctype*len(values))() arr[:] = values return arr def array_to_image(self, arr): import numpy as np # need to return old values to avoid python freeing memory arr = arr.transpose(2,0,1) c = arr.shape[0] h = arr.shape[1] w = arr.shape[2] arr = np.ascontiguousarray(arr.flat, dtype=np.float32) / 255.0 data = arr.ctypes.data_as(POINTER(c_float)) im = IMAGE(w,h,c,data) return im, arr def classify(self, net, meta, im): out = self.predict_image(net, im) res = [] for i in range(meta.classes): if self.altNames is None: nameTag = meta.names[i] else: nameTag = self.altNames[i] res.append((nameTag, out[i])) res = sorted(res, key=lambda x: -x[1]) return res def detect(self, net, meta, cv_im, thresh=.5, hier_thresh=.5, nms=.45, debug= False): # im = self.load_image(image, 0, 0) # debug=True custom_image = cv2.cvtColor(cv_im, cv2.COLOR_BGR2RGB) h, w, c_ = custom_image.shape custom_image = cv2.resize(custom_image,(self.lib.network_width(net), self.lib.network_height(net)), interpolation = cv2.INTER_LINEAR) im, arr = self.array_to_image(custom_image) # you should comment line below: free_image(im) if debug: print("Loaded image") num = c_int(0) if debug: print("Assigned num") pnum = pointer(num) if debug: print("Assigned pnum") self.predict_image(net, im) if debug: print("did prediction") dets = self.get_network_boxes(net, w, h, self.thresh, hier_thresh, None, 0, pnum, 0) # OpenCV # dets = self.get_network_boxes(net, im.w, im.h, thresh, hier_thresh, None, 0, pnum, 0) if debug: print("Got dets") num = pnum[0] if debug: print("got zeroth index of pnum") if nms: self.do_nms_sort(dets, num, meta.classes, nms) if debug: print("did sort") res = [] if debug: print("about to range") for j in range(num): if debug: print("Ranging on "+str(j)+" of "+str(num)) if debug: print("Classes: "+str(meta), meta.classes, meta.names) for i in range(meta.classes): if debug: print("Class-ranging on "+str(i)+" of "+str(meta.classes)+"= "+str(dets[j].prob[i])) if dets[j].prob[i] > 0: b = dets[j].bbox if self.altNames is None: nameTag = self.meta.names[i] else: nameTag = self.altNames[i] if debug: print("Got bbox", b) print(nameTag) print(dets[j].prob[i]) print((b.x, b.y, b.w, b.h)) res.append((nameTag, dets[j].prob[i], (b.x, b.y, b.w, b.h))) if debug: print("did range") res = sorted(res, key=lambda x: -x[1]) if debug: print("did sort") self.free_detections(dets, num) if debug: print("freed detections") return res def performDetect(self, cv_img): # Do the detection detections = self.detect(self.netMain, self.metaMain, cv_img, self.thresh) imcaptions = [] boundingBoxs=[] for detection in detections: label = detection[0] confidence = detection[1] pstring = label+": "+str(np.rint(100 * confidence))+"%" imcaptions.append(pstring) print(pstring) bounds = detection[2] yExtent = int(bounds[3]) xEntent = int(bounds[2]) # Coordinates are around the center xCoord = int(bounds[0] - bounds[2]/2) yCoord = int(bounds[1] - bounds[3]/2) boundingBox = [ (xCoord, yCoord), (xCoord, yCoord + yExtent), (xCoord + xEntent, yCoord + yExtent), (xCoord + xEntent, yCoord) ] boundingBoxs.append(boundingBox) # cv_img = cv2.rectangle(cv_img, boundingBox[0], boundingBox[2], (0,0, 255), 1) # # cv2.imshow("image", cv_img) # cv2.waitKey(0) return imcaptions, boundingBoxs if __name__ == "__main__": darknetmain = darknet_main() darknetmain.setGPU(is_GPU=True) imagePath = "./data/bottle_8.png" cv_img = cv2.imread(imagePath) darknetmain.performDetect(cv_img)
Advertisement
Answer
It seems little bit complicated, when you try to Build openCV from source, make sure that your CUDA arch bin supports your version or higher. In my example, 8.0 and 8.6. Then, when you try to compile darknet from source, make sure that your compute_,sm_ for both 8.0 and 8.6