Ep#36: Comparing HOG Descriptor and Haar Cascade for Object Detection: A Stability Analysis

Two well-known methods of Object detection are compared with each other in This Episode and The result was As expected. HOG Descriptor Method had a better stability in detecting Human (In our case) than Haar Cascade. Haar cascade predict a lot of false positive objects and is not accurate, however HOG descriptor in static situation can work more stable but it still challenges in dynamic scene. Below You can test the Code for an Video camera in Both Formats:

1- HOG Descriptor & Haar cascade python Code:

 auto_camera = "http://192.168.1.x:8000/stream.mjpg"
            flag_camera = "http://192.168.1.x:8000/stream.mjpg"
            video_capture = cv2.VideoCapture(auto_camera)
            video_captureL = cv2.VideoCapture(auto_camera)
            video_capture_dslr = cv2.VideoCapture(flag_camera)
            video_captureR = cv2.VideoCapture(flag_camera)
            output_width = 640  # Output width in pixels
            output_height = 480  # Output height in pixels

            # Choose a video codec (e.g., XVID) and create a VideoWriter object
            fourcc = cv2.VideoWriter_fourcc(*'XVID')
            output_filename = 'compressed_output.avi'
            output_filename_dslr = 'compressed_output_dslr.avi'
            output_fps = 20  # Output frames per second
            output_video = cv2.VideoWriter(output_filename, fourcc, output_fps, (output_width, output_height))
            output_video_dslr = cv2.VideoWriter(output_filename_dslr, fourcc, output_fps, (output_width, output_height))
            video_capture.set(cv2.CAP_PROP_AUTOFOCUS, 1)
            video_capture_dslr.set(cv2.CAP_PROP_AUTOFOCUS, 1)

            if not video_capture.isOpened():
                sys.exit('Video source not found...')
            if not video_capture_dslr.isOpened():
                sys.exit('Video source not found...')
            ret, frame1 = video_capture.read()
            retL, frameL = video_captureL.read()
            retdslr, framedslr = video_capture_dslr.read()
            retR, frameR = video_captureR.read()
            frame = cv2.resize(frame1, (output_width, output_height))
            frame_dslr = cv2.resize(framedslr, (output_width, output_height))
            output_video.write(frame)
            output_video_dslr.write(frame_dslr)



            # Trying HOG with SVM Classifier: Initializie HOG descriptor with Pre-trained person detector from Camera 1
            hog = cv2.HOGDescriptor()
            hog.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())
            (rects, weights) = hog.detectMultiScale(frameL, winStride=(6, 6), padding=(6, 6), scale=1.02)
            for (x, y, w, h) in rects:
                cv2.rectangle(frameL, (x, y), (x + w, y + h), (0, 0, 255), 2)
            # Trying HOG with SVM Classifier: Initializie HOG descriptor with Pre-trained person detector from Camera 2
            hog = cv2.HOGDescriptor()
            hog.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())
            (rects, weights) = hog.detectMultiScale(frameR, winStride=(6, 6), padding=(6, 6), scale=1.02)
            for (x, y, w, h) in rects:
                cv2.rectangle(frameR, (x, y), (x + w, y + h), (0, 0, 255), 2)
            # upper body detection
            upper_body_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_upperbody.xml')
            gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            gray_dslr = cv2.cvtColor(frame_dslr, cv2.COLOR_BGR2GRAY)

            humans = upper_body_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=2, flags=1, minSize=(40, 40), maxSize=(150,250))
            humans_dslr = upper_body_cascade.detectMultiScale(gray_dslr, scaleFactor=1.1, minNeighbors=2, flags=1,
                                                         minSize=(40, 40), maxSize=(200, 200))