3D Augmented Reality using OpenCV and Python

Tags

, , , , , , , , , ,

It is time. For. 3D Augmented Reality.

In a previous post, Augmented Reality using OpenCV and Python, I was able to augment my webcam stream with a cube:

augmentedreality_policeman

In my last two posts, Glyph recognition using OpenCV and Python and Glyph recognition using OpenCV and Python (Mark II), I was able to draw devils on glyphs:

Top-notch!

So why not bring this all together, and stick a cube on top of each devil:

Perfect. So. What. Next >>>

Well, now that we know how to project a 3D space around our glyphs, let’s render something more pretty than a grey cube. Stay tuned.

P.S.

Here’s all the code, which the previous posts explain:

The main program

import cv2
from glyphfunctions import *
from glyphdatabase import *
from webcam import Webcam

webcam = Webcam()
webcam.start()

QUADRILATERAL_POINTS = 4
BLACK_THRESHOLD = 100
WHITE_THRESHOLD = 155

while True:

    # Stage 1: Read an image from our webcam
    image = webcam.get_current_frame()

    # Stage 2: Detect edges in image
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    gray = cv2.GaussianBlur(gray, (5,5), 0)
    edges = cv2.Canny(gray, 100, 200)

    # Stage 3: Find contours
    contours, _ = cv2.findContours(edges, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    contours = sorted(contours, key=cv2.contourArea, reverse=True)[:10]

    for contour in contours:
  
        # Stage 4: Shape check
        perimeter = cv2.arcLength(contour, True)
        approx = cv2.approxPolyDP(contour, 0.01*perimeter, True)

        if len(approx) == QUADRILATERAL_POINTS:

            # Stage 5: Perspective warping
            topdown_quad = get_topdown_quad(gray, approx.reshape(4, 2))

            # Stage 6: Border check
            if topdown_quad[(topdown_quad.shape[0]/100.0)*5, 
                            (topdown_quad.shape[1]/100.0)*5] > BLACK_THRESHOLD: continue

            # Stage 7: Glyph pattern
            glyph_pattern = get_glyph_pattern(topdown_quad, BLACK_THRESHOLD, WHITE_THRESHOLD)
            glyph_found, glyph_rotation, glyph_substitute = match_glyph_pattern(glyph_pattern)

            if glyph_found:

                # Stage 8: Substitute glyph
                substitute_image = cv2.imread('glyphs/images/{}.jpg'.format(glyph_substitute))
                
                for _ in range(glyph_rotation):
                    substitute_image = rotate_image(substitute_image, 90)
                
                image = add_substitute_quad(image, substitute_image, approx.reshape(4, 2))

                # Stage 9: Add effects
                image = add_effects(image, approx.reshape(4, 2))

    # Stage 10: Show augmented reality
    cv2.imshow('3D Augmented Reality using Glyphs', image)
    cv2.waitKey(10)

glyphfunctions.py

import cv2
import numpy as np
from effects import Effects

effects = Effects()

def order_points(points):

    s = points.sum(axis=1)
    diff = np.diff(points, axis=1)
    
    ordered_points = np.zeros((4,2), dtype="float32")

    ordered_points[0] = points[np.argmin(s)]
    ordered_points[2] = points[np.argmax(s)]
    ordered_points[1] = points[np.argmin(diff)]
    ordered_points[3] = points[np.argmax(diff)]

    return ordered_points

def max_width_height(points):

    (tl, tr, br, bl) = points

    top_width = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
    bottom_width = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
    max_width = max(int(top_width), int(bottom_width))

    left_height = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
    right_height = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
    max_height = max(int(left_height), int(right_height))

    return (max_width,max_height)

def topdown_points(max_width, max_height):
    return np.array([
        [0, 0],
        [max_width-1, 0],
        [max_width-1, max_height-1],
        [0, max_height-1]], dtype="float32")

def get_topdown_quad(image, src):

    # src and dst points
    src = order_points(src)

    (max_width,max_height) = max_width_height(src)
    dst = topdown_points(max_width, max_height)
 
    # warp perspective
    matrix = cv2.getPerspectiveTransform(src, dst)
    warped = cv2.warpPerspective(image, matrix, max_width_height(src))

    # return top-down quad
    return warped

def add_substitute_quad(image, substitute_quad, dst):

    # dst (zeroed) and src points
    dst = order_points(dst)

    (tl, tr, br, bl) = dst
    min_x = min(int(tl[0]), int(bl[0]))
    min_y = min(int(tl[1]), int(tr[1]))

    for point in dst:
        point[0] = point[0] - min_x
        point[1] = point[1] - min_y

    (max_width,max_height) = max_width_height(dst)
    src = topdown_points(max_width, max_height)

    # warp perspective (with white border)
    substitute_quad = cv2.resize(substitute_quad, (max_width,max_height))

    warped = np.zeros((max_height,max_width,3), np.uint8)
    warped[:,:,:] = 255

    matrix = cv2.getPerspectiveTransform(src, dst)
    cv2.warpPerspective(substitute_quad, matrix, (max_width,max_height), warped, borderMode=cv2.BORDER_TRANSPARENT)

    # add substitute quad
    image[min_y:min_y + max_height, min_x:min_x + max_width] = warped

    return image

def add_effects(image, points):
    
    # order points
    points = order_points(points)

    # add cube effect
    image = effects.render_cube(image, points)

    return image

def get_glyph_pattern(image, black_threshold, white_threshold):

    # collect pixel from each cell (left to right, top to bottom)
    cells = []
    
    cell_half_width = int(round(image.shape[1] / 10.0))
    cell_half_height = int(round(image.shape[0] / 10.0))

    row1 = cell_half_height*3
    row2 = cell_half_height*5
    row3 = cell_half_height*7
    col1 = cell_half_width*3
    col2 = cell_half_width*5
    col3 = cell_half_width*7

    cells.append(image[row1, col1])
    cells.append(image[row1, col2])
    cells.append(image[row1, col3])
    cells.append(image[row2, col1])
    cells.append(image[row2, col2])
    cells.append(image[row2, col3])
    cells.append(image[row3, col1])
    cells.append(image[row3, col2])
    cells.append(image[row3, col3])

    # threshold pixels to either black or white
    for idx, val in enumerate(cells):
        if val < black_threshold:
            cells[idx] = 0
        elif val > white_threshold:
            cells[idx] = 1
        else:
            return None

    return cells

def rotate_image(image, angle):
    (h, w) = image.shape[:2]
    center = (w / 2, h / 2)
    rotation_matrix = cv2.getRotationMatrix2D(center, angle, 1.0)
    return cv2.warpAffine(image, rotation_matrix, (w, h))

glyphdatabase.py

# Glyph table
GLYPH_TABLE = [[[[0, 1, 0, 1, 0, 0, 0, 1, 1],[0, 0, 1, 1, 0, 1, 0, 1, 0],[1, 1, 0, 0, 0, 1, 0, 1, 0],[0, 1, 0, 1, 0, 1, 1, 0, 0]], "devil"],[[[1, 0, 0, 0, 1, 0, 1, 0, 1],[0, 0, 1, 0, 1, 0, 1, 0, 1],[1, 0, 1, 0, 1, 0, 0, 0, 1],[1, 0, 1, 0, 1, 0, 1, 0, 0]], "devil_red"]]

# Match glyph pattern to database record
def match_glyph_pattern(glyph_pattern):
    glyph_found = False
    glyph_rotation = None
    glyph_substitute = None
    
    for glyph_record in GLYPH_TABLE:
        for idx, val in enumerate(glyph_record[0]):    
            if glyph_pattern == val: 
                glyph_found = True
                glyph_rotation = idx
                glyph_substitute = glyph_record[1]
                break
        if glyph_found: break

    return (glyph_found, glyph_rotation, glyph_substitute)

effects.py

import cv2
import numpy as np
  
class Effects:
    
    def render_cube(self, image, points):
 
        # load calibration data
        with np.load('webcam_calibration_ouput.npz') as X:
            mtx, dist, _, _ = [X[i] for i in ('mtx','dist','rvecs','tvecs')]
  
        # set up criteria, image, points and axis
        criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 30, 0.001)

        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

        imgp = np.array(points, dtype="float32")

        objp = np.array([[0.,0.,0.],[1.,0.,0.],
                         [1.,1.,0.],[0.,1.,0.]], dtype="float32")  

        axis = np.float32([[0,0,0], [0,1,0], [1,1,0], [1,0,0],
                           [0,0,-1],[0,1,-1],[1,1,-1],[1,0,-1] ])

        # project 3D points to image plane
        cv2.cornerSubPix(gray,imgp,(11,11),(-1,-1),criteria)
        rvecs, tvecs, _ = cv2.solvePnPRansac(objp, imgp, mtx, dist)
        imgpts, _ = cv2.projectPoints(axis, rvecs, tvecs, mtx, dist)
  
        # draw cube
        self._draw_cube(image, imgpts)
        
        return image

    def _draw_cube(self, image, imgpts):
        imgpts = np.int32(imgpts).reshape(-1,2)

        # draw pillars
        for i,j in zip(range(4),range(4,8)):
            cv2.line(image,tuple(imgpts[i]),tuple(imgpts[j]),(100,100,100),4)
  
        # draw roof
        cv2.drawContours(image,[imgpts[4:]],-1,(100,100,100),4)

webcam.py

import cv2
from threading import Thread
 
class Webcam:
 
    def __init__(self):
        self.video_capture = cv2.VideoCapture(0)
        self.current_frame = self.video_capture.read()[1]
         
    # create thread for capturing images
    def start(self):
        Thread(target=self._update_frame, args=()).start()
 
    def _update_frame(self):
        while(True):
            self.current_frame = self.video_capture.read()[1]
                 
    # get the current frame
    def get_current_frame(self):
        return self.current_frame
Follow

Get every new post delivered to your Inbox.

Join 81 other followers