import numpy as np
import cv2
import skimage
from skimage import color
import matplotlib.pyplot as plt
from scipy.signal import butter, lfilter, freqz
Load the video file
Extract its frames, and convert them to double-precision in the range [0, 1]
Convert each of the frames to the YIQ color space
cap = cv2.VideoCapture('./data/face.mp4')
frame_n = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
width, height = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
images=np.zeros((frame_n,height,width,3),dtype='double')
i = 0
while cap.isOpened():
ret, frame=cap.read() # BGR
if ret is True:
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
images[i]=frame
i+=1
else:
break
images.shape
cap = cv2.VideoCapture('./data/baby2.mp4')
frame_n = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
width, height = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
images=np.zeros((frame_n,height,width,3),dtype='double')
i = 0
while cap.isOpened():
ret, frame=cap.read() # BGR
if ret is True:
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
images[i]=frame
i+=1
else:
break
images.shape
plt.imshow(images[0]/255)
plt.imshow(images[0]/255)
for i in range(images.shape[0]):
images[i] = color.rgb2yiq(images[i])
plt.imshow(images[0]/255)
plt.imshow(images[0]/255)
Construct a Laplacian pyramid for every single frame in the video sequence
두개의 이미지가 최소 사이즈가 되는 10 level들의 이미지 피라미드를 만듬
먼저 가우시안 피라이미를 만든 다음에 라플라시안 피라이드를 생성
오리지널 이미지에서 가우시안 필터 + 이미지 사이즈 감소를 반복하여 가우시안 피라이드를 만들고
한 레벨 높은 가우시안 이미지에서 본 레벨 이미지를 빼서 라플라시안 피라미드를 생성
라플라시안 피라미드는 level 9
g_pyramid = [] # make empty pyramid
l_pyramid = []
img = images[0]
for i in range(10):
g_pyramid.append(np.zeros((images.shape[0], img.shape[0], img.shape[1], img.shape[2])))
l_pyramid.append(np.zeros((images.shape[0], img.shape[0], img.shape[1], img.shape[2])))
img = cv2.pyrDown(img)
for i in range(images.shape[0]): # make Gaussian pyramid
img = images[i]
for j in range(10):
g_pyramid[j][i] = img
img = cv2.pyrDown(img)
for i in range(images.shape[0]): # make Laplacian pyramid
for j in range(9):
l_pyramid[j][i] = cv2.subtract(g_pyramid[j][i], cv2.resize(cv2.pyrUp(g_pyramid[j+1][i]), dsize=(g_pyramid[j][i].shape[1], g_pyramid[j][i].shape[0])))
levels = 3
Consider the time series corresponding to the value of a pixel on all spatial levels of the Laplacian pyramid
Convert this time series to the frequency domain using the Fast Fourier Transform
apply a band pass filter to this signal
face : magnification value = 100, spatial frequency cutoff = 1000, cutoff frequencies = 0.83 and 1, frame rate = 30
baby2 : magnification value = 150, spatial frequency cutoff = 600, cutoff frequencies = 2.33 and 2.67, frame rate = 30
과제에서 주어진 논문을 통해서 필터와 파라미터를 선정
필터의 경우 논문에 주어진데로 ideal filter를 선택함
def bandpass_filter(data, lowcut, highcut, fs, order=5):
frequencies = np.fft.fftfreq(data.shape[0], d=1.0/fs)
mask = np.logical_and(frequencies > lowcut, frequencies < highcut)
data[~mask] = 0
return data
fftx = np.fft.fft(g_pyramid[levels], axis=0)
ffty = bandpass_filter(fftx.copy(), 0.83, 1, 30, order=1)
filtered_x = np.fft.ifft(ffty, axis=0)
fftx = np.fft.fft(g_pyramid[levels], axis=0)
ffty = bandpass_filter(fftx.copy(), 2.33, 2.67, 30, order=1)
filtered_x = np.fft.ifft(ffty, axis=0)
Try to observe a variety of video data in frequency domain and identify the frequency band of interest
Analyzing the histogram of frequency responses can be useful to understand the context of your video
주파수의 히스토그램 대신에 임의의 값을 넣었음 face 영상의 알파값이 200, baby2 영상의 알파값이 400
filtered_x *= 200
filtered_x *= 400
After amplifying the signals, all that is left is to collapse the Laplacian pyramids into a single image per frame.
생성된 라플라시안 피라미드를 통해서 원본 사이즈의 이미즈를 재구축
필터링된 이미지를 가우시안 필터 + 사이즈업 한후 한단계 높은 라플라시안 이미지를 더함
final = np.zeros(images.shape)
for i in range(final.shape[0]): # frames
tmp = filtered_x[i] # level 10 gaussian
for j in range(levels-1, -1, -1):
tmp = cv2.pyrUp(np.abs(tmp), dstsize = (l_pyramid[j][i].shape[1], l_pyramid[j][i].shape[0])) + l_pyramid[j][i]
final[i] = tmp
plt.imshow(final[1]/255)
final = final + images
for i in range(final.shape[0]):
final[i] = skimage.color.yiq2rgb(final[i])
plt.imshow(final[1]/255)
final[final < 0] = 0
final[final > 255] = 255
out = cv2.VideoWriter('face_result.avi',cv2.VideoWriter_fourcc(*'DIVX'), 30, (final[0].shape[1], final[0].shape[0]))
for i in range(len(final)):
tmp = np.uint8(final[i])
tmp = cv2.cvtColor(tmp, cv2.COLOR_BGR2RGB)
out.write(tmp)
out.release()
out = cv2.VideoWriter('baby2_result.avi',cv2.VideoWriter_fourcc(*'DIVX'), 30, (final[0].shape[1], final[0].shape[0]))
for i in range(len(final)):
tmp = np.uint8(final[i])
tmp = cv2.cvtColor(tmp, cv2.COLOR_BGR2RGB)
out.write(tmp)
out.release()