first
BIN
Database/new.npy
Normal file
BIN
Database/student.npy
Normal file
132
accuracy.py
Normal file
@ -0,0 +1,132 @@
|
|||||||
|
import os
|
||||||
|
import time
|
||||||
|
|
||||||
|
import torch
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
from backbones import iresnet50,iresnet18,iresnet100
|
||||||
|
|
||||||
|
def load_image(img_path):
|
||||||
|
#img = cv2.imread(img_path)
|
||||||
|
img = cv2.imdecode(np.fromfile(img_path,dtype=np.uint8),cv2.IMREAD_COLOR)
|
||||||
|
img = img.transpose((2, 0, 1))
|
||||||
|
img = img[np.newaxis, :, :, :]
|
||||||
|
img = np.array(img, dtype=np.float32)
|
||||||
|
img -= 127.5
|
||||||
|
img /= 127.5
|
||||||
|
return img
|
||||||
|
|
||||||
|
def findEuclideanDistance(source_representation, test_representation):
|
||||||
|
euclidean_distance = source_representation - test_representation
|
||||||
|
euclidean_distance = np.sum(np.multiply(euclidean_distance, euclidean_distance))
|
||||||
|
euclidean_distance = np.sqrt(euclidean_distance)
|
||||||
|
return euclidean_distance
|
||||||
|
|
||||||
|
def l2_normalize(x):
|
||||||
|
return x / np.sqrt(np.sum(np.multiply(x, x)))
|
||||||
|
|
||||||
|
def load_npy(path):
|
||||||
|
data = np.load(path,allow_pickle=True)
|
||||||
|
data = data.item()
|
||||||
|
return data
|
||||||
|
|
||||||
|
def findmindistance(pred,threshold,k_v):
|
||||||
|
distance = 10
|
||||||
|
most_like = ""
|
||||||
|
for name in k_v.keys():
|
||||||
|
tmp = findEuclideanDistance(k_v[name],pred)
|
||||||
|
if distance > tmp:
|
||||||
|
distance = tmp
|
||||||
|
most_like = name
|
||||||
|
if distance < threshold:
|
||||||
|
return most_like
|
||||||
|
else:
|
||||||
|
return -1
|
||||||
|
|
||||||
|
def findOne(img,model,k_v):
|
||||||
|
with torch.no_grad():
|
||||||
|
start_time = time.time()
|
||||||
|
pred = model(img)
|
||||||
|
end_time = time.time()
|
||||||
|
#print("predOne time: " + str(end_time - start_time))
|
||||||
|
pred = pred.numpy()
|
||||||
|
name = findmindistance(l2_normalize(pred),threshold=1.20,k_v=k_v)
|
||||||
|
if name != -1:
|
||||||
|
return name
|
||||||
|
else:
|
||||||
|
return "unknown"
|
||||||
|
def findAll(imglist,model,k_v):
|
||||||
|
with torch.no_grad():
|
||||||
|
name_list = []
|
||||||
|
pred = model(imglist)
|
||||||
|
pred = pred.numpy()
|
||||||
|
for pr in pred:
|
||||||
|
name = findmindistance(l2_normalize(pr),threshold=1.20,k_v=k_v)
|
||||||
|
if name != -1:
|
||||||
|
name_list.append(name)
|
||||||
|
else:
|
||||||
|
name_list.append("unknown")
|
||||||
|
return name_list
|
||||||
|
|
||||||
|
if __name__=='__main__':
|
||||||
|
model = iresnet100()
|
||||||
|
model.load_state_dict(torch.load("./model/backbone100.pth", map_location="cpu"))
|
||||||
|
model.eval()
|
||||||
|
pred_name = []
|
||||||
|
order_name = []
|
||||||
|
order_path = []
|
||||||
|
unknown = []
|
||||||
|
test_path = "D:\Download\out\cfp_test"
|
||||||
|
name_list = os.listdir(test_path)
|
||||||
|
for name in name_list:
|
||||||
|
img_list = os.listdir(os.path.join(test_path,name))
|
||||||
|
for img in img_list:
|
||||||
|
order_name.append(name)
|
||||||
|
order_path.append(os.path.join(os.path.join(test_path,name),img))
|
||||||
|
order_img = np.zeros((len(order_path), 3, 112, 112), dtype=np.float32)
|
||||||
|
for index,img_path in enumerate(order_path):
|
||||||
|
order_img[index] = load_image(img_path)
|
||||||
|
print(order_img.shape)
|
||||||
|
# for name in order_path:
|
||||||
|
# print(name)
|
||||||
|
|
||||||
|
k_v = load_npy("cfp.npy")
|
||||||
|
start_time = time.time()
|
||||||
|
order_img = torch.from_numpy(order_img)
|
||||||
|
|
||||||
|
batch = 256
|
||||||
|
now = 0
|
||||||
|
number = len(order_img)
|
||||||
|
#number = 1400
|
||||||
|
for i in range(number):
|
||||||
|
unknown.append("unknown")
|
||||||
|
|
||||||
|
while now < number:
|
||||||
|
if now+batch < number:
|
||||||
|
name = findAll(order_img[now:now+batch],model,k_v)
|
||||||
|
else:
|
||||||
|
name = findAll(order_img[now:number], model, k_v)
|
||||||
|
now = now+batch
|
||||||
|
for na in name:
|
||||||
|
pred_name.append(na)
|
||||||
|
print("batch"+str(now))
|
||||||
|
end_time = time.time()
|
||||||
|
print("findAll time: " + str(end_time - start_time))
|
||||||
|
#print(len(pred_name))
|
||||||
|
right = 0
|
||||||
|
for i,name in enumerate(pred_name):
|
||||||
|
if pred_name[i] == order_name[i]:
|
||||||
|
right += 1
|
||||||
|
filed = 0
|
||||||
|
for i, name in enumerate(pred_name):
|
||||||
|
if pred_name[i] == unknown[i]:
|
||||||
|
filed += 1
|
||||||
|
error = 0
|
||||||
|
for i,name in enumerate(pred_name):
|
||||||
|
if pred_name[i] != order_name[i]:
|
||||||
|
error += 1
|
||||||
|
print(order_name[i]+" "+pred_name[i]+" "+order_path[i])
|
||||||
|
print("total:" + str(number))
|
||||||
|
print("right:" + str(right) + " rate:" + str(right / number))
|
||||||
|
print("filed:" + str(filed) + " rate:" + str(filed / number))
|
||||||
|
print("error:"+str(error-filed)+" rate:"+str((error-filed)/number))
|
134
accuracy_GPU.py
Normal file
@ -0,0 +1,134 @@
|
|||||||
|
import os
|
||||||
|
import time
|
||||||
|
|
||||||
|
import torch
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
from backbones import iresnet50,iresnet18,iresnet100
|
||||||
|
|
||||||
|
def load_image(img_path):
|
||||||
|
#img = cv2.imread(img_path)
|
||||||
|
img = cv2.imdecode(np.fromfile(img_path,dtype=np.uint8),cv2.IMREAD_COLOR)
|
||||||
|
img = img.transpose((2, 0, 1))
|
||||||
|
img = img[np.newaxis, :, :, :]
|
||||||
|
img = np.array(img, dtype=np.float32)
|
||||||
|
img -= 127.5
|
||||||
|
img /= 127.5
|
||||||
|
return img
|
||||||
|
|
||||||
|
def findEuclideanDistance(source_representation, test_representation):
|
||||||
|
euclidean_distance = source_representation - test_representation
|
||||||
|
euclidean_distance = np.sum(np.multiply(euclidean_distance, euclidean_distance))
|
||||||
|
euclidean_distance = np.sqrt(euclidean_distance)
|
||||||
|
return euclidean_distance
|
||||||
|
|
||||||
|
def l2_normalize(x):
|
||||||
|
return x / np.sqrt(np.sum(np.multiply(x, x)))
|
||||||
|
|
||||||
|
def load_npy(path):
|
||||||
|
data = np.load(path,allow_pickle=True)
|
||||||
|
data = data.item()
|
||||||
|
return data
|
||||||
|
|
||||||
|
def findmindistance(pred,threshold,k_v):
|
||||||
|
distance = 10
|
||||||
|
most_like = ""
|
||||||
|
for name in k_v.keys():
|
||||||
|
tmp = findEuclideanDistance(k_v[name],pred)
|
||||||
|
if distance > tmp:
|
||||||
|
distance = tmp
|
||||||
|
most_like = name
|
||||||
|
if distance < threshold:
|
||||||
|
return most_like
|
||||||
|
else:
|
||||||
|
return -1
|
||||||
|
|
||||||
|
def findOne(img,model,k_v):
|
||||||
|
with torch.no_grad():
|
||||||
|
start_time = time.time()
|
||||||
|
pred = model(img)
|
||||||
|
end_time = time.time()
|
||||||
|
#print("predOne time: " + str(end_time - start_time))
|
||||||
|
pred = pred.numpy()
|
||||||
|
name = findmindistance(l2_normalize(pred),threshold=1.20,k_v=k_v)
|
||||||
|
if name != -1:
|
||||||
|
return name
|
||||||
|
else:
|
||||||
|
return "unknown"
|
||||||
|
def findAll(imglist,model,k_v):
|
||||||
|
with torch.no_grad():
|
||||||
|
name_list = []
|
||||||
|
imglist = imglist.to(torch.device("cuda"))
|
||||||
|
pred = model(imglist)
|
||||||
|
pred = pred.cpu().numpy()
|
||||||
|
for pr in pred:
|
||||||
|
name = findmindistance(l2_normalize(pr),threshold=1.20,k_v=k_v)
|
||||||
|
if name != -1:
|
||||||
|
name_list.append(name)
|
||||||
|
else:
|
||||||
|
name_list.append("unknown")
|
||||||
|
return name_list
|
||||||
|
|
||||||
|
if __name__=='__main__':
|
||||||
|
model = iresnet100()
|
||||||
|
model.load_state_dict(torch.load("./model/backbone100.pth"))
|
||||||
|
model.to(torch.device("cuda"))
|
||||||
|
model.eval()
|
||||||
|
pred_name = []
|
||||||
|
order_name = []
|
||||||
|
order_path = []
|
||||||
|
unknown = []
|
||||||
|
test_path = "./retinaface_test"
|
||||||
|
name_list = os.listdir(test_path)
|
||||||
|
for name in name_list:
|
||||||
|
img_list = os.listdir(os.path.join(test_path,name))
|
||||||
|
for img in img_list:
|
||||||
|
order_name.append(name)
|
||||||
|
order_path.append(os.path.join(os.path.join(test_path,name),img))
|
||||||
|
order_img = np.zeros((len(order_path), 3, 112, 112), dtype=np.float32)
|
||||||
|
for index,img_path in enumerate(order_path):
|
||||||
|
order_img[index] = load_image(img_path)
|
||||||
|
print(order_img.shape)
|
||||||
|
# for name in order_path:
|
||||||
|
# print(name)
|
||||||
|
|
||||||
|
k_v = load_npy("retinaface_lfw_myalign.npy")
|
||||||
|
start_time = time.time()
|
||||||
|
order_img = torch.from_numpy(order_img)
|
||||||
|
|
||||||
|
batch = 256
|
||||||
|
now = 0
|
||||||
|
number = len(order_img)
|
||||||
|
#number = 1400
|
||||||
|
for i in range(number):
|
||||||
|
unknown.append("unknown")
|
||||||
|
|
||||||
|
while now < number:
|
||||||
|
if now+batch < number:
|
||||||
|
name = findAll(order_img[now:now+batch],model,k_v)
|
||||||
|
else:
|
||||||
|
name = findAll(order_img[now:number], model, k_v)
|
||||||
|
now = now+batch
|
||||||
|
for na in name:
|
||||||
|
pred_name.append(na)
|
||||||
|
print("batch"+str(now))
|
||||||
|
end_time = time.time()
|
||||||
|
print("findAll time: " + str(end_time - start_time))
|
||||||
|
#print(len(pred_name))
|
||||||
|
right = 0
|
||||||
|
for i,name in enumerate(pred_name):
|
||||||
|
if pred_name[i] == order_name[i]:
|
||||||
|
right += 1
|
||||||
|
filed = 0
|
||||||
|
for i, name in enumerate(pred_name):
|
||||||
|
if pred_name[i] == unknown[i]:
|
||||||
|
filed += 1
|
||||||
|
error = 0
|
||||||
|
for i,name in enumerate(pred_name):
|
||||||
|
if pred_name[i] != order_name[i]:
|
||||||
|
error += 1
|
||||||
|
print(order_name[i]+" "+pred_name[i]+" "+order_path[i])
|
||||||
|
print("total:" + str(number))
|
||||||
|
print("right:" + str(right) + " rate:" + str(right / number))
|
||||||
|
print("filed:" + str(filed) + " rate:" + str(filed / number))
|
||||||
|
print("error:"+str(error-filed)+" rate:"+str((error-filed)/number))
|
150
anti.py
Normal file
@ -0,0 +1,150 @@
|
|||||||
|
import os
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
import argparse
|
||||||
|
import warnings
|
||||||
|
import time
|
||||||
|
import torch
|
||||||
|
import torch.nn.functional as F
|
||||||
|
|
||||||
|
from src.generate_patches import CropImage
|
||||||
|
from src.model_lib.MiniFASNet import MiniFASNetV1, MiniFASNetV2,MiniFASNetV1SE,MiniFASNetV2SE
|
||||||
|
from src.data_io import transform as trans
|
||||||
|
from src.utility import get_kernel, parse_model_name
|
||||||
|
warnings.filterwarnings('ignore')
|
||||||
|
|
||||||
|
MODEL_MAPPING = {
|
||||||
|
'MiniFASNetV1': MiniFASNetV1,
|
||||||
|
'MiniFASNetV2': MiniFASNetV2,
|
||||||
|
'MiniFASNetV1SE':MiniFASNetV1SE,
|
||||||
|
'MiniFASNetV2SE':MiniFASNetV2SE
|
||||||
|
}
|
||||||
|
|
||||||
|
class AntiSpoofPredict():
|
||||||
|
def __init__(self, cpu_or_cuda):
|
||||||
|
super(AntiSpoofPredict, self).__init__()
|
||||||
|
self.device = torch.device("cuda" if cpu_or_cuda == "cuda" else "cpu")
|
||||||
|
|
||||||
|
def predict(self, img, model):
|
||||||
|
test_transform = trans.Compose([
|
||||||
|
trans.ToTensor(),
|
||||||
|
])
|
||||||
|
img = test_transform(img)
|
||||||
|
img = img.unsqueeze(0).to(self.device)
|
||||||
|
with torch.no_grad():
|
||||||
|
result = model.forward(img)
|
||||||
|
result = F.softmax(result).cpu().numpy()
|
||||||
|
return result
|
||||||
|
|
||||||
|
def load_anti_model(model_dir,cpu_or_cuda):
|
||||||
|
model_list = []
|
||||||
|
for model_path in os.listdir(model_dir):
|
||||||
|
model_list.append(_load_model(os.path.join(model_dir, model_path), cpu_or_cuda))
|
||||||
|
return model_list
|
||||||
|
|
||||||
|
def _load_model(model_path,cpu_or_cuda):
|
||||||
|
# define model
|
||||||
|
device = torch.device("cuda" if cpu_or_cuda == "cuda" else "cpu")
|
||||||
|
model_name = os.path.basename(model_path)
|
||||||
|
h_input, w_input, model_type, _ = parse_model_name(model_name)
|
||||||
|
kernel_size = get_kernel(h_input, w_input, )
|
||||||
|
model = MODEL_MAPPING[model_type](conv6_kernel=kernel_size).to(device)
|
||||||
|
|
||||||
|
# load model weight
|
||||||
|
state_dict = torch.load(model_path, map_location=device)
|
||||||
|
keys = iter(state_dict)
|
||||||
|
first_layer_name = keys.__next__()
|
||||||
|
if first_layer_name.find('module.') >= 0:
|
||||||
|
from collections import OrderedDict
|
||||||
|
new_state_dict = OrderedDict()
|
||||||
|
for key, value in state_dict.items():
|
||||||
|
name_key = key[7:]
|
||||||
|
new_state_dict[name_key] = value
|
||||||
|
model.load_state_dict(new_state_dict)
|
||||||
|
else:
|
||||||
|
model.load_state_dict(state_dict)
|
||||||
|
model.eval()
|
||||||
|
return model
|
||||||
|
|
||||||
|
|
||||||
|
# 因为安卓端APK获取的视频流宽高比为3:4,为了与之一致,所以将宽高比限制为3:4
|
||||||
|
def check_image(image):
|
||||||
|
height, width, channel = image.shape
|
||||||
|
if width/height != 3/4:
|
||||||
|
print("Image is not appropriate!!!\nHeight/Width should be 4/3.")
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
return True
|
||||||
|
|
||||||
|
# 人脸活体检测
|
||||||
|
def anti_spoofing(image_name, model_dir, cpu_or_cuda, bbox, model_list):
|
||||||
|
model_test = AntiSpoofPredict(cpu_or_cuda)
|
||||||
|
image_cropper = CropImage()
|
||||||
|
image = cv2.imdecode(np.fromfile(image_name, dtype=np.uint8), cv2.IMREAD_COLOR)
|
||||||
|
h, w = image.shape[:2]
|
||||||
|
factor = h / w
|
||||||
|
if (w > 1000):
|
||||||
|
image = cv2.resize(image, (600, int(600 * factor)))
|
||||||
|
# result = check_image(image)
|
||||||
|
# if result is False:
|
||||||
|
# return
|
||||||
|
# image_bbox = model_test.get_bbox(image)
|
||||||
|
image_bbox = bbox
|
||||||
|
prediction = np.zeros((1, 3))
|
||||||
|
test_speed = 0
|
||||||
|
# sum the prediction from single model's result
|
||||||
|
for index, model_name in enumerate(os.listdir(model_dir)):
|
||||||
|
h_input, w_input, model_type, scale = parse_model_name(model_name)
|
||||||
|
param = {
|
||||||
|
"org_img": image,
|
||||||
|
"bbox": image_bbox,
|
||||||
|
"scale": scale,
|
||||||
|
"out_w": w_input,
|
||||||
|
"out_h": h_input,
|
||||||
|
"crop": True,
|
||||||
|
}
|
||||||
|
if scale is None:
|
||||||
|
param["crop"] = False
|
||||||
|
img = image_cropper.crop(**param)
|
||||||
|
|
||||||
|
start = time.time()
|
||||||
|
prediction += model_test.predict(img, model_list[index])
|
||||||
|
test_speed += time.time()-start
|
||||||
|
|
||||||
|
label = np.argmax(prediction)
|
||||||
|
# print(prediction)
|
||||||
|
# cv2.rectangle(
|
||||||
|
# image,
|
||||||
|
# (image_bbox[0], image_bbox[1]),
|
||||||
|
# (image_bbox[0] + image_bbox[2], image_bbox[1] + image_bbox[3]),
|
||||||
|
# (225,0,0), 2)
|
||||||
|
# cv2.imshow("out",image)
|
||||||
|
# cv2.waitKey(0)
|
||||||
|
value = prediction[0][1]/2
|
||||||
|
if value > 0.915:
|
||||||
|
return "real face", '{:.10f}'.format(value)
|
||||||
|
else:
|
||||||
|
return "fake face", '{:.10f}'.format(value)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
desc = "test"
|
||||||
|
parser = argparse.ArgumentParser(description=desc)
|
||||||
|
parser.add_argument(
|
||||||
|
"--device_id",
|
||||||
|
type=int,
|
||||||
|
default=0,
|
||||||
|
help="which gpu id, [0/1/2/3]")
|
||||||
|
parser.add_argument(
|
||||||
|
"--model_dir",
|
||||||
|
type=str,
|
||||||
|
default="./resources/anti_spoof_models",
|
||||||
|
help="model_lib used to test")
|
||||||
|
parser.add_argument(
|
||||||
|
"--image_name",
|
||||||
|
type=str,
|
||||||
|
default="000_0.bmp",
|
||||||
|
help="image used to test")
|
||||||
|
args = parser.parse_args()
|
||||||
|
# anti_spoofing(args.image_name, args.model_dir, args.device_id)
|
449
app.py
Normal file
@ -0,0 +1,449 @@
|
|||||||
|
import time
|
||||||
|
|
||||||
|
import faiss
|
||||||
|
from flask import Flask, render_template, request, jsonify, send_from_directory
|
||||||
|
from markupsafe import escape, escape_silent
|
||||||
|
from werkzeug.utils import secure_filename
|
||||||
|
|
||||||
|
from anti import anti_spoofing, load_anti_model
|
||||||
|
from face_api import load_arcface_model, load_npy, findOne, load_image, face_verification, findAll, add_one_to_database, \
|
||||||
|
get_claster_tmp_file_embedding, cluster, detect_video
|
||||||
|
from gender_age import set_gender_conf, gender_age, load_gender_model
|
||||||
|
from retinaface_detect import load_retinaface_model, detect_one, set_retinaface_conf
|
||||||
|
from werkzeug.exceptions import RequestEntityTooLarge
|
||||||
|
import zipfile
|
||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
import re
|
||||||
|
import numpy as np
|
||||||
|
import torch
|
||||||
|
|
||||||
|
ALLOWED_IMG = set(['png', 'jpg', 'jpeg', 'bmp', 'PNG', 'JPG', 'JPEG'])
|
||||||
|
# 限制上传的图片最大为10M
|
||||||
|
ALLOWED_IMG_SIZE = 10 * 1024 * 1024
|
||||||
|
ALLOWED_FILE = set(['zip'])
|
||||||
|
ALLOWED_VIDEO = set(['mp4'])
|
||||||
|
app = Flask(__name__)
|
||||||
|
|
||||||
|
# 限制上传的文件最大为100M
|
||||||
|
app.config['MAX_CONTENT_LENGTH'] = 100 * 1024 * 1024
|
||||||
|
# 使用jsonify,避免中文乱码
|
||||||
|
app.config['JSON_AS_ASCII'] = False
|
||||||
|
|
||||||
|
# 设置使用CPU或者GPU(传入cuda)
|
||||||
|
cpu_or_cuda = "cuda" if torch.cuda.is_available() else "cpu"
|
||||||
|
# 加载人脸识别模型
|
||||||
|
arcface_model = load_arcface_model("./model/backbone100.pth", cpu_or_cuda=cpu_or_cuda)
|
||||||
|
# 加载人脸检测模型
|
||||||
|
retinaface_args = set_retinaface_conf(cpu_or_cuda=cpu_or_cuda)
|
||||||
|
retinaface_model = load_retinaface_model(retinaface_args)
|
||||||
|
# 加载性别年龄识别模型
|
||||||
|
gender_args = set_gender_conf()
|
||||||
|
gender_model = load_gender_model(gender_args, 'fc1')
|
||||||
|
anti_spoofing_model_path = "model/anti_spoof_models"
|
||||||
|
anti_model = load_anti_model(anti_spoofing_model_path, cpu_or_cuda)
|
||||||
|
|
||||||
|
|
||||||
|
# 读取人脸库
|
||||||
|
|
||||||
|
|
||||||
|
@app.route('/')
|
||||||
|
def index():
|
||||||
|
return "model"
|
||||||
|
|
||||||
|
|
||||||
|
@app.route('/hello')
|
||||||
|
@app.route('/hello/<name>')
|
||||||
|
def hello(name=None):
|
||||||
|
return render_template('hello.html', name=name)
|
||||||
|
|
||||||
|
|
||||||
|
@app.route('/user', methods=['GET'])
|
||||||
|
def show_user_name():
|
||||||
|
return request.args.get('username', '')
|
||||||
|
|
||||||
|
|
||||||
|
# 创建返回的json数据
|
||||||
|
# 函数参数用是否=None判断,函数中定义的data,result用true,false判断
|
||||||
|
def create_response(status, name=None, distance=None, verification=None, gender=None, age=None, num=None, anti=None,
|
||||||
|
score=None, box_and_point=None, addfile_names=None,fail_names=None,database_name=None,msg=None,
|
||||||
|
delete_names=None,not_exist_names=None):
|
||||||
|
# res为总的json结构体
|
||||||
|
res = {}
|
||||||
|
res['status'] = status
|
||||||
|
|
||||||
|
data = {}
|
||||||
|
try:
|
||||||
|
data["box_and_point"] = box_and_point.tolist()
|
||||||
|
except AttributeError:
|
||||||
|
pass
|
||||||
|
if anti != None and score != None:
|
||||||
|
liveness = {}
|
||||||
|
liveness["spoofing"] = anti
|
||||||
|
liveness['score'] = score
|
||||||
|
data['liveness'] = liveness
|
||||||
|
if distance!=None:
|
||||||
|
data['distance'] = float(distance)
|
||||||
|
if verification!=None:
|
||||||
|
data['verification'] = verification
|
||||||
|
if num!=None:
|
||||||
|
data['number'] = num
|
||||||
|
if gender!=None:
|
||||||
|
data['gender'] = gender
|
||||||
|
if age!=None:
|
||||||
|
data['age'] = age
|
||||||
|
if name!=None:
|
||||||
|
data['name'] = name
|
||||||
|
if data:
|
||||||
|
res['data'] = data
|
||||||
|
|
||||||
|
# 数据库增删接口返回数据
|
||||||
|
result = {}
|
||||||
|
if msg!=None:
|
||||||
|
res['msg'] = msg
|
||||||
|
if database_name!=None:
|
||||||
|
result['database_name'] = database_name
|
||||||
|
# 增加人脸
|
||||||
|
if addfile_names!=None or fail_names!=None:
|
||||||
|
result['success_names'] = addfile_names
|
||||||
|
result['fail_names'] = fail_names
|
||||||
|
# 删除人脸
|
||||||
|
if delete_names!=None or not_exist_names!=None:
|
||||||
|
result['delete_names'] = delete_names
|
||||||
|
result['not_exist_names'] = not_exist_names
|
||||||
|
if result:
|
||||||
|
res['result'] = result
|
||||||
|
|
||||||
|
return jsonify(res)
|
||||||
|
|
||||||
|
|
||||||
|
# 创建cluster接口返回的json数据
|
||||||
|
def create_cluster_response(status, all_cluster):
|
||||||
|
res = {}
|
||||||
|
data = {}
|
||||||
|
for index, cluster in enumerate(all_cluster):
|
||||||
|
data['cluster' + str(index)] = cluster
|
||||||
|
res['data'] = data
|
||||||
|
res['status'] = status
|
||||||
|
return res
|
||||||
|
|
||||||
|
|
||||||
|
# 检查上传文件格式
|
||||||
|
def check_file_format(file_name, format):
|
||||||
|
if '.' in file_name:
|
||||||
|
file_format = file_name.rsplit('.')[1]
|
||||||
|
if file_format in format:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
# 检查img大小,大于10M抛出异常
|
||||||
|
def check_img_size(img_path):
|
||||||
|
fsize = os.path.getsize(img_path)
|
||||||
|
if fsize > ALLOWED_IMG_SIZE:
|
||||||
|
raise RequestEntityTooLarge
|
||||||
|
|
||||||
|
|
||||||
|
# 解压zip文件存到某路径:
|
||||||
|
def unzip(zip_src, dst_dir):
|
||||||
|
f = zipfile.is_zipfile(zip_src)
|
||||||
|
if f:
|
||||||
|
fz = zipfile.ZipFile(zip_src, 'r')
|
||||||
|
for file in fz.namelist():
|
||||||
|
fz.extract(file, dst_dir)
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
# 解压文件
|
||||||
|
def un_zip(file_path, output_path):
|
||||||
|
zip_file = zipfile.ZipFile(file_path)
|
||||||
|
if os.path.isdir(output_path):
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
os.mkdir(output_path)
|
||||||
|
zip_file.extractall(output_path)
|
||||||
|
# for names in zip_file.namelist():
|
||||||
|
# zip_file.extract(names,output_path)
|
||||||
|
zip_file.close()
|
||||||
|
|
||||||
|
|
||||||
|
# 人脸识别、性别年龄识别
|
||||||
|
@app.route('/recognition', methods=['POST'])
|
||||||
|
def recognition():
|
||||||
|
try:
|
||||||
|
f = request.files['file_name']
|
||||||
|
if f and check_file_format(f.filename, ALLOWED_IMG):
|
||||||
|
img_path = './img/recognition/' + secure_filename(f.filename)
|
||||||
|
f.save(img_path)
|
||||||
|
check_img_size(img_path)
|
||||||
|
# img3 = load_image('./file/'+secure_filename(f.filename))
|
||||||
|
# img3 = torch.from_numpy(img3)
|
||||||
|
tic = time.time()
|
||||||
|
img3, box_and_point = detect_one(img_path, retinaface_model, retinaface_args)
|
||||||
|
print('detect time: {:.4f}'.format(time.time() - tic))
|
||||||
|
if len(img3) == 0:
|
||||||
|
return create_response('no face')
|
||||||
|
elif len(img3) > 1:
|
||||||
|
namelist = findAll(img3, arcface_model, index, database_name_list, cpu_or_cuda)
|
||||||
|
gender_list, age_list = [], []
|
||||||
|
# gender_list, age_list = gender_age(img3, gender_model)
|
||||||
|
res = create_response('success', namelist, gender=gender_list, age=age_list,
|
||||||
|
box_and_point=box_and_point)
|
||||||
|
else:
|
||||||
|
b = box_and_point[0]
|
||||||
|
w = b[2] - b[0]
|
||||||
|
h = b[3] - b[1]
|
||||||
|
b[2] = w
|
||||||
|
b[3] = h
|
||||||
|
label, value = anti_spoofing(img_path, anti_spoofing_model_path, cpu_or_cuda, np.array(b[:4], int),
|
||||||
|
anti_model)
|
||||||
|
# print(index,database_name_list)
|
||||||
|
name, distance = findOne(img3, arcface_model, index, database_name_list, cpu_or_cuda)
|
||||||
|
gender_list, age_list = [], []
|
||||||
|
# gender_list, age_list = gender_age(img3, gender_model)
|
||||||
|
res = create_response('success', name, gender=gender_list, age=age_list, distance=distance,
|
||||||
|
anti=label, score=value, box_and_point=box_and_point)
|
||||||
|
return res
|
||||||
|
else:
|
||||||
|
return create_response('png jpg jpeg bmp are allowed')
|
||||||
|
except RequestEntityTooLarge:
|
||||||
|
return create_response('image size should be less than 10M')
|
||||||
|
|
||||||
|
|
||||||
|
# 两张图片比对
|
||||||
|
@app.route('/compare', methods=['POST'])
|
||||||
|
def compare_file():
|
||||||
|
try:
|
||||||
|
file1 = request.files['file1_name']
|
||||||
|
file2 = request.files['file2_name']
|
||||||
|
if file1 and check_file_format(file1.filename, ALLOWED_IMG) and file2 and check_file_format(file2.filename,
|
||||||
|
ALLOWED_IMG):
|
||||||
|
img1_path = './img/compare/' + secure_filename(file1.filename)
|
||||||
|
img2_path = './img/compare/' + secure_filename(file2.filename)
|
||||||
|
file1.save(img1_path)
|
||||||
|
file2.save(img2_path)
|
||||||
|
check_img_size(img1_path)
|
||||||
|
check_img_size(img2_path)
|
||||||
|
img1, box_and_point1 = detect_one(img1_path, retinaface_model,
|
||||||
|
retinaface_args)
|
||||||
|
img2, box_and_point2 = detect_one(img2_path, retinaface_model, retinaface_args)
|
||||||
|
if len(img1) == 1 and len(img2) == 1:
|
||||||
|
result,distance = face_verification(img1, img2, arcface_model, cpu_or_cuda)
|
||||||
|
print(result,distance)
|
||||||
|
return create_response('success', verification=result,distance=distance)
|
||||||
|
else:
|
||||||
|
return create_response('image contains no face or more than 1 face')
|
||||||
|
else:
|
||||||
|
return create_response('png jpg jpeg bmp are allowed')
|
||||||
|
except RequestEntityTooLarge:
|
||||||
|
return create_response('image size should be less than 10M')
|
||||||
|
|
||||||
|
|
||||||
|
# 数据库增加人脸,可实现向“现有/新建”数据库增加“单张/多张”人脸
|
||||||
|
# 增和改
|
||||||
|
@app.route('/databaseAdd', methods=['POST'])
|
||||||
|
def DB_add_face():
|
||||||
|
try:
|
||||||
|
# 上传人脸图片(>=1)
|
||||||
|
# key都为file_list,value为不同的值可实现批量上传图片
|
||||||
|
upload_files = request.files.getlist("file_list")
|
||||||
|
# '',[],{},0都可以视为False
|
||||||
|
if not upload_files:
|
||||||
|
msg = "上传文件为空"
|
||||||
|
return create_response(0,msg=msg)
|
||||||
|
database_name = request.form.get("database_name")
|
||||||
|
database_path = "./Database/" + database_name + ".npy"
|
||||||
|
if not os.path.exists(database_path):
|
||||||
|
msg = "数据库不存在"
|
||||||
|
return create_response(0,msg=msg)
|
||||||
|
# 数据库中已存在的人名
|
||||||
|
names = load_npy(database_path).keys()
|
||||||
|
# print(names)
|
||||||
|
|
||||||
|
# 这是服务器上用于暂存上传图片的文件夹,每次上传前重建,使用后删除
|
||||||
|
# 后面可根据需要改为定期删除
|
||||||
|
file_temp_path = './img/uploadNew/'
|
||||||
|
if not os.path.exists(file_temp_path):
|
||||||
|
os.makedirs(file_temp_path)
|
||||||
|
|
||||||
|
# 正则表达式用于提取文件名中的中文,用于.npy中的keys
|
||||||
|
r = re.compile('[\u4e00-\u9fa5]+')
|
||||||
|
# 分别存取添加成功或失败的名字
|
||||||
|
success_names = []
|
||||||
|
fail_names = {}
|
||||||
|
# 添加失败的两种情况:格式错误或已经存在
|
||||||
|
format_wrong = []
|
||||||
|
alreadyExist = []
|
||||||
|
# 分别处理每一张图片,先判断格式对不对,再判断是否存在
|
||||||
|
for file in upload_files:
|
||||||
|
filename = file.filename
|
||||||
|
name = r.findall(filename)[0]
|
||||||
|
if file and check_file_format(filename, ALLOWED_IMG):
|
||||||
|
if name in names:
|
||||||
|
alreadyExist.append(name)
|
||||||
|
continue
|
||||||
|
save_path = file_temp_path + filename
|
||||||
|
file.save(save_path)
|
||||||
|
check_img_size(save_path)
|
||||||
|
img_file, box_and_point = detect_one(save_path, retinaface_model, retinaface_args)
|
||||||
|
add_one_to_database(img=img_file, model=arcface_model, name=name, database_path=database_path,
|
||||||
|
cpu_or_cuda=cpu_or_cuda)
|
||||||
|
success_names.append(name)
|
||||||
|
else:
|
||||||
|
format_wrong.append(name)
|
||||||
|
continue
|
||||||
|
shutil.rmtree(file_temp_path)
|
||||||
|
# 如果有错误情况
|
||||||
|
if format_wrong or alreadyExist:
|
||||||
|
status = 0
|
||||||
|
else:
|
||||||
|
status = 1
|
||||||
|
fail_names['formatWrong'] = format_wrong
|
||||||
|
fail_names['alreadyExist'] = alreadyExist
|
||||||
|
|
||||||
|
return create_response(status=status,addfile_names=success_names,fail_names=fail_names,database_name=database_name,msg="新增人脸操作执行完成")
|
||||||
|
except RequestEntityTooLarge:
|
||||||
|
return create_response(0,msg='image size should be less than 10M')
|
||||||
|
|
||||||
|
|
||||||
|
# 数据库删除人脸,可实现在现有数据库中删除’单/多‘张人脸
|
||||||
|
@app.route('/databaseDelete', methods=['POST'])
|
||||||
|
def DB_delete_face():
|
||||||
|
try:
|
||||||
|
delete_names = request.form.getlist("delete_names")
|
||||||
|
database_name = request.form.get("database_name")
|
||||||
|
database_path = "./Database/" + database_name + ".npy"
|
||||||
|
if not os.path.exists(database_path):
|
||||||
|
msg = "数据库不存在"
|
||||||
|
return create_response(0,msg=msg)
|
||||||
|
if not delete_names:
|
||||||
|
msg = "delete_names参数为空"
|
||||||
|
return create_response(0,msg=msg)
|
||||||
|
k_v = load_npy(database_path)
|
||||||
|
print(k_v.keys())
|
||||||
|
success_list = []
|
||||||
|
fail_list = []
|
||||||
|
for name in delete_names:
|
||||||
|
if name in k_v.keys():
|
||||||
|
del k_v[name]
|
||||||
|
success_list.append(name)
|
||||||
|
else:
|
||||||
|
fail_list.append(name)
|
||||||
|
continue
|
||||||
|
np.save(database_path, k_v)
|
||||||
|
status = 1
|
||||||
|
if fail_list:
|
||||||
|
status = 0
|
||||||
|
return create_response(status=status,delete_names=success_list,not_exist_names=fail_list,database_name=database_name,
|
||||||
|
msg="删除人脸操作完成")
|
||||||
|
except RequestEntityTooLarge:
|
||||||
|
return create_response(0,'image size should be less than 10M')
|
||||||
|
|
||||||
|
|
||||||
|
# 以图搜图接口:
|
||||||
|
# 上传图片压缩包建图片库
|
||||||
|
@app.route('/uploadZip', methods=['POST'])
|
||||||
|
def upload_Zip():
|
||||||
|
try:
|
||||||
|
zip = request.files['zip_name']
|
||||||
|
dst_dir = './img/search/'
|
||||||
|
if unzip(zip, dst_dir):
|
||||||
|
return create_response('upload zip success')
|
||||||
|
else:
|
||||||
|
return create_response('upload zip file please')
|
||||||
|
except RequestEntityTooLarge:
|
||||||
|
return create_response('image size should be less than 10M')
|
||||||
|
|
||||||
|
|
||||||
|
# 以图搜图
|
||||||
|
@app.route('/imgSearchImg', methods=['POST'])
|
||||||
|
def img_search_img():
|
||||||
|
searchfile = './img/search/face'
|
||||||
|
try:
|
||||||
|
file = request.files['img_name']
|
||||||
|
if file and check_file_format(file.filename, ALLOWED_IMG):
|
||||||
|
img_path = './img/search/' + secure_filename(file.filename)
|
||||||
|
file.save(img_path)
|
||||||
|
check_img_size(img_path)
|
||||||
|
img, box_and_point = detect_one(img_path, retinaface_model,
|
||||||
|
retinaface_args)
|
||||||
|
if len(img) == 1:
|
||||||
|
Onename = []
|
||||||
|
num = 0
|
||||||
|
for filenames in os.listdir(searchfile):
|
||||||
|
imgpath = os.path.join(searchfile, filenames)
|
||||||
|
imgdata, box_and_point = detect_one(imgpath, retinaface_model, retinaface_args)
|
||||||
|
result = face_verification(img, imgdata, arcface_model, cpu_or_cuda)
|
||||||
|
isOne, distance = result.split(' ', -1)[0], result.split(' ', -1)[1]
|
||||||
|
if isOne == 'same':
|
||||||
|
Onename.append(filenames)
|
||||||
|
num += 1
|
||||||
|
return create_response('success', name=Onename, num=num)
|
||||||
|
else:
|
||||||
|
return create_response('image contains no face or more than 1 face')
|
||||||
|
else:
|
||||||
|
return create_response('png jpg jpeg bmp are allowed')
|
||||||
|
except RequestEntityTooLarge:
|
||||||
|
return create_response('image size should be less than 10M')
|
||||||
|
|
||||||
|
|
||||||
|
# 人脸聚类接口
|
||||||
|
@app.route('/cluster', methods=['POST'])
|
||||||
|
def zip_cluster():
|
||||||
|
try:
|
||||||
|
f = request.files['file_name']
|
||||||
|
if f and check_file_format(f.filename, ALLOWED_FILE):
|
||||||
|
zip_name = secure_filename(f.filename)
|
||||||
|
f.save('./img/cluster_tmp_file/' + zip_name)
|
||||||
|
un_zip('./img/cluster_tmp_file/' + zip_name, './img/cluster_tmp_file/')
|
||||||
|
emb_list, name_list = get_claster_tmp_file_embedding("./img/cluster_tmp_file/" + zip_name.rsplit('.')[0],
|
||||||
|
retinaface_model,
|
||||||
|
retinaface_args, arcface_model, cpu_or_cuda)
|
||||||
|
return create_cluster_response("success", cluster(emb_list, name_list))
|
||||||
|
else:
|
||||||
|
return create_response('zip are allowed')
|
||||||
|
except RequestEntityTooLarge:
|
||||||
|
return create_response('file size should be less than 100M')
|
||||||
|
|
||||||
|
|
||||||
|
# 视频识别接口
|
||||||
|
@app.route('/videorecognition', methods=['POST'])
|
||||||
|
def video_recognition():
|
||||||
|
try:
|
||||||
|
f = request.files['file_name']
|
||||||
|
if f and check_file_format(f.filename, ALLOWED_VIDEO):
|
||||||
|
video_name = secure_filename(f.filename)
|
||||||
|
f.save('./video/' + video_name)
|
||||||
|
detect_video('./video/' + video_name, './videoout/' + video_name, retinaface_model, arcface_model, k_v,
|
||||||
|
retinaface_args)
|
||||||
|
return create_response("success")
|
||||||
|
else:
|
||||||
|
return create_response('mp4 are allowed')
|
||||||
|
except RequestEntityTooLarge:
|
||||||
|
return create_response('file size should be less than 100M')
|
||||||
|
|
||||||
|
|
||||||
|
@app.route('/download/<string:filename>', methods=['GET'])
|
||||||
|
def download(filename):
|
||||||
|
if os.path.isfile(os.path.join('./videoout/', filename)):
|
||||||
|
return send_from_directory('./videoout/', filename, as_attachment=True)
|
||||||
|
else:
|
||||||
|
return create_response("Download failed")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
k_v = load_npy("./Database/student.npy")
|
||||||
|
database_name_list = list(k_v.keys())
|
||||||
|
vector_list = np.array(list(k_v.values()))
|
||||||
|
print(vector_list.shape)
|
||||||
|
#print(database_name_list)
|
||||||
|
nlist = 50
|
||||||
|
quantizer = faiss.IndexFlatL2(512) # the other index
|
||||||
|
index = faiss.IndexIVFFlat(quantizer, 512, nlist, faiss.METRIC_L2)
|
||||||
|
index.train(vector_list)
|
||||||
|
# index = faiss.IndexFlatL2(512)
|
||||||
|
index.add(vector_list)
|
||||||
|
index.nprobe = 50
|
||||||
|
app.run(host="0.0.0.0", port=5000)
|
1
backbones/__init__.py
Normal file
@ -0,0 +1 @@
|
|||||||
|
from .iresnet import iresnet18, iresnet34, iresnet50, iresnet100, iresnet200
|
BIN
backbones/__pycache__/__init__.cpython-38.pyc
Normal file
BIN
backbones/__pycache__/iresnet.cpython-38.pyc
Normal file
187
backbones/iresnet.py
Normal file
@ -0,0 +1,187 @@
|
|||||||
|
import torch
|
||||||
|
from torch import nn
|
||||||
|
|
||||||
|
__all__ = ['iresnet18', 'iresnet34', 'iresnet50', 'iresnet100', 'iresnet200']
|
||||||
|
|
||||||
|
|
||||||
|
def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
|
||||||
|
"""3x3 convolution with padding"""
|
||||||
|
return nn.Conv2d(in_planes,
|
||||||
|
out_planes,
|
||||||
|
kernel_size=3,
|
||||||
|
stride=stride,
|
||||||
|
padding=dilation,
|
||||||
|
groups=groups,
|
||||||
|
bias=False,
|
||||||
|
dilation=dilation)
|
||||||
|
|
||||||
|
|
||||||
|
def conv1x1(in_planes, out_planes, stride=1):
|
||||||
|
"""1x1 convolution"""
|
||||||
|
return nn.Conv2d(in_planes,
|
||||||
|
out_planes,
|
||||||
|
kernel_size=1,
|
||||||
|
stride=stride,
|
||||||
|
bias=False)
|
||||||
|
|
||||||
|
|
||||||
|
class IBasicBlock(nn.Module):
|
||||||
|
expansion = 1
|
||||||
|
def __init__(self, inplanes, planes, stride=1, downsample=None,
|
||||||
|
groups=1, base_width=64, dilation=1):
|
||||||
|
super(IBasicBlock, self).__init__()
|
||||||
|
if groups != 1 or base_width != 64:
|
||||||
|
raise ValueError('BasicBlock only supports groups=1 and base_width=64')
|
||||||
|
if dilation > 1:
|
||||||
|
raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
|
||||||
|
self.bn1 = nn.BatchNorm2d(inplanes, eps=1e-05,)
|
||||||
|
self.conv1 = conv3x3(inplanes, planes)
|
||||||
|
self.bn2 = nn.BatchNorm2d(planes, eps=1e-05,)
|
||||||
|
self.prelu = nn.PReLU(planes)
|
||||||
|
self.conv2 = conv3x3(planes, planes, stride)
|
||||||
|
self.bn3 = nn.BatchNorm2d(planes, eps=1e-05,)
|
||||||
|
self.downsample = downsample
|
||||||
|
self.stride = stride
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
identity = x
|
||||||
|
out = self.bn1(x)
|
||||||
|
out = self.conv1(out)
|
||||||
|
out = self.bn2(out)
|
||||||
|
out = self.prelu(out)
|
||||||
|
out = self.conv2(out)
|
||||||
|
out = self.bn3(out)
|
||||||
|
if self.downsample is not None:
|
||||||
|
identity = self.downsample(x)
|
||||||
|
out += identity
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
class IResNet(nn.Module):
|
||||||
|
fc_scale = 7 * 7
|
||||||
|
def __init__(self,
|
||||||
|
block, layers, dropout=0, num_features=512, zero_init_residual=False,
|
||||||
|
groups=1, width_per_group=64, replace_stride_with_dilation=None, fp16=False):
|
||||||
|
super(IResNet, self).__init__()
|
||||||
|
self.fp16 = fp16
|
||||||
|
self.inplanes = 64
|
||||||
|
self.dilation = 1
|
||||||
|
if replace_stride_with_dilation is None:
|
||||||
|
replace_stride_with_dilation = [False, False, False]
|
||||||
|
if len(replace_stride_with_dilation) != 3:
|
||||||
|
raise ValueError("replace_stride_with_dilation should be None "
|
||||||
|
"or a 3-element tuple, got {}".format(replace_stride_with_dilation))
|
||||||
|
self.groups = groups
|
||||||
|
self.base_width = width_per_group
|
||||||
|
self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=3, stride=1, padding=1, bias=False)
|
||||||
|
self.bn1 = nn.BatchNorm2d(self.inplanes, eps=1e-05)
|
||||||
|
self.prelu = nn.PReLU(self.inplanes)
|
||||||
|
self.layer1 = self._make_layer(block, 64, layers[0], stride=2)
|
||||||
|
self.layer2 = self._make_layer(block,
|
||||||
|
128,
|
||||||
|
layers[1],
|
||||||
|
stride=2,
|
||||||
|
dilate=replace_stride_with_dilation[0])
|
||||||
|
self.layer3 = self._make_layer(block,
|
||||||
|
256,
|
||||||
|
layers[2],
|
||||||
|
stride=2,
|
||||||
|
dilate=replace_stride_with_dilation[1])
|
||||||
|
self.layer4 = self._make_layer(block,
|
||||||
|
512,
|
||||||
|
layers[3],
|
||||||
|
stride=2,
|
||||||
|
dilate=replace_stride_with_dilation[2])
|
||||||
|
self.bn2 = nn.BatchNorm2d(512 * block.expansion, eps=1e-05,)
|
||||||
|
self.dropout = nn.Dropout(p=dropout, inplace=True)
|
||||||
|
self.fc = nn.Linear(512 * block.expansion * self.fc_scale, num_features)
|
||||||
|
self.features = nn.BatchNorm1d(num_features, eps=1e-05)
|
||||||
|
nn.init.constant_(self.features.weight, 1.0)
|
||||||
|
self.features.weight.requires_grad = False
|
||||||
|
|
||||||
|
for m in self.modules():
|
||||||
|
if isinstance(m, nn.Conv2d):
|
||||||
|
nn.init.normal_(m.weight, 0, 0.1)
|
||||||
|
elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
|
||||||
|
nn.init.constant_(m.weight, 1)
|
||||||
|
nn.init.constant_(m.bias, 0)
|
||||||
|
|
||||||
|
if zero_init_residual:
|
||||||
|
for m in self.modules():
|
||||||
|
if isinstance(m, IBasicBlock):
|
||||||
|
nn.init.constant_(m.bn2.weight, 0)
|
||||||
|
|
||||||
|
def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
|
||||||
|
downsample = None
|
||||||
|
previous_dilation = self.dilation
|
||||||
|
if dilate:
|
||||||
|
self.dilation *= stride
|
||||||
|
stride = 1
|
||||||
|
if stride != 1 or self.inplanes != planes * block.expansion:
|
||||||
|
downsample = nn.Sequential(
|
||||||
|
conv1x1(self.inplanes, planes * block.expansion, stride),
|
||||||
|
nn.BatchNorm2d(planes * block.expansion, eps=1e-05, ),
|
||||||
|
)
|
||||||
|
layers = []
|
||||||
|
layers.append(
|
||||||
|
block(self.inplanes, planes, stride, downsample, self.groups,
|
||||||
|
self.base_width, previous_dilation))
|
||||||
|
self.inplanes = planes * block.expansion
|
||||||
|
for _ in range(1, blocks):
|
||||||
|
layers.append(
|
||||||
|
block(self.inplanes,
|
||||||
|
planes,
|
||||||
|
groups=self.groups,
|
||||||
|
base_width=self.base_width,
|
||||||
|
dilation=self.dilation))
|
||||||
|
|
||||||
|
return nn.Sequential(*layers)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
with torch.cuda.amp.autocast(self.fp16):
|
||||||
|
x = self.conv1(x)
|
||||||
|
x = self.bn1(x)
|
||||||
|
x = self.prelu(x)
|
||||||
|
x = self.layer1(x)
|
||||||
|
x = self.layer2(x)
|
||||||
|
x = self.layer3(x)
|
||||||
|
x = self.layer4(x)
|
||||||
|
x = self.bn2(x)
|
||||||
|
x = torch.flatten(x, 1)
|
||||||
|
x = self.dropout(x)
|
||||||
|
x = self.fc(x.float() if self.fp16 else x)
|
||||||
|
x = self.features(x)
|
||||||
|
return x
|
||||||
|
|
||||||
|
|
||||||
|
def _iresnet(arch, block, layers, pretrained, progress, **kwargs):
|
||||||
|
model = IResNet(block, layers, **kwargs)
|
||||||
|
if pretrained:
|
||||||
|
raise ValueError()
|
||||||
|
return model
|
||||||
|
|
||||||
|
|
||||||
|
def iresnet18(pretrained=False, progress=True, **kwargs):
|
||||||
|
return _iresnet('iresnet18', IBasicBlock, [2, 2, 2, 2], pretrained,
|
||||||
|
progress, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
def iresnet34(pretrained=False, progress=True, **kwargs):
|
||||||
|
return _iresnet('iresnet34', IBasicBlock, [3, 4, 6, 3], pretrained,
|
||||||
|
progress, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
def iresnet50(pretrained=False, progress=True, **kwargs):
|
||||||
|
return _iresnet('iresnet50', IBasicBlock, [3, 4, 14, 3], pretrained,
|
||||||
|
progress, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
def iresnet100(pretrained=False, progress=True, **kwargs):
|
||||||
|
return _iresnet('iresnet100', IBasicBlock, [3, 13, 30, 3], pretrained,
|
||||||
|
progress, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
def iresnet200(pretrained=False, progress=True, **kwargs):
|
||||||
|
return _iresnet('iresnet200', IBasicBlock, [6, 26, 60, 6], pretrained,
|
||||||
|
progress, **kwargs)
|
||||||
|
|
135
centerface.py
Normal file
@ -0,0 +1,135 @@
|
|||||||
|
import time
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import cv2
|
||||||
|
import datetime
|
||||||
|
|
||||||
|
|
||||||
|
class CenterFace(object):
|
||||||
|
def __init__(self, landmarks=True):
|
||||||
|
self.landmarks = landmarks
|
||||||
|
if self.landmarks:
|
||||||
|
self.net = cv2.dnn.readNetFromONNX('./model/onnx/centerface.onnx')
|
||||||
|
else:
|
||||||
|
self.net = cv2.dnn.readNetFromONNX('./model/onnx/cface.1k.onnx')
|
||||||
|
self.img_h_new, self.img_w_new, self.scale_h, self.scale_w = 0, 0, 0, 0
|
||||||
|
|
||||||
|
def __call__(self, img, height, width, threshold=0.5):
|
||||||
|
self.img_h_new, self.img_w_new, self.scale_h, self.scale_w = self.transform(height, width)
|
||||||
|
return self.inference_opencv(img, threshold)
|
||||||
|
|
||||||
|
def inference_opencv(self, img, threshold):
|
||||||
|
blob = cv2.dnn.blobFromImage(img, scalefactor=1.0, size=(self.img_w_new, self.img_h_new), mean=(0, 0, 0), swapRB=True, crop=False)
|
||||||
|
self.net.setInput(blob)
|
||||||
|
begin = datetime.datetime.now()
|
||||||
|
start_time = time.time()
|
||||||
|
|
||||||
|
|
||||||
|
if self.landmarks:
|
||||||
|
heatmap, scale, offset, lms = self.net.forward(["537", "538", "539", '540'])
|
||||||
|
else:
|
||||||
|
heatmap, scale, offset = self.net.forward(["535", "536", "537"])
|
||||||
|
end = datetime.datetime.now()
|
||||||
|
end_time = time.time()
|
||||||
|
# print("cpuOne time: " + str(end_time - start_time))
|
||||||
|
# print("cpu times = ", end - begin)
|
||||||
|
return self.postprocess(heatmap, lms, offset, scale, threshold)
|
||||||
|
|
||||||
|
def transform(self, h, w):
|
||||||
|
img_h_new, img_w_new = int(np.ceil(h / 32) * 32), int(np.ceil(w / 32) * 32)
|
||||||
|
scale_h, scale_w = img_h_new / h, img_w_new / w
|
||||||
|
return img_h_new, img_w_new, scale_h, scale_w
|
||||||
|
|
||||||
|
def postprocess(self, heatmap, lms, offset, scale, threshold):
|
||||||
|
if self.landmarks:
|
||||||
|
dets, lms = self.decode(heatmap, scale, offset, lms, (self.img_h_new, self.img_w_new), threshold=threshold)
|
||||||
|
else:
|
||||||
|
dets = self.decode(heatmap, scale, offset, None, (self.img_h_new, self.img_w_new), threshold=threshold)
|
||||||
|
if len(dets) > 0:
|
||||||
|
dets[:, 0:4:2], dets[:, 1:4:2] = dets[:, 0:4:2] / self.scale_w, dets[:, 1:4:2] / self.scale_h
|
||||||
|
if self.landmarks:
|
||||||
|
lms[:, 0:10:2], lms[:, 1:10:2] = lms[:, 0:10:2] / self.scale_w, lms[:, 1:10:2] / self.scale_h
|
||||||
|
else:
|
||||||
|
dets = np.empty(shape=[0, 5], dtype=np.float32)
|
||||||
|
if self.landmarks:
|
||||||
|
lms = np.empty(shape=[0, 10], dtype=np.float32)
|
||||||
|
if self.landmarks:
|
||||||
|
return dets, lms
|
||||||
|
else:
|
||||||
|
return dets
|
||||||
|
|
||||||
|
def decode(self, heatmap, scale, offset, landmark, size, threshold=0.1):
|
||||||
|
heatmap = np.squeeze(heatmap)
|
||||||
|
scale0, scale1 = scale[0, 0, :, :], scale[0, 1, :, :]
|
||||||
|
offset0, offset1 = offset[0, 0, :, :], offset[0, 1, :, :]
|
||||||
|
c0, c1 = np.where(heatmap > threshold)
|
||||||
|
if self.landmarks:
|
||||||
|
boxes, lms = [], []
|
||||||
|
else:
|
||||||
|
boxes = []
|
||||||
|
if len(c0) > 0:
|
||||||
|
for i in range(len(c0)):
|
||||||
|
s0, s1 = np.exp(scale0[c0[i], c1[i]]) * 4, np.exp(scale1[c0[i], c1[i]]) * 4
|
||||||
|
o0, o1 = offset0[c0[i], c1[i]], offset1[c0[i], c1[i]]
|
||||||
|
s = heatmap[c0[i], c1[i]]
|
||||||
|
x1, y1 = max(0, (c1[i] + o1 + 0.5) * 4 - s1 / 2), max(0, (c0[i] + o0 + 0.5) * 4 - s0 / 2)
|
||||||
|
x1, y1 = min(x1, size[1]), min(y1, size[0])
|
||||||
|
boxes.append([x1, y1, min(x1 + s1, size[1]), min(y1 + s0, size[0]), s])
|
||||||
|
if self.landmarks:
|
||||||
|
lm = []
|
||||||
|
for j in range(5):
|
||||||
|
lm.append(landmark[0, j * 2 + 1, c0[i], c1[i]] * s1 + x1)
|
||||||
|
lm.append(landmark[0, j * 2, c0[i], c1[i]] * s0 + y1)
|
||||||
|
lms.append(lm)
|
||||||
|
boxes = np.asarray(boxes, dtype=np.float32)
|
||||||
|
keep = self.nms(boxes[:, :4], boxes[:, 4], 0.3)
|
||||||
|
boxes = boxes[keep, :]
|
||||||
|
if self.landmarks:
|
||||||
|
lms = np.asarray(lms, dtype=np.float32)
|
||||||
|
lms = lms[keep, :]
|
||||||
|
if self.landmarks:
|
||||||
|
return boxes, lms
|
||||||
|
else:
|
||||||
|
return boxes
|
||||||
|
|
||||||
|
def nms(self, boxes, scores, nms_thresh):
|
||||||
|
x1 = boxes[:, 0]
|
||||||
|
y1 = boxes[:, 1]
|
||||||
|
x2 = boxes[:, 2]
|
||||||
|
y2 = boxes[:, 3]
|
||||||
|
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
|
||||||
|
order = np.argsort(scores)[::-1]
|
||||||
|
num_detections = boxes.shape[0]
|
||||||
|
suppressed = np.zeros((num_detections,), dtype=np.bool)
|
||||||
|
|
||||||
|
keep = []
|
||||||
|
for _i in range(num_detections):
|
||||||
|
i = order[_i]
|
||||||
|
if suppressed[i]:
|
||||||
|
continue
|
||||||
|
keep.append(i)
|
||||||
|
|
||||||
|
ix1 = x1[i]
|
||||||
|
iy1 = y1[i]
|
||||||
|
ix2 = x2[i]
|
||||||
|
iy2 = y2[i]
|
||||||
|
iarea = areas[i]
|
||||||
|
|
||||||
|
for _j in range(_i + 1, num_detections):
|
||||||
|
j = order[_j]
|
||||||
|
if suppressed[j]:
|
||||||
|
continue
|
||||||
|
|
||||||
|
xx1 = max(ix1, x1[j])
|
||||||
|
yy1 = max(iy1, y1[j])
|
||||||
|
xx2 = min(ix2, x2[j])
|
||||||
|
yy2 = min(iy2, y2[j])
|
||||||
|
w = max(0, xx2 - xx1 + 1)
|
||||||
|
h = max(0, yy2 - yy1 + 1)
|
||||||
|
|
||||||
|
inter = w * h
|
||||||
|
ovr = inter / (iarea + areas[j] - inter)
|
||||||
|
if ovr >= nms_thresh:
|
||||||
|
suppressed[j] = True
|
||||||
|
|
||||||
|
return keep
|
67
config.py
Normal file
@ -0,0 +1,67 @@
|
|||||||
|
from easydict import EasyDict as edict
|
||||||
|
|
||||||
|
config = edict()
|
||||||
|
config.dataset = "ms1m-retinaface-t2"
|
||||||
|
config.embedding_size = 512
|
||||||
|
config.sample_rate = 1
|
||||||
|
config.fp16 = False
|
||||||
|
config.momentum = 0.9
|
||||||
|
config.weight_decay = 5e-4
|
||||||
|
config.batch_size = 64
|
||||||
|
config.lr = 0.1 # batch size is 512
|
||||||
|
config.output = "ms1mv3_arcface_r50"
|
||||||
|
|
||||||
|
if config.dataset == "emore":
|
||||||
|
config.rec = "/train_tmp/faces_emore"
|
||||||
|
config.num_classes = 85742
|
||||||
|
config.num_image = 5822653
|
||||||
|
config.num_epoch = 16
|
||||||
|
config.warmup_epoch = -1
|
||||||
|
config.val_targets = ["lfw", ]
|
||||||
|
|
||||||
|
def lr_step_func(epoch):
|
||||||
|
return ((epoch + 1) / (4 + 1)) ** 2 if epoch < -1 else 0.1 ** len(
|
||||||
|
[m for m in [8, 14] if m - 1 <= epoch])
|
||||||
|
config.lr_func = lr_step_func
|
||||||
|
|
||||||
|
elif config.dataset == "ms1m-retinaface-t2":
|
||||||
|
config.rec = "/train_tmp/ms1m-retinaface-t2"
|
||||||
|
config.num_classes = 91180
|
||||||
|
config.num_epoch = 25
|
||||||
|
config.warmup_epoch = -1
|
||||||
|
config.val_targets = ["lfw", "cfp_fp", "agedb_30"]
|
||||||
|
|
||||||
|
def lr_step_func(epoch):
|
||||||
|
return ((epoch + 1) / (4 + 1)) ** 2 if epoch < -1 else 0.1 ** len(
|
||||||
|
[m for m in [11, 17, 22] if m - 1 <= epoch])
|
||||||
|
config.lr_func = lr_step_func
|
||||||
|
|
||||||
|
elif config.dataset == "glint360k":
|
||||||
|
# make training faster
|
||||||
|
# our RAM is 256G
|
||||||
|
# mount -t tmpfs -o size=140G tmpfs /train_tmp
|
||||||
|
config.rec = "/train_tmp/glint360k"
|
||||||
|
config.num_classes = 360232
|
||||||
|
config.num_image = 17091657
|
||||||
|
config.num_epoch = 20
|
||||||
|
config.warmup_epoch = -1
|
||||||
|
config.val_targets = ["lfw", "cfp_fp", "agedb_30"]
|
||||||
|
|
||||||
|
def lr_step_func(epoch):
|
||||||
|
return ((epoch + 1) / (4 + 1)) ** 2 if epoch < config.warmup_epoch else 0.1 ** len(
|
||||||
|
[m for m in [8, 12, 15, 18] if m - 1 <= epoch])
|
||||||
|
config.lr_func = lr_step_func
|
||||||
|
|
||||||
|
elif config.dataset == "webface":
|
||||||
|
config.rec = "/train_tmp/faces_webface_112x112"
|
||||||
|
config.num_classes = 10572
|
||||||
|
config.num_image = "forget"
|
||||||
|
config.num_epoch = 34
|
||||||
|
config.warmup_epoch = -1
|
||||||
|
config.val_targets = ["lfw", "cfp_fp", "agedb_30"]
|
||||||
|
|
||||||
|
def lr_step_func(epoch):
|
||||||
|
return ((epoch + 1) / (4 + 1)) ** 2 if epoch < config.warmup_epoch else 0.1 ** len(
|
||||||
|
[m for m in [20, 28, 32] if m - 1 <= epoch])
|
||||||
|
config.lr_func = lr_step_func
|
||||||
|
|
168
create_database.py
Normal file
@ -0,0 +1,168 @@
|
|||||||
|
import os
|
||||||
|
import time
|
||||||
|
import re
|
||||||
|
import torch
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
from backbones import iresnet50,iresnet18,iresnet100
|
||||||
|
|
||||||
|
def load_image(img_path):
|
||||||
|
#img = cv2.imread(img_path)
|
||||||
|
img = cv2.imdecode(np.fromfile(img_path,dtype=np.uint8),cv2.IMREAD_COLOR)
|
||||||
|
img = img.transpose((2, 0, 1))
|
||||||
|
img = img[np.newaxis, :, :, :]
|
||||||
|
img = np.array(img, dtype=np.float32)
|
||||||
|
img -= 127.5
|
||||||
|
img /= 127.5
|
||||||
|
return img
|
||||||
|
|
||||||
|
def findEuclideanDistance(source_representation, test_representation):
|
||||||
|
euclidean_distance = source_representation - test_representation
|
||||||
|
euclidean_distance = np.sum(np.multiply(euclidean_distance, euclidean_distance))
|
||||||
|
euclidean_distance = np.sqrt(euclidean_distance)
|
||||||
|
return euclidean_distance
|
||||||
|
|
||||||
|
def findCosineDistance(source_representation, test_representation):
|
||||||
|
a = np.matmul(np.transpose(source_representation), test_representation)
|
||||||
|
b = np.sum(np.multiply(source_representation, source_representation))
|
||||||
|
c = np.sum(np.multiply(test_representation, test_representation))
|
||||||
|
return 1 - (a / (np.sqrt(b) * np.sqrt(c)))
|
||||||
|
|
||||||
|
def l2_normalize(x):
|
||||||
|
return x / np.sqrt(np.sum(np.multiply(x, x)))
|
||||||
|
|
||||||
|
def cosin_metric(x1, x2):
|
||||||
|
return np.dot(x1, x2) / (np.linalg.norm(x1) * np.linalg.norm(x2))
|
||||||
|
|
||||||
|
def load_npy(path):
|
||||||
|
data = np.load(path,allow_pickle=True)
|
||||||
|
data = data.item()
|
||||||
|
return data
|
||||||
|
|
||||||
|
def create_database(path,model,database_path):
|
||||||
|
name_list = os.listdir(path)
|
||||||
|
k_v = {}
|
||||||
|
if os.path.exists(database_path):
|
||||||
|
k_v = np.load(database_path, allow_pickle=True)
|
||||||
|
k_v = k_v.item()
|
||||||
|
for name in name_list:
|
||||||
|
img_path = os.listdir(os.path.join(path,name))
|
||||||
|
for img_name in img_path[:1]:
|
||||||
|
img = load_image(os.path.join(path,name,img_name))
|
||||||
|
img = torch.from_numpy(img)
|
||||||
|
with torch.no_grad():
|
||||||
|
pred = model(img)
|
||||||
|
pred = pred.numpy()
|
||||||
|
k_v[name] = l2_normalize(pred)
|
||||||
|
np.save(database_path, k_v)
|
||||||
|
|
||||||
|
def create_database_batch(path,model,database_path):
|
||||||
|
name_list = os.listdir(path)
|
||||||
|
k_v = {}
|
||||||
|
if os.path.exists(database_path):
|
||||||
|
k_v = np.load(database_path, allow_pickle=True)
|
||||||
|
k_v = k_v.item()
|
||||||
|
batch = 256
|
||||||
|
order_name = []
|
||||||
|
order_path = []
|
||||||
|
emb_list = []
|
||||||
|
for name in name_list:
|
||||||
|
img_path = os.listdir(os.path.join(path,name))
|
||||||
|
for img_name in img_path[:1]:
|
||||||
|
order_name.append(name)
|
||||||
|
order_path.append(os.path.join(path,name,img_name))
|
||||||
|
order_img = np.zeros((len(order_path), 3, 112, 112), dtype=np.float32)
|
||||||
|
for index, img_path in enumerate(order_path):
|
||||||
|
order_img[index] = load_image(img_path)
|
||||||
|
print(order_img.shape)
|
||||||
|
order_img = torch.from_numpy(order_img)
|
||||||
|
now = 0
|
||||||
|
number = len(order_img)
|
||||||
|
with torch.no_grad():
|
||||||
|
while now < number:
|
||||||
|
if now + batch < number:
|
||||||
|
emb = model(order_img[now:now+batch])
|
||||||
|
else:
|
||||||
|
emb = model(order_img[now:])
|
||||||
|
now = now + batch
|
||||||
|
for em in emb:
|
||||||
|
emb_list.append(em)
|
||||||
|
print("batch"+str(now))
|
||||||
|
|
||||||
|
for i, emb in enumerate(emb_list):
|
||||||
|
k_v[order_name[i]] = l2_normalize(emb.numpy())
|
||||||
|
np.save(database_path, k_v)
|
||||||
|
|
||||||
|
def add_one(img,model,name,database_path):
|
||||||
|
img = torch.from_numpy(img)
|
||||||
|
with torch.no_grad():
|
||||||
|
pred = model(img)
|
||||||
|
pred = pred.numpy()
|
||||||
|
k_v = {}
|
||||||
|
if os.path.exists(database_path):
|
||||||
|
k_v = np.load(database_path, allow_pickle=True)
|
||||||
|
k_v = k_v.item()
|
||||||
|
k_v[name] = l2_normalize(pred)
|
||||||
|
np.save(database_path, k_v)
|
||||||
|
|
||||||
|
def findmindistance(pred,threshold,k_v):
|
||||||
|
distance = 10
|
||||||
|
most_like = ""
|
||||||
|
for name in k_v.keys():
|
||||||
|
tmp = findEuclideanDistance(k_v[name],pred)
|
||||||
|
if distance > tmp:
|
||||||
|
distance = tmp
|
||||||
|
most_like = name
|
||||||
|
if distance < threshold:
|
||||||
|
return most_like
|
||||||
|
else:
|
||||||
|
return -1
|
||||||
|
|
||||||
|
def findOne(img,model,k_v):
|
||||||
|
|
||||||
|
with torch.no_grad():
|
||||||
|
start_time = time.time()
|
||||||
|
pred = model(img)
|
||||||
|
end_time = time.time()
|
||||||
|
#print("predOne time: " + str(end_time - start_time))
|
||||||
|
pred = pred.numpy()
|
||||||
|
name = findmindistance(l2_normalize(pred),threshold=1.20,k_v=k_v)
|
||||||
|
if name != -1:
|
||||||
|
return name
|
||||||
|
else:
|
||||||
|
return "unknown"
|
||||||
|
def findAll(imglist,model,k_v):
|
||||||
|
with torch.no_grad():
|
||||||
|
name_list = []
|
||||||
|
pred = model(imglist)
|
||||||
|
pred = pred.numpy()
|
||||||
|
for pr in pred:
|
||||||
|
name = findmindistance(l2_normalize(pr),threshold=1.20,k_v=k_v)
|
||||||
|
if name != -1:
|
||||||
|
name_list.append(name)
|
||||||
|
else:
|
||||||
|
name_list.append("unknown")
|
||||||
|
return name_list
|
||||||
|
|
||||||
|
if __name__=='__main__':
|
||||||
|
model = iresnet100()
|
||||||
|
model.load_state_dict(torch.load("./model/backbone100.pth", map_location="cpu"))
|
||||||
|
model.eval()
|
||||||
|
#img = load_image(r"D:\Download\out\facedatabase\man.jpg")
|
||||||
|
#img = load_image(r"D:\Download\out\facedatabase\man6.jpg")
|
||||||
|
# img = load_image(r"D:\Download\out\alig_students\student.jpg")
|
||||||
|
# print(img.shape)
|
||||||
|
#
|
||||||
|
# k_v = load_npy("./Database/student.npy")
|
||||||
|
# start_time = time.time()
|
||||||
|
# img = torch.from_numpy(img)
|
||||||
|
# name = findOne(img,model,k_v)
|
||||||
|
# mo = r'[\u4e00-\u9fa5]*'
|
||||||
|
# name = re.match(mo,name)
|
||||||
|
# print(name.group(0))
|
||||||
|
# end_time = time.time()
|
||||||
|
# print("findOne time: " + str(end_time - start_time))
|
||||||
|
|
||||||
|
#create_database_batch(r"D:\Download\out\alig_students",model,"./Database/student.npy")
|
||||||
|
create_database_batch(r"D:\Download\out\cfp_database", model, "cfp.npy")
|
||||||
|
#add_one(img,model,"Arminio_Fraga","centerface_lfw.npy")
|
2845
data/FDDB/img_list.txt
Normal file
3
data/__init__.py
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
from .wider_face import WiderFaceDetection, detection_collate
|
||||||
|
from .data_augment import *
|
||||||
|
from .config import *
|
BIN
data/__pycache__/__init__.cpython-38.pyc
Normal file
BIN
data/__pycache__/config.cpython-38.pyc
Normal file
BIN
data/__pycache__/data_augment.cpython-38.pyc
Normal file
BIN
data/__pycache__/wider_face.cpython-38.pyc
Normal file
42
data/config.py
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
# config.py
|
||||||
|
|
||||||
|
cfg_mnet = {
|
||||||
|
'name': 'mobilenet0.25',
|
||||||
|
'min_sizes': [[16, 32], [64, 128], [256, 512]],
|
||||||
|
'steps': [8, 16, 32],
|
||||||
|
'variance': [0.1, 0.2],
|
||||||
|
'clip': False,
|
||||||
|
'loc_weight': 2.0,
|
||||||
|
'gpu_train': True,
|
||||||
|
'batch_size': 32,
|
||||||
|
'ngpu': 1,
|
||||||
|
'epoch': 250,
|
||||||
|
'decay1': 190,
|
||||||
|
'decay2': 220,
|
||||||
|
'image_size': 640,
|
||||||
|
'pretrain': True,
|
||||||
|
'return_layers': {'stage1': 1, 'stage2': 2, 'stage3': 3},
|
||||||
|
'in_channel': 32,
|
||||||
|
'out_channel': 64
|
||||||
|
}
|
||||||
|
|
||||||
|
cfg_re50 = {
|
||||||
|
'name': 'Resnet50',
|
||||||
|
'min_sizes': [[16, 32], [64, 128], [256, 512]],
|
||||||
|
'steps': [8, 16, 32],
|
||||||
|
'variance': [0.1, 0.2],
|
||||||
|
'clip': False,
|
||||||
|
'loc_weight': 2.0,
|
||||||
|
'gpu_train': True,
|
||||||
|
'batch_size': 24,
|
||||||
|
'ngpu': 4,
|
||||||
|
'epoch': 100,
|
||||||
|
'decay1': 70,
|
||||||
|
'decay2': 90,
|
||||||
|
'image_size': 840,
|
||||||
|
'pretrain': True,
|
||||||
|
'return_layers': {'layer2': 1, 'layer3': 2, 'layer4': 3},
|
||||||
|
'in_channel': 256,
|
||||||
|
'out_channel': 256
|
||||||
|
}
|
||||||
|
|
237
data/data_augment.py
Normal file
@ -0,0 +1,237 @@
|
|||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
import random
|
||||||
|
from utils.box_utils import matrix_iof
|
||||||
|
|
||||||
|
|
||||||
|
def _crop(image, boxes, labels, landm, img_dim):
|
||||||
|
height, width, _ = image.shape
|
||||||
|
pad_image_flag = True
|
||||||
|
|
||||||
|
for _ in range(250):
|
||||||
|
"""
|
||||||
|
if random.uniform(0, 1) <= 0.2:
|
||||||
|
scale = 1.0
|
||||||
|
else:
|
||||||
|
scale = random.uniform(0.3, 1.0)
|
||||||
|
"""
|
||||||
|
PRE_SCALES = [0.3, 0.45, 0.6, 0.8, 1.0]
|
||||||
|
scale = random.choice(PRE_SCALES)
|
||||||
|
short_side = min(width, height)
|
||||||
|
w = int(scale * short_side)
|
||||||
|
h = w
|
||||||
|
|
||||||
|
if width == w:
|
||||||
|
l = 0
|
||||||
|
else:
|
||||||
|
l = random.randrange(width - w)
|
||||||
|
if height == h:
|
||||||
|
t = 0
|
||||||
|
else:
|
||||||
|
t = random.randrange(height - h)
|
||||||
|
roi = np.array((l, t, l + w, t + h))
|
||||||
|
|
||||||
|
value = matrix_iof(boxes, roi[np.newaxis])
|
||||||
|
flag = (value >= 1)
|
||||||
|
if not flag.any():
|
||||||
|
continue
|
||||||
|
|
||||||
|
centers = (boxes[:, :2] + boxes[:, 2:]) / 2
|
||||||
|
mask_a = np.logical_and(roi[:2] < centers, centers < roi[2:]).all(axis=1)
|
||||||
|
boxes_t = boxes[mask_a].copy()
|
||||||
|
labels_t = labels[mask_a].copy()
|
||||||
|
landms_t = landm[mask_a].copy()
|
||||||
|
landms_t = landms_t.reshape([-1, 5, 2])
|
||||||
|
|
||||||
|
if boxes_t.shape[0] == 0:
|
||||||
|
continue
|
||||||
|
|
||||||
|
image_t = image[roi[1]:roi[3], roi[0]:roi[2]]
|
||||||
|
|
||||||
|
boxes_t[:, :2] = np.maximum(boxes_t[:, :2], roi[:2])
|
||||||
|
boxes_t[:, :2] -= roi[:2]
|
||||||
|
boxes_t[:, 2:] = np.minimum(boxes_t[:, 2:], roi[2:])
|
||||||
|
boxes_t[:, 2:] -= roi[:2]
|
||||||
|
|
||||||
|
# landm
|
||||||
|
landms_t[:, :, :2] = landms_t[:, :, :2] - roi[:2]
|
||||||
|
landms_t[:, :, :2] = np.maximum(landms_t[:, :, :2], np.array([0, 0]))
|
||||||
|
landms_t[:, :, :2] = np.minimum(landms_t[:, :, :2], roi[2:] - roi[:2])
|
||||||
|
landms_t = landms_t.reshape([-1, 10])
|
||||||
|
|
||||||
|
|
||||||
|
# make sure that the cropped image contains at least one face > 16 pixel at training image scale
|
||||||
|
b_w_t = (boxes_t[:, 2] - boxes_t[:, 0] + 1) / w * img_dim
|
||||||
|
b_h_t = (boxes_t[:, 3] - boxes_t[:, 1] + 1) / h * img_dim
|
||||||
|
mask_b = np.minimum(b_w_t, b_h_t) > 0.0
|
||||||
|
boxes_t = boxes_t[mask_b]
|
||||||
|
labels_t = labels_t[mask_b]
|
||||||
|
landms_t = landms_t[mask_b]
|
||||||
|
|
||||||
|
if boxes_t.shape[0] == 0:
|
||||||
|
continue
|
||||||
|
|
||||||
|
pad_image_flag = False
|
||||||
|
|
||||||
|
return image_t, boxes_t, labels_t, landms_t, pad_image_flag
|
||||||
|
return image, boxes, labels, landm, pad_image_flag
|
||||||
|
|
||||||
|
|
||||||
|
def _distort(image):
|
||||||
|
|
||||||
|
def _convert(image, alpha=1, beta=0):
|
||||||
|
tmp = image.astype(float) * alpha + beta
|
||||||
|
tmp[tmp < 0] = 0
|
||||||
|
tmp[tmp > 255] = 255
|
||||||
|
image[:] = tmp
|
||||||
|
|
||||||
|
image = image.copy()
|
||||||
|
|
||||||
|
if random.randrange(2):
|
||||||
|
|
||||||
|
#brightness distortion
|
||||||
|
if random.randrange(2):
|
||||||
|
_convert(image, beta=random.uniform(-32, 32))
|
||||||
|
|
||||||
|
#contrast distortion
|
||||||
|
if random.randrange(2):
|
||||||
|
_convert(image, alpha=random.uniform(0.5, 1.5))
|
||||||
|
|
||||||
|
image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
|
||||||
|
|
||||||
|
#saturation distortion
|
||||||
|
if random.randrange(2):
|
||||||
|
_convert(image[:, :, 1], alpha=random.uniform(0.5, 1.5))
|
||||||
|
|
||||||
|
#hue distortion
|
||||||
|
if random.randrange(2):
|
||||||
|
tmp = image[:, :, 0].astype(int) + random.randint(-18, 18)
|
||||||
|
tmp %= 180
|
||||||
|
image[:, :, 0] = tmp
|
||||||
|
|
||||||
|
image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR)
|
||||||
|
|
||||||
|
else:
|
||||||
|
|
||||||
|
#brightness distortion
|
||||||
|
if random.randrange(2):
|
||||||
|
_convert(image, beta=random.uniform(-32, 32))
|
||||||
|
|
||||||
|
image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
|
||||||
|
|
||||||
|
#saturation distortion
|
||||||
|
if random.randrange(2):
|
||||||
|
_convert(image[:, :, 1], alpha=random.uniform(0.5, 1.5))
|
||||||
|
|
||||||
|
#hue distortion
|
||||||
|
if random.randrange(2):
|
||||||
|
tmp = image[:, :, 0].astype(int) + random.randint(-18, 18)
|
||||||
|
tmp %= 180
|
||||||
|
image[:, :, 0] = tmp
|
||||||
|
|
||||||
|
image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR)
|
||||||
|
|
||||||
|
#contrast distortion
|
||||||
|
if random.randrange(2):
|
||||||
|
_convert(image, alpha=random.uniform(0.5, 1.5))
|
||||||
|
|
||||||
|
return image
|
||||||
|
|
||||||
|
|
||||||
|
def _expand(image, boxes, fill, p):
|
||||||
|
if random.randrange(2):
|
||||||
|
return image, boxes
|
||||||
|
|
||||||
|
height, width, depth = image.shape
|
||||||
|
|
||||||
|
scale = random.uniform(1, p)
|
||||||
|
w = int(scale * width)
|
||||||
|
h = int(scale * height)
|
||||||
|
|
||||||
|
left = random.randint(0, w - width)
|
||||||
|
top = random.randint(0, h - height)
|
||||||
|
|
||||||
|
boxes_t = boxes.copy()
|
||||||
|
boxes_t[:, :2] += (left, top)
|
||||||
|
boxes_t[:, 2:] += (left, top)
|
||||||
|
expand_image = np.empty(
|
||||||
|
(h, w, depth),
|
||||||
|
dtype=image.dtype)
|
||||||
|
expand_image[:, :] = fill
|
||||||
|
expand_image[top:top + height, left:left + width] = image
|
||||||
|
image = expand_image
|
||||||
|
|
||||||
|
return image, boxes_t
|
||||||
|
|
||||||
|
|
||||||
|
def _mirror(image, boxes, landms):
|
||||||
|
_, width, _ = image.shape
|
||||||
|
if random.randrange(2):
|
||||||
|
image = image[:, ::-1]
|
||||||
|
boxes = boxes.copy()
|
||||||
|
boxes[:, 0::2] = width - boxes[:, 2::-2]
|
||||||
|
|
||||||
|
# landm
|
||||||
|
landms = landms.copy()
|
||||||
|
landms = landms.reshape([-1, 5, 2])
|
||||||
|
landms[:, :, 0] = width - landms[:, :, 0]
|
||||||
|
tmp = landms[:, 1, :].copy()
|
||||||
|
landms[:, 1, :] = landms[:, 0, :]
|
||||||
|
landms[:, 0, :] = tmp
|
||||||
|
tmp1 = landms[:, 4, :].copy()
|
||||||
|
landms[:, 4, :] = landms[:, 3, :]
|
||||||
|
landms[:, 3, :] = tmp1
|
||||||
|
landms = landms.reshape([-1, 10])
|
||||||
|
|
||||||
|
return image, boxes, landms
|
||||||
|
|
||||||
|
|
||||||
|
def _pad_to_square(image, rgb_mean, pad_image_flag):
|
||||||
|
if not pad_image_flag:
|
||||||
|
return image
|
||||||
|
height, width, _ = image.shape
|
||||||
|
long_side = max(width, height)
|
||||||
|
image_t = np.empty((long_side, long_side, 3), dtype=image.dtype)
|
||||||
|
image_t[:, :] = rgb_mean
|
||||||
|
image_t[0:0 + height, 0:0 + width] = image
|
||||||
|
return image_t
|
||||||
|
|
||||||
|
|
||||||
|
def _resize_subtract_mean(image, insize, rgb_mean):
|
||||||
|
interp_methods = [cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_NEAREST, cv2.INTER_LANCZOS4]
|
||||||
|
interp_method = interp_methods[random.randrange(5)]
|
||||||
|
image = cv2.resize(image, (insize, insize), interpolation=interp_method)
|
||||||
|
image = image.astype(np.float32)
|
||||||
|
image -= rgb_mean
|
||||||
|
return image.transpose(2, 0, 1)
|
||||||
|
|
||||||
|
|
||||||
|
class preproc(object):
|
||||||
|
|
||||||
|
def __init__(self, img_dim, rgb_means):
|
||||||
|
self.img_dim = img_dim
|
||||||
|
self.rgb_means = rgb_means
|
||||||
|
|
||||||
|
def __call__(self, image, targets):
|
||||||
|
assert targets.shape[0] > 0, "this image does not have gt"
|
||||||
|
|
||||||
|
boxes = targets[:, :4].copy()
|
||||||
|
labels = targets[:, -1].copy()
|
||||||
|
landm = targets[:, 4:-1].copy()
|
||||||
|
|
||||||
|
image_t, boxes_t, labels_t, landm_t, pad_image_flag = _crop(image, boxes, labels, landm, self.img_dim)
|
||||||
|
image_t = _distort(image_t)
|
||||||
|
image_t = _pad_to_square(image_t,self.rgb_means, pad_image_flag)
|
||||||
|
image_t, boxes_t, landm_t = _mirror(image_t, boxes_t, landm_t)
|
||||||
|
height, width, _ = image_t.shape
|
||||||
|
image_t = _resize_subtract_mean(image_t, self.img_dim, self.rgb_means)
|
||||||
|
boxes_t[:, 0::2] /= width
|
||||||
|
boxes_t[:, 1::2] /= height
|
||||||
|
|
||||||
|
landm_t[:, 0::2] /= width
|
||||||
|
landm_t[:, 1::2] /= height
|
||||||
|
|
||||||
|
labels_t = np.expand_dims(labels_t, 1)
|
||||||
|
targets_t = np.hstack((boxes_t, landm_t, labels_t))
|
||||||
|
|
||||||
|
return image_t, targets_t
|
258
data/realtime_detect.py
Normal file
@ -0,0 +1,258 @@
|
|||||||
|
import subprocess
|
||||||
|
import time
|
||||||
|
import cv2
|
||||||
|
import torch
|
||||||
|
import numpy as np
|
||||||
|
from skimage import transform as trans
|
||||||
|
from PIL import Image, ImageDraw, ImageFont
|
||||||
|
from data import cfg_mnet, cfg_re50
|
||||||
|
from face_api import load_arcface_model, load_npy
|
||||||
|
from layers.functions.prior_box import PriorBox
|
||||||
|
from retinaface_detect import set_retinaface_conf, load_retinaface_model, findAll
|
||||||
|
from utils.nms.py_cpu_nms import py_cpu_nms
|
||||||
|
from utils.box_utils import decode, decode_landm
|
||||||
|
import faiss
|
||||||
|
|
||||||
|
ppi = 1280
|
||||||
|
ppi2 = 640
|
||||||
|
step = 3
|
||||||
|
|
||||||
|
def detect_rtsp(rtsp, out_rtsp, net, arcface_model, k_v, args):
|
||||||
|
tic_total = time.time()
|
||||||
|
cfg = None
|
||||||
|
if args.network == "mobile0.25":
|
||||||
|
cfg = cfg_mnet
|
||||||
|
elif args.network == "resnet50":
|
||||||
|
cfg = cfg_re50
|
||||||
|
device = torch.device("cpu" if args.cpu else "cuda")
|
||||||
|
resize = 1
|
||||||
|
|
||||||
|
# testing begin
|
||||||
|
cap = cv2.VideoCapture(rtsp)
|
||||||
|
ret, frame = cap.read()
|
||||||
|
h, w = frame.shape[:2]
|
||||||
|
|
||||||
|
factor = 0
|
||||||
|
if (w > ppi):
|
||||||
|
factor = h / w
|
||||||
|
frame = cv2.resize(frame, (ppi, int(ppi * factor)))
|
||||||
|
h, w = frame.shape[:2]
|
||||||
|
arf = 1
|
||||||
|
detect_h, detect_w = frame.shape[:2]
|
||||||
|
frame_detect = frame
|
||||||
|
factor2 = 0
|
||||||
|
if (w > ppi2):
|
||||||
|
factor2 = h / w
|
||||||
|
frame_detect = cv2.resize(frame, (ppi2, int(ppi2 * factor2)))
|
||||||
|
detect_h, detect_w = frame_detect.shape[:2]
|
||||||
|
arf = w/detect_w
|
||||||
|
print(w,h)
|
||||||
|
print(detect_w,detect_h)
|
||||||
|
|
||||||
|
#fps = cap.get(cv2.CAP_PROP_FPS)
|
||||||
|
#print(fps)
|
||||||
|
size = (w, h)
|
||||||
|
sizeStr = str(size[0]) + 'x' + str(size[1])
|
||||||
|
if(out_rtsp.startswith("rtsp")):
|
||||||
|
command = ['ffmpeg',
|
||||||
|
'-y', '-an',
|
||||||
|
'-f', 'rawvideo',
|
||||||
|
'-vcodec', 'rawvideo',
|
||||||
|
'-pix_fmt', 'bgr24',
|
||||||
|
'-s', sizeStr,
|
||||||
|
'-r', "25",
|
||||||
|
'-i', '-',
|
||||||
|
'-c:v', 'libx265',
|
||||||
|
'-b:v', '3000k',
|
||||||
|
'-pix_fmt', 'yuv420p',
|
||||||
|
'-preset', 'ultrafast',
|
||||||
|
'-f', 'rtsp',
|
||||||
|
out_rtsp]
|
||||||
|
pipe = subprocess.Popen(command, shell=False, stdin=subprocess.PIPE)
|
||||||
|
number = step
|
||||||
|
dets = []
|
||||||
|
name_list = []
|
||||||
|
font = ImageFont.truetype("font.ttf", 22)
|
||||||
|
priorbox = PriorBox(cfg, image_size=(detect_h, detect_w))
|
||||||
|
priors = priorbox.forward()
|
||||||
|
priors = priors.to(device)
|
||||||
|
prior_data = priors.data
|
||||||
|
|
||||||
|
scale = torch.Tensor([detect_w, detect_h, detect_w, detect_h])
|
||||||
|
scale = scale.to(device)
|
||||||
|
scale1 = torch.Tensor([detect_w, detect_h, detect_w, detect_h,
|
||||||
|
detect_w, detect_h, detect_w, detect_h,
|
||||||
|
detect_w, detect_h])
|
||||||
|
scale1 = scale1.to(device)
|
||||||
|
|
||||||
|
src1 = np.array([
|
||||||
|
[38.3814, 51.6963],
|
||||||
|
[73.6186, 51.5014],
|
||||||
|
[56.1120, 71.7366],
|
||||||
|
[41.6361, 92.3655],
|
||||||
|
[70.8167, 92.2041]], dtype=np.float32)
|
||||||
|
tform = trans.SimilarityTransform()
|
||||||
|
|
||||||
|
while ret:
|
||||||
|
tic_all = time.time()
|
||||||
|
if number == step:
|
||||||
|
tic = time.time()
|
||||||
|
img = np.float32(frame_detect)
|
||||||
|
img -= (104, 117, 123)
|
||||||
|
img = img.transpose(2, 0, 1)
|
||||||
|
img = torch.from_numpy(img).unsqueeze(0)
|
||||||
|
img = img.to(device)
|
||||||
|
|
||||||
|
loc, conf, landms = net(img) # forward pass
|
||||||
|
|
||||||
|
boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
|
||||||
|
boxes = boxes * scale / resize
|
||||||
|
boxes = boxes.cpu().numpy()
|
||||||
|
scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
|
||||||
|
landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance'])
|
||||||
|
|
||||||
|
landms = landms * scale1 / resize
|
||||||
|
landms = landms.cpu().numpy()
|
||||||
|
|
||||||
|
# ignore low scores
|
||||||
|
inds = np.where(scores > args.confidence_threshold)[0]
|
||||||
|
boxes = boxes[inds]
|
||||||
|
landms = landms[inds]
|
||||||
|
scores = scores[inds]
|
||||||
|
|
||||||
|
# keep top-K before NMS
|
||||||
|
order = scores.argsort()[::-1][:args.top_k]
|
||||||
|
boxes = boxes[order]
|
||||||
|
landms = landms[order]
|
||||||
|
scores = scores[order]
|
||||||
|
|
||||||
|
# do NMS
|
||||||
|
dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
|
||||||
|
keep = py_cpu_nms(dets, args.nms_threshold)
|
||||||
|
# keep = nms(dets, args.nms_threshold,force_cpu=args.cpu)
|
||||||
|
dets = dets[keep, :]
|
||||||
|
landms = landms[keep]
|
||||||
|
|
||||||
|
# keep top-K faster NMS
|
||||||
|
dets = dets[:args.keep_top_k, :]
|
||||||
|
landms = landms[:args.keep_top_k, :]
|
||||||
|
|
||||||
|
dets = np.concatenate((dets, landms), axis=1)
|
||||||
|
face_list = []
|
||||||
|
name_list = []
|
||||||
|
print('net forward time: {:.4f}'.format(time.time() - tic))
|
||||||
|
start_time_findall = time.time()
|
||||||
|
for i, det in enumerate(dets[:1]):
|
||||||
|
if det[4] < args.vis_thres:
|
||||||
|
continue
|
||||||
|
#boxes, score = det[:4], det[4]
|
||||||
|
dst = np.reshape(landms[i], (5, 2))
|
||||||
|
dst = dst * arf
|
||||||
|
|
||||||
|
tform.estimate(dst, src1)
|
||||||
|
M = tform.params[0:2, :]
|
||||||
|
frame2 = cv2.warpAffine(frame, M, (w, h), borderValue=0.0)
|
||||||
|
img112 = frame2[0:112, 0:112, :]
|
||||||
|
face_list.append(img112)
|
||||||
|
|
||||||
|
if len(face_list) != 0:
|
||||||
|
face_list = np.array(face_list)
|
||||||
|
face_list = face_list.transpose((0, 3, 1, 2))
|
||||||
|
face_list = np.array(face_list, dtype=np.float32)
|
||||||
|
face_list -= 127.5
|
||||||
|
face_list /= 127.5
|
||||||
|
print(face_list.shape)
|
||||||
|
print("warpALL time: " + str(time.time() - start_time_findall ))
|
||||||
|
#start_time = time.time()
|
||||||
|
name_list = findAll(face_list, arcface_model, k_v, "cpu" if args.cpu else "cuda")
|
||||||
|
#print(name_list)
|
||||||
|
|
||||||
|
#print("findOneframe time: " + str(time.time() - start_time_findall))
|
||||||
|
#start_time = time.time()
|
||||||
|
# if (len(dets) != 0):
|
||||||
|
# for i, det in enumerate(dets[:]):
|
||||||
|
# if det[4] < args.vis_thres:
|
||||||
|
# continue
|
||||||
|
# boxes, score = det[:4], det[4]
|
||||||
|
# boxes = boxes * arf
|
||||||
|
# name = name_list[i]
|
||||||
|
# cv2.rectangle(frame, (int(boxes[0]), int(boxes[1])), (int(boxes[2]), int(boxes[3])), (255, 0, 0), 2)
|
||||||
|
# cv2.putText(frame, name, (int(boxes[0]), int(boxes[1])), cv2.FONT_HERSHEY_COMPLEX, 0.4,(0, 225, 255), 1)
|
||||||
|
start_time = time.time()
|
||||||
|
if(len(dets) != 0):
|
||||||
|
img_PIL = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
|
||||||
|
draw = ImageDraw.Draw(img_PIL)
|
||||||
|
for i, det in enumerate(dets[:1]):
|
||||||
|
if det[4] < args.vis_thres:
|
||||||
|
continue
|
||||||
|
boxes, score = det[:4], det[4]
|
||||||
|
boxes = boxes * arf
|
||||||
|
name = name_list[i]
|
||||||
|
if not isinstance(name, np.unicode):
|
||||||
|
name = name.decode('utf8')
|
||||||
|
draw.text((int(boxes[0]), int(boxes[1])), name, fill=(255, 0, 0), font=font)
|
||||||
|
draw.rectangle((int(boxes[0]), int(boxes[1]), int(boxes[2]), int(boxes[3])), outline="green", width=3)
|
||||||
|
frame = cv2.cvtColor(np.asarray(img_PIL), cv2.COLOR_RGB2BGR)
|
||||||
|
pipe.stdin.write(frame.tostring())
|
||||||
|
print("drawOneframe time: " + str(time.time() - start_time))
|
||||||
|
#start_time = time.time()
|
||||||
|
ret, frame = cap.read()
|
||||||
|
frame_detect = frame
|
||||||
|
number = step
|
||||||
|
if (ret != 0 and factor != 0):
|
||||||
|
frame = cv2.resize(frame, (ppi, int(ppi * factor)))
|
||||||
|
if (ret != 0 and factor2 != 0):
|
||||||
|
frame_detect = cv2.resize(frame, (ppi2, int(ppi2 * factor2)))
|
||||||
|
#print("readframe time: " + str(time.time() - start_time))
|
||||||
|
else:
|
||||||
|
number += 1
|
||||||
|
if (len(dets) != 0):
|
||||||
|
for i, det in enumerate(dets[:4]):
|
||||||
|
if det[4] < args.vis_thres:
|
||||||
|
continue
|
||||||
|
boxes, score = det[:4], det[4]
|
||||||
|
cv2.rectangle(frame, (int(boxes[0]), int(boxes[1])), (int(boxes[2]), int(boxes[3])), (2, 255, 0), 1)
|
||||||
|
# if (len(dets) != 0):
|
||||||
|
# img_PIL = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
|
||||||
|
# draw = ImageDraw.Draw(img_PIL)
|
||||||
|
# for i, det in enumerate(dets[:4]):
|
||||||
|
# if det[4] < args.vis_thres:
|
||||||
|
# continue
|
||||||
|
# boxes, score = det[:4], det[4]
|
||||||
|
# name = name_list[i]
|
||||||
|
# if not isinstance(name, np.unicode):
|
||||||
|
# name = name.decode('utf8')
|
||||||
|
# draw.text((int(boxes[0]), int(boxes[1])), name, fill=(255, 0, 0), font=font)
|
||||||
|
# draw.rectangle((int(boxes[0]), int(boxes[1]), int(boxes[2]), int(boxes[3])), outline="green",
|
||||||
|
# width=3)
|
||||||
|
# frame = cv2.cvtColor(np.asarray(img_PIL), cv2.COLOR_RGB2BGR)
|
||||||
|
start_time = time.time()
|
||||||
|
pipe.stdin.write(frame.tostring())
|
||||||
|
print("writeframe time: " + str(time.time() - start_time))
|
||||||
|
start_time = time.time()
|
||||||
|
ret, frame = cap.read()
|
||||||
|
if (ret != 0 and factor != 0):
|
||||||
|
frame = cv2.resize(frame, (ppi, int(ppi * factor)))
|
||||||
|
print("readframe time: " + str(time.time() - start_time))
|
||||||
|
print('all time: {:.4f}'.format(time.time() - tic_all))
|
||||||
|
cap.release()
|
||||||
|
pipe.terminate()
|
||||||
|
print('total time: {:.4f}'.format(time.time() - tic_total))
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
cpu_or_cuda = "cuda" if torch.cuda.is_available() else "cpu"
|
||||||
|
# 加载人脸识别模型
|
||||||
|
arcface_model = load_arcface_model("./model/backbone100.pth", cpu_or_cuda=cpu_or_cuda)
|
||||||
|
# 加载人脸检测模型
|
||||||
|
retinaface_args = set_retinaface_conf(cpu_or_cuda=cpu_or_cuda)
|
||||||
|
retinaface_model = load_retinaface_model(retinaface_args)
|
||||||
|
k_v = load_npy("./Database/student.npy")
|
||||||
|
#print(list(k_v.keys()))
|
||||||
|
database_name_list = list(k_v.keys())
|
||||||
|
vector_list = np.array(list(k_v.values()))
|
||||||
|
print(vector_list.shape)
|
||||||
|
index = faiss.IndexFlatL2(512)
|
||||||
|
index.add(vector_list)
|
||||||
|
|
||||||
|
#detect_rtsp("software.mp4", 'rtsp://localhost/test2', retinaface_model, arcface_model, index ,database_name_list, retinaface_args)
|
||||||
|
detect_rtsp("cut.mp4", 'rtsp://localhost:5001/test2', retinaface_model, arcface_model, k_v, retinaface_args)
|
101
data/wider_face.py
Normal file
@ -0,0 +1,101 @@
|
|||||||
|
import os
|
||||||
|
import os.path
|
||||||
|
import sys
|
||||||
|
import torch
|
||||||
|
import torch.utils.data as data
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
class WiderFaceDetection(data.Dataset):
|
||||||
|
def __init__(self, txt_path, preproc=None):
|
||||||
|
self.preproc = preproc
|
||||||
|
self.imgs_path = []
|
||||||
|
self.words = []
|
||||||
|
f = open(txt_path,'r')
|
||||||
|
lines = f.readlines()
|
||||||
|
isFirst = True
|
||||||
|
labels = []
|
||||||
|
for line in lines:
|
||||||
|
line = line.rstrip()
|
||||||
|
if line.startswith('#'):
|
||||||
|
if isFirst is True:
|
||||||
|
isFirst = False
|
||||||
|
else:
|
||||||
|
labels_copy = labels.copy()
|
||||||
|
self.words.append(labels_copy)
|
||||||
|
labels.clear()
|
||||||
|
path = line[2:]
|
||||||
|
path = txt_path.replace('label.txt','images/') + path
|
||||||
|
self.imgs_path.append(path)
|
||||||
|
else:
|
||||||
|
line = line.split(' ')
|
||||||
|
label = [float(x) for x in line]
|
||||||
|
labels.append(label)
|
||||||
|
|
||||||
|
self.words.append(labels)
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return len(self.imgs_path)
|
||||||
|
|
||||||
|
def __getitem__(self, index):
|
||||||
|
img = cv2.imread(self.imgs_path[index])
|
||||||
|
height, width, _ = img.shape
|
||||||
|
|
||||||
|
labels = self.words[index]
|
||||||
|
annotations = np.zeros((0, 15))
|
||||||
|
if len(labels) == 0:
|
||||||
|
return annotations
|
||||||
|
for idx, label in enumerate(labels):
|
||||||
|
annotation = np.zeros((1, 15))
|
||||||
|
# bbox
|
||||||
|
annotation[0, 0] = label[0] # x1
|
||||||
|
annotation[0, 1] = label[1] # y1
|
||||||
|
annotation[0, 2] = label[0] + label[2] # x2
|
||||||
|
annotation[0, 3] = label[1] + label[3] # y2
|
||||||
|
|
||||||
|
# landmarks
|
||||||
|
annotation[0, 4] = label[4] # l0_x
|
||||||
|
annotation[0, 5] = label[5] # l0_y
|
||||||
|
annotation[0, 6] = label[7] # l1_x
|
||||||
|
annotation[0, 7] = label[8] # l1_y
|
||||||
|
annotation[0, 8] = label[10] # l2_x
|
||||||
|
annotation[0, 9] = label[11] # l2_y
|
||||||
|
annotation[0, 10] = label[13] # l3_x
|
||||||
|
annotation[0, 11] = label[14] # l3_y
|
||||||
|
annotation[0, 12] = label[16] # l4_x
|
||||||
|
annotation[0, 13] = label[17] # l4_y
|
||||||
|
if (annotation[0, 4]<0):
|
||||||
|
annotation[0, 14] = -1
|
||||||
|
else:
|
||||||
|
annotation[0, 14] = 1
|
||||||
|
|
||||||
|
annotations = np.append(annotations, annotation, axis=0)
|
||||||
|
target = np.array(annotations)
|
||||||
|
if self.preproc is not None:
|
||||||
|
img, target = self.preproc(img, target)
|
||||||
|
|
||||||
|
return torch.from_numpy(img), target
|
||||||
|
|
||||||
|
def detection_collate(batch):
|
||||||
|
"""Custom collate fn for dealing with batches of images that have a different
|
||||||
|
number of associated object annotations (bounding boxes).
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
batch: (tuple) A tuple of tensor images and lists of annotations
|
||||||
|
|
||||||
|
Return:
|
||||||
|
A tuple containing:
|
||||||
|
1) (tensor) batch of images stacked on their 0 dim
|
||||||
|
2) (list of tensors) annotations for a given image are stacked on 0 dim
|
||||||
|
"""
|
||||||
|
targets = []
|
||||||
|
imgs = []
|
||||||
|
for _, sample in enumerate(batch):
|
||||||
|
for _, tup in enumerate(sample):
|
||||||
|
if torch.is_tensor(tup):
|
||||||
|
imgs.append(tup)
|
||||||
|
elif isinstance(tup, type(np.empty(0))):
|
||||||
|
annos = torch.from_numpy(tup).float()
|
||||||
|
targets.append(annos)
|
||||||
|
|
||||||
|
return (torch.stack(imgs, 0), targets)
|
107
dataset.py
Normal file
@ -0,0 +1,107 @@
|
|||||||
|
import numbers
|
||||||
|
import os
|
||||||
|
import queue as Queue
|
||||||
|
import threading
|
||||||
|
|
||||||
|
import mxnet as mx
|
||||||
|
import numpy as np
|
||||||
|
import torch
|
||||||
|
from torch.utils.data import DataLoader, Dataset
|
||||||
|
from torchvision import transforms
|
||||||
|
|
||||||
|
|
||||||
|
class BackgroundGenerator(threading.Thread):
|
||||||
|
def __init__(self, generator, local_rank, max_prefetch=6):
|
||||||
|
super(BackgroundGenerator, self).__init__()
|
||||||
|
self.queue = Queue.Queue(max_prefetch)
|
||||||
|
self.generator = generator
|
||||||
|
self.local_rank = local_rank
|
||||||
|
self.daemon = True
|
||||||
|
self.start()
|
||||||
|
|
||||||
|
def run(self):
|
||||||
|
torch.cuda.set_device(self.local_rank)
|
||||||
|
for item in self.generator:
|
||||||
|
self.queue.put(item)
|
||||||
|
self.queue.put(None)
|
||||||
|
|
||||||
|
def next(self):
|
||||||
|
next_item = self.queue.get()
|
||||||
|
if next_item is None:
|
||||||
|
raise StopIteration
|
||||||
|
return next_item
|
||||||
|
|
||||||
|
def __next__(self):
|
||||||
|
return self.next()
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
return self
|
||||||
|
|
||||||
|
|
||||||
|
class DataLoaderX(DataLoader):
|
||||||
|
def __init__(self, local_rank, **kwargs):
|
||||||
|
super(DataLoaderX, self).__init__(**kwargs)
|
||||||
|
self.stream = torch.cuda.Stream(local_rank)
|
||||||
|
self.local_rank = local_rank
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
self.iter = super(DataLoaderX, self).__iter__()
|
||||||
|
self.iter = BackgroundGenerator(self.iter, self.local_rank)
|
||||||
|
self.preload()
|
||||||
|
return self
|
||||||
|
|
||||||
|
def preload(self):
|
||||||
|
self.batch = next(self.iter, None)
|
||||||
|
if self.batch is None:
|
||||||
|
return None
|
||||||
|
with torch.cuda.stream(self.stream):
|
||||||
|
for k in range(len(self.batch)):
|
||||||
|
self.batch[k] = self.batch[k].to(device=self.local_rank,
|
||||||
|
non_blocking=True)
|
||||||
|
|
||||||
|
def __next__(self):
|
||||||
|
torch.cuda.current_stream().wait_stream(self.stream)
|
||||||
|
batch = self.batch
|
||||||
|
if batch is None:
|
||||||
|
raise StopIteration
|
||||||
|
self.preload()
|
||||||
|
return batch
|
||||||
|
|
||||||
|
|
||||||
|
class MXFaceDataset(Dataset):
|
||||||
|
def __init__(self, root_dir, local_rank):
|
||||||
|
super(MXFaceDataset, self).__init__()
|
||||||
|
self.transform = transforms.Compose(
|
||||||
|
[transforms.ToPILImage(),
|
||||||
|
transforms.RandomHorizontalFlip(),
|
||||||
|
transforms.ToTensor(),
|
||||||
|
transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
|
||||||
|
])
|
||||||
|
self.root_dir = root_dir
|
||||||
|
self.local_rank = local_rank
|
||||||
|
path_imgrec = os.path.join(root_dir, 'train.rec')
|
||||||
|
path_imgidx = os.path.join(root_dir, 'train.idx')
|
||||||
|
self.imgrec = mx.recordio.MXIndexedRecordIO(path_imgidx, path_imgrec, 'r')
|
||||||
|
s = self.imgrec.read_idx(0)
|
||||||
|
header, _ = mx.recordio.unpack(s)
|
||||||
|
if header.flag > 0:
|
||||||
|
self.header0 = (int(header.label[0]), int(header.label[1]))
|
||||||
|
self.imgidx = np.array(range(1, int(header.label[0])))
|
||||||
|
else:
|
||||||
|
self.imgidx = np.array(list(self.imgrec.keys))
|
||||||
|
|
||||||
|
def __getitem__(self, index):
|
||||||
|
idx = self.imgidx[index]
|
||||||
|
s = self.imgrec.read_idx(idx)
|
||||||
|
header, img = mx.recordio.unpack(s)
|
||||||
|
label = header.label
|
||||||
|
if not isinstance(label, numbers.Number):
|
||||||
|
label = label[0]
|
||||||
|
label = torch.tensor(label, dtype=torch.long)
|
||||||
|
sample = mx.image.imdecode(img).asnumpy()
|
||||||
|
if self.transform is not None:
|
||||||
|
sample = self.transform(sample)
|
||||||
|
return sample, label
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return len(self.imgidx)
|
0
eval/__init__.py
Normal file
409
eval/verification.py
Normal file
@ -0,0 +1,409 @@
|
|||||||
|
"""Helper for evaluation on the Labeled Faces in the Wild dataset
|
||||||
|
"""
|
||||||
|
|
||||||
|
# MIT License
|
||||||
|
#
|
||||||
|
# Copyright (c) 2016 David Sandberg
|
||||||
|
#
|
||||||
|
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
# of this software and associated documentation files (the "Software"), to deal
|
||||||
|
# in the Software without restriction, including without limitation the rights
|
||||||
|
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
# copies of the Software, and to permit persons to whom the Software is
|
||||||
|
# furnished to do so, subject to the following conditions:
|
||||||
|
#
|
||||||
|
# The above copyright notice and this permission notice shall be included in all
|
||||||
|
# copies or substantial portions of the Software.
|
||||||
|
#
|
||||||
|
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
# SOFTWARE.
|
||||||
|
|
||||||
|
|
||||||
|
import datetime
|
||||||
|
import os
|
||||||
|
import pickle
|
||||||
|
|
||||||
|
import mxnet as mx
|
||||||
|
import numpy as np
|
||||||
|
import sklearn
|
||||||
|
import torch
|
||||||
|
from mxnet import ndarray as nd
|
||||||
|
from scipy import interpolate
|
||||||
|
from sklearn.decomposition import PCA
|
||||||
|
from sklearn.model_selection import KFold
|
||||||
|
|
||||||
|
|
||||||
|
class LFold:
|
||||||
|
def __init__(self, n_splits=2, shuffle=False):
|
||||||
|
self.n_splits = n_splits
|
||||||
|
if self.n_splits > 1:
|
||||||
|
self.k_fold = KFold(n_splits=n_splits, shuffle=shuffle)
|
||||||
|
|
||||||
|
def split(self, indices):
|
||||||
|
if self.n_splits > 1:
|
||||||
|
return self.k_fold.split(indices)
|
||||||
|
else:
|
||||||
|
return [(indices, indices)]
|
||||||
|
|
||||||
|
|
||||||
|
def calculate_roc(thresholds,
|
||||||
|
embeddings1,
|
||||||
|
embeddings2,
|
||||||
|
actual_issame,
|
||||||
|
nrof_folds=10,
|
||||||
|
pca=0):
|
||||||
|
assert (embeddings1.shape[0] == embeddings2.shape[0])
|
||||||
|
assert (embeddings1.shape[1] == embeddings2.shape[1])
|
||||||
|
nrof_pairs = min(len(actual_issame), embeddings1.shape[0])
|
||||||
|
nrof_thresholds = len(thresholds)
|
||||||
|
k_fold = LFold(n_splits=nrof_folds, shuffle=False)
|
||||||
|
|
||||||
|
tprs = np.zeros((nrof_folds, nrof_thresholds))
|
||||||
|
fprs = np.zeros((nrof_folds, nrof_thresholds))
|
||||||
|
accuracy = np.zeros((nrof_folds))
|
||||||
|
indices = np.arange(nrof_pairs)
|
||||||
|
|
||||||
|
if pca == 0:
|
||||||
|
diff = np.subtract(embeddings1, embeddings2)
|
||||||
|
dist = np.sum(np.square(diff), 1)
|
||||||
|
|
||||||
|
for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)):
|
||||||
|
if pca > 0:
|
||||||
|
print('doing pca on', fold_idx)
|
||||||
|
embed1_train = embeddings1[train_set]
|
||||||
|
embed2_train = embeddings2[train_set]
|
||||||
|
_embed_train = np.concatenate((embed1_train, embed2_train), axis=0)
|
||||||
|
pca_model = PCA(n_components=pca)
|
||||||
|
pca_model.fit(_embed_train)
|
||||||
|
embed1 = pca_model.transform(embeddings1)
|
||||||
|
embed2 = pca_model.transform(embeddings2)
|
||||||
|
embed1 = sklearn.preprocessing.normalize(embed1)
|
||||||
|
embed2 = sklearn.preprocessing.normalize(embed2)
|
||||||
|
diff = np.subtract(embed1, embed2)
|
||||||
|
dist = np.sum(np.square(diff), 1)
|
||||||
|
|
||||||
|
# Find the best threshold for the fold
|
||||||
|
acc_train = np.zeros((nrof_thresholds))
|
||||||
|
for threshold_idx, threshold in enumerate(thresholds):
|
||||||
|
_, _, acc_train[threshold_idx] = calculate_accuracy(
|
||||||
|
threshold, dist[train_set], actual_issame[train_set])
|
||||||
|
best_threshold_index = np.argmax(acc_train)
|
||||||
|
for threshold_idx, threshold in enumerate(thresholds):
|
||||||
|
tprs[fold_idx, threshold_idx], fprs[fold_idx, threshold_idx], _ = calculate_accuracy(
|
||||||
|
threshold, dist[test_set],
|
||||||
|
actual_issame[test_set])
|
||||||
|
_, _, accuracy[fold_idx] = calculate_accuracy(
|
||||||
|
thresholds[best_threshold_index], dist[test_set],
|
||||||
|
actual_issame[test_set])
|
||||||
|
|
||||||
|
tpr = np.mean(tprs, 0)
|
||||||
|
fpr = np.mean(fprs, 0)
|
||||||
|
return tpr, fpr, accuracy
|
||||||
|
|
||||||
|
|
||||||
|
def calculate_accuracy(threshold, dist, actual_issame):
|
||||||
|
predict_issame = np.less(dist, threshold)
|
||||||
|
tp = np.sum(np.logical_and(predict_issame, actual_issame))
|
||||||
|
fp = np.sum(np.logical_and(predict_issame, np.logical_not(actual_issame)))
|
||||||
|
tn = np.sum(
|
||||||
|
np.logical_and(np.logical_not(predict_issame),
|
||||||
|
np.logical_not(actual_issame)))
|
||||||
|
fn = np.sum(np.logical_and(np.logical_not(predict_issame), actual_issame))
|
||||||
|
|
||||||
|
tpr = 0 if (tp + fn == 0) else float(tp) / float(tp + fn)
|
||||||
|
fpr = 0 if (fp + tn == 0) else float(fp) / float(fp + tn)
|
||||||
|
acc = float(tp + tn) / dist.size
|
||||||
|
return tpr, fpr, acc
|
||||||
|
|
||||||
|
|
||||||
|
def calculate_val(thresholds,
|
||||||
|
embeddings1,
|
||||||
|
embeddings2,
|
||||||
|
actual_issame,
|
||||||
|
far_target,
|
||||||
|
nrof_folds=10):
|
||||||
|
assert (embeddings1.shape[0] == embeddings2.shape[0])
|
||||||
|
assert (embeddings1.shape[1] == embeddings2.shape[1])
|
||||||
|
nrof_pairs = min(len(actual_issame), embeddings1.shape[0])
|
||||||
|
nrof_thresholds = len(thresholds)
|
||||||
|
k_fold = LFold(n_splits=nrof_folds, shuffle=False)
|
||||||
|
|
||||||
|
val = np.zeros(nrof_folds)
|
||||||
|
far = np.zeros(nrof_folds)
|
||||||
|
|
||||||
|
diff = np.subtract(embeddings1, embeddings2)
|
||||||
|
dist = np.sum(np.square(diff), 1)
|
||||||
|
indices = np.arange(nrof_pairs)
|
||||||
|
|
||||||
|
for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)):
|
||||||
|
|
||||||
|
# Find the threshold that gives FAR = far_target
|
||||||
|
far_train = np.zeros(nrof_thresholds)
|
||||||
|
for threshold_idx, threshold in enumerate(thresholds):
|
||||||
|
_, far_train[threshold_idx] = calculate_val_far(
|
||||||
|
threshold, dist[train_set], actual_issame[train_set])
|
||||||
|
if np.max(far_train) >= far_target:
|
||||||
|
f = interpolate.interp1d(far_train, thresholds, kind='slinear')
|
||||||
|
threshold = f(far_target)
|
||||||
|
else:
|
||||||
|
threshold = 0.0
|
||||||
|
|
||||||
|
val[fold_idx], far[fold_idx] = calculate_val_far(
|
||||||
|
threshold, dist[test_set], actual_issame[test_set])
|
||||||
|
|
||||||
|
val_mean = np.mean(val)
|
||||||
|
far_mean = np.mean(far)
|
||||||
|
val_std = np.std(val)
|
||||||
|
return val_mean, val_std, far_mean
|
||||||
|
|
||||||
|
|
||||||
|
def calculate_val_far(threshold, dist, actual_issame):
|
||||||
|
predict_issame = np.less(dist, threshold)
|
||||||
|
true_accept = np.sum(np.logical_and(predict_issame, actual_issame))
|
||||||
|
false_accept = np.sum(
|
||||||
|
np.logical_and(predict_issame, np.logical_not(actual_issame)))
|
||||||
|
n_same = np.sum(actual_issame)
|
||||||
|
n_diff = np.sum(np.logical_not(actual_issame))
|
||||||
|
# print(true_accept, false_accept)
|
||||||
|
# print(n_same, n_diff)
|
||||||
|
val = float(true_accept) / float(n_same)
|
||||||
|
far = float(false_accept) / float(n_diff)
|
||||||
|
return val, far
|
||||||
|
|
||||||
|
|
||||||
|
def evaluate(embeddings, actual_issame, nrof_folds=10, pca=0):
|
||||||
|
# Calculate evaluation metrics
|
||||||
|
thresholds = np.arange(0, 4, 0.01)
|
||||||
|
embeddings1 = embeddings[0::2]
|
||||||
|
embeddings2 = embeddings[1::2]
|
||||||
|
tpr, fpr, accuracy = calculate_roc(thresholds,
|
||||||
|
embeddings1,
|
||||||
|
embeddings2,
|
||||||
|
np.asarray(actual_issame),
|
||||||
|
nrof_folds=nrof_folds,
|
||||||
|
pca=pca)
|
||||||
|
thresholds = np.arange(0, 4, 0.001)
|
||||||
|
val, val_std, far = calculate_val(thresholds,
|
||||||
|
embeddings1,
|
||||||
|
embeddings2,
|
||||||
|
np.asarray(actual_issame),
|
||||||
|
1e-3,
|
||||||
|
nrof_folds=nrof_folds)
|
||||||
|
return tpr, fpr, accuracy, val, val_std, far
|
||||||
|
|
||||||
|
@torch.no_grad()
|
||||||
|
def load_bin(path, image_size):
|
||||||
|
try:
|
||||||
|
with open(path, 'rb') as f:
|
||||||
|
bins, issame_list = pickle.load(f) # py2
|
||||||
|
except UnicodeDecodeError as e:
|
||||||
|
with open(path, 'rb') as f:
|
||||||
|
bins, issame_list = pickle.load(f, encoding='bytes') # py3
|
||||||
|
data_list = []
|
||||||
|
for flip in [0, 1]:
|
||||||
|
data = torch.empty((len(issame_list) * 2, 3, image_size[0], image_size[1]))
|
||||||
|
data_list.append(data)
|
||||||
|
for idx in range(len(issame_list) * 2):
|
||||||
|
_bin = bins[idx]
|
||||||
|
img = mx.image.imdecode(_bin)
|
||||||
|
if img.shape[1] != image_size[0]:
|
||||||
|
img = mx.image.resize_short(img, image_size[0])
|
||||||
|
img = nd.transpose(img, axes=(2, 0, 1))
|
||||||
|
for flip in [0, 1]:
|
||||||
|
if flip == 1:
|
||||||
|
img = mx.ndarray.flip(data=img, axis=2)
|
||||||
|
data_list[flip][idx][:] = torch.from_numpy(img.asnumpy())
|
||||||
|
if idx % 1000 == 0:
|
||||||
|
print('loading bin', idx)
|
||||||
|
print(data_list[0].shape)
|
||||||
|
return data_list, issame_list
|
||||||
|
|
||||||
|
@torch.no_grad()
|
||||||
|
def test(data_set, backbone, batch_size, nfolds=10):
|
||||||
|
print('testing verification..')
|
||||||
|
data_list = data_set[0]
|
||||||
|
issame_list = data_set[1]
|
||||||
|
embeddings_list = []
|
||||||
|
time_consumed = 0.0
|
||||||
|
for i in range(len(data_list)):
|
||||||
|
data = data_list[i]
|
||||||
|
embeddings = None
|
||||||
|
ba = 0
|
||||||
|
while ba < data.shape[0]:
|
||||||
|
bb = min(ba + batch_size, data.shape[0])
|
||||||
|
count = bb - ba
|
||||||
|
_data = data[bb - batch_size: bb]
|
||||||
|
time0 = datetime.datetime.now()
|
||||||
|
img = ((_data / 255) - 0.5) / 0.5
|
||||||
|
net_out: torch.Tensor = backbone(img)
|
||||||
|
_embeddings = net_out.detach().cpu().numpy()
|
||||||
|
time_now = datetime.datetime.now()
|
||||||
|
diff = time_now - time0
|
||||||
|
time_consumed += diff.total_seconds()
|
||||||
|
if embeddings is None:
|
||||||
|
embeddings = np.zeros((data.shape[0], _embeddings.shape[1]))
|
||||||
|
embeddings[ba:bb, :] = _embeddings[(batch_size - count):, :]
|
||||||
|
ba = bb
|
||||||
|
embeddings_list.append(embeddings)
|
||||||
|
|
||||||
|
_xnorm = 0.0
|
||||||
|
_xnorm_cnt = 0
|
||||||
|
for embed in embeddings_list:
|
||||||
|
for i in range(embed.shape[0]):
|
||||||
|
_em = embed[i]
|
||||||
|
_norm = np.linalg.norm(_em)
|
||||||
|
_xnorm += _norm
|
||||||
|
_xnorm_cnt += 1
|
||||||
|
_xnorm /= _xnorm_cnt
|
||||||
|
|
||||||
|
embeddings = embeddings_list[0].copy()
|
||||||
|
embeddings = sklearn.preprocessing.normalize(embeddings)
|
||||||
|
acc1 = 0.0
|
||||||
|
std1 = 0.0
|
||||||
|
embeddings = embeddings_list[0] + embeddings_list[1]
|
||||||
|
embeddings = sklearn.preprocessing.normalize(embeddings)
|
||||||
|
print(embeddings.shape)
|
||||||
|
print('infer time', time_consumed)
|
||||||
|
_, _, accuracy, val, val_std, far = evaluate(embeddings, issame_list, nrof_folds=nfolds)
|
||||||
|
acc2, std2 = np.mean(accuracy), np.std(accuracy)
|
||||||
|
return acc1, std1, acc2, std2, _xnorm, embeddings_list
|
||||||
|
|
||||||
|
|
||||||
|
def dumpR(data_set,
|
||||||
|
backbone,
|
||||||
|
batch_size,
|
||||||
|
name='',
|
||||||
|
data_extra=None,
|
||||||
|
label_shape=None):
|
||||||
|
print('dump verification embedding..')
|
||||||
|
data_list = data_set[0]
|
||||||
|
issame_list = data_set[1]
|
||||||
|
embeddings_list = []
|
||||||
|
time_consumed = 0.0
|
||||||
|
for i in range(len(data_list)):
|
||||||
|
data = data_list[i]
|
||||||
|
embeddings = None
|
||||||
|
ba = 0
|
||||||
|
while ba < data.shape[0]:
|
||||||
|
bb = min(ba + batch_size, data.shape[0])
|
||||||
|
count = bb - ba
|
||||||
|
|
||||||
|
_data = nd.slice_axis(data, axis=0, begin=bb - batch_size, end=bb)
|
||||||
|
time0 = datetime.datetime.now()
|
||||||
|
if data_extra is None:
|
||||||
|
db = mx.io.DataBatch(data=(_data,), label=(_label,))
|
||||||
|
else:
|
||||||
|
db = mx.io.DataBatch(data=(_data, _data_extra),
|
||||||
|
label=(_label,))
|
||||||
|
model.forward(db, is_train=False)
|
||||||
|
net_out = model.get_outputs()
|
||||||
|
_embeddings = net_out[0].asnumpy()
|
||||||
|
time_now = datetime.datetime.now()
|
||||||
|
diff = time_now - time0
|
||||||
|
time_consumed += diff.total_seconds()
|
||||||
|
if embeddings is None:
|
||||||
|
embeddings = np.zeros((data.shape[0], _embeddings.shape[1]))
|
||||||
|
embeddings[ba:bb, :] = _embeddings[(batch_size - count):, :]
|
||||||
|
ba = bb
|
||||||
|
embeddings_list.append(embeddings)
|
||||||
|
embeddings = embeddings_list[0] + embeddings_list[1]
|
||||||
|
embeddings = sklearn.preprocessing.normalize(embeddings)
|
||||||
|
actual_issame = np.asarray(issame_list)
|
||||||
|
outname = os.path.join('temp.bin')
|
||||||
|
with open(outname, 'wb') as f:
|
||||||
|
pickle.dump((embeddings, issame_list),
|
||||||
|
f,
|
||||||
|
protocol=pickle.HIGHEST_PROTOCOL)
|
||||||
|
|
||||||
|
|
||||||
|
# if __name__ == '__main__':
|
||||||
|
#
|
||||||
|
# parser = argparse.ArgumentParser(description='do verification')
|
||||||
|
# # general
|
||||||
|
# parser.add_argument('--data-dir', default='', help='')
|
||||||
|
# parser.add_argument('--model',
|
||||||
|
# default='../model/softmax,50',
|
||||||
|
# help='path to load model.')
|
||||||
|
# parser.add_argument('--target',
|
||||||
|
# default='lfw,cfp_ff,cfp_fp,agedb_30',
|
||||||
|
# help='test targets.')
|
||||||
|
# parser.add_argument('--gpu', default=0, type=int, help='gpu id')
|
||||||
|
# parser.add_argument('--batch-size', default=32, type=int, help='')
|
||||||
|
# parser.add_argument('--max', default='', type=str, help='')
|
||||||
|
# parser.add_argument('--mode', default=0, type=int, help='')
|
||||||
|
# parser.add_argument('--nfolds', default=10, type=int, help='')
|
||||||
|
# args = parser.parse_args()
|
||||||
|
# image_size = [112, 112]
|
||||||
|
# print('image_size', image_size)
|
||||||
|
# ctx = mx.gpu(args.gpu)
|
||||||
|
# nets = []
|
||||||
|
# vec = args.model.split(',')
|
||||||
|
# prefix = args.model.split(',')[0]
|
||||||
|
# epochs = []
|
||||||
|
# if len(vec) == 1:
|
||||||
|
# pdir = os.path.dirname(prefix)
|
||||||
|
# for fname in os.listdir(pdir):
|
||||||
|
# if not fname.endswith('.params'):
|
||||||
|
# continue
|
||||||
|
# _file = os.path.join(pdir, fname)
|
||||||
|
# if _file.startswith(prefix):
|
||||||
|
# epoch = int(fname.split('.')[0].split('-')[1])
|
||||||
|
# epochs.append(epoch)
|
||||||
|
# epochs = sorted(epochs, reverse=True)
|
||||||
|
# if len(args.max) > 0:
|
||||||
|
# _max = [int(x) for x in args.max.split(',')]
|
||||||
|
# assert len(_max) == 2
|
||||||
|
# if len(epochs) > _max[1]:
|
||||||
|
# epochs = epochs[_max[0]:_max[1]]
|
||||||
|
#
|
||||||
|
# else:
|
||||||
|
# epochs = [int(x) for x in vec[1].split('|')]
|
||||||
|
# print('model number', len(epochs))
|
||||||
|
# time0 = datetime.datetime.now()
|
||||||
|
# for epoch in epochs:
|
||||||
|
# print('loading', prefix, epoch)
|
||||||
|
# sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
|
||||||
|
# # arg_params, aux_params = ch_dev(arg_params, aux_params, ctx)
|
||||||
|
# all_layers = sym.get_internals()
|
||||||
|
# sym = all_layers['fc1_output']
|
||||||
|
# model = mx.mod.Module(symbol=sym, context=ctx, label_names=None)
|
||||||
|
# # model.bind(data_shapes=[('data', (args.batch_size, 3, image_size[0], image_size[1]))], label_shapes=[('softmax_label', (args.batch_size,))])
|
||||||
|
# model.bind(data_shapes=[('data', (args.batch_size, 3, image_size[0],
|
||||||
|
# image_size[1]))])
|
||||||
|
# model.set_params(arg_params, aux_params)
|
||||||
|
# nets.append(model)
|
||||||
|
# time_now = datetime.datetime.now()
|
||||||
|
# diff = time_now - time0
|
||||||
|
# print('model loading time', diff.total_seconds())
|
||||||
|
#
|
||||||
|
# ver_list = []
|
||||||
|
# ver_name_list = []
|
||||||
|
# for name in args.target.split(','):
|
||||||
|
# path = os.path.join(args.data_dir, name + ".bin")
|
||||||
|
# if os.path.exists(path):
|
||||||
|
# print('loading.. ', name)
|
||||||
|
# data_set = load_bin(path, image_size)
|
||||||
|
# ver_list.append(data_set)
|
||||||
|
# ver_name_list.append(name)
|
||||||
|
#
|
||||||
|
# if args.mode == 0:
|
||||||
|
# for i in range(len(ver_list)):
|
||||||
|
# results = []
|
||||||
|
# for model in nets:
|
||||||
|
# acc1, std1, acc2, std2, xnorm, embeddings_list = test(
|
||||||
|
# ver_list[i], model, args.batch_size, args.nfolds)
|
||||||
|
# print('[%s]XNorm: %f' % (ver_name_list[i], xnorm))
|
||||||
|
# print('[%s]Accuracy: %1.5f+-%1.5f' % (ver_name_list[i], acc1, std1))
|
||||||
|
# print('[%s]Accuracy-Flip: %1.5f+-%1.5f' % (ver_name_list[i], acc2, std2))
|
||||||
|
# results.append(acc2)
|
||||||
|
# print('Max of [%s] is %1.5f' % (ver_name_list[i], np.max(results)))
|
||||||
|
# elif args.mode == 1:
|
||||||
|
# raise ValueError
|
||||||
|
# else:
|
||||||
|
# model = nets[0]
|
||||||
|
# dumpR(ver_list[0], model, args.batch_size, args.target)
|
483
eval_ijbc.py
Normal file
@ -0,0 +1,483 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
|
||||||
|
import os
|
||||||
|
import pickle
|
||||||
|
|
||||||
|
import matplotlib
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
matplotlib.use('Agg')
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import timeit
|
||||||
|
import sklearn
|
||||||
|
import argparse
|
||||||
|
from sklearn.metrics import roc_curve, auc
|
||||||
|
|
||||||
|
from menpo.visualize.viewmatplotlib import sample_colours_from_colourmap
|
||||||
|
from prettytable import PrettyTable
|
||||||
|
from pathlib import Path
|
||||||
|
import sys
|
||||||
|
import warnings
|
||||||
|
|
||||||
|
sys.path.insert(0, "../")
|
||||||
|
warnings.filterwarnings("ignore")
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser(description='do ijb test')
|
||||||
|
# general
|
||||||
|
parser.add_argument('--model-prefix', default='', help='path to load model.')
|
||||||
|
parser.add_argument('--image-path', default='', type=str, help='')
|
||||||
|
parser.add_argument('--result-dir', default='.', type=str, help='')
|
||||||
|
parser.add_argument('--batch-size', default=128, type=int, help='')
|
||||||
|
parser.add_argument('--network', default='iresnet50', type=str, help='')
|
||||||
|
parser.add_argument('--job', default='insightface', type=str, help='job name')
|
||||||
|
parser.add_argument('--target', default='IJBC', type=str, help='target, set to IJBC or IJBB')
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
target = args.target
|
||||||
|
model_path = args.model_prefix
|
||||||
|
image_path = args.image_path
|
||||||
|
result_dir = args.result_dir
|
||||||
|
gpu_id = None
|
||||||
|
use_norm_score = True # if Ture, TestMode(N1)
|
||||||
|
use_detector_score = True # if Ture, TestMode(D1)
|
||||||
|
use_flip_test = True # if Ture, TestMode(F1)
|
||||||
|
job = args.job
|
||||||
|
batch_size = args.batch_size
|
||||||
|
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
import torch
|
||||||
|
from skimage import transform as trans
|
||||||
|
import backbones
|
||||||
|
|
||||||
|
|
||||||
|
class Embedding(object):
|
||||||
|
def __init__(self, prefix, data_shape, batch_size=1):
|
||||||
|
image_size = (112, 112)
|
||||||
|
self.image_size = image_size
|
||||||
|
weight = torch.load(prefix)
|
||||||
|
resnet = eval("backbones.{}".format(args.network))(False).cuda()
|
||||||
|
resnet.load_state_dict(weight)
|
||||||
|
model = torch.nn.DataParallel(resnet)
|
||||||
|
self.model = model
|
||||||
|
self.model.eval()
|
||||||
|
src = np.array([
|
||||||
|
[30.2946, 51.6963],
|
||||||
|
[65.5318, 51.5014],
|
||||||
|
[48.0252, 71.7366],
|
||||||
|
[33.5493, 92.3655],
|
||||||
|
[62.7299, 92.2041]], dtype=np.float32)
|
||||||
|
src[:, 0] += 8.0
|
||||||
|
self.src = src
|
||||||
|
self.batch_size = batch_size
|
||||||
|
self.data_shape = data_shape
|
||||||
|
|
||||||
|
def get(self, rimg, landmark):
|
||||||
|
|
||||||
|
assert landmark.shape[0] == 68 or landmark.shape[0] == 5
|
||||||
|
assert landmark.shape[1] == 2
|
||||||
|
if landmark.shape[0] == 68:
|
||||||
|
landmark5 = np.zeros((5, 2), dtype=np.float32)
|
||||||
|
landmark5[0] = (landmark[36] + landmark[39]) / 2
|
||||||
|
landmark5[1] = (landmark[42] + landmark[45]) / 2
|
||||||
|
landmark5[2] = landmark[30]
|
||||||
|
landmark5[3] = landmark[48]
|
||||||
|
landmark5[4] = landmark[54]
|
||||||
|
else:
|
||||||
|
landmark5 = landmark
|
||||||
|
tform = trans.SimilarityTransform()
|
||||||
|
tform.estimate(landmark5, self.src)
|
||||||
|
M = tform.params[0:2, :]
|
||||||
|
img = cv2.warpAffine(rimg,
|
||||||
|
M, (self.image_size[1], self.image_size[0]),
|
||||||
|
borderValue=0.0)
|
||||||
|
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
|
||||||
|
img_flip = np.fliplr(img)
|
||||||
|
img = np.transpose(img, (2, 0, 1)) # 3*112*112, RGB
|
||||||
|
img_flip = np.transpose(img_flip, (2, 0, 1))
|
||||||
|
input_blob = np.zeros((2, 3, self.image_size[1], self.image_size[0]), dtype=np.uint8)
|
||||||
|
input_blob[0] = img
|
||||||
|
input_blob[1] = img_flip
|
||||||
|
return input_blob
|
||||||
|
|
||||||
|
@torch.no_grad()
|
||||||
|
def forward_db(self, batch_data):
|
||||||
|
imgs = torch.Tensor(batch_data).cuda()
|
||||||
|
imgs.div_(255).sub_(0.5).div_(0.5)
|
||||||
|
feat = self.model(imgs)
|
||||||
|
feat = feat.reshape([self.batch_size, 2 * feat.shape[1]])
|
||||||
|
return feat.cpu().numpy()
|
||||||
|
|
||||||
|
|
||||||
|
# 将一个list尽量均分成n份,限制len(list)==n,份数大于原list内元素个数则分配空list[]
|
||||||
|
def divideIntoNstrand(listTemp, n):
|
||||||
|
twoList = [[] for i in range(n)]
|
||||||
|
for i, e in enumerate(listTemp):
|
||||||
|
twoList[i % n].append(e)
|
||||||
|
return twoList
|
||||||
|
|
||||||
|
|
||||||
|
def read_template_media_list(path):
|
||||||
|
# ijb_meta = np.loadtxt(path, dtype=str)
|
||||||
|
ijb_meta = pd.read_csv(path, sep=' ', header=None).values
|
||||||
|
templates = ijb_meta[:, 1].astype(np.int)
|
||||||
|
medias = ijb_meta[:, 2].astype(np.int)
|
||||||
|
return templates, medias
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
def read_template_pair_list(path):
|
||||||
|
# pairs = np.loadtxt(path, dtype=str)
|
||||||
|
pairs = pd.read_csv(path, sep=' ', header=None).values
|
||||||
|
# print(pairs.shape)
|
||||||
|
# print(pairs[:, 0].astype(np.int))
|
||||||
|
t1 = pairs[:, 0].astype(np.int)
|
||||||
|
t2 = pairs[:, 1].astype(np.int)
|
||||||
|
label = pairs[:, 2].astype(np.int)
|
||||||
|
return t1, t2, label
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
def read_image_feature(path):
|
||||||
|
with open(path, 'rb') as fid:
|
||||||
|
img_feats = pickle.load(fid)
|
||||||
|
return img_feats
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
def get_image_feature(img_path, files_list, model_path, epoch, gpu_id):
|
||||||
|
batch_size = args.batch_size
|
||||||
|
data_shape = (3, 112, 112)
|
||||||
|
|
||||||
|
files = files_list
|
||||||
|
print('files:', len(files))
|
||||||
|
rare_size = len(files) % batch_size
|
||||||
|
faceness_scores = []
|
||||||
|
batch = 0
|
||||||
|
img_feats = np.empty((len(files), 1024), dtype=np.float32)
|
||||||
|
|
||||||
|
batch_data = np.empty((2 * batch_size, 3, 112, 112))
|
||||||
|
embedding = Embedding(model_path, data_shape, batch_size)
|
||||||
|
for img_index, each_line in enumerate(files[:len(files) - rare_size]):
|
||||||
|
name_lmk_score = each_line.strip().split(' ')
|
||||||
|
img_name = os.path.join(img_path, name_lmk_score[0])
|
||||||
|
img = cv2.imread(img_name)
|
||||||
|
lmk = np.array([float(x) for x in name_lmk_score[1:-1]],
|
||||||
|
dtype=np.float32)
|
||||||
|
lmk = lmk.reshape((5, 2))
|
||||||
|
input_blob = embedding.get(img, lmk)
|
||||||
|
|
||||||
|
batch_data[2 * (img_index - batch * batch_size)][:] = input_blob[0]
|
||||||
|
batch_data[2 * (img_index - batch * batch_size) + 1][:] = input_blob[1]
|
||||||
|
if (img_index + 1) % batch_size == 0:
|
||||||
|
print('batch', batch)
|
||||||
|
img_feats[batch * batch_size:batch * batch_size +
|
||||||
|
batch_size][:] = embedding.forward_db(batch_data)
|
||||||
|
batch += 1
|
||||||
|
faceness_scores.append(name_lmk_score[-1])
|
||||||
|
|
||||||
|
batch_data = np.empty((2 * rare_size, 3, 112, 112))
|
||||||
|
embedding = Embedding(model_path, data_shape, rare_size)
|
||||||
|
for img_index, each_line in enumerate(files[len(files) - rare_size:]):
|
||||||
|
name_lmk_score = each_line.strip().split(' ')
|
||||||
|
img_name = os.path.join(img_path, name_lmk_score[0])
|
||||||
|
img = cv2.imread(img_name)
|
||||||
|
lmk = np.array([float(x) for x in name_lmk_score[1:-1]],
|
||||||
|
dtype=np.float32)
|
||||||
|
lmk = lmk.reshape((5, 2))
|
||||||
|
input_blob = embedding.get(img, lmk)
|
||||||
|
batch_data[2 * img_index][:] = input_blob[0]
|
||||||
|
batch_data[2 * img_index + 1][:] = input_blob[1]
|
||||||
|
if (img_index + 1) % rare_size == 0:
|
||||||
|
print('batch', batch)
|
||||||
|
img_feats[len(files) -
|
||||||
|
rare_size:][:] = embedding.forward_db(batch_data)
|
||||||
|
batch += 1
|
||||||
|
faceness_scores.append(name_lmk_score[-1])
|
||||||
|
faceness_scores = np.array(faceness_scores).astype(np.float32)
|
||||||
|
# img_feats = np.ones( (len(files), 1024), dtype=np.float32) * 0.01
|
||||||
|
# faceness_scores = np.ones( (len(files), ), dtype=np.float32 )
|
||||||
|
return img_feats, faceness_scores
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
def image2template_feature(img_feats=None, templates=None, medias=None):
|
||||||
|
# ==========================================================
|
||||||
|
# 1. face image feature l2 normalization. img_feats:[number_image x feats_dim]
|
||||||
|
# 2. compute media feature.
|
||||||
|
# 3. compute template feature.
|
||||||
|
# ==========================================================
|
||||||
|
unique_templates = np.unique(templates)
|
||||||
|
template_feats = np.zeros((len(unique_templates), img_feats.shape[1]))
|
||||||
|
|
||||||
|
for count_template, uqt in enumerate(unique_templates):
|
||||||
|
|
||||||
|
(ind_t,) = np.where(templates == uqt)
|
||||||
|
face_norm_feats = img_feats[ind_t]
|
||||||
|
face_medias = medias[ind_t]
|
||||||
|
unique_medias, unique_media_counts = np.unique(face_medias,
|
||||||
|
return_counts=True)
|
||||||
|
media_norm_feats = []
|
||||||
|
for u, ct in zip(unique_medias, unique_media_counts):
|
||||||
|
(ind_m,) = np.where(face_medias == u)
|
||||||
|
if ct == 1:
|
||||||
|
media_norm_feats += [face_norm_feats[ind_m]]
|
||||||
|
else: # image features from the same video will be aggregated into one feature
|
||||||
|
media_norm_feats += [
|
||||||
|
np.mean(face_norm_feats[ind_m], axis=0, keepdims=True)
|
||||||
|
]
|
||||||
|
media_norm_feats = np.array(media_norm_feats)
|
||||||
|
# media_norm_feats = media_norm_feats / np.sqrt(np.sum(media_norm_feats ** 2, -1, keepdims=True))
|
||||||
|
template_feats[count_template] = np.sum(media_norm_feats, axis=0)
|
||||||
|
if count_template % 2000 == 0:
|
||||||
|
print('Finish Calculating {} template features.'.format(
|
||||||
|
count_template))
|
||||||
|
# template_norm_feats = template_feats / np.sqrt(np.sum(template_feats ** 2, -1, keepdims=True))
|
||||||
|
template_norm_feats = sklearn.preprocessing.normalize(template_feats)
|
||||||
|
# print(template_norm_feats.shape)
|
||||||
|
return template_norm_feats, unique_templates
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
def verification(template_norm_feats=None,
|
||||||
|
unique_templates=None,
|
||||||
|
p1=None,
|
||||||
|
p2=None):
|
||||||
|
# ==========================================================
|
||||||
|
# Compute set-to-set Similarity Score.
|
||||||
|
# ==========================================================
|
||||||
|
template2id = np.zeros((max(unique_templates) + 1, 1), dtype=int)
|
||||||
|
for count_template, uqt in enumerate(unique_templates):
|
||||||
|
template2id[uqt] = count_template
|
||||||
|
|
||||||
|
score = np.zeros((len(p1),)) # save cosine distance between pairs
|
||||||
|
|
||||||
|
total_pairs = np.array(range(len(p1)))
|
||||||
|
batchsize = 100000 # small batchsize instead of all pairs in one batch due to the memory limiation
|
||||||
|
sublists = [
|
||||||
|
total_pairs[i:i + batchsize] for i in range(0, len(p1), batchsize)
|
||||||
|
]
|
||||||
|
total_sublists = len(sublists)
|
||||||
|
for c, s in enumerate(sublists):
|
||||||
|
feat1 = template_norm_feats[template2id[p1[s]]]
|
||||||
|
feat2 = template_norm_feats[template2id[p2[s]]]
|
||||||
|
similarity_score = np.sum(feat1 * feat2, -1)
|
||||||
|
score[s] = similarity_score.flatten()
|
||||||
|
if c % 10 == 0:
|
||||||
|
print('Finish {}/{} pairs.'.format(c, total_sublists))
|
||||||
|
return score
|
||||||
|
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
def verification2(template_norm_feats=None,
|
||||||
|
unique_templates=None,
|
||||||
|
p1=None,
|
||||||
|
p2=None):
|
||||||
|
template2id = np.zeros((max(unique_templates) + 1, 1), dtype=int)
|
||||||
|
for count_template, uqt in enumerate(unique_templates):
|
||||||
|
template2id[uqt] = count_template
|
||||||
|
score = np.zeros((len(p1),)) # save cosine distance between pairs
|
||||||
|
total_pairs = np.array(range(len(p1)))
|
||||||
|
batchsize = 100000 # small batchsize instead of all pairs in one batch due to the memory limiation
|
||||||
|
sublists = [
|
||||||
|
total_pairs[i:i + batchsize] for i in range(0, len(p1), batchsize)
|
||||||
|
]
|
||||||
|
total_sublists = len(sublists)
|
||||||
|
for c, s in enumerate(sublists):
|
||||||
|
feat1 = template_norm_feats[template2id[p1[s]]]
|
||||||
|
feat2 = template_norm_feats[template2id[p2[s]]]
|
||||||
|
similarity_score = np.sum(feat1 * feat2, -1)
|
||||||
|
score[s] = similarity_score.flatten()
|
||||||
|
if c % 10 == 0:
|
||||||
|
print('Finish {}/{} pairs.'.format(c, total_sublists))
|
||||||
|
return score
|
||||||
|
|
||||||
|
|
||||||
|
def read_score(path):
|
||||||
|
with open(path, 'rb') as fid:
|
||||||
|
img_feats = pickle.load(fid)
|
||||||
|
return img_feats
|
||||||
|
|
||||||
|
|
||||||
|
# # Step1: Load Meta Data
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
assert target == 'IJBC' or target == 'IJBB'
|
||||||
|
|
||||||
|
# =============================================================
|
||||||
|
# load image and template relationships for template feature embedding
|
||||||
|
# tid --> template id, mid --> media id
|
||||||
|
# format:
|
||||||
|
# image_name tid mid
|
||||||
|
# =============================================================
|
||||||
|
start = timeit.default_timer()
|
||||||
|
templates, medias = read_template_media_list(
|
||||||
|
os.path.join('%s/meta' % image_path,
|
||||||
|
'%s_face_tid_mid.txt' % target.lower()))
|
||||||
|
stop = timeit.default_timer()
|
||||||
|
print('Time: %.2f s. ' % (stop - start))
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
# =============================================================
|
||||||
|
# load template pairs for template-to-template verification
|
||||||
|
# tid : template id, label : 1/0
|
||||||
|
# format:
|
||||||
|
# tid_1 tid_2 label
|
||||||
|
# =============================================================
|
||||||
|
start = timeit.default_timer()
|
||||||
|
p1, p2, label = read_template_pair_list(
|
||||||
|
os.path.join('%s/meta' % image_path,
|
||||||
|
'%s_template_pair_label.txt' % target.lower()))
|
||||||
|
stop = timeit.default_timer()
|
||||||
|
print('Time: %.2f s. ' % (stop - start))
|
||||||
|
|
||||||
|
# # Step 2: Get Image Features
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
# =============================================================
|
||||||
|
# load image features
|
||||||
|
# format:
|
||||||
|
# img_feats: [image_num x feats_dim] (227630, 512)
|
||||||
|
# =============================================================
|
||||||
|
start = timeit.default_timer()
|
||||||
|
img_path = '%s/loose_crop' % image_path
|
||||||
|
img_list_path = '%s/meta/%s_name_5pts_score.txt' % (image_path, target.lower())
|
||||||
|
img_list = open(img_list_path)
|
||||||
|
files = img_list.readlines()
|
||||||
|
# files_list = divideIntoNstrand(files, rank_size)
|
||||||
|
files_list = files
|
||||||
|
|
||||||
|
# img_feats
|
||||||
|
# for i in range(rank_size):
|
||||||
|
img_feats, faceness_scores = get_image_feature(img_path, files_list,
|
||||||
|
model_path, 0, gpu_id)
|
||||||
|
stop = timeit.default_timer()
|
||||||
|
print('Time: %.2f s. ' % (stop - start))
|
||||||
|
print('Feature Shape: ({} , {}) .'.format(img_feats.shape[0],
|
||||||
|
img_feats.shape[1]))
|
||||||
|
|
||||||
|
# # Step3: Get Template Features
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
# =============================================================
|
||||||
|
# compute template features from image features.
|
||||||
|
# =============================================================
|
||||||
|
start = timeit.default_timer()
|
||||||
|
# ==========================================================
|
||||||
|
# Norm feature before aggregation into template feature?
|
||||||
|
# Feature norm from embedding network and faceness score are able to decrease weights for noise samples (not face).
|
||||||
|
# ==========================================================
|
||||||
|
# 1. FaceScore (Feature Norm)
|
||||||
|
# 2. FaceScore (Detector)
|
||||||
|
|
||||||
|
if use_flip_test:
|
||||||
|
# concat --- F1
|
||||||
|
# img_input_feats = img_feats
|
||||||
|
# add --- F2
|
||||||
|
img_input_feats = img_feats[:, 0:img_feats.shape[1] //
|
||||||
|
2] + img_feats[:, img_feats.shape[1] // 2:]
|
||||||
|
else:
|
||||||
|
img_input_feats = img_feats[:, 0:img_feats.shape[1] // 2]
|
||||||
|
|
||||||
|
if use_norm_score:
|
||||||
|
img_input_feats = img_input_feats
|
||||||
|
else:
|
||||||
|
# normalise features to remove norm information
|
||||||
|
img_input_feats = img_input_feats / np.sqrt(
|
||||||
|
np.sum(img_input_feats ** 2, -1, keepdims=True))
|
||||||
|
|
||||||
|
if use_detector_score:
|
||||||
|
print(img_input_feats.shape, faceness_scores.shape)
|
||||||
|
img_input_feats = img_input_feats * faceness_scores[:, np.newaxis]
|
||||||
|
else:
|
||||||
|
img_input_feats = img_input_feats
|
||||||
|
|
||||||
|
template_norm_feats, unique_templates = image2template_feature(
|
||||||
|
img_input_feats, templates, medias)
|
||||||
|
stop = timeit.default_timer()
|
||||||
|
print('Time: %.2f s. ' % (stop - start))
|
||||||
|
|
||||||
|
# # Step 4: Get Template Similarity Scores
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
# =============================================================
|
||||||
|
# compute verification scores between template pairs.
|
||||||
|
# =============================================================
|
||||||
|
start = timeit.default_timer()
|
||||||
|
score = verification(template_norm_feats, unique_templates, p1, p2)
|
||||||
|
stop = timeit.default_timer()
|
||||||
|
print('Time: %.2f s. ' % (stop - start))
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
save_path = os.path.join(result_dir, args.job)
|
||||||
|
# save_path = result_dir + '/%s_result' % target
|
||||||
|
|
||||||
|
if not os.path.exists(save_path):
|
||||||
|
os.makedirs(save_path)
|
||||||
|
|
||||||
|
score_save_file = os.path.join(save_path, "%s.npy" % target.lower())
|
||||||
|
np.save(score_save_file, score)
|
||||||
|
|
||||||
|
# # Step 5: Get ROC Curves and TPR@FPR Table
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
files = [score_save_file]
|
||||||
|
methods = []
|
||||||
|
scores = []
|
||||||
|
for file in files:
|
||||||
|
methods.append(Path(file).stem)
|
||||||
|
scores.append(np.load(file))
|
||||||
|
|
||||||
|
methods = np.array(methods)
|
||||||
|
scores = dict(zip(methods, scores))
|
||||||
|
colours = dict(
|
||||||
|
zip(methods, sample_colours_from_colourmap(methods.shape[0], 'Set2')))
|
||||||
|
x_labels = [10 ** -6, 10 ** -5, 10 ** -4, 10 ** -3, 10 ** -2, 10 ** -1]
|
||||||
|
tpr_fpr_table = PrettyTable(['Methods'] + [str(x) for x in x_labels])
|
||||||
|
fig = plt.figure()
|
||||||
|
for method in methods:
|
||||||
|
fpr, tpr, _ = roc_curve(label, scores[method])
|
||||||
|
roc_auc = auc(fpr, tpr)
|
||||||
|
fpr = np.flipud(fpr)
|
||||||
|
tpr = np.flipud(tpr) # select largest tpr at same fpr
|
||||||
|
plt.plot(fpr,
|
||||||
|
tpr,
|
||||||
|
color=colours[method],
|
||||||
|
lw=1,
|
||||||
|
label=('[%s (AUC = %0.4f %%)]' %
|
||||||
|
(method.split('-')[-1], roc_auc * 100)))
|
||||||
|
tpr_fpr_row = []
|
||||||
|
tpr_fpr_row.append("%s-%s" % (method, target))
|
||||||
|
for fpr_iter in np.arange(len(x_labels)):
|
||||||
|
_, min_index = min(
|
||||||
|
list(zip(abs(fpr - x_labels[fpr_iter]), range(len(fpr)))))
|
||||||
|
tpr_fpr_row.append('%.2f' % (tpr[min_index] * 100))
|
||||||
|
tpr_fpr_table.add_row(tpr_fpr_row)
|
||||||
|
plt.xlim([10 ** -6, 0.1])
|
||||||
|
plt.ylim([0.3, 1.0])
|
||||||
|
plt.grid(linestyle='--', linewidth=1)
|
||||||
|
plt.xticks(x_labels)
|
||||||
|
plt.yticks(np.linspace(0.3, 1.0, 8, endpoint=True))
|
||||||
|
plt.xscale('log')
|
||||||
|
plt.xlabel('False Positive Rate')
|
||||||
|
plt.ylabel('True Positive Rate')
|
||||||
|
plt.title('ROC on IJB')
|
||||||
|
plt.legend(loc="lower right")
|
||||||
|
fig.savefig(os.path.join(save_path, '%s.pdf' % target.lower()))
|
||||||
|
print(tpr_fpr_table)
|
377
face_api.py
Normal file
@ -0,0 +1,377 @@
|
|||||||
|
import os
|
||||||
|
import time
|
||||||
|
import re
|
||||||
|
import torch
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from anti import anti_spoofing, load_anti_model
|
||||||
|
from backbones import iresnet50, iresnet18, iresnet100
|
||||||
|
from retinaface_detect import load_retinaface_model, detect_one, detect_video, set_retinaface_conf
|
||||||
|
from torch2trt import torch2trt, TRTModule
|
||||||
|
|
||||||
|
threshold = 0.7
|
||||||
|
|
||||||
|
|
||||||
|
# 读取112x112的本地图片并变换通道位置归一化
|
||||||
|
def load_image(img_path):
|
||||||
|
img = cv2.imdecode(np.fromfile(img_path, dtype=np.uint8), cv2.IMREAD_COLOR)
|
||||||
|
img = img.transpose((2, 0, 1))
|
||||||
|
img = img[np.newaxis, :, :, :]
|
||||||
|
img = np.array(img, dtype=np.float32)
|
||||||
|
img -= 127.5
|
||||||
|
img /= 127.5
|
||||||
|
return img
|
||||||
|
|
||||||
|
|
||||||
|
# 计算两个特征向量的欧式距离
|
||||||
|
def findEuclideanDistance(source_representation, test_representation):
|
||||||
|
euclidean_distance = source_representation - test_representation
|
||||||
|
euclidean_distance = np.sum(np.multiply(euclidean_distance, euclidean_distance))
|
||||||
|
euclidean_distance = np.sqrt(euclidean_distance)
|
||||||
|
return euclidean_distance
|
||||||
|
|
||||||
|
|
||||||
|
# 计算两个特征向量的余弦距离
|
||||||
|
def findCosineDistance(source_representation, test_representation):
|
||||||
|
a = np.matmul(np.transpose(source_representation), test_representation)
|
||||||
|
b = np.sum(np.multiply(source_representation, source_representation))
|
||||||
|
c = np.sum(np.multiply(test_representation, test_representation))
|
||||||
|
return 1 - (a / (np.sqrt(b) * np.sqrt(c)))
|
||||||
|
|
||||||
|
|
||||||
|
# 归一化欧氏距离
|
||||||
|
def l2_normalize(x):
|
||||||
|
return x / np.sqrt(np.sum(np.multiply(x, x)))
|
||||||
|
|
||||||
|
|
||||||
|
# 归一化余弦距离
|
||||||
|
def cosin_metric(x1, x2):
|
||||||
|
return np.dot(x1, x2) / (np.linalg.norm(x1) * np.linalg.norm(x2))
|
||||||
|
|
||||||
|
|
||||||
|
# 加载保存的姓名、人脸特征向量的人脸库
|
||||||
|
def load_npy(path):
|
||||||
|
data = np.load(path, allow_pickle=True)
|
||||||
|
data = data.item()
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
# 批量化生成人脸特征向量并保存到人脸库
|
||||||
|
def create_database_batch(path, model, database_path):
|
||||||
|
name_list = os.listdir(path)
|
||||||
|
k_v = {}
|
||||||
|
if os.path.exists(database_path):
|
||||||
|
k_v = np.load(database_path, allow_pickle=True)
|
||||||
|
k_v = k_v.item()
|
||||||
|
batch = 256
|
||||||
|
order_name = []
|
||||||
|
order_path = []
|
||||||
|
emb_list = []
|
||||||
|
for name in name_list[:]:
|
||||||
|
img_path = os.path.join(path, name)
|
||||||
|
# for img_name in img_path[:1]:
|
||||||
|
order_name.append(name[:-4])
|
||||||
|
order_path.append(img_path)
|
||||||
|
order_img = np.zeros((len(order_path), 3, 112, 112), dtype=np.float32)
|
||||||
|
for index, img_path in enumerate(order_path):
|
||||||
|
order_img[index] = load_image(img_path)
|
||||||
|
print(order_img.shape)
|
||||||
|
order_img = torch.from_numpy(order_img)
|
||||||
|
order_img = order_img.to(torch.device("cuda" if cpu_or_cuda == "cuda" else "cpu"))
|
||||||
|
now = 0
|
||||||
|
number = len(order_img)
|
||||||
|
with torch.no_grad():
|
||||||
|
while now < number:
|
||||||
|
if now + batch < number:
|
||||||
|
emb = model(order_img[now:now + batch])
|
||||||
|
else:
|
||||||
|
emb = model(order_img[now:])
|
||||||
|
now = now + batch
|
||||||
|
emb = emb.cpu().numpy()
|
||||||
|
for em in emb:
|
||||||
|
emb_list.append(em)
|
||||||
|
print("batch" + str(now))
|
||||||
|
|
||||||
|
for i, emb in enumerate(emb_list):
|
||||||
|
k_v[order_name[i]] = l2_normalize(emb)
|
||||||
|
np.save(database_path, k_v)
|
||||||
|
|
||||||
|
def create_database_from_img(order_name, order_img, model, database_path, cpu_or_cuda):
|
||||||
|
k_v = {}
|
||||||
|
if os.path.exists(database_path):
|
||||||
|
k_v = np.load(database_path, allow_pickle=True)
|
||||||
|
k_v = k_v.item()
|
||||||
|
batch = 256
|
||||||
|
emb_list = []
|
||||||
|
|
||||||
|
print(order_img.shape)
|
||||||
|
order_img = torch.from_numpy(order_img)
|
||||||
|
order_img = order_img.to(torch.device("cuda" if cpu_or_cuda == "cuda" else "cpu"))
|
||||||
|
now = 0
|
||||||
|
number = len(order_img)
|
||||||
|
with torch.no_grad():
|
||||||
|
while now < number:
|
||||||
|
if now + batch < number:
|
||||||
|
emb = model(order_img[now:now + batch])
|
||||||
|
else:
|
||||||
|
emb = model(order_img[now:])
|
||||||
|
now = now + batch
|
||||||
|
emb = emb.cpu().numpy()
|
||||||
|
for em in emb:
|
||||||
|
emb_list.append(em)
|
||||||
|
print("batch" + str(now))
|
||||||
|
for i, emb in enumerate(emb_list):
|
||||||
|
k_v[order_name[i]] = l2_normalize(emb)
|
||||||
|
np.save(database_path, k_v)
|
||||||
|
|
||||||
|
# 向人脸库中新增一个人的姓名和人脸特征向量,若人脸库不存在则创建
|
||||||
|
def add_one_to_database(img, model, name, database_path, cpu_or_cuda):
|
||||||
|
img = torch.from_numpy(img)
|
||||||
|
img = img.to(torch.device("cuda" if cpu_or_cuda == "cuda" else "cpu"))
|
||||||
|
with torch.no_grad():
|
||||||
|
pred = model(img)
|
||||||
|
pred = pred.cpu().numpy()
|
||||||
|
k_v = {}
|
||||||
|
if os.path.exists(database_path):
|
||||||
|
k_v = np.load(database_path, allow_pickle=True)
|
||||||
|
k_v = k_v.item()
|
||||||
|
k_v[name] = l2_normalize(pred)
|
||||||
|
np.save(database_path, k_v)
|
||||||
|
|
||||||
|
|
||||||
|
# 计算此特征向量与人脸库中的哪个人脸特征向量距离最近
|
||||||
|
def findmindistance(pred, threshold, k_v):
|
||||||
|
distance = 10
|
||||||
|
most_like = ""
|
||||||
|
for name in k_v.keys():
|
||||||
|
tmp = findEuclideanDistance(k_v[name], pred)
|
||||||
|
if distance > tmp:
|
||||||
|
distance = tmp
|
||||||
|
most_like = name
|
||||||
|
if distance < threshold:
|
||||||
|
return most_like, distance
|
||||||
|
else:
|
||||||
|
return -1, distance
|
||||||
|
|
||||||
|
|
||||||
|
def faiss_find_face(pred, index, database_name_list):
|
||||||
|
name_list = []
|
||||||
|
start_time = time.time()
|
||||||
|
D, I = index.search(pred, 1)
|
||||||
|
end_time = time.time()
|
||||||
|
# print("faiss cost %fs" % (end_time - start_time))
|
||||||
|
# print(D, I)
|
||||||
|
if len(pred) == 1:
|
||||||
|
if D[0][0] < threshold:
|
||||||
|
# print(database_name_list[I[0][0]])
|
||||||
|
return database_name_list[I[0][0]], D[0][0]
|
||||||
|
else:
|
||||||
|
return "unknown", D[0][0]
|
||||||
|
else:
|
||||||
|
for i,index in enumerate(I):
|
||||||
|
if D[i][0] < threshold:
|
||||||
|
#print(database_name_list[I[0][0]])
|
||||||
|
name_list.append(database_name_list[index[0]]+str(D[i][0]))
|
||||||
|
else:
|
||||||
|
name_list.append("unknown"+str(D[i][0]))
|
||||||
|
return name_list
|
||||||
|
|
||||||
|
|
||||||
|
# 从人脸库中找到单个人脸
|
||||||
|
def findOne(img, model, index, database_name_list, cpu_or_cuda):
|
||||||
|
img = torch.from_numpy(img)
|
||||||
|
img = img.to(torch.device("cuda" if cpu_or_cuda == "cuda" else "cpu"))
|
||||||
|
with torch.no_grad():
|
||||||
|
start_time = time.time()
|
||||||
|
pred = model(img)
|
||||||
|
end_time = time.time()
|
||||||
|
print("predOne time: " + str(end_time - start_time))
|
||||||
|
pred = pred.cpu().numpy()
|
||||||
|
# start_time = time.time()
|
||||||
|
# name, distance = findmindistance(l2_normalize(pred), threshold=threshold, k_v=k_v)
|
||||||
|
# end_time = time.time()
|
||||||
|
# print("baoli time: " + str(end_time - start_time))
|
||||||
|
name, distance = faiss_find_face(l2_normalize(pred), index, database_name_list)
|
||||||
|
print(pred.shape)
|
||||||
|
if name != -1:
|
||||||
|
mo = r'[\u4e00-\u9fa5_a-zA-Z0-9]*'
|
||||||
|
name = re.match(mo, name)
|
||||||
|
return name.group(0), distance
|
||||||
|
else:
|
||||||
|
return "unknown", distance
|
||||||
|
|
||||||
|
|
||||||
|
# 从人脸库中找到传入的人脸列表中的所有人脸
|
||||||
|
def findAll(imglist, model, index ,database_name_list, cpu_or_cuda):
|
||||||
|
imglist = torch.from_numpy(imglist)
|
||||||
|
imglist = imglist.to(torch.device("cuda" if cpu_or_cuda == "cuda" else "cpu"))
|
||||||
|
with torch.no_grad():
|
||||||
|
name_list =[]
|
||||||
|
start_time = time.time()
|
||||||
|
pred = model(imglist)
|
||||||
|
end_time = time.time()
|
||||||
|
print("predOne time: " + str(end_time - start_time))
|
||||||
|
pred = pred.cpu().numpy()
|
||||||
|
start_time = time.time()
|
||||||
|
#name_list = faiss_find_face(l2_normalize(pred), index, database_name_list)
|
||||||
|
for pr in pred:
|
||||||
|
pr = np.expand_dims(l2_normalize(pr), 0)
|
||||||
|
# #print(pr.shape)
|
||||||
|
name, distance = faiss_find_face(l2_normalize(pr), index, database_name_list)
|
||||||
|
#name_list.append(name+" "+str(distance))
|
||||||
|
name_list.append(name)
|
||||||
|
# for pr in pred:
|
||||||
|
# name, distance = findmindistance(l2_normalize(pr), threshold=threshold, k_v=k_v)
|
||||||
|
# if name != -1:
|
||||||
|
# mo = r'[\u4e00-\u9fa5_a-zA-Z]*'
|
||||||
|
# name = re.match(mo, name)
|
||||||
|
# name_list.append(name.group(0) + str(distance))
|
||||||
|
# else:
|
||||||
|
# name_list.append("unknown" + str(distance))
|
||||||
|
end_time = time.time()
|
||||||
|
print("searchALL time: " + str(end_time - start_time))
|
||||||
|
return name_list
|
||||||
|
|
||||||
|
|
||||||
|
# 提取为512维特征向量
|
||||||
|
def embedding(order_img, model, cpu_or_cuda):
|
||||||
|
number = len(order_img)
|
||||||
|
order_img = torch.from_numpy(order_img)
|
||||||
|
order_img = order_img.to(torch.device("cuda" if cpu_or_cuda == "cuda" else "cpu"))
|
||||||
|
batch = 64
|
||||||
|
emb_list = []
|
||||||
|
now = 0
|
||||||
|
with torch.no_grad():
|
||||||
|
while now < number:
|
||||||
|
if now + batch < number:
|
||||||
|
emb = model(order_img[now:now + batch])
|
||||||
|
else:
|
||||||
|
emb = model(order_img[now:])
|
||||||
|
now = now + batch
|
||||||
|
emb = emb.cpu().numpy()
|
||||||
|
for em in emb:
|
||||||
|
emb_list.append(l2_normalize(em))
|
||||||
|
# print("batch" + str(now))
|
||||||
|
emb_list = np.array(emb_list)
|
||||||
|
return emb_list
|
||||||
|
|
||||||
|
|
||||||
|
# 处理聚类人脸文件夹,返回特征向量列表,文件名列表
|
||||||
|
def get_claster_tmp_file_embedding(file_path, retinaface_model, retinaface_args, arcface_model, cpu_or_cuda):
|
||||||
|
img_name = os.listdir(file_path)
|
||||||
|
img_list = []
|
||||||
|
for name in img_name:
|
||||||
|
all_face, box_and_point = detect_one(os.path.join(file_path, name), retinaface_model, retinaface_args)
|
||||||
|
img_list.append(all_face[0])
|
||||||
|
img_list = np.array(img_list)
|
||||||
|
# print(img_list.shape)
|
||||||
|
emb_list = embedding(img_list, arcface_model, cpu_or_cuda)
|
||||||
|
return emb_list, img_name
|
||||||
|
|
||||||
|
|
||||||
|
# 同一个人聚为一类
|
||||||
|
def cluster(emb_list, name_list):
|
||||||
|
all_claster = []
|
||||||
|
cla = []
|
||||||
|
in_claster_name = []
|
||||||
|
img_number = len(emb_list)
|
||||||
|
for index, emb in enumerate(emb_list):
|
||||||
|
if name_list[index] in in_claster_name:
|
||||||
|
continue
|
||||||
|
for j in range(img_number - index - 1):
|
||||||
|
if findEuclideanDistance(emb, emb_list[index + 1 + j]) < threshold:
|
||||||
|
if name_list[index + 1 + j] not in in_claster_name:
|
||||||
|
cla.append(name_list[index + 1 + j])
|
||||||
|
in_claster_name.append(name_list[index + 1 + j])
|
||||||
|
cla.append(name_list[index])
|
||||||
|
in_claster_name.append(name_list[index])
|
||||||
|
all_claster.append(cla)
|
||||||
|
cla = []
|
||||||
|
return all_claster
|
||||||
|
|
||||||
|
|
||||||
|
# 加载人脸识别模型
|
||||||
|
def load_arcface_model(model_path, cpu_or_cuda):
|
||||||
|
if cpu_or_cuda == "trt":
|
||||||
|
model = TRTModule()
|
||||||
|
model.load_state_dict(torch.load('./model/arcface_trt.pth'))
|
||||||
|
elif cpu_or_cuda == "trt_new":
|
||||||
|
model = iresnet100()
|
||||||
|
model.load_state_dict(torch.load(model_path, map_location="cuda"))
|
||||||
|
model = model.eval()
|
||||||
|
model.to(torch.device("cuda"))
|
||||||
|
x = torch.ones((1, 3, 112, 112)).to(torch.device("cuda"))
|
||||||
|
model = torch2trt(model, [x], max_batch_size=4)
|
||||||
|
torch.save(model.state_dict(), './model/arcface_trt.pth')
|
||||||
|
else:
|
||||||
|
model = iresnet100()
|
||||||
|
model.load_state_dict(torch.load(model_path, map_location=cpu_or_cuda))
|
||||||
|
model = model.eval()
|
||||||
|
model.to(torch.device("cuda" if cpu_or_cuda == "cuda" else "cpu"))
|
||||||
|
return model
|
||||||
|
|
||||||
|
|
||||||
|
# 对比两张人脸是否相同
|
||||||
|
def face_verification(img1, img2, model, cpu_or_cuda):
|
||||||
|
img_list = np.concatenate((img1, img2), axis=0)
|
||||||
|
img_list = torch.from_numpy(img_list)
|
||||||
|
img_list = img_list.to(torch.device("cuda" if cpu_or_cuda == "cuda" else "cpu"))
|
||||||
|
with torch.no_grad():
|
||||||
|
pred = model(img_list)
|
||||||
|
pred = pred.cpu().numpy()
|
||||||
|
distance = findEuclideanDistance(l2_normalize(pred[0]), l2_normalize(pred[1]))
|
||||||
|
# print("EuclideanDistance is :" + str(distance))
|
||||||
|
if distance < threshold:
|
||||||
|
return 'same ',distance
|
||||||
|
else:
|
||||||
|
return 'different ', distance
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
cpu_or_cuda = "cuda" if torch.cuda.is_available() else "cpu"
|
||||||
|
arcface_model = load_arcface_model("./model/backbone100.pth", cpu_or_cuda=cpu_or_cuda)
|
||||||
|
# retinaface_args = set_retinaface_conf(cpu_or_cuda=cpu_or_cuda)
|
||||||
|
# retinaface_model = load_retinaface_model(retinaface_args)
|
||||||
|
#
|
||||||
|
# anti_spoofing_model_path = "model/anti_spoof_models"
|
||||||
|
# anti_model = load_anti_model(anti_spoofing_model_path, 0)
|
||||||
|
#
|
||||||
|
# k_v = load_npy("./Database/student.npy")
|
||||||
|
# 对比两张人脸
|
||||||
|
# img1, box_and_point = detect_one("D:\Download\lfw\lfw\Aaron_Peirsol\Aaron_Peirsol_0001.jpg", retinaface_model, retinaface_args)
|
||||||
|
# img2, box_and_point = detect_one("D:\Download\lfw\lfw\Aaron_Peirsol\Aaron_Peirsol_0002.jpg", retinaface_model, retinaface_args)
|
||||||
|
# print(face_verification(img1, img2, arcface_model))
|
||||||
|
|
||||||
|
# img3 = load_image(r"D:\Download\out\alig_students\student.jpg")
|
||||||
|
# img3 = torch.from_numpy(img3)
|
||||||
|
# 单张人脸活体检测
|
||||||
|
# img3, b_p = detect_one(r"C:\Users\ASUS\Desktop\face\IMG_20210525_113950.jpg", retinaface_model, retinaface_args)
|
||||||
|
# b = b_p[0]
|
||||||
|
# w = b[2] - b[0]
|
||||||
|
# h = b[3] - b[1]
|
||||||
|
# b[2] = w
|
||||||
|
# b[3] = h
|
||||||
|
# label, value = anti_spoofing("./img/recognition/000_0.bmp", "model/anti_spoof_models", 0, np.array(b[:4], int), anti_model)
|
||||||
|
# print(label,value)
|
||||||
|
# name = findOne(img3, arcface_model, k_v, cpu_or_cuda)
|
||||||
|
# print(name)
|
||||||
|
|
||||||
|
# 人脸聚类
|
||||||
|
# emb_list, name_list = get_claster_tmp_file_embedding("./img/cluster_tmp_file/face", retinaface_model,
|
||||||
|
# retinaface_args, arcface_model, cpu_or_cuda)
|
||||||
|
# print(cluster(emb_list, name_list))
|
||||||
|
|
||||||
|
# img3, box_and_point = detect_one("D:\Download\out\students\student.jpg", retinaface_model, retinaface_args)
|
||||||
|
# print(embedding(img3,arcface_model).shape)
|
||||||
|
|
||||||
|
# 人脸库中增加一张人脸
|
||||||
|
# add_one_to_database(img1,arcface_model,"Aaron_Peirsol","./Database/student.npy")
|
||||||
|
# name = findOne(img1, arcface_model, k_v)
|
||||||
|
# print(name)
|
||||||
|
|
||||||
|
# 人脸库中批量增加人脸
|
||||||
|
create_database_batch(r"D:\Download\out\alig_students_all", arcface_model, "./Database/sfz.npy")
|
||||||
|
|
||||||
|
# 识别视频中的人脸
|
||||||
|
# detect_video("software.mp4","out.avi",retinaface_model,arcface_model,k_v,retinaface_args)
|
98
gender_age.py
Normal file
@ -0,0 +1,98 @@
|
|||||||
|
import datetime
|
||||||
|
import mxnet as mx
|
||||||
|
import numpy as np
|
||||||
|
from retinaface_detect import detect_one, load_retinaface_model, set_retinaface_conf
|
||||||
|
|
||||||
|
|
||||||
|
# 年龄性别配置
|
||||||
|
class ConfGenderModel(object):
|
||||||
|
def __init__(self, image_size, image, model, gpu, det):
|
||||||
|
self.image_size = image_size
|
||||||
|
self.image = image
|
||||||
|
self.gpu = gpu
|
||||||
|
self.model = model
|
||||||
|
self.det = det
|
||||||
|
|
||||||
|
|
||||||
|
# 实例化一个配置
|
||||||
|
def set_gender_conf():
|
||||||
|
args = ConfGenderModel(image_size='112,112',
|
||||||
|
image=r'C:\Users\ASUS\Desktop\man.png',
|
||||||
|
gpu=-1,
|
||||||
|
model='model/model,0',
|
||||||
|
det=0)
|
||||||
|
return args
|
||||||
|
|
||||||
|
|
||||||
|
# 加载性别年龄模型
|
||||||
|
def load_gender_model(args, layer):
|
||||||
|
if args.gpu >= 0:
|
||||||
|
ctx = mx.gpu(args.gpu)
|
||||||
|
else:
|
||||||
|
ctx = mx.cpu()
|
||||||
|
_vec = args.image_size.split(',')
|
||||||
|
assert len(_vec) == 2
|
||||||
|
image_size = (int(_vec[0]), int(_vec[1]))
|
||||||
|
|
||||||
|
_vec = args.model.split(',')
|
||||||
|
assert len(_vec) == 2
|
||||||
|
prefix = _vec[0]
|
||||||
|
epoch = int(_vec[1])
|
||||||
|
print('loading', prefix, epoch)
|
||||||
|
sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
|
||||||
|
all_layers = sym.get_internals()
|
||||||
|
sym = all_layers[layer + '_output']
|
||||||
|
model = mx.mod.Module(symbol=sym, context=ctx, label_names=None)
|
||||||
|
model.bind(data_shapes=[('data', (1, 3, image_size[0], image_size[1]))])
|
||||||
|
model.set_params(arg_params, aux_params)
|
||||||
|
return model
|
||||||
|
|
||||||
|
|
||||||
|
# 前向推理
|
||||||
|
def get_ga(model, img):
|
||||||
|
# print(data)
|
||||||
|
model.forward(img, is_train=False)
|
||||||
|
ret = model.get_outputs()[0].asnumpy()
|
||||||
|
g = ret[:, 0:2].flatten()
|
||||||
|
gender = np.argmax(g)
|
||||||
|
a = ret[:, 2:202].reshape((100, 2))
|
||||||
|
a = np.argmax(a, axis=1)
|
||||||
|
age = int(sum(a))
|
||||||
|
return gender, age
|
||||||
|
|
||||||
|
|
||||||
|
# 预测人脸列表中每个人的性别年龄
|
||||||
|
def gender_age(img_list, gender_model):
|
||||||
|
gender_list = []
|
||||||
|
age_list = []
|
||||||
|
if len(img_list) == 0:
|
||||||
|
print("find no face")
|
||||||
|
else:
|
||||||
|
time_now = datetime.datetime.now()
|
||||||
|
img_list *= 127.5
|
||||||
|
img_list += 127.5
|
||||||
|
|
||||||
|
for img in img_list:
|
||||||
|
img = np.expand_dims(img, axis=0)
|
||||||
|
img = mx.nd.array(img)
|
||||||
|
img = mx.io.DataBatch(data=(img,))
|
||||||
|
gender, age = get_ga(gender_model, img)
|
||||||
|
if gender == 1:
|
||||||
|
gender_list.append("man")
|
||||||
|
else:
|
||||||
|
gender_list.append('woman')
|
||||||
|
age_list.append(age)
|
||||||
|
time_now2 = datetime.datetime.now()
|
||||||
|
diff = time_now2 - time_now
|
||||||
|
print('time cost', diff.total_seconds())
|
||||||
|
return gender_list,age_list
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
args = set_gender_conf()
|
||||||
|
retinaface_args = set_retinaface_conf()
|
||||||
|
gender_model = load_gender_model(args, 'fc1')
|
||||||
|
retinaface_model = load_retinaface_model(retinaface_args)
|
||||||
|
img_list, box_and_point = detect_one(args.image, retinaface_model,retinaface_args)
|
||||||
|
gender_list, age_list = gender_age(img_list, gender_model)
|
||||||
|
print(gender_list)
|
49
gender_model.py
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
import numpy as np
|
||||||
|
import mxnet as mx
|
||||||
|
|
||||||
|
|
||||||
|
# 加载性别年龄模型
|
||||||
|
def get_model(ctx, image_size, model_str, layer):
|
||||||
|
_vec = model_str.split(',')
|
||||||
|
assert len(_vec) == 2
|
||||||
|
prefix = _vec[0]
|
||||||
|
epoch = int(_vec[1])
|
||||||
|
print('loading', prefix, epoch)
|
||||||
|
sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
|
||||||
|
all_layers = sym.get_internals()
|
||||||
|
sym = all_layers[layer + '_output']
|
||||||
|
model = mx.mod.Module(symbol=sym, context=ctx, label_names=None)
|
||||||
|
model.bind(data_shapes=[('data', (1, 3, image_size[0], image_size[1]))])
|
||||||
|
model.set_params(arg_params, aux_params)
|
||||||
|
return model
|
||||||
|
|
||||||
|
|
||||||
|
class GenderModel:
|
||||||
|
def __init__(self, args):
|
||||||
|
self.args = args
|
||||||
|
if args.gpu >= 0:
|
||||||
|
ctx = mx.gpu(args.gpu)
|
||||||
|
else:
|
||||||
|
ctx = mx.cpu()
|
||||||
|
_vec = args.image_size.split(',')
|
||||||
|
assert len(_vec) == 2
|
||||||
|
image_size = (int(_vec[0]), int(_vec[1]))
|
||||||
|
self.model = None
|
||||||
|
if len(args.model) > 0:
|
||||||
|
self.model = get_model(ctx, image_size, args.model, 'fc1')
|
||||||
|
|
||||||
|
self.det_minsize = 50
|
||||||
|
self.det_threshold = [0.6, 0.7, 0.8]
|
||||||
|
# self.det_factor = 0.9
|
||||||
|
self.image_size = image_size
|
||||||
|
|
||||||
|
def get_ga(self, data):
|
||||||
|
# print(data)
|
||||||
|
self.model.forward(data, is_train=False)
|
||||||
|
ret = self.model.get_outputs()[0].asnumpy()
|
||||||
|
g = ret[:, 0:2].flatten()
|
||||||
|
gender = np.argmax(g)
|
||||||
|
a = ret[:, 2:202].reshape((100, 2))
|
||||||
|
a = np.argmax(a, axis=1)
|
||||||
|
age = int(sum(a))
|
||||||
|
return gender, age
|
BIN
img/search/000_1.bmp
Normal file
After Width: | Height: | Size: 900 KiB |
BIN
img/search/002_1.bmp
Normal file
After Width: | Height: | Size: 900 KiB |
BIN
img/search/377_3.bmp
Normal file
After Width: | Height: | Size: 900 KiB |
BIN
img/search/face/000_0.bmp
Normal file
After Width: | Height: | Size: 900 KiB |
BIN
img/search/face/000_1.bmp
Normal file
After Width: | Height: | Size: 900 KiB |
BIN
img/search/face/000_2.bmp
Normal file
After Width: | Height: | Size: 900 KiB |
BIN
img/search/face/000_3.bmp
Normal file
After Width: | Height: | Size: 900 KiB |
BIN
img/search/face/000_4.bmp
Normal file
After Width: | Height: | Size: 900 KiB |
BIN
img/search/face/001_0.bmp
Normal file
After Width: | Height: | Size: 900 KiB |
BIN
img/search/face/001_1.bmp
Normal file
After Width: | Height: | Size: 900 KiB |
BIN
img/search/face/001_2.bmp
Normal file
After Width: | Height: | Size: 900 KiB |
BIN
img/search/face/001_3.bmp
Normal file
After Width: | Height: | Size: 900 KiB |
BIN
img/search/face/001_4.bmp
Normal file
After Width: | Height: | Size: 900 KiB |
BIN
img/search/face/002_0.bmp
Normal file
After Width: | Height: | Size: 900 KiB |
BIN
img/search/face/002_1.bmp
Normal file
After Width: | Height: | Size: 900 KiB |
BIN
img/search/face/002_2.bmp
Normal file
After Width: | Height: | Size: 900 KiB |
BIN
img/search/face/002_3.bmp
Normal file
After Width: | Height: | Size: 900 KiB |
BIN
img/search/face/002_4.bmp
Normal file
After Width: | Height: | Size: 900 KiB |
BIN
img/search/face/003_0.bmp
Normal file
After Width: | Height: | Size: 900 KiB |
BIN
img/search/face/003_1.bmp
Normal file
After Width: | Height: | Size: 900 KiB |
BIN
img/search/face/003_2.bmp
Normal file
After Width: | Height: | Size: 900 KiB |
BIN
img/search/face/003_3.bmp
Normal file
After Width: | Height: | Size: 900 KiB |
BIN
img/search/face/003_4.bmp
Normal file
After Width: | Height: | Size: 900 KiB |
BIN
img/search/face/004_0.bmp
Normal file
After Width: | Height: | Size: 900 KiB |
BIN
img/search/face/004_1.bmp
Normal file
After Width: | Height: | Size: 900 KiB |
BIN
img/search/face/004_2.bmp
Normal file
After Width: | Height: | Size: 900 KiB |
BIN
img/search/face/004_3.bmp
Normal file
After Width: | Height: | Size: 900 KiB |
BIN
img/search/face/004_4.bmp
Normal file
After Width: | Height: | Size: 900 KiB |
2
layers/__init__.py
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
from .functions import *
|
||||||
|
from .modules import *
|
BIN
layers/__pycache__/__init__.cpython-38.pyc
Normal file
BIN
layers/functions/__pycache__/prior_box.cpython-38.pyc
Normal file
34
layers/functions/prior_box.py
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
import torch
|
||||||
|
from itertools import product as product
|
||||||
|
import numpy as np
|
||||||
|
from math import ceil
|
||||||
|
|
||||||
|
|
||||||
|
class PriorBox(object):
|
||||||
|
def __init__(self, cfg, image_size=None, phase='train'):
|
||||||
|
super(PriorBox, self).__init__()
|
||||||
|
self.min_sizes = cfg['min_sizes']
|
||||||
|
self.steps = cfg['steps']
|
||||||
|
self.clip = cfg['clip']
|
||||||
|
self.image_size = image_size
|
||||||
|
self.feature_maps = [[ceil(self.image_size[0]/step), ceil(self.image_size[1]/step)] for step in self.steps]
|
||||||
|
self.name = "s"
|
||||||
|
|
||||||
|
def forward(self):
|
||||||
|
anchors = []
|
||||||
|
for k, f in enumerate(self.feature_maps):
|
||||||
|
min_sizes = self.min_sizes[k]
|
||||||
|
for i, j in product(range(f[0]), range(f[1])):
|
||||||
|
for min_size in min_sizes:
|
||||||
|
s_kx = min_size / self.image_size[1]
|
||||||
|
s_ky = min_size / self.image_size[0]
|
||||||
|
dense_cx = [x * self.steps[k] / self.image_size[1] for x in [j + 0.5]]
|
||||||
|
dense_cy = [y * self.steps[k] / self.image_size[0] for y in [i + 0.5]]
|
||||||
|
for cy, cx in product(dense_cy, dense_cx):
|
||||||
|
anchors += [cx, cy, s_kx, s_ky]
|
||||||
|
|
||||||
|
# back to torch land
|
||||||
|
output = torch.Tensor(anchors).view(-1, 4)
|
||||||
|
if self.clip:
|
||||||
|
output.clamp_(max=1, min=0)
|
||||||
|
return output
|
3
layers/modules/__init__.py
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
from .multibox_loss import MultiBoxLoss
|
||||||
|
|
||||||
|
__all__ = ['MultiBoxLoss']
|
BIN
layers/modules/__pycache__/__init__.cpython-38.pyc
Normal file
BIN
layers/modules/__pycache__/multibox_loss.cpython-38.pyc
Normal file
125
layers/modules/multibox_loss.py
Normal file
@ -0,0 +1,125 @@
|
|||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
import torch.nn.functional as F
|
||||||
|
from torch.autograd import Variable
|
||||||
|
from utils.box_utils import match, log_sum_exp
|
||||||
|
from data import cfg_mnet
|
||||||
|
GPU = cfg_mnet['gpu_train']
|
||||||
|
|
||||||
|
class MultiBoxLoss(nn.Module):
|
||||||
|
"""SSD Weighted Loss Function
|
||||||
|
Compute Targets:
|
||||||
|
1) Produce Confidence Target Indices by matching ground truth boxes
|
||||||
|
with (default) 'priorboxes' that have jaccard index > threshold parameter
|
||||||
|
(default threshold: 0.5).
|
||||||
|
2) Produce localization target by 'encoding' variance into offsets of ground
|
||||||
|
truth boxes and their matched 'priorboxes'.
|
||||||
|
3) Hard negative mining to filter the excessive number of negative examples
|
||||||
|
that comes with using a large number of default bounding boxes.
|
||||||
|
(default negative:positive ratio 3:1)
|
||||||
|
Objective Loss:
|
||||||
|
L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
|
||||||
|
Where, Lconf is the CrossEntropy Loss and Lloc is the SmoothL1 Loss
|
||||||
|
weighted by α which is set to 1 by cross val.
|
||||||
|
Args:
|
||||||
|
c: class confidences,
|
||||||
|
l: predicted boxes,
|
||||||
|
g: ground truth boxes
|
||||||
|
N: number of matched default boxes
|
||||||
|
See: https://arxiv.org/pdf/1512.02325.pdf for more details.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, num_classes, overlap_thresh, prior_for_matching, bkg_label, neg_mining, neg_pos, neg_overlap, encode_target):
|
||||||
|
super(MultiBoxLoss, self).__init__()
|
||||||
|
self.num_classes = num_classes
|
||||||
|
self.threshold = overlap_thresh
|
||||||
|
self.background_label = bkg_label
|
||||||
|
self.encode_target = encode_target
|
||||||
|
self.use_prior_for_matching = prior_for_matching
|
||||||
|
self.do_neg_mining = neg_mining
|
||||||
|
self.negpos_ratio = neg_pos
|
||||||
|
self.neg_overlap = neg_overlap
|
||||||
|
self.variance = [0.1, 0.2]
|
||||||
|
|
||||||
|
def forward(self, predictions, priors, targets):
|
||||||
|
"""Multibox Loss
|
||||||
|
Args:
|
||||||
|
predictions (tuple): A tuple containing loc preds, conf preds,
|
||||||
|
and prior boxes from SSD net.
|
||||||
|
conf shape: torch.size(batch_size,num_priors,num_classes)
|
||||||
|
loc shape: torch.size(batch_size,num_priors,4)
|
||||||
|
priors shape: torch.size(num_priors,4)
|
||||||
|
|
||||||
|
ground_truth (tensor): Ground truth boxes and labels for a batch,
|
||||||
|
shape: [batch_size,num_objs,5] (last idx is the label).
|
||||||
|
"""
|
||||||
|
|
||||||
|
loc_data, conf_data, landm_data = predictions
|
||||||
|
priors = priors
|
||||||
|
num = loc_data.size(0)
|
||||||
|
num_priors = (priors.size(0))
|
||||||
|
|
||||||
|
# match priors (default boxes) and ground truth boxes
|
||||||
|
loc_t = torch.Tensor(num, num_priors, 4)
|
||||||
|
landm_t = torch.Tensor(num, num_priors, 10)
|
||||||
|
conf_t = torch.LongTensor(num, num_priors)
|
||||||
|
for idx in range(num):
|
||||||
|
truths = targets[idx][:, :4].data
|
||||||
|
labels = targets[idx][:, -1].data
|
||||||
|
landms = targets[idx][:, 4:14].data
|
||||||
|
defaults = priors.data
|
||||||
|
match(self.threshold, truths, defaults, self.variance, labels, landms, loc_t, conf_t, landm_t, idx)
|
||||||
|
if GPU:
|
||||||
|
loc_t = loc_t.cuda()
|
||||||
|
conf_t = conf_t.cuda()
|
||||||
|
landm_t = landm_t.cuda()
|
||||||
|
|
||||||
|
zeros = torch.tensor(0).cuda()
|
||||||
|
# landm Loss (Smooth L1)
|
||||||
|
# Shape: [batch,num_priors,10]
|
||||||
|
pos1 = conf_t > zeros
|
||||||
|
num_pos_landm = pos1.long().sum(1, keepdim=True)
|
||||||
|
N1 = max(num_pos_landm.data.sum().float(), 1)
|
||||||
|
pos_idx1 = pos1.unsqueeze(pos1.dim()).expand_as(landm_data)
|
||||||
|
landm_p = landm_data[pos_idx1].view(-1, 10)
|
||||||
|
landm_t = landm_t[pos_idx1].view(-1, 10)
|
||||||
|
loss_landm = F.smooth_l1_loss(landm_p, landm_t, reduction='sum')
|
||||||
|
|
||||||
|
|
||||||
|
pos = conf_t != zeros
|
||||||
|
conf_t[pos] = 1
|
||||||
|
|
||||||
|
# Localization Loss (Smooth L1)
|
||||||
|
# Shape: [batch,num_priors,4]
|
||||||
|
pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
|
||||||
|
loc_p = loc_data[pos_idx].view(-1, 4)
|
||||||
|
loc_t = loc_t[pos_idx].view(-1, 4)
|
||||||
|
loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction='sum')
|
||||||
|
|
||||||
|
# Compute max conf across batch for hard negative mining
|
||||||
|
batch_conf = conf_data.view(-1, self.num_classes)
|
||||||
|
loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1, 1))
|
||||||
|
|
||||||
|
# Hard Negative Mining
|
||||||
|
loss_c[pos.view(-1, 1)] = 0 # filter out pos boxes for now
|
||||||
|
loss_c = loss_c.view(num, -1)
|
||||||
|
_, loss_idx = loss_c.sort(1, descending=True)
|
||||||
|
_, idx_rank = loss_idx.sort(1)
|
||||||
|
num_pos = pos.long().sum(1, keepdim=True)
|
||||||
|
num_neg = torch.clamp(self.negpos_ratio*num_pos, max=pos.size(1)-1)
|
||||||
|
neg = idx_rank < num_neg.expand_as(idx_rank)
|
||||||
|
|
||||||
|
# Confidence Loss Including Positive and Negative Examples
|
||||||
|
pos_idx = pos.unsqueeze(2).expand_as(conf_data)
|
||||||
|
neg_idx = neg.unsqueeze(2).expand_as(conf_data)
|
||||||
|
conf_p = conf_data[(pos_idx+neg_idx).gt(0)].view(-1,self.num_classes)
|
||||||
|
targets_weighted = conf_t[(pos+neg).gt(0)]
|
||||||
|
loss_c = F.cross_entropy(conf_p, targets_weighted, reduction='sum')
|
||||||
|
|
||||||
|
# Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
|
||||||
|
N = max(num_pos.data.sum().float(), 1)
|
||||||
|
loss_l /= N
|
||||||
|
loss_c /= N
|
||||||
|
loss_landm /= N1
|
||||||
|
|
||||||
|
return loss_l, loss_c, loss_landm
|
33
losses.py
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
import torch
|
||||||
|
from torch import nn
|
||||||
|
|
||||||
|
|
||||||
|
class CosFace(nn.Module):
|
||||||
|
def __init__(self, s=64.0, m=0.40):
|
||||||
|
super(CosFace, self).__init__()
|
||||||
|
self.s = s
|
||||||
|
self.m = m
|
||||||
|
|
||||||
|
def forward(self, cosine, label):
|
||||||
|
index = torch.where(label != -1)[0]
|
||||||
|
m_hot = torch.zeros(index.size()[0], cosine.size()[1], device=cosine.device)
|
||||||
|
m_hot.scatter_(1, label[index, None], self.m)
|
||||||
|
cosine[index] -= m_hot
|
||||||
|
ret = cosine * self.s
|
||||||
|
return ret
|
||||||
|
|
||||||
|
|
||||||
|
class ArcFace(nn.Module):
|
||||||
|
def __init__(self, s=64.0, m=0.5):
|
||||||
|
super(ArcFace, self).__init__()
|
||||||
|
self.s = s
|
||||||
|
self.m = m
|
||||||
|
|
||||||
|
def forward(self, cosine: torch.Tensor, label):
|
||||||
|
index = torch.where(label != -1)[0]
|
||||||
|
m_hot = torch.zeros(index.size()[0], cosine.size()[1], device=cosine.device)
|
||||||
|
m_hot.scatter_(1, label[index, None], self.m)
|
||||||
|
cosine.acos_()
|
||||||
|
cosine[index] += m_hot
|
||||||
|
cosine.cos_().mul_(self.s)
|
||||||
|
return cosine
|
BIN
model/anti_spoof_models/2.7_80x80_MiniFASNetV2.pth
Normal file
BIN
model/anti_spoof_models/4_0_0_80x80_MiniFASNetV1SE.pth
Normal file
BIN
model/backbone100.pth
Normal file
BIN
model/model-0000.params
Normal file
2399
model/model-symbol.json
Normal file
BIN
model/onnx/centerface.onnx
Normal file
BIN
model/onnx/centerface_bnmerged.onnx
Normal file
0
models/__init__.py
Normal file
BIN
models/__pycache__/__init__.cpython-38.pyc
Normal file
BIN
models/__pycache__/net.cpython-38.pyc
Normal file
BIN
models/__pycache__/retinaface.cpython-38.pyc
Normal file
137
models/net.py
Normal file
@ -0,0 +1,137 @@
|
|||||||
|
import time
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
import torchvision.models._utils as _utils
|
||||||
|
import torchvision.models as models
|
||||||
|
import torch.nn.functional as F
|
||||||
|
from torch.autograd import Variable
|
||||||
|
|
||||||
|
def conv_bn(inp, oup, stride = 1, leaky = 0):
|
||||||
|
return nn.Sequential(
|
||||||
|
nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
|
||||||
|
nn.BatchNorm2d(oup),
|
||||||
|
nn.LeakyReLU(negative_slope=leaky, inplace=True)
|
||||||
|
)
|
||||||
|
|
||||||
|
def conv_bn_no_relu(inp, oup, stride):
|
||||||
|
return nn.Sequential(
|
||||||
|
nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
|
||||||
|
nn.BatchNorm2d(oup),
|
||||||
|
)
|
||||||
|
|
||||||
|
def conv_bn1X1(inp, oup, stride, leaky=0):
|
||||||
|
return nn.Sequential(
|
||||||
|
nn.Conv2d(inp, oup, 1, stride, padding=0, bias=False),
|
||||||
|
nn.BatchNorm2d(oup),
|
||||||
|
nn.LeakyReLU(negative_slope=leaky, inplace=True)
|
||||||
|
)
|
||||||
|
|
||||||
|
def conv_dw(inp, oup, stride, leaky=0.1):
|
||||||
|
return nn.Sequential(
|
||||||
|
nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False),
|
||||||
|
nn.BatchNorm2d(inp),
|
||||||
|
nn.LeakyReLU(negative_slope= leaky,inplace=True),
|
||||||
|
|
||||||
|
nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
|
||||||
|
nn.BatchNorm2d(oup),
|
||||||
|
nn.LeakyReLU(negative_slope= leaky,inplace=True),
|
||||||
|
)
|
||||||
|
|
||||||
|
class SSH(nn.Module):
|
||||||
|
def __init__(self, in_channel, out_channel):
|
||||||
|
super(SSH, self).__init__()
|
||||||
|
assert out_channel % 4 == 0
|
||||||
|
leaky = 0
|
||||||
|
if (out_channel <= 64):
|
||||||
|
leaky = 0.1
|
||||||
|
self.conv3X3 = conv_bn_no_relu(in_channel, out_channel//2, stride=1)
|
||||||
|
|
||||||
|
self.conv5X5_1 = conv_bn(in_channel, out_channel//4, stride=1, leaky = leaky)
|
||||||
|
self.conv5X5_2 = conv_bn_no_relu(out_channel//4, out_channel//4, stride=1)
|
||||||
|
|
||||||
|
self.conv7X7_2 = conv_bn(out_channel//4, out_channel//4, stride=1, leaky = leaky)
|
||||||
|
self.conv7x7_3 = conv_bn_no_relu(out_channel//4, out_channel//4, stride=1)
|
||||||
|
|
||||||
|
def forward(self, input):
|
||||||
|
conv3X3 = self.conv3X3(input)
|
||||||
|
|
||||||
|
conv5X5_1 = self.conv5X5_1(input)
|
||||||
|
conv5X5 = self.conv5X5_2(conv5X5_1)
|
||||||
|
|
||||||
|
conv7X7_2 = self.conv7X7_2(conv5X5_1)
|
||||||
|
conv7X7 = self.conv7x7_3(conv7X7_2)
|
||||||
|
|
||||||
|
out = torch.cat([conv3X3, conv5X5, conv7X7], dim=1)
|
||||||
|
out = F.relu(out)
|
||||||
|
return out
|
||||||
|
|
||||||
|
class FPN(nn.Module):
|
||||||
|
def __init__(self,in_channels_list,out_channels):
|
||||||
|
super(FPN,self).__init__()
|
||||||
|
leaky = 0
|
||||||
|
if (out_channels <= 64):
|
||||||
|
leaky = 0.1
|
||||||
|
self.output1 = conv_bn1X1(in_channels_list[0], out_channels, stride = 1, leaky = leaky)
|
||||||
|
self.output2 = conv_bn1X1(in_channels_list[1], out_channels, stride = 1, leaky = leaky)
|
||||||
|
self.output3 = conv_bn1X1(in_channels_list[2], out_channels, stride = 1, leaky = leaky)
|
||||||
|
|
||||||
|
self.merge1 = conv_bn(out_channels, out_channels, leaky = leaky)
|
||||||
|
self.merge2 = conv_bn(out_channels, out_channels, leaky = leaky)
|
||||||
|
|
||||||
|
def forward(self, input):
|
||||||
|
# names = list(input.keys())
|
||||||
|
input = list(input.values())
|
||||||
|
|
||||||
|
output1 = self.output1(input[0])
|
||||||
|
output2 = self.output2(input[1])
|
||||||
|
output3 = self.output3(input[2])
|
||||||
|
|
||||||
|
up3 = F.interpolate(output3, size=[output2.size(2), output2.size(3)], mode="nearest")
|
||||||
|
output2 = output2 + up3
|
||||||
|
output2 = self.merge2(output2)
|
||||||
|
|
||||||
|
up2 = F.interpolate(output2, size=[output1.size(2), output1.size(3)], mode="nearest")
|
||||||
|
output1 = output1 + up2
|
||||||
|
output1 = self.merge1(output1)
|
||||||
|
|
||||||
|
out = [output1, output2, output3]
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class MobileNetV1(nn.Module):
|
||||||
|
def __init__(self):
|
||||||
|
super(MobileNetV1, self).__init__()
|
||||||
|
self.stage1 = nn.Sequential(
|
||||||
|
conv_bn(3, 8, 2, leaky = 0.1), # 3
|
||||||
|
conv_dw(8, 16, 1), # 7
|
||||||
|
conv_dw(16, 32, 2), # 11
|
||||||
|
conv_dw(32, 32, 1), # 19
|
||||||
|
conv_dw(32, 64, 2), # 27
|
||||||
|
conv_dw(64, 64, 1), # 43
|
||||||
|
)
|
||||||
|
self.stage2 = nn.Sequential(
|
||||||
|
conv_dw(64, 128, 2), # 43 + 16 = 59
|
||||||
|
conv_dw(128, 128, 1), # 59 + 32 = 91
|
||||||
|
conv_dw(128, 128, 1), # 91 + 32 = 123
|
||||||
|
conv_dw(128, 128, 1), # 123 + 32 = 155
|
||||||
|
conv_dw(128, 128, 1), # 155 + 32 = 187
|
||||||
|
conv_dw(128, 128, 1), # 187 + 32 = 219
|
||||||
|
)
|
||||||
|
self.stage3 = nn.Sequential(
|
||||||
|
conv_dw(128, 256, 2), # 219 +3 2 = 241
|
||||||
|
conv_dw(256, 256, 1), # 241 + 64 = 301
|
||||||
|
)
|
||||||
|
self.avg = nn.AdaptiveAvgPool2d((1,1))
|
||||||
|
self.fc = nn.Linear(256, 1000)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
x = self.stage1(x)
|
||||||
|
x = self.stage2(x)
|
||||||
|
x = self.stage3(x)
|
||||||
|
x = self.avg(x)
|
||||||
|
# x = self.model(x)
|
||||||
|
x = x.view(-1, 256)
|
||||||
|
x = self.fc(x)
|
||||||
|
return x
|
||||||
|
|
127
models/retinaface.py
Normal file
@ -0,0 +1,127 @@
|
|||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
import torchvision.models.detection.backbone_utils as backbone_utils
|
||||||
|
import torchvision.models._utils as _utils
|
||||||
|
import torch.nn.functional as F
|
||||||
|
from collections import OrderedDict
|
||||||
|
|
||||||
|
from models.net import MobileNetV1 as MobileNetV1
|
||||||
|
from models.net import FPN as FPN
|
||||||
|
from models.net import SSH as SSH
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class ClassHead(nn.Module):
|
||||||
|
def __init__(self,inchannels=512,num_anchors=3):
|
||||||
|
super(ClassHead,self).__init__()
|
||||||
|
self.num_anchors = num_anchors
|
||||||
|
self.conv1x1 = nn.Conv2d(inchannels,self.num_anchors*2,kernel_size=(1,1),stride=1,padding=0)
|
||||||
|
|
||||||
|
def forward(self,x):
|
||||||
|
out = self.conv1x1(x)
|
||||||
|
out = out.permute(0,2,3,1).contiguous()
|
||||||
|
|
||||||
|
return out.view(out.shape[0], -1, 2)
|
||||||
|
|
||||||
|
class BboxHead(nn.Module):
|
||||||
|
def __init__(self,inchannels=512,num_anchors=3):
|
||||||
|
super(BboxHead,self).__init__()
|
||||||
|
self.conv1x1 = nn.Conv2d(inchannels,num_anchors*4,kernel_size=(1,1),stride=1,padding=0)
|
||||||
|
|
||||||
|
def forward(self,x):
|
||||||
|
out = self.conv1x1(x)
|
||||||
|
out = out.permute(0,2,3,1).contiguous()
|
||||||
|
|
||||||
|
return out.view(out.shape[0], -1, 4)
|
||||||
|
|
||||||
|
class LandmarkHead(nn.Module):
|
||||||
|
def __init__(self,inchannels=512,num_anchors=3):
|
||||||
|
super(LandmarkHead,self).__init__()
|
||||||
|
self.conv1x1 = nn.Conv2d(inchannels,num_anchors*10,kernel_size=(1,1),stride=1,padding=0)
|
||||||
|
|
||||||
|
def forward(self,x):
|
||||||
|
out = self.conv1x1(x)
|
||||||
|
out = out.permute(0,2,3,1).contiguous()
|
||||||
|
|
||||||
|
return out.view(out.shape[0], -1, 10)
|
||||||
|
|
||||||
|
class RetinaFace(nn.Module):
|
||||||
|
def __init__(self, cfg = None, phase = 'train'):
|
||||||
|
"""
|
||||||
|
:param cfg: Network related settings.
|
||||||
|
:param phase: train or test.
|
||||||
|
"""
|
||||||
|
super(RetinaFace,self).__init__()
|
||||||
|
self.phase = phase
|
||||||
|
backbone = None
|
||||||
|
if cfg['name'] == 'mobilenet0.25':
|
||||||
|
backbone = MobileNetV1()
|
||||||
|
if cfg['pretrain']:
|
||||||
|
checkpoint = torch.load("./weights/mobilenetV1X0.25_pretrain.tar", map_location=torch.device('cpu'))
|
||||||
|
from collections import OrderedDict
|
||||||
|
new_state_dict = OrderedDict()
|
||||||
|
for k, v in checkpoint['state_dict'].items():
|
||||||
|
name = k[7:] # remove module.
|
||||||
|
new_state_dict[name] = v
|
||||||
|
# load params
|
||||||
|
backbone.load_state_dict(new_state_dict)
|
||||||
|
elif cfg['name'] == 'Resnet50':
|
||||||
|
import torchvision.models as models
|
||||||
|
backbone = models.resnet50(pretrained=cfg['pretrain'])
|
||||||
|
|
||||||
|
self.body = _utils.IntermediateLayerGetter(backbone, cfg['return_layers'])
|
||||||
|
in_channels_stage2 = cfg['in_channel']
|
||||||
|
in_channels_list = [
|
||||||
|
in_channels_stage2 * 2,
|
||||||
|
in_channels_stage2 * 4,
|
||||||
|
in_channels_stage2 * 8,
|
||||||
|
]
|
||||||
|
out_channels = cfg['out_channel']
|
||||||
|
self.fpn = FPN(in_channels_list,out_channels)
|
||||||
|
self.ssh1 = SSH(out_channels, out_channels)
|
||||||
|
self.ssh2 = SSH(out_channels, out_channels)
|
||||||
|
self.ssh3 = SSH(out_channels, out_channels)
|
||||||
|
|
||||||
|
self.ClassHead = self._make_class_head(fpn_num=3, inchannels=cfg['out_channel'])
|
||||||
|
self.BboxHead = self._make_bbox_head(fpn_num=3, inchannels=cfg['out_channel'])
|
||||||
|
self.LandmarkHead = self._make_landmark_head(fpn_num=3, inchannels=cfg['out_channel'])
|
||||||
|
|
||||||
|
def _make_class_head(self,fpn_num=3,inchannels=64,anchor_num=2):
|
||||||
|
classhead = nn.ModuleList()
|
||||||
|
for i in range(fpn_num):
|
||||||
|
classhead.append(ClassHead(inchannels,anchor_num))
|
||||||
|
return classhead
|
||||||
|
|
||||||
|
def _make_bbox_head(self,fpn_num=3,inchannels=64,anchor_num=2):
|
||||||
|
bboxhead = nn.ModuleList()
|
||||||
|
for i in range(fpn_num):
|
||||||
|
bboxhead.append(BboxHead(inchannels,anchor_num))
|
||||||
|
return bboxhead
|
||||||
|
|
||||||
|
def _make_landmark_head(self,fpn_num=3,inchannels=64,anchor_num=2):
|
||||||
|
landmarkhead = nn.ModuleList()
|
||||||
|
for i in range(fpn_num):
|
||||||
|
landmarkhead.append(LandmarkHead(inchannels,anchor_num))
|
||||||
|
return landmarkhead
|
||||||
|
|
||||||
|
def forward(self,inputs):
|
||||||
|
out = self.body(inputs)
|
||||||
|
|
||||||
|
# FPN
|
||||||
|
fpn = self.fpn(out)
|
||||||
|
|
||||||
|
# SSH
|
||||||
|
feature1 = self.ssh1(fpn[0])
|
||||||
|
feature2 = self.ssh2(fpn[1])
|
||||||
|
feature3 = self.ssh3(fpn[2])
|
||||||
|
features = [feature1, feature2, feature3]
|
||||||
|
|
||||||
|
bbox_regressions = torch.cat([self.BboxHead[i](feature) for i, feature in enumerate(features)], dim=1)
|
||||||
|
classifications = torch.cat([self.ClassHead[i](feature) for i, feature in enumerate(features)],dim=1)
|
||||||
|
ldm_regressions = torch.cat([self.LandmarkHead[i](feature) for i, feature in enumerate(features)], dim=1)
|
||||||
|
|
||||||
|
if self.phase == 'train':
|
||||||
|
output = (bbox_regressions, classifications, ldm_regressions)
|
||||||
|
else:
|
||||||
|
output = (bbox_regressions, F.softmax(classifications, dim=-1), ldm_regressions)
|
||||||
|
return output
|
161
partial_fc.py
Normal file
@ -0,0 +1,161 @@
|
|||||||
|
import logging
|
||||||
|
import os
|
||||||
|
|
||||||
|
import torch
|
||||||
|
import torch.distributed as dist
|
||||||
|
from torch.nn import Module
|
||||||
|
from torch.nn.functional import normalize, linear
|
||||||
|
from torch.nn.parameter import Parameter
|
||||||
|
|
||||||
|
|
||||||
|
class PartialFC(Module):
|
||||||
|
"""
|
||||||
|
Author: {Xiang An, Yang Xiao, XuHan Zhu} in DeepGlint,
|
||||||
|
Partial FC: Training 10 Million Identities on a Single Machine
|
||||||
|
See the original paper:
|
||||||
|
https://arxiv.org/abs/2010.05222
|
||||||
|
"""
|
||||||
|
|
||||||
|
@torch.no_grad()
|
||||||
|
def __init__(self, rank, local_rank, world_size, batch_size, resume,
|
||||||
|
margin_softmax, num_classes, sample_rate=1.0, embedding_size=512, prefix="./"):
|
||||||
|
super(PartialFC, self).__init__()
|
||||||
|
#
|
||||||
|
self.num_classes: int = num_classes
|
||||||
|
self.rank: int = rank
|
||||||
|
self.local_rank: int = local_rank
|
||||||
|
self.device: torch.device = torch.device("cuda:{}".format(self.local_rank))
|
||||||
|
self.world_size: int = world_size
|
||||||
|
self.batch_size: int = batch_size
|
||||||
|
self.margin_softmax: callable = margin_softmax
|
||||||
|
self.sample_rate: float = sample_rate
|
||||||
|
self.embedding_size: int = embedding_size
|
||||||
|
self.prefix: str = prefix
|
||||||
|
self.num_local: int = num_classes // world_size + int(rank < num_classes % world_size)
|
||||||
|
self.class_start: int = num_classes // world_size * rank + min(rank, num_classes % world_size)
|
||||||
|
self.num_sample: int = int(self.sample_rate * self.num_local)
|
||||||
|
|
||||||
|
self.weight_name = os.path.join(self.prefix, "rank:{}_softmax_weight.pt".format(self.rank))
|
||||||
|
self.weight_mom_name = os.path.join(self.prefix, "rank:{}_softmax_weight_mom.pt".format(self.rank))
|
||||||
|
|
||||||
|
if resume:
|
||||||
|
try:
|
||||||
|
self.weight: torch.Tensor = torch.load(self.weight_name)
|
||||||
|
logging.info("softmax weight resume successfully!")
|
||||||
|
except (FileNotFoundError, KeyError, IndexError):
|
||||||
|
self.weight = torch.normal(0, 0.01, (self.num_local, self.embedding_size), device=self.device)
|
||||||
|
logging.info("softmax weight resume fail!")
|
||||||
|
|
||||||
|
try:
|
||||||
|
self.weight_mom: torch.Tensor = torch.load(self.weight_mom_name)
|
||||||
|
logging.info("softmax weight mom resume successfully!")
|
||||||
|
except (FileNotFoundError, KeyError, IndexError):
|
||||||
|
self.weight_mom: torch.Tensor = torch.zeros_like(self.weight)
|
||||||
|
logging.info("softmax weight mom resume fail!")
|
||||||
|
else:
|
||||||
|
self.weight = torch.normal(0, 0.01, (self.num_local, self.embedding_size), device=self.device)
|
||||||
|
self.weight_mom: torch.Tensor = torch.zeros_like(self.weight)
|
||||||
|
logging.info("softmax weight init successfully!")
|
||||||
|
logging.info("softmax weight mom init successfully!")
|
||||||
|
self.stream: torch.cuda.Stream = torch.cuda.Stream(local_rank)
|
||||||
|
|
||||||
|
self.index = None
|
||||||
|
if int(self.sample_rate) == 1:
|
||||||
|
self.update = lambda: 0
|
||||||
|
self.sub_weight = Parameter(self.weight)
|
||||||
|
self.sub_weight_mom = self.weight_mom
|
||||||
|
else:
|
||||||
|
self.sub_weight = Parameter(torch.empty((0, 0)).cuda(local_rank))
|
||||||
|
|
||||||
|
def save_params(self):
|
||||||
|
torch.save(self.weight.data, self.weight_name)
|
||||||
|
torch.save(self.weight_mom, self.weight_mom_name)
|
||||||
|
|
||||||
|
@torch.no_grad()
|
||||||
|
def sample(self, total_label):
|
||||||
|
index_positive = (self.class_start <= total_label) & (total_label < self.class_start + self.num_local)
|
||||||
|
total_label[~index_positive] = -1
|
||||||
|
total_label[index_positive] -= self.class_start
|
||||||
|
if int(self.sample_rate) != 1:
|
||||||
|
positive = torch.unique(total_label[index_positive], sorted=True)
|
||||||
|
if self.num_sample - positive.size(0) >= 0:
|
||||||
|
perm = torch.rand(size=[self.num_local], device=self.device)
|
||||||
|
perm[positive] = 2.0
|
||||||
|
index = torch.topk(perm, k=self.num_sample)[1]
|
||||||
|
index = index.sort()[0]
|
||||||
|
else:
|
||||||
|
index = positive
|
||||||
|
self.index = index
|
||||||
|
total_label[index_positive] = torch.searchsorted(index, total_label[index_positive])
|
||||||
|
self.sub_weight = Parameter(self.weight[index])
|
||||||
|
self.sub_weight_mom = self.weight_mom[index]
|
||||||
|
|
||||||
|
def forward(self, total_features, norm_weight):
|
||||||
|
torch.cuda.current_stream().wait_stream(self.stream)
|
||||||
|
logits = linear(total_features, norm_weight)
|
||||||
|
return logits
|
||||||
|
|
||||||
|
@torch.no_grad()
|
||||||
|
def update(self):
|
||||||
|
self.weight_mom[self.index] = self.sub_weight_mom
|
||||||
|
self.weight[self.index] = self.sub_weight
|
||||||
|
|
||||||
|
def prepare(self, label, optimizer):
|
||||||
|
with torch.cuda.stream(self.stream):
|
||||||
|
total_label = torch.zeros(
|
||||||
|
size=[self.batch_size * self.world_size], device=self.device, dtype=torch.long)
|
||||||
|
dist.all_gather(list(total_label.chunk(self.world_size, dim=0)), label)
|
||||||
|
self.sample(total_label)
|
||||||
|
optimizer.state.pop(optimizer.param_groups[-1]['params'][0], None)
|
||||||
|
optimizer.param_groups[-1]['params'][0] = self.sub_weight
|
||||||
|
optimizer.state[self.sub_weight]['momentum_buffer'] = self.sub_weight_mom
|
||||||
|
norm_weight = normalize(self.sub_weight)
|
||||||
|
return total_label, norm_weight
|
||||||
|
|
||||||
|
def forward_backward(self, label, features, optimizer):
|
||||||
|
total_label, norm_weight = self.prepare(label, optimizer)
|
||||||
|
total_features = torch.zeros(
|
||||||
|
size=[self.batch_size * self.world_size, self.embedding_size], device=self.device)
|
||||||
|
dist.all_gather(list(total_features.chunk(self.world_size, dim=0)), features.data)
|
||||||
|
total_features.requires_grad = True
|
||||||
|
|
||||||
|
logits = self.forward(total_features, norm_weight)
|
||||||
|
logits = self.margin_softmax(logits, total_label)
|
||||||
|
|
||||||
|
with torch.no_grad():
|
||||||
|
max_fc = torch.max(logits, dim=1, keepdim=True)[0]
|
||||||
|
dist.all_reduce(max_fc, dist.ReduceOp.MAX)
|
||||||
|
|
||||||
|
# calculate exp(logits) and all-reduce
|
||||||
|
logits_exp = torch.exp(logits - max_fc)
|
||||||
|
logits_sum_exp = logits_exp.sum(dim=1, keepdims=True)
|
||||||
|
dist.all_reduce(logits_sum_exp, dist.ReduceOp.SUM)
|
||||||
|
|
||||||
|
# calculate prob
|
||||||
|
logits_exp.div_(logits_sum_exp)
|
||||||
|
|
||||||
|
# get one-hot
|
||||||
|
grad = logits_exp
|
||||||
|
index = torch.where(total_label != -1)[0]
|
||||||
|
one_hot = torch.zeros(size=[index.size()[0], grad.size()[1]], device=grad.device)
|
||||||
|
one_hot.scatter_(1, total_label[index, None], 1)
|
||||||
|
|
||||||
|
# calculate loss
|
||||||
|
loss = torch.zeros(grad.size()[0], 1, device=grad.device)
|
||||||
|
loss[index] = grad[index].gather(1, total_label[index, None])
|
||||||
|
dist.all_reduce(loss, dist.ReduceOp.SUM)
|
||||||
|
loss_v = loss.clamp_min_(1e-30).log_().mean() * (-1)
|
||||||
|
|
||||||
|
# calculate grad
|
||||||
|
grad[index] -= one_hot
|
||||||
|
grad.div_(self.batch_size * self.world_size)
|
||||||
|
|
||||||
|
logits.backward(grad)
|
||||||
|
if total_features.grad is not None:
|
||||||
|
total_features.grad.detach_()
|
||||||
|
x_grad: torch.Tensor = torch.zeros_like(features, requires_grad=True)
|
||||||
|
# feature gradient all-reduce
|
||||||
|
dist.reduce_scatter(x_grad, list(total_features.grad.chunk(self.world_size, dim=0)))
|
||||||
|
x_grad = x_grad * self.world_size
|
||||||
|
# backward backbone
|
||||||
|
return x_grad, loss_v
|
14
play.py
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
import cv2
|
||||||
|
cap = cv2.VideoCapture("rtsp://admin:2020@uestc@192.168.30.83:554/h264")
|
||||||
|
ret, frame = cap.read()
|
||||||
|
h, w = frame.shape[:2]
|
||||||
|
print("hight:"+str(h)+"with:"+str(w))
|
||||||
|
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||||
|
print(fps)
|
||||||
|
# while ret:
|
||||||
|
# cv2.imshow('out', frame)
|
||||||
|
# if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||||
|
# break
|
||||||
|
# ret, frame = cap.read()
|
||||||
|
cap.release()
|
||||||
|
cv2.destroyAllWindows()
|
282
realtime_detect.py
Normal file
@ -0,0 +1,282 @@
|
|||||||
|
import argparse
|
||||||
|
import subprocess
|
||||||
|
import time
|
||||||
|
import cv2
|
||||||
|
import torch
|
||||||
|
import numpy as np
|
||||||
|
from skimage import transform as trans
|
||||||
|
from PIL import Image, ImageDraw, ImageFont
|
||||||
|
from data import cfg_mnet, cfg_re50
|
||||||
|
from face_api import load_arcface_model, load_npy
|
||||||
|
from layers.functions.prior_box import PriorBox
|
||||||
|
from retinaface_detect import set_retinaface_conf, load_retinaface_model, findAll
|
||||||
|
from utils.nms.py_cpu_nms import py_cpu_nms
|
||||||
|
from utils.box_utils import decode, decode_landm
|
||||||
|
import faiss
|
||||||
|
|
||||||
|
ppi = 1280
|
||||||
|
ppi2 = 1100
|
||||||
|
step = 3
|
||||||
|
|
||||||
|
def detect_rtsp(rtsp, out_rtsp, net, arcface_model, index ,database_name_list, k_v, args):
|
||||||
|
tic_total = time.time()
|
||||||
|
cfg = None
|
||||||
|
if args.network == "mobile0.25":
|
||||||
|
cfg = cfg_mnet
|
||||||
|
elif args.network == "resnet50":
|
||||||
|
cfg = cfg_re50
|
||||||
|
device = torch.device("cpu" if args.cpu else "cuda")
|
||||||
|
resize = 1
|
||||||
|
|
||||||
|
# testing begin
|
||||||
|
cap = cv2.VideoCapture(rtsp)
|
||||||
|
ret, frame = cap.read()
|
||||||
|
h, w = frame.shape[:2]
|
||||||
|
|
||||||
|
factor = 0
|
||||||
|
if (w > ppi):
|
||||||
|
factor = h / w
|
||||||
|
frame = cv2.resize(frame, (ppi, int(ppi * factor)))
|
||||||
|
h, w = frame.shape[:2]
|
||||||
|
arf = 1
|
||||||
|
detect_h, detect_w = frame.shape[:2]
|
||||||
|
frame_detect = frame
|
||||||
|
factor2 = 0
|
||||||
|
if (w > ppi2):
|
||||||
|
factor2 = h / w
|
||||||
|
frame_detect = cv2.resize(frame, (ppi2, int(ppi2 * factor2)))
|
||||||
|
detect_h, detect_w = frame_detect.shape[:2]
|
||||||
|
arf = w/detect_w
|
||||||
|
print(w,h)
|
||||||
|
print(detect_w,detect_h)
|
||||||
|
|
||||||
|
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||||
|
#print(fps)
|
||||||
|
size = (w, h)
|
||||||
|
sizeStr = str(size[0]) + 'x' + str(size[1])
|
||||||
|
if(out_rtsp.startswith("rtsp")):
|
||||||
|
command = ['ffmpeg',
|
||||||
|
'-y', '-an',
|
||||||
|
'-f', 'rawvideo',
|
||||||
|
'-vcodec', 'rawvideo',
|
||||||
|
'-pix_fmt', 'bgr24',
|
||||||
|
'-s', sizeStr,
|
||||||
|
'-r', "25",
|
||||||
|
'-i', '-',
|
||||||
|
'-c:v', 'libx265',
|
||||||
|
'-b:v', '3000k',
|
||||||
|
'-pix_fmt', 'yuv420p',
|
||||||
|
'-preset', 'ultrafast',
|
||||||
|
'-f', 'rtsp',
|
||||||
|
out_rtsp]
|
||||||
|
pipe = subprocess.Popen(command, shell=False, stdin=subprocess.PIPE)
|
||||||
|
#out = cv2.VideoWriter("output.avi", cv2.VideoWriter_fourcc(*'XVID'), fps, size)
|
||||||
|
number = step
|
||||||
|
dets = []
|
||||||
|
name_list = []
|
||||||
|
font = ImageFont.truetype("font.ttf", 22)
|
||||||
|
priorbox = PriorBox(cfg, image_size=(detect_h, detect_w))
|
||||||
|
priors = priorbox.forward()
|
||||||
|
priors = priors.to(device)
|
||||||
|
prior_data = priors.data
|
||||||
|
|
||||||
|
scale = torch.Tensor([detect_w, detect_h, detect_w, detect_h])
|
||||||
|
scale = scale.to(device)
|
||||||
|
scale1 = torch.Tensor([detect_w, detect_h, detect_w, detect_h,
|
||||||
|
detect_w, detect_h, detect_w, detect_h,
|
||||||
|
detect_w, detect_h])
|
||||||
|
scale1 = scale1.to(device)
|
||||||
|
|
||||||
|
src1 = np.array([
|
||||||
|
[38.3814, 51.6963],
|
||||||
|
[73.6186, 51.5014],
|
||||||
|
[56.1120, 71.7366],
|
||||||
|
[41.6361, 92.3655],
|
||||||
|
[70.8167, 92.2041]], dtype=np.float32)
|
||||||
|
tform = trans.SimilarityTransform()
|
||||||
|
|
||||||
|
while ret:
|
||||||
|
tic_all = time.time()
|
||||||
|
if number == step:
|
||||||
|
tic = time.time()
|
||||||
|
img = np.float32(frame_detect)
|
||||||
|
img -= (104, 117, 123)
|
||||||
|
img = img.transpose(2, 0, 1)
|
||||||
|
img = torch.from_numpy(img).unsqueeze(0)
|
||||||
|
img = img.to(device)
|
||||||
|
|
||||||
|
loc, conf, landms = net(img) # forward pass
|
||||||
|
|
||||||
|
boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
|
||||||
|
boxes = boxes * scale / resize
|
||||||
|
boxes = boxes.cpu().numpy()
|
||||||
|
scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
|
||||||
|
landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance'])
|
||||||
|
|
||||||
|
landms = landms * scale1 / resize
|
||||||
|
landms = landms.cpu().numpy()
|
||||||
|
|
||||||
|
# ignore low scores
|
||||||
|
inds = np.where(scores > args.confidence_threshold)[0]
|
||||||
|
boxes = boxes[inds]
|
||||||
|
landms = landms[inds]
|
||||||
|
scores = scores[inds]
|
||||||
|
|
||||||
|
# keep top-K before NMS
|
||||||
|
order = scores.argsort()[::-1][:args.top_k]
|
||||||
|
boxes = boxes[order]
|
||||||
|
landms = landms[order]
|
||||||
|
scores = scores[order]
|
||||||
|
|
||||||
|
# do NMS
|
||||||
|
dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
|
||||||
|
keep = py_cpu_nms(dets, args.nms_threshold)
|
||||||
|
# keep = nms(dets, args.nms_threshold,force_cpu=args.cpu)
|
||||||
|
dets = dets[keep, :]
|
||||||
|
landms = landms[keep]
|
||||||
|
|
||||||
|
# keep top-K faster NMS
|
||||||
|
dets = dets[:args.keep_top_k, :]
|
||||||
|
landms = landms[:args.keep_top_k, :]
|
||||||
|
|
||||||
|
dets = np.concatenate((dets, landms), axis=1)
|
||||||
|
face_list = []
|
||||||
|
name_list = []
|
||||||
|
print('net forward time: {:.4f}'.format(time.time() - tic))
|
||||||
|
start_time_findall = time.time()
|
||||||
|
for i, det in enumerate(dets[:4]):
|
||||||
|
if det[4] < args.vis_thres:
|
||||||
|
continue
|
||||||
|
#boxes, score = det[:4], det[4]
|
||||||
|
dst = np.reshape(landms[i], (5, 2))
|
||||||
|
dst = dst * arf
|
||||||
|
|
||||||
|
tform.estimate(dst, src1)
|
||||||
|
M = tform.params[0:2, :]
|
||||||
|
frame2 = cv2.warpAffine(frame, M, (w, h), borderValue=0.0)
|
||||||
|
img112 = frame2[0:112, 0:112, :]
|
||||||
|
face_list.append(img112)
|
||||||
|
|
||||||
|
if len(face_list) != 0:
|
||||||
|
face_list = np.array(face_list)
|
||||||
|
face_list = face_list.transpose((0, 3, 1, 2))
|
||||||
|
face_list = np.array(face_list, dtype=np.float32)
|
||||||
|
face_list -= 127.5
|
||||||
|
face_list /= 127.5
|
||||||
|
print(face_list.shape)
|
||||||
|
print("warpALL time: " + str(time.time() - start_time_findall ))
|
||||||
|
#start_time = time.time()
|
||||||
|
name_list = findAll(face_list, arcface_model, index ,database_name_list, k_v, "cpu" if args.cpu else "cuda")
|
||||||
|
#print(name_list)
|
||||||
|
|
||||||
|
#print("findOneframe time: " + str(time.time() - start_time_findall))
|
||||||
|
# start_time = time.time()
|
||||||
|
# if (len(dets) != 0):
|
||||||
|
# for i, det in enumerate(dets[:]):
|
||||||
|
# if det[4] < args.vis_thres:
|
||||||
|
# continue
|
||||||
|
# boxes, score = det[:4], det[4]
|
||||||
|
# boxes = boxes * arf
|
||||||
|
# name = name_list[i]
|
||||||
|
# cv2.rectangle(frame, (int(boxes[0]), int(boxes[1])), (int(boxes[2]), int(boxes[3])), (255, 0, 0), 2)
|
||||||
|
# cv2.putText(frame, name, (int(boxes[0]), int(boxes[1])), cv2.FONT_HERSHEY_COMPLEX, 0.4,(0, 225, 255), 1)
|
||||||
|
start_time = time.time()
|
||||||
|
if(len(dets) != 0):
|
||||||
|
img_PIL = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
|
||||||
|
draw = ImageDraw.Draw(img_PIL)
|
||||||
|
for i, det in enumerate(dets[:4]):
|
||||||
|
if det[4] < args.vis_thres:
|
||||||
|
continue
|
||||||
|
boxes, score = det[:4], det[4]
|
||||||
|
boxes = boxes * arf
|
||||||
|
name = name_list[i]
|
||||||
|
if not isinstance(name, np.unicode):
|
||||||
|
name = name.decode('utf8')
|
||||||
|
draw.text((int(boxes[0]), int(boxes[1])), name, fill=(255, 0, 0), font=font)
|
||||||
|
draw.rectangle((int(boxes[0]), int(boxes[1]), int(boxes[2]), int(boxes[3])), outline="green", width=3)
|
||||||
|
frame = cv2.cvtColor(np.asarray(img_PIL), cv2.COLOR_RGB2BGR)
|
||||||
|
pipe.stdin.write(frame.tostring())
|
||||||
|
#out.write(frame)
|
||||||
|
print("drawOneframe time: " + str(time.time() - start_time))
|
||||||
|
start_time = time.time()
|
||||||
|
ret, frame = cap.read()
|
||||||
|
frame_detect = frame
|
||||||
|
number = 0
|
||||||
|
if (ret != 0 and factor != 0):
|
||||||
|
frame = cv2.resize(frame, (ppi, int(ppi * factor)))
|
||||||
|
if (ret != 0 and factor2 != 0):
|
||||||
|
frame_detect = cv2.resize(frame, (ppi2, int(ppi2 * factor2)))
|
||||||
|
print("readframe time: " + str(time.time() - start_time))
|
||||||
|
else:
|
||||||
|
number += 1
|
||||||
|
# if (len(dets) != 0):
|
||||||
|
# for i, det in enumerate(dets[:4]):
|
||||||
|
# if det[4] < args.vis_thres:
|
||||||
|
# continue
|
||||||
|
# boxes, score = det[:4], det[4]
|
||||||
|
# cv2.rectangle(frame, (int(boxes[0]), int(boxes[1])), (int(boxes[2]), int(boxes[3])), (2, 255, 0), 1)
|
||||||
|
if (len(dets) != 0):
|
||||||
|
img_PIL = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
|
||||||
|
draw = ImageDraw.Draw(img_PIL)
|
||||||
|
for i, det in enumerate(dets[:4]):
|
||||||
|
if det[4] < args.vis_thres:
|
||||||
|
continue
|
||||||
|
boxes, score = det[:4], det[4]
|
||||||
|
boxes = boxes * arf
|
||||||
|
name = name_list[i]
|
||||||
|
if not isinstance(name, np.unicode):
|
||||||
|
name = name.decode('utf8')
|
||||||
|
draw.text((int(boxes[0]), int(boxes[1])), name, fill=(255, 0, 0), font=font)
|
||||||
|
draw.rectangle((int(boxes[0]), int(boxes[1]), int(boxes[2]), int(boxes[3])), outline="green",
|
||||||
|
width=3)
|
||||||
|
frame = cv2.cvtColor(np.asarray(img_PIL), cv2.COLOR_RGB2BGR)
|
||||||
|
start_time = time.time()
|
||||||
|
pipe.stdin.write(frame.tostring())
|
||||||
|
#out.write(frame)
|
||||||
|
print("writeframe time: " + str(time.time() - start_time))
|
||||||
|
start_time = time.time()
|
||||||
|
ret, frame = cap.read()
|
||||||
|
frame_detect = frame
|
||||||
|
if (ret != 0 and factor != 0):
|
||||||
|
frame = cv2.resize(frame, (ppi, int(ppi * factor)))
|
||||||
|
if (ret != 0 and factor2 != 0):
|
||||||
|
frame_detect = cv2.resize(frame, (ppi2, int(ppi2 * factor2)))
|
||||||
|
print("readframe time: " + str(time.time() - start_time))
|
||||||
|
print('all time: {:.4f}'.format(time.time() - tic_all))
|
||||||
|
cap.release()
|
||||||
|
#out.release()
|
||||||
|
pipe.terminate()
|
||||||
|
print('total time: {:.4f}'.format(time.time() - tic_total))
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument(
|
||||||
|
"--rtsp",
|
||||||
|
type=str,
|
||||||
|
default="",
|
||||||
|
dest="rtsp_path"
|
||||||
|
)
|
||||||
|
args = parser.parse_args()
|
||||||
|
cpu_or_cuda = "cuda" if torch.cuda.is_available() else "cpu"
|
||||||
|
# 加载人脸识别模型
|
||||||
|
arcface_model = load_arcface_model("./model/backbone100.pth", cpu_or_cuda="cuda")
|
||||||
|
# 加载人脸检测模型
|
||||||
|
retinaface_args = set_retinaface_conf(cpu_or_cuda=cpu_or_cuda)
|
||||||
|
retinaface_model = load_retinaface_model(retinaface_args)
|
||||||
|
k_v = load_npy("./Database/student.npy")
|
||||||
|
#print(list(k_v.keys()))
|
||||||
|
database_name_list = list(k_v.keys())
|
||||||
|
vector_list = np.array(list(k_v.values()))
|
||||||
|
print(vector_list.shape)
|
||||||
|
nlist = 10
|
||||||
|
quantizer = faiss.IndexFlatL2(512) # the other index
|
||||||
|
index = faiss.IndexIVFFlat(quantizer, 512, nlist, faiss.METRIC_L2)
|
||||||
|
index.train(vector_list)
|
||||||
|
#index = faiss.IndexFlatL2(512)
|
||||||
|
index.add(vector_list)
|
||||||
|
index.nprobe=10
|
||||||
|
|
||||||
|
detect_rtsp(args.rtsp_path, 'rtsp://localhost:5001/test2', retinaface_model, arcface_model, index ,database_name_list, k_v, retinaface_args)
|
||||||
|
|
||||||
|
#detect_rtsp("rtsp://admin:2020@uestc@192.168.14.32:8557/h264", 'rtsp://localhost:5001/test2', retinaface_model, arcface_model, index ,database_name_list, k_v, retinaface_args)
|
||||||
|
#detect_rtsp("cut.mp4", 'rtsp://localhost:5001/test2', retinaface_model, arcface_model, k_v, retinaface_args)
|
283
recognition_video.py
Normal file
@ -0,0 +1,283 @@
|
|||||||
|
import time
|
||||||
|
from centerface import CenterFace
|
||||||
|
from skimage import transform as trans
|
||||||
|
import numpy as np
|
||||||
|
import torch
|
||||||
|
import cv2
|
||||||
|
from backbones import iresnet100, iresnet18
|
||||||
|
from create_database import findOne, load_npy,findAll
|
||||||
|
from PIL import Image, ImageDraw,ImageFont
|
||||||
|
|
||||||
|
def show():
|
||||||
|
cap = cv2.VideoCapture("test.mp4")
|
||||||
|
ret, frame = cap.read()
|
||||||
|
h, w = frame.shape[:2]
|
||||||
|
centerface = CenterFace()
|
||||||
|
size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
|
||||||
|
int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
|
||||||
|
out = cv2.VideoWriter('ccvt6.mp4', cv2.VideoWriter_fourcc('m', 'p', '4', 'v'), 30, size)
|
||||||
|
while ret:
|
||||||
|
start_time = time.time()
|
||||||
|
dets, lms = centerface(frame, h, w, threshold=0.35)
|
||||||
|
end_time = time.time()
|
||||||
|
print("findOne time: " + str(end_time - start_time))
|
||||||
|
for det in dets:
|
||||||
|
boxes, score = det[:4], det[4]
|
||||||
|
cv2.rectangle(frame, (int(boxes[0]), int(boxes[1])), (int(boxes[2]), int(boxes[3])), (2, 255, 0), 1)
|
||||||
|
for lm in lms:
|
||||||
|
for i in range(0, 5):
|
||||||
|
cv2.circle(frame, (int(lm[i * 2]), int(lm[i * 2 + 1])), 2, (0, 0, 255), -1)
|
||||||
|
cv2.imshow('out', frame)
|
||||||
|
out.write(frame)
|
||||||
|
# Press Q on keyboard to stop recording
|
||||||
|
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||||
|
break
|
||||||
|
ret, frame = cap.read()
|
||||||
|
cap.release()
|
||||||
|
out.release()
|
||||||
|
cv2.destroyAllWindows()
|
||||||
|
|
||||||
|
def video():
|
||||||
|
model = iresnet100()
|
||||||
|
model.load_state_dict(torch.load("./model/backbone100.pth", map_location="cpu"))
|
||||||
|
model.eval()
|
||||||
|
k_v = load_npy("student.npy")
|
||||||
|
count = 0
|
||||||
|
#cap = cv2.VideoCapture("http://ivi.bupt.edu.cn/hls/cctv6hd.m3u8")
|
||||||
|
cap = cv2.VideoCapture("software.mp4")
|
||||||
|
ret, frame = cap.read()
|
||||||
|
h, w = frame.shape[:2]
|
||||||
|
size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
|
||||||
|
int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
|
||||||
|
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||||
|
out = cv2.VideoWriter('ttt.mp4', cv2.VideoWriter_fourcc('m', 'p', '4', 'v'), fps, size)
|
||||||
|
centerface = CenterFace()
|
||||||
|
while ret:
|
||||||
|
start_time = time.time()
|
||||||
|
dets, lms = centerface(frame, h, w, threshold=0.35)
|
||||||
|
end_time = time.time()
|
||||||
|
print("detectOneframe time: " + str(end_time - start_time))
|
||||||
|
face_list = []
|
||||||
|
name_list = []
|
||||||
|
for i,det in enumerate(dets):
|
||||||
|
boxes, score = det[:4], det[4]
|
||||||
|
img_w = int(boxes[2] - boxes[0])
|
||||||
|
img_h = int(boxes[3] - boxes[1])
|
||||||
|
distace = int(abs(img_w - img_h) / 2)
|
||||||
|
img_w1 = int(boxes[0]) - distace
|
||||||
|
img_w2 = int(boxes[2]) + distace
|
||||||
|
# print(img_w,img_h,distace,max_hw)
|
||||||
|
if img_w <= img_h and img_w1 >= 0 and img_w2 <= frame.shape[1]:
|
||||||
|
img112 = frame[int(boxes[1]):int(boxes[3]), img_w1:img_w2, :]
|
||||||
|
img112 = cv2.resize(img112, (112, 112))
|
||||||
|
# cv2.imwrite("./img/man"+str(count)+".jpg", img112)
|
||||||
|
# count += 1
|
||||||
|
face_list.append(img112)
|
||||||
|
else:
|
||||||
|
img112 = frame[int(boxes[1]):int(boxes[3]), int(boxes[0]):int(boxes[2]), :]
|
||||||
|
img112 = cv2.resize(img112, (112, 112))
|
||||||
|
face_list.append(img112)
|
||||||
|
if len(face_list) != 0:
|
||||||
|
face_list = np.array(face_list)
|
||||||
|
face_list = face_list.transpose((0,3,1,2))
|
||||||
|
face_list = np.array(face_list, dtype=np.float32)
|
||||||
|
face_list -= 127.5
|
||||||
|
face_list /= 127.5
|
||||||
|
print(face_list.shape)
|
||||||
|
face_list = torch.from_numpy(face_list)
|
||||||
|
start_time = time.time()
|
||||||
|
|
||||||
|
for face in face_list:
|
||||||
|
face = face[np.newaxis, :, :, :]
|
||||||
|
|
||||||
|
name_list.append(findOne(face,model,k_v))
|
||||||
|
end_time = time.time()
|
||||||
|
print("findOneframe time: "+str(end_time-start_time))
|
||||||
|
img_PIL = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
|
||||||
|
draw = ImageDraw.Draw(img_PIL)
|
||||||
|
font = ImageFont.truetype("font.ttf",12)
|
||||||
|
for i,det in enumerate(dets):
|
||||||
|
boxes, score = det[:4], det[4]
|
||||||
|
# cv2.rectangle(frame, (int(boxes[0]), int(boxes[1])), (int(boxes[2]), int(boxes[3])), (2, 255, 0), 1)
|
||||||
|
# cv2.putText(frame, name_list[i], (int(boxes[0]), int(boxes[1])), cv2.FONT_HERSHEY_COMPLEX, 0.4,
|
||||||
|
# (0, 225, 255), 1)
|
||||||
|
name = name_list[i][:3]
|
||||||
|
if not isinstance(name, np.unicode):
|
||||||
|
name = name.decode('utf8')
|
||||||
|
draw.text((int(boxes[0]), int(boxes[1])),name,fill=(0, 225, 255),font=font)
|
||||||
|
draw.rectangle((int(boxes[0]), int(boxes[1]),int(boxes[2]), int(boxes[3])),outline="green",width=1)
|
||||||
|
frame = cv2.cvtColor(np.asarray(img_PIL),cv2.COLOR_RGB2BGR)
|
||||||
|
cv2.imshow('out', frame)
|
||||||
|
out.write(frame)
|
||||||
|
# Press Q on keyboard to stop recording
|
||||||
|
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||||
|
break
|
||||||
|
ret, frame = cap.read()
|
||||||
|
cap.release()
|
||||||
|
out.release()
|
||||||
|
cv2.destroyAllWindows()
|
||||||
|
def video_GPU():
|
||||||
|
model = iresnet100()
|
||||||
|
model.load_state_dict(torch.load("./model/backbone100.pth", map_location="cpu"))
|
||||||
|
model.eval()
|
||||||
|
k_v = load_npy("student.npy")
|
||||||
|
count = 0
|
||||||
|
#cap = cv2.VideoCapture("http://ivi.bupt.edu.cn/hls/cctv6hd.m3u8")
|
||||||
|
cap = cv2.VideoCapture("software.mp4")
|
||||||
|
ret, frame = cap.read()
|
||||||
|
h, w = frame.shape[:2]
|
||||||
|
size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
|
||||||
|
int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
|
||||||
|
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||||
|
out = cv2.VideoWriter('ttt.mp4', cv2.VideoWriter_fourcc('m', 'p', '4', 'v'), fps, size)
|
||||||
|
centerface = CenterFace()
|
||||||
|
while ret:
|
||||||
|
start_time = time.time()
|
||||||
|
dets, lms = centerface(frame, h, w, threshold=0.35)
|
||||||
|
end_time = time.time()
|
||||||
|
print("detectOneframe time: " + str(end_time - start_time))
|
||||||
|
face_list = []
|
||||||
|
name_list = []
|
||||||
|
for i,det in enumerate(dets):
|
||||||
|
boxes, score = det[:4], det[4]
|
||||||
|
img_w = int(boxes[2] - boxes[0])
|
||||||
|
img_h = int(boxes[3] - boxes[1])
|
||||||
|
distace = int(abs(img_w - img_h) / 2)
|
||||||
|
img_w1 = int(boxes[0]) - distace
|
||||||
|
img_w2 = int(boxes[2]) + distace
|
||||||
|
# print(img_w,img_h,distace,max_hw)
|
||||||
|
if img_w <= img_h and img_w1 >= 0 and img_w2 <= frame.shape[1]:
|
||||||
|
img112 = frame[int(boxes[1]):int(boxes[3]), img_w1:img_w2, :]
|
||||||
|
img112 = cv2.resize(img112, (112, 112))
|
||||||
|
# cv2.imwrite("./img/man"+str(count)+".jpg", img112)
|
||||||
|
# count += 1
|
||||||
|
face_list.append(img112)
|
||||||
|
else:
|
||||||
|
img112 = frame[int(boxes[1]):int(boxes[3]), int(boxes[0]):int(boxes[2]), :]
|
||||||
|
img112 = cv2.resize(img112, (112, 112))
|
||||||
|
face_list.append(img112)
|
||||||
|
if len(face_list) != 0:
|
||||||
|
face_list = np.array(face_list)
|
||||||
|
face_list = face_list.transpose((0,3,1,2))
|
||||||
|
face_list = np.array(face_list, dtype=np.float32)
|
||||||
|
face_list -= 127.5
|
||||||
|
face_list /= 127.5
|
||||||
|
print(face_list.shape)
|
||||||
|
face_list = torch.from_numpy(face_list)
|
||||||
|
start_time = time.time()
|
||||||
|
name_list = findAll(face_list, model, k_v)
|
||||||
|
# for face in face_list:
|
||||||
|
# face = face[np.newaxis, :, :, :]
|
||||||
|
#
|
||||||
|
# name_list.append(findOne(face,model,k_v))
|
||||||
|
end_time = time.time()
|
||||||
|
print("findOneframe time: "+str(end_time-start_time))
|
||||||
|
img_PIL = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
|
||||||
|
draw = ImageDraw.Draw(img_PIL)
|
||||||
|
font = ImageFont.truetype("font.ttf",18)
|
||||||
|
for i,det in enumerate(dets):
|
||||||
|
boxes, score = det[:4], det[4]
|
||||||
|
# cv2.rectangle(frame, (int(boxes[0]), int(boxes[1])), (int(boxes[2]), int(boxes[3])), (2, 255, 0), 1)
|
||||||
|
# cv2.putText(frame, name_list[i], (int(boxes[0]), int(boxes[1])), cv2.FONT_HERSHEY_COMPLEX, 0.4,
|
||||||
|
# (0, 225, 255), 1)
|
||||||
|
name = name_list[i][:3]
|
||||||
|
if not isinstance(name, np.unicode):
|
||||||
|
name = name.decode('utf8')
|
||||||
|
draw.text((int(boxes[0]), int(boxes[1])),name,fill=(255, 0, 0),font=font)
|
||||||
|
draw.rectangle((int(boxes[0]), int(boxes[1]),int(boxes[2]), int(boxes[3])),outline="green",width=2)
|
||||||
|
frame = cv2.cvtColor(np.asarray(img_PIL),cv2.COLOR_RGB2BGR)
|
||||||
|
cv2.imshow('out', frame)
|
||||||
|
out.write(frame)
|
||||||
|
# Press Q on keyboard to stop recording
|
||||||
|
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||||
|
break
|
||||||
|
ret, frame = cap.read()
|
||||||
|
cap.release()
|
||||||
|
out.release()
|
||||||
|
cv2.destroyAllWindows()
|
||||||
|
|
||||||
|
def video_GPU_retinaface():
|
||||||
|
model = iresnet100()
|
||||||
|
model.load_state_dict(torch.load("./model/backbone100.pth", map_location="cpu"))
|
||||||
|
model.eval()
|
||||||
|
k_v = load_npy("student.npy")
|
||||||
|
count = 0
|
||||||
|
#cap = cv2.VideoCapture("http://ivi.bupt.edu.cn/hls/cctv6hd.m3u8")
|
||||||
|
cap = cv2.VideoCapture("software.mp4")
|
||||||
|
ret, frame = cap.read()
|
||||||
|
h, w = frame.shape[:2]
|
||||||
|
size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
|
||||||
|
int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
|
||||||
|
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||||
|
out = cv2.VideoWriter('ttt.mp4', cv2.VideoWriter_fourcc('m', 'p', '4', 'v'), fps, size)
|
||||||
|
centerface = CenterFace()
|
||||||
|
while ret:
|
||||||
|
start_time = time.time()
|
||||||
|
dets, lms = centerface(frame, h, w, threshold=0.35)
|
||||||
|
end_time = time.time()
|
||||||
|
print("detectOneframe time: " + str(end_time - start_time))
|
||||||
|
face_list = []
|
||||||
|
name_list = []
|
||||||
|
print(dets.shape)
|
||||||
|
for i,det in enumerate(dets):
|
||||||
|
boxes, score = det[:4], det[4]
|
||||||
|
img_w = int(boxes[2] - boxes[0])
|
||||||
|
img_h = int(boxes[3] - boxes[1])
|
||||||
|
distace = int(abs(img_w - img_h) / 2)
|
||||||
|
img_w1 = int(boxes[0]) - distace
|
||||||
|
img_w2 = int(boxes[2]) + distace
|
||||||
|
# print(img_w,img_h,distace,max_hw)
|
||||||
|
if img_w <= img_h and img_w1 >= 0 and img_w2 <= frame.shape[1]:
|
||||||
|
img112 = frame[int(boxes[1]):int(boxes[3]), img_w1:img_w2, :]
|
||||||
|
img112 = cv2.resize(img112, (112, 112))
|
||||||
|
# cv2.imwrite("./img/man"+str(count)+".jpg", img112)
|
||||||
|
# count += 1
|
||||||
|
face_list.append(img112)
|
||||||
|
else:
|
||||||
|
img112 = frame[int(boxes[1]):int(boxes[3]), int(boxes[0]):int(boxes[2]), :]
|
||||||
|
img112 = cv2.resize(img112, (112, 112))
|
||||||
|
face_list.append(img112)
|
||||||
|
if len(face_list) != 0:
|
||||||
|
face_list = np.array(face_list)
|
||||||
|
face_list = face_list.transpose((0,3,1,2))
|
||||||
|
face_list = np.array(face_list, dtype=np.float32)
|
||||||
|
face_list -= 127.5
|
||||||
|
face_list /= 127.5
|
||||||
|
print(face_list.shape)
|
||||||
|
face_list = torch.from_numpy(face_list)
|
||||||
|
start_time = time.time()
|
||||||
|
name_list = findAll(face_list, model, k_v)
|
||||||
|
# for face in face_list:
|
||||||
|
# face = face[np.newaxis, :, :, :]
|
||||||
|
#
|
||||||
|
# name_list.append(findOne(face,model,k_v))
|
||||||
|
end_time = time.time()
|
||||||
|
print("findOneframe time: "+str(end_time-start_time))
|
||||||
|
img_PIL = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
|
||||||
|
draw = ImageDraw.Draw(img_PIL)
|
||||||
|
font = ImageFont.truetype("font.ttf",18)
|
||||||
|
for i,det in enumerate(dets):
|
||||||
|
boxes, score = det[:4], det[4]
|
||||||
|
# cv2.rectangle(frame, (int(boxes[0]), int(boxes[1])), (int(boxes[2]), int(boxes[3])), (2, 255, 0), 1)
|
||||||
|
# cv2.putText(frame, name_list[i], (int(boxes[0]), int(boxes[1])), cv2.FONT_HERSHEY_COMPLEX, 0.4,
|
||||||
|
# (0, 225, 255), 1)
|
||||||
|
name = name_list[i][:3]
|
||||||
|
if not isinstance(name, np.unicode):
|
||||||
|
name = name.decode('utf8')
|
||||||
|
draw.text((int(boxes[0]), int(boxes[1])),name,fill=(255, 0, 0),font=font)
|
||||||
|
draw.rectangle((int(boxes[0]), int(boxes[1]),int(boxes[2]), int(boxes[3])),outline="green",width=2)
|
||||||
|
frame = cv2.cvtColor(np.asarray(img_PIL),cv2.COLOR_RGB2BGR)
|
||||||
|
cv2.imshow('out', frame)
|
||||||
|
out.write(frame)
|
||||||
|
# Press Q on keyboard to stop recording
|
||||||
|
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||||
|
break
|
||||||
|
ret, frame = cap.read()
|
||||||
|
cap.release()
|
||||||
|
out.release()
|
||||||
|
cv2.destroyAllWindows()
|
||||||
|
|
||||||
|
|
||||||
|
video_GPU_retinaface()
|
||||||
|
#video_GPU()
|
||||||
|
#show()
|
111
requirements.txt
Normal file
@ -0,0 +1,111 @@
|
|||||||
|
Package Version
|
||||||
|
---------------------- -----------
|
||||||
|
appdirs 1.4.4
|
||||||
|
attrs 21.2.0
|
||||||
|
backcall 0.2.0
|
||||||
|
beautifulsoup4 4.9.3
|
||||||
|
certifi 2021.5.30
|
||||||
|
cffi 1.14.0
|
||||||
|
chardet 4.0.0
|
||||||
|
click 8.0.1
|
||||||
|
conda 4.9.1
|
||||||
|
conda-build 3.20.5
|
||||||
|
conda-package-handling 1.7.0
|
||||||
|
cryptography 2.9.2
|
||||||
|
cycler 0.10.0
|
||||||
|
dataclasses 0.6
|
||||||
|
decorator 4.4.2
|
||||||
|
dnspython 2.0.0
|
||||||
|
faiss-cpu 1.7.1
|
||||||
|
filelock 3.0.12
|
||||||
|
fire 0.4.0
|
||||||
|
Flask 1.1.2
|
||||||
|
future 0.18.2
|
||||||
|
glob2 0.7
|
||||||
|
graphsurgeon 0.4.5
|
||||||
|
graphviz 0.8.4
|
||||||
|
h5py 3.3.0
|
||||||
|
idna 2.10
|
||||||
|
imageio 2.9.0
|
||||||
|
iniconfig 1.1.1
|
||||||
|
ipython 7.18.1
|
||||||
|
ipython-genutils 0.2.0
|
||||||
|
itsdangerous 2.0.1
|
||||||
|
jedi 0.17.2
|
||||||
|
Jinja2 3.0.1
|
||||||
|
joblib 1.0.1
|
||||||
|
kiwisolver 1.3.1
|
||||||
|
libarchive-c 2.9
|
||||||
|
Mako 1.1.4
|
||||||
|
MarkupSafe 2.0.1
|
||||||
|
matplotlib 3.4.1
|
||||||
|
mkl-fft 1.2.0
|
||||||
|
mkl-random 1.1.1
|
||||||
|
mkl-service 2.3.0
|
||||||
|
mxnet 1.8.0.post0
|
||||||
|
networkx 2.5.1
|
||||||
|
nltk 3.6
|
||||||
|
numpy 1.20.3
|
||||||
|
olefile 0.46
|
||||||
|
opencv-python 4.5.1.48
|
||||||
|
packaging 21.0
|
||||||
|
pandas 1.2.4
|
||||||
|
parso 0.7.0
|
||||||
|
pexpect 4.8.0
|
||||||
|
pickleshare 0.7.5
|
||||||
|
Pillow 8.0.0
|
||||||
|
pip 20.0.2
|
||||||
|
pkginfo 1.6.0
|
||||||
|
pluggy 1.0.0
|
||||||
|
prefetch-generator 1.0.1
|
||||||
|
prompt-toolkit 3.0.8
|
||||||
|
protobuf 3.15.8
|
||||||
|
psutil 5.7.2
|
||||||
|
ptyprocess 0.6.0
|
||||||
|
py 1.9.0
|
||||||
|
pycosat 0.6.3
|
||||||
|
pycparser 2.20
|
||||||
|
pycuda 2021.1
|
||||||
|
Pygments 2.7.1
|
||||||
|
pyOpenSSL 19.1.0
|
||||||
|
pyparsing 2.4.7
|
||||||
|
PySocks 1.7.1
|
||||||
|
pytest 6.2.5
|
||||||
|
python-dateutil 2.8.1
|
||||||
|
python-etcd 0.4.5
|
||||||
|
pytools 2021.2.6
|
||||||
|
pytz 2020.1
|
||||||
|
PyWavelets 1.1.1
|
||||||
|
PyYAML 5.3.1
|
||||||
|
pyzmq 22.1.0
|
||||||
|
regex 2021.8.3
|
||||||
|
requests 2.25.1
|
||||||
|
ruamel-yaml 0.15.87
|
||||||
|
scikit-image 0.18.1
|
||||||
|
scipy 1.6.3
|
||||||
|
seaborn 0.11.1
|
||||||
|
setuptools 57.1.0
|
||||||
|
six 1.14.0
|
||||||
|
soupsieve 2.0.1
|
||||||
|
tensorboard-logger 0.1.0
|
||||||
|
tensorrt 7.2.3.4
|
||||||
|
termcolor 1.1.0
|
||||||
|
tifffile 2021.4.8
|
||||||
|
toml 0.10.2
|
||||||
|
torch 1.7.1
|
||||||
|
torch2trt 0.2.0
|
||||||
|
torchelastic 0.2.1
|
||||||
|
torchfile 0.1.0
|
||||||
|
torchtext 0.8.0
|
||||||
|
torchvision 0.8.2
|
||||||
|
tornado 6.1
|
||||||
|
tqdm 4.46.0
|
||||||
|
traitlets 5.0.5
|
||||||
|
typing-extensions 3.7.4.3
|
||||||
|
uff 0.6.9
|
||||||
|
urllib3 1.26.5
|
||||||
|
visdom 0.1.8
|
||||||
|
wcwidth 0.2.5
|
||||||
|
websocket-client 1.1.0
|
||||||
|
Werkzeug 2.0.1
|
||||||
|
wheel 0.34.2
|
762
retinaface_arcface.py
Normal file
@ -0,0 +1,762 @@
|
|||||||
|
from __future__ import print_function
|
||||||
|
import os
|
||||||
|
import argparse
|
||||||
|
import re
|
||||||
|
|
||||||
|
import faiss
|
||||||
|
import torch
|
||||||
|
import torch.backends.cudnn as cudnn
|
||||||
|
import numpy as np
|
||||||
|
from data import cfg_mnet, cfg_re50
|
||||||
|
from face_api import create_database_from_img, load_arcface_model, findAll
|
||||||
|
from layers.functions.prior_box import PriorBox
|
||||||
|
from utils.nms.py_cpu_nms import py_cpu_nms
|
||||||
|
import cv2
|
||||||
|
from models.retinaface import RetinaFace
|
||||||
|
from utils.box_utils import decode, decode_landm
|
||||||
|
import time
|
||||||
|
from face_api import load_arcface_model, load_npy
|
||||||
|
from skimage import transform as trans
|
||||||
|
from backbones import iresnet100, iresnet18
|
||||||
|
#from create_database import findOne, load_npy,findAll
|
||||||
|
from PIL import Image, ImageDraw,ImageFont
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser(description='Retinaface')
|
||||||
|
|
||||||
|
parser.add_argument('-m', '--trained_model', default='./weights/mobilenet0.25_Final.pth',
|
||||||
|
type=str, help='Trained state_dict file path to open')
|
||||||
|
parser.add_argument('--network', default='mobile0.25', help='Backbone network mobile0.25 or resnet50')
|
||||||
|
parser.add_argument('--cpu', action="store_true", default=False if torch.cuda.is_available() else True, help='Use cpu inference')
|
||||||
|
parser.add_argument('--confidence_threshold', default=0.02, type=float, help='confidence_threshold')
|
||||||
|
parser.add_argument('--top_k', default=5000, type=int, help='top_k')
|
||||||
|
parser.add_argument('--nms_threshold', default=0.4, type=float, help='nms_threshold')
|
||||||
|
parser.add_argument('--keep_top_k', default=750, type=int, help='keep_top_k')
|
||||||
|
parser.add_argument('-s', '--save_image', action="store_true", default=True, help='show detection results')
|
||||||
|
parser.add_argument('--vis_thres', default=0.6, type=float, help='visualization_threshold')
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
|
def check_keys(model, pretrained_state_dict):
|
||||||
|
ckpt_keys = set(pretrained_state_dict.keys())
|
||||||
|
model_keys = set(model.state_dict().keys())
|
||||||
|
used_pretrained_keys = model_keys & ckpt_keys
|
||||||
|
unused_pretrained_keys = ckpt_keys - model_keys
|
||||||
|
missing_keys = model_keys - ckpt_keys
|
||||||
|
print('Missing keys:{}'.format(len(missing_keys)))
|
||||||
|
print('Unused checkpoint keys:{}'.format(len(unused_pretrained_keys)))
|
||||||
|
print('Used keys:{}'.format(len(used_pretrained_keys)))
|
||||||
|
assert len(used_pretrained_keys) > 0, 'load NONE from pretrained checkpoint'
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def remove_prefix(state_dict, prefix):
|
||||||
|
''' Old style model is stored with all names of parameters sharing common prefix 'module.' '''
|
||||||
|
print('remove prefix \'{}\''.format(prefix))
|
||||||
|
f = lambda x: x.split(prefix, 1)[-1] if x.startswith(prefix) else x
|
||||||
|
return {f(key): value for key, value in state_dict.items()}
|
||||||
|
|
||||||
|
|
||||||
|
def load_model(model, pretrained_path, load_to_cpu):
|
||||||
|
print('Loading pretrained model from {}'.format(pretrained_path))
|
||||||
|
if load_to_cpu:
|
||||||
|
pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage)
|
||||||
|
else:
|
||||||
|
device = torch.cuda.current_device()
|
||||||
|
pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage.cuda(device))
|
||||||
|
if "state_dict" in pretrained_dict.keys():
|
||||||
|
pretrained_dict = remove_prefix(pretrained_dict['state_dict'], 'module.')
|
||||||
|
else:
|
||||||
|
pretrained_dict = remove_prefix(pretrained_dict, 'module.')
|
||||||
|
check_keys(model, pretrained_dict)
|
||||||
|
model.load_state_dict(pretrained_dict, strict=False)
|
||||||
|
return model
|
||||||
|
|
||||||
|
def image_to112x112_retinaface():
|
||||||
|
torch.set_grad_enabled(False)
|
||||||
|
cfg = None
|
||||||
|
if args.network == "mobile0.25":
|
||||||
|
cfg = cfg_mnet
|
||||||
|
elif args.network == "resnet50":
|
||||||
|
cfg = cfg_re50
|
||||||
|
# net and model
|
||||||
|
net = RetinaFace(cfg=cfg, phase = 'test')
|
||||||
|
net = load_model(net, args.trained_model, args.cpu)
|
||||||
|
net.eval()
|
||||||
|
print('Finished loading model!')
|
||||||
|
#print(net)
|
||||||
|
cudnn.benchmark = True
|
||||||
|
device = torch.device("cpu" if args.cpu else "cuda")
|
||||||
|
net = net.to(device)
|
||||||
|
|
||||||
|
resize = 1
|
||||||
|
input_path = r"D:\Download\out\cfp"
|
||||||
|
output_path = "D:\Download\out\cfp_align"
|
||||||
|
folder1 = os.listdir(input_path)
|
||||||
|
count = 0
|
||||||
|
count2 =0
|
||||||
|
for f in folder1:
|
||||||
|
output_name_path = os.path.join(output_path, f)
|
||||||
|
if os.path.exists(output_name_path) == 0:
|
||||||
|
os.makedirs(output_name_path)
|
||||||
|
img_name_path = os.path.join(input_path, f)
|
||||||
|
img_list = os.listdir(img_name_path)
|
||||||
|
|
||||||
|
for img in img_list:
|
||||||
|
count2 +=1
|
||||||
|
print(count2)
|
||||||
|
path = os.path.join(img_name_path, img)
|
||||||
|
align_img_path = os.path.join(output_name_path, img)
|
||||||
|
# print(path)
|
||||||
|
frame = cv2.imread(path)
|
||||||
|
h, w = frame.shape[:2]
|
||||||
|
img = np.float32(frame)
|
||||||
|
im_height, im_width, _ = img.shape
|
||||||
|
scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
|
||||||
|
img -= (104, 117, 123)
|
||||||
|
img = img.transpose(2, 0, 1)
|
||||||
|
img = torch.from_numpy(img).unsqueeze(0)
|
||||||
|
img = img.to(device)
|
||||||
|
scale = scale.to(device)
|
||||||
|
|
||||||
|
tic = time.time()
|
||||||
|
loc, conf, landms = net(img) # forward pass
|
||||||
|
print('net forward time: {:.4f}'.format(time.time() - tic))
|
||||||
|
|
||||||
|
priorbox = PriorBox(cfg, image_size=(im_height, im_width))
|
||||||
|
priors = priorbox.forward()
|
||||||
|
priors = priors.to(device)
|
||||||
|
prior_data = priors.data
|
||||||
|
boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
|
||||||
|
boxes = boxes * scale / resize
|
||||||
|
boxes = boxes.cpu().numpy()
|
||||||
|
scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
|
||||||
|
landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance'])
|
||||||
|
scale1 = torch.Tensor([img.shape[3], img.shape[2], img.shape[3], img.shape[2],
|
||||||
|
img.shape[3], img.shape[2], img.shape[3], img.shape[2],
|
||||||
|
img.shape[3], img.shape[2]])
|
||||||
|
scale1 = scale1.to(device)
|
||||||
|
landms = landms * scale1 / resize
|
||||||
|
landms = landms.cpu().numpy()
|
||||||
|
|
||||||
|
# ignore low scores
|
||||||
|
inds = np.where(scores > args.confidence_threshold)[0]
|
||||||
|
boxes = boxes[inds]
|
||||||
|
landms = landms[inds]
|
||||||
|
scores = scores[inds]
|
||||||
|
|
||||||
|
# keep top-K before NMS
|
||||||
|
order = scores.argsort()[::-1][:args.top_k]
|
||||||
|
boxes = boxes[order]
|
||||||
|
landms = landms[order]
|
||||||
|
scores = scores[order]
|
||||||
|
|
||||||
|
# do NMS
|
||||||
|
dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
|
||||||
|
keep = py_cpu_nms(dets, args.nms_threshold)
|
||||||
|
# keep = nms(dets, args.nms_threshold,force_cpu=args.cpu)
|
||||||
|
dets = dets[keep, :]
|
||||||
|
landms = landms[keep]
|
||||||
|
|
||||||
|
# keep top-K faster NMS
|
||||||
|
dets = dets[:args.keep_top_k, :]
|
||||||
|
landms = landms[:args.keep_top_k, :]
|
||||||
|
|
||||||
|
dets = np.concatenate((dets, landms), axis=1)
|
||||||
|
score = 500
|
||||||
|
# show image
|
||||||
|
if args.save_image:
|
||||||
|
dst = []
|
||||||
|
for i, det in enumerate(dets):
|
||||||
|
if det[4] < args.vis_thres:
|
||||||
|
continue
|
||||||
|
center_x = (det[2] + det[0]) / 2
|
||||||
|
center_y = (det[3] + det[1]) / 2
|
||||||
|
if abs(center_x - 125) + abs(center_y - 125) < score:
|
||||||
|
score = abs(center_x - 125) + abs(center_y - 125)
|
||||||
|
dst = np.reshape(landms[i], (5, 2))
|
||||||
|
if len(dst) > 0:
|
||||||
|
src1 = np.array([
|
||||||
|
[38.3814, 51.6963],
|
||||||
|
[73.6186, 51.5014],
|
||||||
|
[56.1120, 71.7366],
|
||||||
|
[41.6361, 92.3655],
|
||||||
|
[70.8167, 92.2041]], dtype=np.float32)
|
||||||
|
tform = trans.SimilarityTransform()
|
||||||
|
tform.estimate(dst, src1)
|
||||||
|
M = tform.params[0:2, :]
|
||||||
|
|
||||||
|
if w < 112 or h < 112:
|
||||||
|
count += 1
|
||||||
|
#print(align_img_path)
|
||||||
|
continue
|
||||||
|
frame = cv2.warpAffine(frame, M, (w, h), borderValue=0.0)
|
||||||
|
img112 = frame[0:112, 0:112, :]
|
||||||
|
cv2.imwrite(align_img_path, img112)
|
||||||
|
print(">112 number"+str(count))
|
||||||
|
|
||||||
|
def sfz_to112x112_retinaface(arcface_model,cpu_or_cuda):
|
||||||
|
torch.set_grad_enabled(False)
|
||||||
|
cfg = None
|
||||||
|
if args.network == "mobile0.25":
|
||||||
|
cfg = cfg_mnet
|
||||||
|
elif args.network == "resnet50":
|
||||||
|
cfg = cfg_re50
|
||||||
|
# net and model
|
||||||
|
net = RetinaFace(cfg=cfg, phase = 'test')
|
||||||
|
net = load_model(net, args.trained_model, args.cpu)
|
||||||
|
net.eval()
|
||||||
|
print('Finished loading model!')
|
||||||
|
#print(net)
|
||||||
|
cudnn.benchmark = True
|
||||||
|
device = torch.device("cpu" if args.cpu else "cuda")
|
||||||
|
net = net.to(device)
|
||||||
|
|
||||||
|
resize = 1
|
||||||
|
input_path = r"D:\Download\out\alig_students_all"
|
||||||
|
output_path = r"D:\Download\out\alig_students_all"
|
||||||
|
folder1 = os.listdir(input_path)
|
||||||
|
count = 0
|
||||||
|
count2 =0
|
||||||
|
print(len(folder1))
|
||||||
|
# print(folder1[0][:-4])
|
||||||
|
# return 0
|
||||||
|
order_img = []
|
||||||
|
order_name = []
|
||||||
|
tic = time.time()
|
||||||
|
for img_name in folder1[:2500]:
|
||||||
|
# output_name_path = os.path.join(output_path, img_name)
|
||||||
|
# if os.path.exists(output_name_path) == 0:
|
||||||
|
# os.makedirs(output_name_path)
|
||||||
|
img_name_path = os.path.join(input_path, img_name)
|
||||||
|
#img_list = os.listdir(img_name_path)
|
||||||
|
count2 += 1
|
||||||
|
if (count2 % 1000 == 0):
|
||||||
|
print('net forward time: {:.4f}'.format(time.time() - tic))
|
||||||
|
print(count2)
|
||||||
|
if len(order_img) > 0:
|
||||||
|
order_img = np.array(order_img)
|
||||||
|
order_img = order_img.transpose((0, 3, 1, 2))
|
||||||
|
order_img = np.array(order_img, dtype=np.float32)
|
||||||
|
order_img -= 127.5
|
||||||
|
order_img /= 127.5
|
||||||
|
# order_img = np.array(order_img)
|
||||||
|
# print(order_img.shape)
|
||||||
|
# print(len(order_name))
|
||||||
|
create_database_from_img(order_name, order_img, arcface_model, "./Database/sfz_test.npy", cpu_or_cuda)
|
||||||
|
order_img = []
|
||||||
|
order_name = []
|
||||||
|
tic = time.time()
|
||||||
|
|
||||||
|
# if img_name[19] != "1":
|
||||||
|
# continue
|
||||||
|
|
||||||
|
#path = os.path.join(img_name_path, img)
|
||||||
|
align_img_path = os.path.join(output_path, img_name)
|
||||||
|
# print(path)
|
||||||
|
#frame = cv2.imdecode(np.fromfile(img_name_path, dtype=np.uint8), cv2.IMREAD_COLOR)
|
||||||
|
try:
|
||||||
|
frame = cv2.imdecode(np.fromfile(img_name_path, dtype=np.uint8), cv2.IMREAD_COLOR)
|
||||||
|
h, w, d = frame.shape
|
||||||
|
except AttributeError:
|
||||||
|
print(img_name)
|
||||||
|
continue
|
||||||
|
if d == 1:
|
||||||
|
continue
|
||||||
|
factor = h / w
|
||||||
|
if (w > 1000):
|
||||||
|
frame = cv2.resize(frame, (600, int(600 * factor)))
|
||||||
|
h, w = frame.shape[:2]
|
||||||
|
img = np.float32(frame)
|
||||||
|
im_height, im_width, _ = img.shape
|
||||||
|
scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
|
||||||
|
img -= (104, 117, 123)
|
||||||
|
img = img.transpose(2, 0, 1)
|
||||||
|
img = torch.from_numpy(img).unsqueeze(0)
|
||||||
|
img = img.to(device)
|
||||||
|
scale = scale.to(device)
|
||||||
|
|
||||||
|
#tic = time.time()
|
||||||
|
loc, conf, landms = net(img) # forward pass
|
||||||
|
#print('net forward time: {:.4f}'.format(time.time() - tic))
|
||||||
|
|
||||||
|
priorbox = PriorBox(cfg, image_size=(im_height, im_width))
|
||||||
|
priors = priorbox.forward()
|
||||||
|
priors = priors.to(device)
|
||||||
|
prior_data = priors.data
|
||||||
|
boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
|
||||||
|
boxes = boxes * scale / resize
|
||||||
|
boxes = boxes.cpu().numpy()
|
||||||
|
scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
|
||||||
|
landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance'])
|
||||||
|
scale1 = torch.Tensor([img.shape[3], img.shape[2], img.shape[3], img.shape[2],
|
||||||
|
img.shape[3], img.shape[2], img.shape[3], img.shape[2],
|
||||||
|
img.shape[3], img.shape[2]])
|
||||||
|
scale1 = scale1.to(device)
|
||||||
|
landms = landms * scale1 / resize
|
||||||
|
landms = landms.cpu().numpy()
|
||||||
|
|
||||||
|
# ignore low scores
|
||||||
|
inds = np.where(scores > args.confidence_threshold)[0]
|
||||||
|
boxes = boxes[inds]
|
||||||
|
landms = landms[inds]
|
||||||
|
scores = scores[inds]
|
||||||
|
|
||||||
|
# keep top-K before NMS
|
||||||
|
order = scores.argsort()[::-1][:args.top_k]
|
||||||
|
boxes = boxes[order]
|
||||||
|
landms = landms[order]
|
||||||
|
scores = scores[order]
|
||||||
|
|
||||||
|
# do NMS
|
||||||
|
dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
|
||||||
|
keep = py_cpu_nms(dets, args.nms_threshold)
|
||||||
|
# keep = nms(dets, args.nms_threshold,force_cpu=args.cpu)
|
||||||
|
dets = dets[keep, :]
|
||||||
|
landms = landms[keep]
|
||||||
|
|
||||||
|
# keep top-K faster NMS
|
||||||
|
dets = dets[:args.keep_top_k, :]
|
||||||
|
landms = landms[:args.keep_top_k, :]
|
||||||
|
|
||||||
|
dets = np.concatenate((dets, landms), axis=1)
|
||||||
|
score = 500
|
||||||
|
# show image
|
||||||
|
if args.save_image:
|
||||||
|
dst = []
|
||||||
|
for i, det in enumerate(dets):
|
||||||
|
if det[4] < args.vis_thres:
|
||||||
|
continue
|
||||||
|
# center_x = (det[2] + det[0]) / 2
|
||||||
|
# center_y = (det[3] + det[1]) / 2
|
||||||
|
# if abs(center_x - 125) + abs(center_y - 125) < score:
|
||||||
|
# score = abs(center_x - 125) + abs(center_y - 125)
|
||||||
|
dst = np.reshape(landms[i], (5, 2))
|
||||||
|
if len(dst) > 0:
|
||||||
|
src1 = np.array([
|
||||||
|
[38.3814, 51.6963],
|
||||||
|
[73.6186, 51.5014],
|
||||||
|
[56.1120, 71.7366],
|
||||||
|
[41.6361, 92.3655],
|
||||||
|
[70.8167, 92.2041]], dtype=np.float32)
|
||||||
|
tform = trans.SimilarityTransform()
|
||||||
|
tform.estimate(dst, src1)
|
||||||
|
M = tform.params[0:2, :]
|
||||||
|
|
||||||
|
if w < 112 or h < 112:
|
||||||
|
count += 1
|
||||||
|
print(img_name_path)
|
||||||
|
continue
|
||||||
|
frame = cv2.warpAffine(frame, M, (w, h), borderValue=0.0)
|
||||||
|
img112 = frame[0:112, 0:112, :]
|
||||||
|
order_img.append(img112)
|
||||||
|
order_name.append(img_name[:-6])
|
||||||
|
#cv2.imencode('.jpg', img112)[1].tofile(align_img_path)
|
||||||
|
#cv2.imwrite(align_img_path, img112)
|
||||||
|
|
||||||
|
print(">112 number"+str(count))
|
||||||
|
if len(order_img) > 0:
|
||||||
|
order_img = np.array(order_img)
|
||||||
|
order_img = order_img.transpose((0, 3, 1, 2))
|
||||||
|
order_img = np.array(order_img, dtype=np.float32)
|
||||||
|
order_img -= 127.5
|
||||||
|
order_img /= 127.5
|
||||||
|
#order_img = np.array(order_img)
|
||||||
|
# print(order_img.shape)
|
||||||
|
# print(len(order_name))
|
||||||
|
create_database_from_img(order_name, order_img, arcface_model, "./Database/sfz_test.npy", cpu_or_cuda)
|
||||||
|
|
||||||
|
def count_accuracy(arcface_model,cpu_or_cuda,index ,database_name_list):
|
||||||
|
torch.set_grad_enabled(False)
|
||||||
|
cfg = None
|
||||||
|
if args.network == "mobile0.25":
|
||||||
|
cfg = cfg_mnet
|
||||||
|
elif args.network == "resnet50":
|
||||||
|
cfg = cfg_re50
|
||||||
|
# net and model
|
||||||
|
net = RetinaFace(cfg=cfg, phase = 'test')
|
||||||
|
net = load_model(net, args.trained_model, args.cpu)
|
||||||
|
net.eval()
|
||||||
|
print('Finished loading model!')
|
||||||
|
#print(net)
|
||||||
|
cudnn.benchmark = True
|
||||||
|
device = torch.device("cpu" if args.cpu else "cuda")
|
||||||
|
net = net.to(device)
|
||||||
|
|
||||||
|
resize = 1
|
||||||
|
input_path = r"../face/czrkzp2"
|
||||||
|
folder1 = os.listdir(input_path)
|
||||||
|
count = 0
|
||||||
|
count2 =0
|
||||||
|
print(len(folder1))
|
||||||
|
# print(folder1[0][:-4])
|
||||||
|
# return 0
|
||||||
|
order_img = []
|
||||||
|
order_name = []
|
||||||
|
tic = time.time()
|
||||||
|
for img_name in folder1[:15000]:
|
||||||
|
# output_name_path = os.path.join(output_path, img_name)
|
||||||
|
# if os.path.exists(output_name_path) == 0:
|
||||||
|
# os.makedirs(output_name_path)
|
||||||
|
img_name_path = os.path.join(input_path, img_name)
|
||||||
|
#img_list = os.listdir(img_name_path)
|
||||||
|
count2 += 1
|
||||||
|
if (count2 % 5000 == 0):
|
||||||
|
print('net forward time: {:.4f}'.format(time.time() - tic))
|
||||||
|
print(count2)
|
||||||
|
# if len(order_img) > 0:
|
||||||
|
# order_img = np.array(order_img)
|
||||||
|
# order_img = order_img.transpose((0, 3, 1, 2))
|
||||||
|
# order_img = np.array(order_img, dtype=np.float32)
|
||||||
|
# order_img -= 127.5
|
||||||
|
# order_img /= 127.5
|
||||||
|
# # order_img = np.array(order_img)
|
||||||
|
# # print(order_img.shape)
|
||||||
|
# # print(len(order_name))
|
||||||
|
# create_database_from_img(order_name, order_img, arcface_model, "./Database/sfz_test.npy", cpu_or_cuda)
|
||||||
|
# order_img = []
|
||||||
|
# order_name = []
|
||||||
|
# tic = time.time()
|
||||||
|
|
||||||
|
if img_name[19] == "1":
|
||||||
|
continue
|
||||||
|
|
||||||
|
#path = os.path.join(img_name_path, img)
|
||||||
|
#align_img_path = os.path.join(output_path, img_name)
|
||||||
|
# print(path)
|
||||||
|
#frame = cv2.imdecode(np.fromfile(img_name_path, dtype=np.uint8), cv2.IMREAD_COLOR)
|
||||||
|
try:
|
||||||
|
frame = cv2.imread(img_name_path)
|
||||||
|
h, w, d = frame.shape
|
||||||
|
except AttributeError:
|
||||||
|
print(img_name)
|
||||||
|
continue
|
||||||
|
if d == 1:
|
||||||
|
continue
|
||||||
|
factor = h / w
|
||||||
|
if (w > 1000):
|
||||||
|
frame = cv2.resize(frame, (600, int(600 * factor)))
|
||||||
|
h, w = frame.shape[:2]
|
||||||
|
img = np.float32(frame)
|
||||||
|
im_height, im_width, _ = img.shape
|
||||||
|
scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
|
||||||
|
img -= (104, 117, 123)
|
||||||
|
img = img.transpose(2, 0, 1)
|
||||||
|
img = torch.from_numpy(img).unsqueeze(0)
|
||||||
|
img = img.to(device)
|
||||||
|
scale = scale.to(device)
|
||||||
|
|
||||||
|
#tic = time.time()
|
||||||
|
loc, conf, landms = net(img) # forward pass
|
||||||
|
#print('net forward time: {:.4f}'.format(time.time() - tic))
|
||||||
|
|
||||||
|
priorbox = PriorBox(cfg, image_size=(im_height, im_width))
|
||||||
|
priors = priorbox.forward()
|
||||||
|
priors = priors.to(device)
|
||||||
|
prior_data = priors.data
|
||||||
|
boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
|
||||||
|
boxes = boxes * scale / resize
|
||||||
|
boxes = boxes.cpu().numpy()
|
||||||
|
scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
|
||||||
|
landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance'])
|
||||||
|
scale1 = torch.Tensor([img.shape[3], img.shape[2], img.shape[3], img.shape[2],
|
||||||
|
img.shape[3], img.shape[2], img.shape[3], img.shape[2],
|
||||||
|
img.shape[3], img.shape[2]])
|
||||||
|
scale1 = scale1.to(device)
|
||||||
|
landms = landms * scale1 / resize
|
||||||
|
landms = landms.cpu().numpy()
|
||||||
|
|
||||||
|
# ignore low scores
|
||||||
|
inds = np.where(scores > args.confidence_threshold)[0]
|
||||||
|
boxes = boxes[inds]
|
||||||
|
landms = landms[inds]
|
||||||
|
scores = scores[inds]
|
||||||
|
|
||||||
|
# keep top-K before NMS
|
||||||
|
order = scores.argsort()[::-1][:args.top_k]
|
||||||
|
boxes = boxes[order]
|
||||||
|
landms = landms[order]
|
||||||
|
scores = scores[order]
|
||||||
|
|
||||||
|
# do NMS
|
||||||
|
dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
|
||||||
|
keep = py_cpu_nms(dets, args.nms_threshold)
|
||||||
|
# keep = nms(dets, args.nms_threshold,force_cpu=args.cpu)
|
||||||
|
dets = dets[keep, :]
|
||||||
|
landms = landms[keep]
|
||||||
|
|
||||||
|
# keep top-K faster NMS
|
||||||
|
dets = dets[:args.keep_top_k, :]
|
||||||
|
landms = landms[:args.keep_top_k, :]
|
||||||
|
|
||||||
|
dets = np.concatenate((dets, landms), axis=1)
|
||||||
|
score = 500
|
||||||
|
# show image
|
||||||
|
if args.save_image:
|
||||||
|
dst = []
|
||||||
|
for i, det in enumerate(dets):
|
||||||
|
if det[4] < args.vis_thres:
|
||||||
|
continue
|
||||||
|
# center_x = (det[2] + det[0]) / 2
|
||||||
|
# center_y = (det[3] + det[1]) / 2
|
||||||
|
# if abs(center_x - 125) + abs(center_y - 125) < score:
|
||||||
|
# score = abs(center_x - 125) + abs(center_y - 125)
|
||||||
|
dst = np.reshape(landms[i], (5, 2))
|
||||||
|
if len(dst) > 0:
|
||||||
|
src1 = np.array([
|
||||||
|
[38.3814, 51.6963],
|
||||||
|
[73.6186, 51.5014],
|
||||||
|
[56.1120, 71.7366],
|
||||||
|
[41.6361, 92.3655],
|
||||||
|
[70.8167, 92.2041]], dtype=np.float32)
|
||||||
|
tform = trans.SimilarityTransform()
|
||||||
|
tform.estimate(dst, src1)
|
||||||
|
M = tform.params[0:2, :]
|
||||||
|
|
||||||
|
if w < 112 or h < 112:
|
||||||
|
count += 1
|
||||||
|
print(img_name_path)
|
||||||
|
continue
|
||||||
|
frame = cv2.warpAffine(frame, M, (w, h), borderValue=0.0)
|
||||||
|
img112 = frame[0:112, 0:112, :]
|
||||||
|
order_img.append(img112)
|
||||||
|
order_name.append(img_name)
|
||||||
|
#cv2.imencode('.jpg', img112)[1].tofile(align_img_path)
|
||||||
|
#cv2.imwrite(align_img_path, img112)
|
||||||
|
|
||||||
|
print(">112 number"+str(count))
|
||||||
|
if len(order_img) > 0:
|
||||||
|
order_img = np.array(order_img)
|
||||||
|
order_img = order_img.transpose((0, 3, 1, 2))
|
||||||
|
order_img = np.array(order_img, dtype=np.float32)
|
||||||
|
order_img -= 127.5
|
||||||
|
order_img /= 127.5
|
||||||
|
#order_img = np.array(order_img)
|
||||||
|
# print(order_img.shape)
|
||||||
|
# print(len(order_name))
|
||||||
|
count_acc(order_name,order_img,arcface_model,index ,database_name_list,cpu_or_cuda)
|
||||||
|
|
||||||
|
def count_acc(order_name,order_img,model,index ,database_name_list,cpu_or_cuda):
|
||||||
|
pred_name = []
|
||||||
|
unknown = []
|
||||||
|
print(order_img.shape)
|
||||||
|
|
||||||
|
start_time = time.time()
|
||||||
|
# order_img = torch.from_numpy(order_img)
|
||||||
|
# order_img = order_img.to(torch.device("cuda" if cpu_or_cuda == "cuda" else "cpu"))
|
||||||
|
batch = 256
|
||||||
|
now = 0
|
||||||
|
number = len(order_img)
|
||||||
|
# number = 1400
|
||||||
|
for i in range(number):
|
||||||
|
unknown.append("unknown")
|
||||||
|
|
||||||
|
while now < number:
|
||||||
|
if now + batch < number:
|
||||||
|
name = findAll(order_img[now:now + batch], model, index ,database_name_list, cpu_or_cuda)
|
||||||
|
else:
|
||||||
|
name = findAll(order_img[now:number], model, index ,database_name_list, cpu_or_cuda)
|
||||||
|
now = now + batch
|
||||||
|
for na in name:
|
||||||
|
pred_name.append(na)
|
||||||
|
print("batch" + str(now))
|
||||||
|
end_time = time.time()
|
||||||
|
print("findAll time: " + str(end_time - start_time))
|
||||||
|
# print(len(pred_name))
|
||||||
|
right = 0
|
||||||
|
for i, name in enumerate(pred_name):
|
||||||
|
if pred_name[i] == order_name[i][:-6]:
|
||||||
|
right += 1
|
||||||
|
filed = 0
|
||||||
|
for i, name in enumerate(pred_name):
|
||||||
|
if pred_name[i] == unknown[i]:
|
||||||
|
filed += 1
|
||||||
|
#print(order_name[i])
|
||||||
|
error = 0
|
||||||
|
print("----------------")
|
||||||
|
for i, name in enumerate(pred_name):
|
||||||
|
if pred_name[i] != order_name[i][:-6]:
|
||||||
|
error += 1
|
||||||
|
#print(order_name[i] + " " + pred_name[i] + " ")
|
||||||
|
#print(order_name)
|
||||||
|
#print(pred_name)
|
||||||
|
print("total:" + str(number))
|
||||||
|
print("right:" + str(right+filed) + " rate:" + str((filed+right) / number))
|
||||||
|
#print("filed:" + str(filed) + " rate:" + str(filed / number))
|
||||||
|
print("error:" + str(error - filed) + " rate:" + str((error - filed) / number))
|
||||||
|
# if __name__ == '__main__':
|
||||||
|
# torch.set_grad_enabled(False)
|
||||||
|
# cfg = None
|
||||||
|
# if args.network == "mobile0.25":
|
||||||
|
# cfg = cfg_mnet
|
||||||
|
# elif args.network == "resnet50":
|
||||||
|
# cfg = cfg_re50
|
||||||
|
# # net and model
|
||||||
|
# net = RetinaFace(cfg=cfg, phase = 'test')
|
||||||
|
# net = load_model(net, args.trained_model, args.cpu)
|
||||||
|
# net.eval()
|
||||||
|
# print('Finished loading model!')
|
||||||
|
# #print(net)
|
||||||
|
# cudnn.benchmark = True
|
||||||
|
# device = torch.device("cpu" if args.cpu else "cuda")
|
||||||
|
# net = net.to(device)
|
||||||
|
#
|
||||||
|
# resize = 1
|
||||||
|
#
|
||||||
|
# # testing begin
|
||||||
|
# cap = cv2.VideoCapture("rtsp://47.108.74.82:8557/h264")
|
||||||
|
# ret, frame = cap.read()
|
||||||
|
# h, w = frame.shape[:2]
|
||||||
|
# fps = cap.get(cv2.CAP_PROP_FPS)
|
||||||
|
# size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
|
||||||
|
# int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
|
||||||
|
# #out = cv2.VideoWriter('out.mp4', cv2.VideoWriter_fourcc('m', 'p', '4', 'v'), fps, size)
|
||||||
|
# out = cv2.VideoWriter('ttttttt.avi', cv2.VideoWriter_fourcc(*'XVID'), fps, size)
|
||||||
|
# number = 0
|
||||||
|
#
|
||||||
|
# model = iresnet100()
|
||||||
|
# model.load_state_dict(torch.load("./model/backbone100.pth", map_location="cpu"))
|
||||||
|
# model.eval()
|
||||||
|
# k_v = load_npy("./Database/student.npy")
|
||||||
|
#
|
||||||
|
# while ret:
|
||||||
|
# tic = time.time()
|
||||||
|
# img = np.float32(frame)
|
||||||
|
# im_height, im_width, _ = img.shape
|
||||||
|
# scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
|
||||||
|
# img -= (104, 117, 123)
|
||||||
|
# img = img.transpose(2, 0, 1)
|
||||||
|
# img = torch.from_numpy(img).unsqueeze(0)
|
||||||
|
# img = img.to(device)
|
||||||
|
# scale = scale.to(device)
|
||||||
|
#
|
||||||
|
# loc, conf, landms = net(img) # forward pass
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# priorbox = PriorBox(cfg, image_size=(im_height, im_width))
|
||||||
|
# priors = priorbox.forward()
|
||||||
|
# priors = priors.to(device)
|
||||||
|
# prior_data = priors.data
|
||||||
|
# boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
|
||||||
|
# boxes = boxes * scale / resize
|
||||||
|
# boxes = boxes.cpu().numpy()
|
||||||
|
# scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
|
||||||
|
# landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance'])
|
||||||
|
# scale1 = torch.Tensor([img.shape[3], img.shape[2], img.shape[3], img.shape[2],
|
||||||
|
# img.shape[3], img.shape[2], img.shape[3], img.shape[2],
|
||||||
|
# img.shape[3], img.shape[2]])
|
||||||
|
# scale1 = scale1.to(device)
|
||||||
|
# landms = landms * scale1 / resize
|
||||||
|
# landms = landms.cpu().numpy()
|
||||||
|
#
|
||||||
|
# # ignore low scores
|
||||||
|
# inds = np.where(scores > args.confidence_threshold)[0]
|
||||||
|
# boxes = boxes[inds]
|
||||||
|
# landms = landms[inds]
|
||||||
|
# scores = scores[inds]
|
||||||
|
#
|
||||||
|
# # keep top-K before NMS
|
||||||
|
# order = scores.argsort()[::-1][:args.top_k]
|
||||||
|
# boxes = boxes[order]
|
||||||
|
# landms = landms[order]
|
||||||
|
# scores = scores[order]
|
||||||
|
#
|
||||||
|
# # do NMS
|
||||||
|
# dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
|
||||||
|
# keep = py_cpu_nms(dets, args.nms_threshold)
|
||||||
|
# # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu)
|
||||||
|
# dets = dets[keep, :]
|
||||||
|
# landms = landms[keep]
|
||||||
|
#
|
||||||
|
# # keep top-K faster NMS
|
||||||
|
# dets = dets[:args.keep_top_k, :]
|
||||||
|
# landms = landms[:args.keep_top_k, :]
|
||||||
|
#
|
||||||
|
# dets = np.concatenate((dets, landms), axis=1)
|
||||||
|
# face_list = []
|
||||||
|
# name_list = []
|
||||||
|
# #print(dets[:4])
|
||||||
|
# print('net forward time: {:.4f}'.format(time.time() - tic))
|
||||||
|
# start_time = time.time()
|
||||||
|
# for i, det in enumerate(dets):
|
||||||
|
# if det[4] < args.vis_thres:
|
||||||
|
# continue
|
||||||
|
# boxes, score = det[:4], det[4]
|
||||||
|
# dst = np.reshape(landms[i],(5,2))
|
||||||
|
# #print(dst.shape)
|
||||||
|
# src1 = np.array([
|
||||||
|
# [38.3814, 51.6963],
|
||||||
|
# [73.6186, 51.5014],
|
||||||
|
# [56.1120, 71.7366],
|
||||||
|
# [41.6361, 92.3655],
|
||||||
|
# [70.8167, 92.2041]], dtype=np.float32)
|
||||||
|
# #print(src1.shape)
|
||||||
|
# tform = trans.SimilarityTransform()
|
||||||
|
# tform.estimate(dst, src1)
|
||||||
|
# M = tform.params[0:2, :]
|
||||||
|
# frame2 = cv2.warpAffine(frame, M, (w, h), borderValue=0.0)
|
||||||
|
# img112 = frame2[0:112, 0:112, :]
|
||||||
|
# # cv2.imwrite("./img/man"+str(count)+".jpg", img112)
|
||||||
|
# # count += 1
|
||||||
|
# face_list.append(img112)
|
||||||
|
#
|
||||||
|
# if len(face_list) != 0:
|
||||||
|
# face_list = np.array(face_list)
|
||||||
|
# face_list = face_list.transpose((0, 3, 1, 2))
|
||||||
|
# face_list = np.array(face_list, dtype=np.float32)
|
||||||
|
# face_list -= 127.5
|
||||||
|
# face_list /= 127.5
|
||||||
|
# print(face_list.shape)
|
||||||
|
# face_list = torch.from_numpy(face_list)
|
||||||
|
#
|
||||||
|
# name_list = findAll(face_list, model, k_v)
|
||||||
|
# end_time = time.time()
|
||||||
|
# print("findOneframe time: " + str(end_time - start_time))
|
||||||
|
# start_time = time.time()
|
||||||
|
# img_PIL = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
|
||||||
|
# draw = ImageDraw.Draw(img_PIL)
|
||||||
|
# font = ImageFont.truetype("font.ttf", 22)
|
||||||
|
# for i, det in enumerate(dets):
|
||||||
|
# if det[4] < args.vis_thres:
|
||||||
|
# continue
|
||||||
|
# boxes, score = det[:4], det[4]
|
||||||
|
# #print(name_list)
|
||||||
|
# name = name_list[i]
|
||||||
|
# mo = r'[\u4e00-\u9fa5]*'
|
||||||
|
# name = re.match(mo, name).group(0)
|
||||||
|
# if not isinstance(name, np.unicode):
|
||||||
|
# name = name.decode('utf8')
|
||||||
|
# draw.text((int(boxes[0]), int(boxes[1])), name, fill=(255, 0, 0), font=font)
|
||||||
|
# draw.rectangle((int(boxes[0]), int(boxes[1]), int(boxes[2]), int(boxes[3])), outline="green", width=3)
|
||||||
|
# frame = cv2.cvtColor(np.asarray(img_PIL), cv2.COLOR_RGB2BGR)
|
||||||
|
# cv2.imshow('out', frame)
|
||||||
|
# out.write(frame)
|
||||||
|
# end_time = time.time()
|
||||||
|
# print("drawOneframe time: " + str(end_time - start_time))
|
||||||
|
# # Press Q on keyboard to stop recording
|
||||||
|
# if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||||
|
# break
|
||||||
|
# ret, frame = cap.read()
|
||||||
|
# cap.release()
|
||||||
|
# out.release()
|
||||||
|
# cv2.destroyAllWindows()
|
||||||
|
if __name__ == '__main__':
|
||||||
|
cpu_or_cuda = "cuda" if torch.cuda.is_available() else "cpu"
|
||||||
|
arcface_model = load_arcface_model("./model/backbone100.pth", cpu_or_cuda=cpu_or_cuda)
|
||||||
|
|
||||||
|
k_v = load_npy("./Database/sfz_test.npy")
|
||||||
|
database_name_list = list(k_v.keys())
|
||||||
|
vector_list = np.array(list(k_v.values()))
|
||||||
|
print(vector_list.shape)
|
||||||
|
# print(database_name_list)
|
||||||
|
nlist = 500
|
||||||
|
quantizer = faiss.IndexFlatL2(512) # the other index
|
||||||
|
index = faiss.IndexIVFFlat(quantizer, 512, nlist, faiss.METRIC_L2)
|
||||||
|
index.train(vector_list)
|
||||||
|
# index = faiss.IndexFlatL2(512)
|
||||||
|
index.add(vector_list)
|
||||||
|
index.nprobe = 50
|
||||||
|
|
||||||
|
count_accuracy(arcface_model, cpu_or_cuda, index, database_name_list)
|
||||||
|
# sfz_to112x112_retinaface(arcface_model,cpu_or_cuda)
|
||||||
|
|
||||||
|
|
483
retinaface_detect.py
Normal file
@ -0,0 +1,483 @@
|
|||||||
|
from __future__ import print_function
|
||||||
|
import re
|
||||||
|
import time
|
||||||
|
import cv2
|
||||||
|
import torch
|
||||||
|
import torch.backends.cudnn as cudnn
|
||||||
|
import numpy as np
|
||||||
|
from skimage import transform as trans
|
||||||
|
from PIL import Image, ImageDraw, ImageFont
|
||||||
|
from data import cfg_mnet, cfg_re50
|
||||||
|
from layers.functions.prior_box import PriorBox
|
||||||
|
from utils.nms.py_cpu_nms import py_cpu_nms
|
||||||
|
from models.retinaface import RetinaFace
|
||||||
|
from utils.box_utils import decode, decode_landm
|
||||||
|
|
||||||
|
threshold = 1.05
|
||||||
|
ppi = 1280
|
||||||
|
step = 3
|
||||||
|
|
||||||
|
class ConfRetinaface(object):
|
||||||
|
def __init__(self, trained_model, network, cpu, confidence_threshold, top_k, nms_threshold, keep_top_k, vis_thres):
|
||||||
|
self.trained_model = trained_model
|
||||||
|
self.network = network
|
||||||
|
self.cpu = cpu
|
||||||
|
self.confidence_threshold = confidence_threshold
|
||||||
|
self.top_k = top_k
|
||||||
|
self.nms_threshold = nms_threshold
|
||||||
|
self.keep_top_k = keep_top_k
|
||||||
|
self.vis_thres = vis_thres
|
||||||
|
|
||||||
|
|
||||||
|
def set_retinaface_conf(cpu_or_cuda):
|
||||||
|
args = ConfRetinaface(trained_model='./weights/mobilenet0.25_Final.pth',
|
||||||
|
network='mobile0.25',
|
||||||
|
cpu=True if cpu_or_cuda == 'cpu' else False,
|
||||||
|
confidence_threshold=0.02,
|
||||||
|
top_k=5000,
|
||||||
|
nms_threshold=0.4,
|
||||||
|
keep_top_k=750,
|
||||||
|
vis_thres=0.6)
|
||||||
|
return args
|
||||||
|
|
||||||
|
|
||||||
|
def check_keys(model, pretrained_state_dict):
|
||||||
|
ckpt_keys = set(pretrained_state_dict.keys())
|
||||||
|
model_keys = set(model.state_dict().keys())
|
||||||
|
used_pretrained_keys = model_keys & ckpt_keys
|
||||||
|
unused_pretrained_keys = ckpt_keys - model_keys
|
||||||
|
missing_keys = model_keys - ckpt_keys
|
||||||
|
print('Missing keys:{}'.format(len(missing_keys)))
|
||||||
|
print('Unused checkpoint keys:{}'.format(len(unused_pretrained_keys)))
|
||||||
|
print('Used keys:{}'.format(len(used_pretrained_keys)))
|
||||||
|
assert len(used_pretrained_keys) > 0, 'load NONE from pretrained checkpoint'
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def remove_prefix(state_dict, prefix):
|
||||||
|
''' Old style model is stored with all names of parameters sharing common prefix 'module.' '''
|
||||||
|
print('remove prefix \'{}\''.format(prefix))
|
||||||
|
f = lambda x: x.split(prefix, 1)[-1] if x.startswith(prefix) else x
|
||||||
|
return {f(key): value for key, value in state_dict.items()}
|
||||||
|
|
||||||
|
|
||||||
|
def load_model(model, pretrained_path, load_to_cpu):
|
||||||
|
print('Loading pretrained model from {}'.format(pretrained_path))
|
||||||
|
if load_to_cpu:
|
||||||
|
pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage)
|
||||||
|
else:
|
||||||
|
device = torch.cuda.current_device()
|
||||||
|
pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage.cuda(device))
|
||||||
|
if "state_dict" in pretrained_dict.keys():
|
||||||
|
pretrained_dict = remove_prefix(pretrained_dict['state_dict'], 'module.')
|
||||||
|
else:
|
||||||
|
pretrained_dict = remove_prefix(pretrained_dict, 'module.')
|
||||||
|
check_keys(model, pretrained_dict)
|
||||||
|
model.load_state_dict(pretrained_dict, strict=False)
|
||||||
|
return model
|
||||||
|
|
||||||
|
|
||||||
|
# 加载retinaface模型
|
||||||
|
def load_retinaface_model(args):
|
||||||
|
torch.set_grad_enabled(False)
|
||||||
|
cfg = None
|
||||||
|
if args.network == "mobile0.25":
|
||||||
|
cfg = cfg_mnet
|
||||||
|
elif args.network == "resnet50":
|
||||||
|
cfg = cfg_re50
|
||||||
|
# net and model
|
||||||
|
net = RetinaFace(cfg=cfg, phase='test')
|
||||||
|
net = load_model(net, args.trained_model, args.cpu)
|
||||||
|
net.eval()
|
||||||
|
cudnn.benchmark = True
|
||||||
|
device = torch.device("cpu" if args.cpu else "cuda")
|
||||||
|
net = net.to(device)
|
||||||
|
print('Finished loading model!')
|
||||||
|
return net
|
||||||
|
|
||||||
|
|
||||||
|
# 计算两个特征向量的欧式距离
|
||||||
|
def findEuclideanDistance(source_representation, test_representation):
|
||||||
|
euclidean_distance = source_representation - test_representation
|
||||||
|
euclidean_distance = np.sum(np.multiply(euclidean_distance, euclidean_distance))
|
||||||
|
euclidean_distance = np.sqrt(euclidean_distance)
|
||||||
|
return euclidean_distance
|
||||||
|
|
||||||
|
|
||||||
|
# 归一化欧氏距离
|
||||||
|
def l2_normalize(x):
|
||||||
|
return x / np.sqrt(np.sum(np.multiply(x, x)))
|
||||||
|
|
||||||
|
|
||||||
|
# 计算此特征向量与人脸库中的哪个人脸特征向量距离最近
|
||||||
|
def findmindistance(pred, threshold, k_v):
|
||||||
|
distance = 10
|
||||||
|
most_like = ""
|
||||||
|
for name in k_v.keys():
|
||||||
|
tmp = findEuclideanDistance(k_v[name], pred)
|
||||||
|
if distance > tmp:
|
||||||
|
distance = tmp
|
||||||
|
most_like = name
|
||||||
|
if distance < threshold:
|
||||||
|
return most_like
|
||||||
|
else:
|
||||||
|
return "unknown"
|
||||||
|
|
||||||
|
#
|
||||||
|
def faiss_find_face(pred,index ,database_name_list):
|
||||||
|
#print(len(database_name_list))
|
||||||
|
start_time = time.time()
|
||||||
|
D, I = index.search(pred, 1)
|
||||||
|
name_list = []
|
||||||
|
end_time = time.time()
|
||||||
|
print("faiss cost %fs" % (end_time - start_time))
|
||||||
|
print(D, I)
|
||||||
|
# if D[0][0] < threshold:
|
||||||
|
# print(database_name_list[I[0][0]])
|
||||||
|
# return database_name_list[I[0][0]]
|
||||||
|
# else:
|
||||||
|
# return "unknown"
|
||||||
|
for i,index in enumerate(I):
|
||||||
|
if D[i][0] < threshold:
|
||||||
|
#print(database_name_list[I[0][0]])
|
||||||
|
name_list.append(database_name_list[index[0]])
|
||||||
|
else:
|
||||||
|
name_list.append("unknown")
|
||||||
|
return name_list
|
||||||
|
|
||||||
|
# 从人脸库中找到传入的人脸列表中的所有人脸
|
||||||
|
def findAll(imglist, model, index ,database_name_list, k_v, cpu_or_cuda):
|
||||||
|
start_time = time.time()
|
||||||
|
imglist = torch.from_numpy(imglist)
|
||||||
|
imglist = imglist.to(torch.device("cuda" if cpu_or_cuda == "cuda" else "cpu"))
|
||||||
|
with torch.no_grad():
|
||||||
|
name_list = []
|
||||||
|
pred = model(imglist)
|
||||||
|
pred = pred.cpu().numpy()
|
||||||
|
print("predOne time: " + str(time.time() - start_time))
|
||||||
|
#print(pred.shape)
|
||||||
|
start_time = time.time()
|
||||||
|
#name_list = faiss_find_face(l2_normalize(pred), index, database_name_list)
|
||||||
|
for pr in pred:
|
||||||
|
name = findmindistance(l2_normalize(pr), threshold=threshold, k_v=k_v)
|
||||||
|
print(name)
|
||||||
|
# print(l2_normalize(pr).shape)
|
||||||
|
#pr = np.expand_dims(l2_normalize(pr), 0)
|
||||||
|
#print(pr.shape)
|
||||||
|
#name = faiss_find_face(pr,index ,database_name_list)
|
||||||
|
if name != "unknown":
|
||||||
|
mo = r'[\u4e00-\u9fa5_a-zA-Z]*'
|
||||||
|
name = re.match(mo, name)
|
||||||
|
name_list.append(name.group(0))
|
||||||
|
else:
|
||||||
|
name_list.append("unknown")
|
||||||
|
#name_list.append(name)
|
||||||
|
print("findOne time: " + str(time.time() - start_time))
|
||||||
|
return name_list
|
||||||
|
|
||||||
|
|
||||||
|
# 检测单张人脸,返回1x3x112x112的数组
|
||||||
|
def detect_one(path, net, args):
|
||||||
|
cfg = None
|
||||||
|
if args.network == "mobile0.25":
|
||||||
|
cfg = cfg_mnet
|
||||||
|
elif args.network == "resnet50":
|
||||||
|
cfg = cfg_re50
|
||||||
|
|
||||||
|
device = torch.device("cpu" if args.cpu else "cuda")
|
||||||
|
resize = 1
|
||||||
|
|
||||||
|
# testing begin
|
||||||
|
frame = cv2.imdecode(np.fromfile(path, dtype=np.uint8), cv2.IMREAD_COLOR)
|
||||||
|
h, w = frame.shape[:2]
|
||||||
|
factor = h / w
|
||||||
|
if (w > 1000):
|
||||||
|
frame = cv2.resize(frame, (600, int(600 * factor)))
|
||||||
|
h, w = frame.shape[:2]
|
||||||
|
|
||||||
|
tic = time.time()
|
||||||
|
img = np.float32(frame)
|
||||||
|
im_height, im_width, _ = img.shape
|
||||||
|
scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
|
||||||
|
img -= (104, 117, 123)
|
||||||
|
img = img.transpose(2, 0, 1)
|
||||||
|
img = torch.from_numpy(img).unsqueeze(0)
|
||||||
|
img = img.to(device)
|
||||||
|
scale = scale.to(device)
|
||||||
|
|
||||||
|
loc, conf, landms = net(img) # forward pass
|
||||||
|
#print(loc.shape,landms.shape,conf.shape)
|
||||||
|
priorbox = PriorBox(cfg, image_size=(im_height, im_width))
|
||||||
|
priors = priorbox.forward()
|
||||||
|
priors = priors.to(device)
|
||||||
|
prior_data = priors.data
|
||||||
|
boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
|
||||||
|
boxes = boxes * scale / resize
|
||||||
|
boxes = boxes.cpu().numpy()
|
||||||
|
scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
|
||||||
|
landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance'])
|
||||||
|
scale1 = torch.Tensor([img.shape[3], img.shape[2], img.shape[3], img.shape[2],
|
||||||
|
img.shape[3], img.shape[2], img.shape[3], img.shape[2],
|
||||||
|
img.shape[3], img.shape[2]])
|
||||||
|
scale1 = scale1.to(device)
|
||||||
|
landms = landms * scale1 / resize
|
||||||
|
landms = landms.cpu().numpy()
|
||||||
|
|
||||||
|
# ignore low scores
|
||||||
|
inds = np.where(scores > args.confidence_threshold)[0]
|
||||||
|
boxes = boxes[inds]
|
||||||
|
landms = landms[inds]
|
||||||
|
scores = scores[inds]
|
||||||
|
|
||||||
|
# keep top-K before NMS
|
||||||
|
order = scores.argsort()[::-1][:args.top_k]
|
||||||
|
boxes = boxes[order]
|
||||||
|
landms = landms[order]
|
||||||
|
scores = scores[order]
|
||||||
|
|
||||||
|
# do NMS
|
||||||
|
dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
|
||||||
|
keep = py_cpu_nms(dets, args.nms_threshold)
|
||||||
|
# keep = nms(dets, args.nms_threshold,force_cpu=args.cpu)
|
||||||
|
dets = dets[keep, :]
|
||||||
|
landms = landms[keep]
|
||||||
|
|
||||||
|
# keep top-K faster NMS
|
||||||
|
dets = dets[:args.keep_top_k, :]
|
||||||
|
landms = landms[:args.keep_top_k, :]
|
||||||
|
|
||||||
|
dets = np.concatenate((dets, landms), axis=1)
|
||||||
|
face_list = []
|
||||||
|
box_and_point = []
|
||||||
|
# print(dets[:4])
|
||||||
|
# print('net forward time: {:.4f}'.format(time.time() - tic))
|
||||||
|
print(len(dets))
|
||||||
|
for i, det in enumerate(dets):
|
||||||
|
|
||||||
|
if det[4] < args.vis_thres:
|
||||||
|
continue
|
||||||
|
box_and_point.append(det)
|
||||||
|
dst = np.reshape(landms[i], (5, 2))
|
||||||
|
# print(dst.shape)
|
||||||
|
src1 = np.array([
|
||||||
|
[38.3814, 51.6963],
|
||||||
|
[73.6186, 51.5014],
|
||||||
|
[56.1120, 71.7366],
|
||||||
|
[41.6361, 92.3655],
|
||||||
|
[70.8167, 92.2041]], dtype=np.float32)
|
||||||
|
# print(src1.shape)
|
||||||
|
tform = trans.SimilarityTransform()
|
||||||
|
tform.estimate(dst, src1)
|
||||||
|
M = tform.params[0:2, :]
|
||||||
|
frame2 = cv2.warpAffine(frame, M, (w, h), borderValue=0.0)
|
||||||
|
img112 = frame2[0:112, 0:112, :]
|
||||||
|
# cv2.imshow('out', img112)
|
||||||
|
# cv2.waitKey(0)
|
||||||
|
face_list.append(img112)
|
||||||
|
if len(face_list) > 0:
|
||||||
|
face_list = np.array(face_list)
|
||||||
|
face_list = face_list.transpose((0, 3, 1, 2))
|
||||||
|
face_list = np.array(face_list, dtype=np.float32)
|
||||||
|
face_list -= 127.5
|
||||||
|
face_list /= 127.5
|
||||||
|
box_and_point = np.array(box_and_point)
|
||||||
|
# face_list = torch.from_numpy(face_list)
|
||||||
|
# cv2.imshow('out', img112)
|
||||||
|
# cv2.waitKey(0)
|
||||||
|
return face_list, box_and_point
|
||||||
|
|
||||||
|
|
||||||
|
# 检测视频中的人脸并人脸识别
|
||||||
|
def detect_video(video_path, output_path, net, arcface_model, k_v, args):
|
||||||
|
tic_total = time.time()
|
||||||
|
cfg = None
|
||||||
|
if args.network == "mobile0.25":
|
||||||
|
cfg = cfg_mnet
|
||||||
|
elif args.network == "resnet50":
|
||||||
|
cfg = cfg_re50
|
||||||
|
device = torch.device("cpu" if args.cpu else "cuda")
|
||||||
|
resize = 1
|
||||||
|
|
||||||
|
# testing begin
|
||||||
|
cap = cv2.VideoCapture(video_path)
|
||||||
|
ret, frame = cap.read()
|
||||||
|
h, w = frame.shape[:2]
|
||||||
|
factor = 0
|
||||||
|
if (w > ppi):
|
||||||
|
factor = h / w
|
||||||
|
frame = cv2.resize(frame, (ppi, int(ppi * factor)))
|
||||||
|
h, w = frame.shape[:2]
|
||||||
|
|
||||||
|
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||||
|
size = (w, h)
|
||||||
|
# size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
|
||||||
|
# int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
|
||||||
|
# out = cv2.VideoWriter('out.mp4', cv2.VideoWriter_fourcc('m', 'p', '4', 'v'), fps, size)
|
||||||
|
out = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'XVID'), fps, size)
|
||||||
|
number = step
|
||||||
|
dets = []
|
||||||
|
name_list = []
|
||||||
|
font = ImageFont.truetype("font.ttf", 22)
|
||||||
|
priorbox = PriorBox(cfg, image_size=(h, w))
|
||||||
|
priors = priorbox.forward()
|
||||||
|
priors = priors.to(device)
|
||||||
|
prior_data = priors.data
|
||||||
|
|
||||||
|
scale = torch.Tensor([w, h, w, h])
|
||||||
|
scale = scale.to(device)
|
||||||
|
scale1 = torch.Tensor([w, h, w, h,
|
||||||
|
w, h, w, h,
|
||||||
|
w, h])
|
||||||
|
scale1 = scale1.to(device)
|
||||||
|
|
||||||
|
src1 = np.array([
|
||||||
|
[38.3814, 51.6963],
|
||||||
|
[73.6186, 51.5014],
|
||||||
|
[56.1120, 71.7366],
|
||||||
|
[41.6361, 92.3655],
|
||||||
|
[70.8167, 92.2041]], dtype=np.float32)
|
||||||
|
# print(src1.shape)
|
||||||
|
tform = trans.SimilarityTransform()
|
||||||
|
|
||||||
|
while ret:
|
||||||
|
tic_all = time.time()
|
||||||
|
if number == step:
|
||||||
|
tic = time.time()
|
||||||
|
img = np.float32(frame)
|
||||||
|
img -= (104, 117, 123)
|
||||||
|
img = img.transpose(2, 0, 1)
|
||||||
|
img = torch.from_numpy(img).unsqueeze(0)
|
||||||
|
img = img.to(device)
|
||||||
|
|
||||||
|
loc, conf, landms = net(img) # forward pass
|
||||||
|
|
||||||
|
boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
|
||||||
|
boxes = boxes * scale / resize
|
||||||
|
boxes = boxes.cpu().numpy()
|
||||||
|
scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
|
||||||
|
landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance'])
|
||||||
|
|
||||||
|
landms = landms * scale1 / resize
|
||||||
|
landms = landms.cpu().numpy()
|
||||||
|
|
||||||
|
# ignore low scores
|
||||||
|
inds = np.where(scores > args.confidence_threshold)[0]
|
||||||
|
boxes = boxes[inds]
|
||||||
|
landms = landms[inds]
|
||||||
|
scores = scores[inds]
|
||||||
|
|
||||||
|
# keep top-K before NMS
|
||||||
|
order = scores.argsort()[::-1][:args.top_k]
|
||||||
|
boxes = boxes[order]
|
||||||
|
landms = landms[order]
|
||||||
|
scores = scores[order]
|
||||||
|
|
||||||
|
# do NMS
|
||||||
|
dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
|
||||||
|
keep = py_cpu_nms(dets, args.nms_threshold)
|
||||||
|
# keep = nms(dets, args.nms_threshold,force_cpu=args.cpu)
|
||||||
|
dets = dets[keep, :]
|
||||||
|
landms = landms[keep]
|
||||||
|
|
||||||
|
# keep top-K faster NMS
|
||||||
|
dets = dets[:args.keep_top_k, :]
|
||||||
|
landms = landms[:args.keep_top_k, :]
|
||||||
|
|
||||||
|
dets = np.concatenate((dets, landms), axis=1)
|
||||||
|
face_list = []
|
||||||
|
name_list = []
|
||||||
|
# print(dets[:4])
|
||||||
|
print('net forward time: {:.4f}'.format(time.time() - tic))
|
||||||
|
start_time = time.time()
|
||||||
|
for i, det in enumerate(dets[:4]):
|
||||||
|
if det[4] < args.vis_thres:
|
||||||
|
continue
|
||||||
|
boxes, score = det[:4], det[4]
|
||||||
|
dst = np.reshape(landms[i], (5, 2))
|
||||||
|
# print(dst.shape)
|
||||||
|
|
||||||
|
tform.estimate(dst, src1)
|
||||||
|
M = tform.params[0:2, :]
|
||||||
|
frame2 = cv2.warpAffine(frame, M, (w, h), borderValue=0.0)
|
||||||
|
img112 = frame2[0:112, 0:112, :]
|
||||||
|
face_list.append(img112)
|
||||||
|
|
||||||
|
if len(face_list) != 0:
|
||||||
|
face_list = np.array(face_list)
|
||||||
|
face_list = face_list.transpose((0, 3, 1, 2))
|
||||||
|
face_list = np.array(face_list, dtype=np.float32)
|
||||||
|
face_list -= 127.5
|
||||||
|
face_list /= 127.5
|
||||||
|
print(face_list.shape)
|
||||||
|
# face_list = torch.from_numpy(face_list)
|
||||||
|
name_list = findAll(face_list, arcface_model, k_v, "cpu" if args.cpu else "cuda")
|
||||||
|
end_time = time.time()
|
||||||
|
print("findOneframe time: " + str(end_time - start_time))
|
||||||
|
start_time = time.time()
|
||||||
|
if (len(dets) != 0):
|
||||||
|
for i, det in enumerate(dets[:4]):
|
||||||
|
if det[4] < args.vis_thres:
|
||||||
|
continue
|
||||||
|
boxes, score = det[:4], det[4]
|
||||||
|
cv2.rectangle(frame, (int(boxes[0]), int(boxes[1])), (int(boxes[2]), int(boxes[3])), (2, 255, 0), 1)
|
||||||
|
|
||||||
|
# if (len(dets) != 0):
|
||||||
|
# img_PIL = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
|
||||||
|
# draw = ImageDraw.Draw(img_PIL)
|
||||||
|
#
|
||||||
|
# for i, det in enumerate(dets[:4]):
|
||||||
|
# if det[4] < args.vis_thres:
|
||||||
|
# continue
|
||||||
|
# boxes, score = det[:4], det[4]
|
||||||
|
# # print(name_list)
|
||||||
|
# name = name_list[i]
|
||||||
|
# if not isinstance(name, np.unicode):
|
||||||
|
# name = name.decode('utf8')
|
||||||
|
# draw.text((int(boxes[0]), int(boxes[1])), name, fill=(255, 0, 0), font=font)
|
||||||
|
# draw.rectangle((int(boxes[0]), int(boxes[1]), int(boxes[2]), int(boxes[3])), outline="green", width=3)
|
||||||
|
# frame = cv2.cvtColor(np.asarray(img_PIL), cv2.COLOR_RGB2BGR)
|
||||||
|
#cv2.imshow('out', frame)
|
||||||
|
#cv2.waitKey(0)
|
||||||
|
out.write(frame)
|
||||||
|
end_time = time.time()
|
||||||
|
print("drawOneframe time: " + str(end_time - start_time))
|
||||||
|
# Press Q on keyboard to stop recording
|
||||||
|
# if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||||
|
# break
|
||||||
|
ret, frame = cap.read()
|
||||||
|
number = 0
|
||||||
|
if (ret != 0 and factor != 0):
|
||||||
|
frame = cv2.resize(frame, (ppi, int(ppi * factor)))
|
||||||
|
else:
|
||||||
|
number += 1
|
||||||
|
if (len(dets) != 0):
|
||||||
|
img_PIL = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
|
||||||
|
draw = ImageDraw.Draw(img_PIL)
|
||||||
|
for i, det in enumerate(dets[:4]):
|
||||||
|
if det[4] < args.vis_thres:
|
||||||
|
continue
|
||||||
|
boxes, score = det[:4], det[4]
|
||||||
|
# print(name_list)
|
||||||
|
name = name_list[i]
|
||||||
|
if not isinstance(name, np.unicode):
|
||||||
|
name = name.decode('utf8')
|
||||||
|
draw.text((int(boxes[0]), int(boxes[1])), name, fill=(255, 0, 0), font=font)
|
||||||
|
draw.rectangle((int(boxes[0]), int(boxes[1]), int(boxes[2]), int(boxes[3])), outline="green",
|
||||||
|
width=3)
|
||||||
|
frame = cv2.cvtColor(np.asarray(img_PIL), cv2.COLOR_RGB2BGR)
|
||||||
|
out.write(frame)
|
||||||
|
start_time = time.time()
|
||||||
|
ret, frame = cap.read()
|
||||||
|
if (ret != 0 and factor != 0):
|
||||||
|
frame = cv2.resize(frame, (ppi, int(ppi * factor)))
|
||||||
|
print("readframe time: " + str(time.time() - start_time))
|
||||||
|
print('all time: {:.4f}'.format(time.time() - tic_all))
|
||||||
|
cap.release()
|
||||||
|
out.release()
|
||||||
|
print('total time: {:.4f}'.format(time.time() - tic_total))
|
||||||
|
#cv2.destroyAllWindows()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
args = set_retinaface_conf()
|
||||||
|
print(args.cpu)
|
BIN
src/__pycache__/generate_patches.cpython-38.pyc
Normal file
BIN
src/__pycache__/utility.cpython-38.pyc
Normal file
BIN
src/data_io/__pycache__/functional.cpython-38.pyc
Normal file
BIN
src/data_io/__pycache__/transform.cpython-38.pyc
Normal file
65
src/data_io/dataset_folder.py
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# @Time : 20-6-4 下午4:04
|
||||||
|
# @Author : zhuying
|
||||||
|
# @Company : Minivision
|
||||||
|
# @File : dataset_folder.py
|
||||||
|
# @Software : PyCharm
|
||||||
|
|
||||||
|
import cv2
|
||||||
|
import torch
|
||||||
|
from torchvision import datasets
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
|
def opencv_loader(path):
|
||||||
|
img = cv2.imread(path)
|
||||||
|
return img
|
||||||
|
|
||||||
|
|
||||||
|
class DatasetFolderFT(datasets.ImageFolder):
|
||||||
|
def __init__(self, root, transform=None, target_transform=None,
|
||||||
|
ft_width=10, ft_height=10, loader=opencv_loader):
|
||||||
|
super(DatasetFolderFT, self).__init__(root, transform, target_transform, loader)
|
||||||
|
self.root = root
|
||||||
|
self.ft_width = ft_width
|
||||||
|
self.ft_height = ft_height
|
||||||
|
|
||||||
|
def __getitem__(self, index):
|
||||||
|
path, target = self.samples[index]
|
||||||
|
sample = self.loader(path)
|
||||||
|
# generate the FT picture of the sample
|
||||||
|
ft_sample = generate_FT(sample)
|
||||||
|
if sample is None:
|
||||||
|
print('image is None --> ', path)
|
||||||
|
if ft_sample is None:
|
||||||
|
print('FT image is None -->', path)
|
||||||
|
assert sample is not None
|
||||||
|
|
||||||
|
ft_sample = cv2.resize(ft_sample, (self.ft_width, self.ft_height))
|
||||||
|
ft_sample = torch.from_numpy(ft_sample).float()
|
||||||
|
ft_sample = torch.unsqueeze(ft_sample, 0)
|
||||||
|
|
||||||
|
if self.transform is not None:
|
||||||
|
try:
|
||||||
|
sample = self.transform(sample)
|
||||||
|
except Exception as err:
|
||||||
|
print('Error Occured: %s' % err, path)
|
||||||
|
if self.target_transform is not None:
|
||||||
|
target = self.target_transform(target)
|
||||||
|
return sample, ft_sample, target
|
||||||
|
|
||||||
|
|
||||||
|
def generate_FT(image):
|
||||||
|
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
||||||
|
f = np.fft.fft2(image)
|
||||||
|
fshift = np.fft.fftshift(f)
|
||||||
|
fimg = np.log(np.abs(fshift)+1)
|
||||||
|
maxx = -1
|
||||||
|
minn = 100000
|
||||||
|
for i in range(len(fimg)):
|
||||||
|
if maxx < max(fimg[i]):
|
||||||
|
maxx = max(fimg[i])
|
||||||
|
if minn > min(fimg[i]):
|
||||||
|
minn = min(fimg[i])
|
||||||
|
fimg = (fimg - minn+1) / (maxx - minn+1)
|
||||||
|
return fimg
|
33
src/data_io/dataset_loader.py
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# @Time : 20-6-4 下午3:40
|
||||||
|
# @Author : zhuying
|
||||||
|
# @Company : Minivision
|
||||||
|
# @File : dataset_loader.py
|
||||||
|
# @Software : PyCharm
|
||||||
|
|
||||||
|
from torch.utils.data import DataLoader
|
||||||
|
from src.data_io.dataset_folder import DatasetFolderFT
|
||||||
|
from src.data_io import transform as trans
|
||||||
|
|
||||||
|
|
||||||
|
def get_train_loader(conf):
|
||||||
|
train_transform = trans.Compose([
|
||||||
|
trans.ToPILImage(),
|
||||||
|
trans.RandomResizedCrop(size=tuple(conf.input_size),
|
||||||
|
scale=(0.9, 1.1)),
|
||||||
|
trans.ColorJitter(brightness=0.4,
|
||||||
|
contrast=0.4, saturation=0.4, hue=0.1),
|
||||||
|
trans.RandomRotation(10),
|
||||||
|
trans.RandomHorizontalFlip(),
|
||||||
|
trans.ToTensor()
|
||||||
|
])
|
||||||
|
root_path = '{}/{}'.format(conf.train_root_path, conf.patch_info)
|
||||||
|
trainset = DatasetFolderFT(root_path, train_transform,
|
||||||
|
None, conf.ft_width, conf.ft_height)
|
||||||
|
train_loader = DataLoader(
|
||||||
|
trainset,
|
||||||
|
batch_size=conf.batch_size,
|
||||||
|
shuffle=True,
|
||||||
|
pin_memory=True,
|
||||||
|
num_workers=16)
|
||||||
|
return train_loader
|
589
src/data_io/functional.py
Normal file
@ -0,0 +1,589 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# @Time : 20-6-4 下午6:18
|
||||||
|
# @Author : zhuying
|
||||||
|
# @Company : Minivision
|
||||||
|
# @File : functional.py
|
||||||
|
# @Software : PyCharm
|
||||||
|
|
||||||
|
from __future__ import division
|
||||||
|
import torch
|
||||||
|
from PIL import Image, ImageOps, ImageEnhance
|
||||||
|
try:
|
||||||
|
import accimage
|
||||||
|
except ImportError:
|
||||||
|
accimage = None
|
||||||
|
import numpy as np
|
||||||
|
import numbers
|
||||||
|
import types
|
||||||
|
import collections
|
||||||
|
import warnings
|
||||||
|
|
||||||
|
|
||||||
|
def _is_pil_image(img):
|
||||||
|
if accimage is not None:
|
||||||
|
return isinstance(img, (Image.Image, accimage.Image))
|
||||||
|
else:
|
||||||
|
return isinstance(img, Image.Image)
|
||||||
|
|
||||||
|
|
||||||
|
def _is_tensor_image(img):
|
||||||
|
return torch.is_tensor(img) and img.ndimension() == 3
|
||||||
|
|
||||||
|
|
||||||
|
def _is_numpy_image(img):
|
||||||
|
return isinstance(img, np.ndarray) and (img.ndim in {2, 3})
|
||||||
|
|
||||||
|
|
||||||
|
def to_tensor(pic):
|
||||||
|
"""Convert a ``PIL Image`` or ``numpy.ndarray`` to tensor.
|
||||||
|
|
||||||
|
See ``ToTensor`` for more details.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
pic (PIL Image or numpy.ndarray): Image to be converted to tensor.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tensor: Converted image.
|
||||||
|
"""
|
||||||
|
if not(_is_pil_image(pic) or _is_numpy_image(pic)):
|
||||||
|
raise TypeError('pic should be PIL Image or ndarray. Got {}'.format(type(pic)))
|
||||||
|
|
||||||
|
if isinstance(pic, np.ndarray):
|
||||||
|
# handle numpy array
|
||||||
|
# IR image channel=1: modify by lzc --> 20190730
|
||||||
|
if pic.ndim == 2:
|
||||||
|
pic = pic.reshape((pic.shape[0], pic.shape[1], 1))
|
||||||
|
|
||||||
|
img = torch.from_numpy(pic.transpose((2, 0, 1)))
|
||||||
|
# backward compatibility
|
||||||
|
# return img.float().div(255) modify by zkx
|
||||||
|
return img.float()
|
||||||
|
if accimage is not None and isinstance(pic, accimage.Image):
|
||||||
|
nppic = np.zeros([pic.channels, pic.height, pic.width], dtype=np.float32)
|
||||||
|
pic.copyto(nppic)
|
||||||
|
return torch.from_numpy(nppic)
|
||||||
|
|
||||||
|
# handle PIL Image
|
||||||
|
if pic.mode == 'I':
|
||||||
|
img = torch.from_numpy(np.array(pic, np.int32, copy=False))
|
||||||
|
elif pic.mode == 'I;16':
|
||||||
|
img = torch.from_numpy(np.array(pic, np.int16, copy=False))
|
||||||
|
else:
|
||||||
|
img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
|
||||||
|
# PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK
|
||||||
|
if pic.mode == 'YCbCr':
|
||||||
|
nchannel = 3
|
||||||
|
elif pic.mode == 'I;16':
|
||||||
|
nchannel = 1
|
||||||
|
else:
|
||||||
|
nchannel = len(pic.mode)
|
||||||
|
img = img.view(pic.size[1], pic.size[0], nchannel)
|
||||||
|
# put it from HWC to CHW format
|
||||||
|
# yikes, this transpose takes 80% of the loading time/CPU
|
||||||
|
img = img.transpose(0, 1).transpose(0, 2).contiguous()
|
||||||
|
if isinstance(img, torch.ByteTensor):
|
||||||
|
# return img.float().div(255) #modified by zkx
|
||||||
|
return img.float()
|
||||||
|
else:
|
||||||
|
return img
|
||||||
|
|
||||||
|
|
||||||
|
def to_pil_image(pic, mode=None):
|
||||||
|
"""Convert a tensor or an ndarray to PIL Image.
|
||||||
|
|
||||||
|
See :class:`~torchvision.transforms.ToPIlImage` for more details.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
pic (Tensor or numpy.ndarray): Image to be converted to PIL Image.
|
||||||
|
mode (`PIL.Image mode`_): color space and pixel depth of input data (optional).
|
||||||
|
|
||||||
|
.. _PIL.Image mode: http://pillow.readthedocs.io/en/3.4.x/handbook/concepts.html#modes
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
PIL Image: Image converted to PIL Image.
|
||||||
|
"""
|
||||||
|
if not(_is_numpy_image(pic) or _is_tensor_image(pic)):
|
||||||
|
raise TypeError('pic should be Tensor or ndarray. Got {}.'.format(type(pic)))
|
||||||
|
|
||||||
|
npimg = pic
|
||||||
|
if isinstance(pic, torch.FloatTensor):
|
||||||
|
pic = pic.mul(255).byte()
|
||||||
|
if torch.is_tensor(pic):
|
||||||
|
npimg = np.transpose(pic.numpy(), (1, 2, 0))
|
||||||
|
|
||||||
|
if not isinstance(npimg, np.ndarray):
|
||||||
|
raise TypeError('Input pic must be a torch.Tensor or NumPy ndarray, ' +
|
||||||
|
'not {}'.format(type(npimg)))
|
||||||
|
|
||||||
|
if npimg.shape[2] == 1:
|
||||||
|
expected_mode = None
|
||||||
|
npimg = npimg[:, :, 0]
|
||||||
|
if npimg.dtype == np.uint8:
|
||||||
|
expected_mode = 'L'
|
||||||
|
if npimg.dtype == np.int16:
|
||||||
|
expected_mode = 'I;16'
|
||||||
|
if npimg.dtype == np.int32:
|
||||||
|
expected_mode = 'I'
|
||||||
|
elif npimg.dtype == np.float32:
|
||||||
|
expected_mode = 'F'
|
||||||
|
if mode is not None and mode != expected_mode:
|
||||||
|
raise ValueError("Incorrect mode ({}) supplied for input type {}. Should be {}"
|
||||||
|
.format(mode, np.dtype, expected_mode))
|
||||||
|
mode = expected_mode
|
||||||
|
|
||||||
|
elif npimg.shape[2] == 4:
|
||||||
|
permitted_4_channel_modes = ['RGBA', 'CMYK']
|
||||||
|
if mode is not None and mode not in permitted_4_channel_modes:
|
||||||
|
raise ValueError("Only modes {} are supported for 4D inputs".format(permitted_4_channel_modes))
|
||||||
|
|
||||||
|
if mode is None and npimg.dtype == np.uint8:
|
||||||
|
mode = 'RGBA'
|
||||||
|
else:
|
||||||
|
permitted_3_channel_modes = ['RGB', 'YCbCr', 'HSV']
|
||||||
|
if mode is not None and mode not in permitted_3_channel_modes:
|
||||||
|
raise ValueError("Only modes {} are supported for 3D inputs".format(permitted_3_channel_modes))
|
||||||
|
if mode is None and npimg.dtype == np.uint8:
|
||||||
|
mode = 'RGB'
|
||||||
|
|
||||||
|
if mode is None:
|
||||||
|
raise TypeError('Input type {} is not supported'.format(npimg.dtype))
|
||||||
|
|
||||||
|
return Image.fromarray(npimg, mode=mode)
|
||||||
|
|
||||||
|
|
||||||
|
def normalize(tensor, mean, std):
|
||||||
|
"""Normalize a tensor image with mean and standard deviation.
|
||||||
|
|
||||||
|
See ``Normalize`` for more details.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
tensor (Tensor): Tensor image of size (C, H, W) to be normalized.
|
||||||
|
mean (sequence): Sequence of means for each channel.
|
||||||
|
std (sequence): Sequence of standard deviations for each channely.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tensor: Normalized Tensor image.
|
||||||
|
"""
|
||||||
|
if not _is_tensor_image(tensor):
|
||||||
|
raise TypeError('tensor is not a torch image.')
|
||||||
|
|
||||||
|
for t, m, s in zip(tensor, mean, std):
|
||||||
|
t.sub_(m).div_(s)
|
||||||
|
return tensor
|
||||||
|
|
||||||
|
|
||||||
|
def resize(img, size, interpolation=Image.BILINEAR):
|
||||||
|
"""Resize the input PIL Image to the given size.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
img (PIL Image): Image to be resized.
|
||||||
|
size (sequence or int): Desired output size. If size is a sequence like
|
||||||
|
(h, w), the output size will be matched to this. If size is an int,
|
||||||
|
the smaller edge of the image will be matched to this number maintaing
|
||||||
|
the aspect ratio. i.e, if height > width, then image will be rescaled to
|
||||||
|
(size * height / width, size)
|
||||||
|
interpolation (int, optional): Desired interpolation. Default is
|
||||||
|
``PIL.Image.BILINEAR``
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
PIL Image: Resized image.
|
||||||
|
"""
|
||||||
|
if not _is_pil_image(img):
|
||||||
|
raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
|
||||||
|
if not (isinstance(size, int) or (isinstance(size, collections.Iterable) and len(size) == 2)):
|
||||||
|
raise TypeError('Got inappropriate size arg: {}'.format(size))
|
||||||
|
|
||||||
|
if isinstance(size, int):
|
||||||
|
w, h = img.size
|
||||||
|
if (w <= h and w == size) or (h <= w and h == size):
|
||||||
|
return img
|
||||||
|
if w < h:
|
||||||
|
ow = size
|
||||||
|
oh = int(size * h / w)
|
||||||
|
return img.resize((ow, oh), interpolation)
|
||||||
|
else:
|
||||||
|
oh = size
|
||||||
|
ow = int(size * w / h)
|
||||||
|
return img.resize((ow, oh), interpolation)
|
||||||
|
else:
|
||||||
|
return img.resize(size[::-1], interpolation)
|
||||||
|
|
||||||
|
|
||||||
|
def scale(*args, **kwargs):
|
||||||
|
warnings.warn("The use of the transforms.Scale transform is deprecated, " +
|
||||||
|
"please use transforms.Resize instead.")
|
||||||
|
return resize(*args, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
def pad(img, padding, fill=0):
|
||||||
|
"""Pad the given PIL Image on all sides with the given "pad" value.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
img (PIL Image): Image to be padded.
|
||||||
|
padding (int or tuple): Padding on each border. If a single int is provided this
|
||||||
|
is used to pad all borders. If tuple of length 2 is provided this is the padding
|
||||||
|
on left/right and top/bottom respectively. If a tuple of length 4 is provided
|
||||||
|
this is the padding for the left, top, right and bottom borders
|
||||||
|
respectively.
|
||||||
|
fill: Pixel fill value. Default is 0. If a tuple of
|
||||||
|
length 3, it is used to fill R, G, B channels respectively.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
PIL Image: Padded image.
|
||||||
|
"""
|
||||||
|
if not _is_pil_image(img):
|
||||||
|
raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
|
||||||
|
|
||||||
|
if not isinstance(padding, (numbers.Number, tuple)):
|
||||||
|
raise TypeError('Got inappropriate padding arg')
|
||||||
|
if not isinstance(fill, (numbers.Number, str, tuple)):
|
||||||
|
raise TypeError('Got inappropriate fill arg')
|
||||||
|
|
||||||
|
if isinstance(padding, collections.Sequence) and len(padding) not in [2, 4]:
|
||||||
|
raise ValueError("Padding must be an int or a 2, or 4 element tuple, not a " +
|
||||||
|
"{} element tuple".format(len(padding)))
|
||||||
|
|
||||||
|
return ImageOps.expand(img, border=padding, fill=fill)
|
||||||
|
|
||||||
|
|
||||||
|
def crop(img, i, j, h, w):
|
||||||
|
"""Crop the given PIL Image.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
img (PIL Image): Image to be cropped.
|
||||||
|
i: Upper pixel coordinate.
|
||||||
|
j: Left pixel coordinate.
|
||||||
|
h: Height of the cropped image.
|
||||||
|
w: Width of the cropped image.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
PIL Image: Cropped image.
|
||||||
|
"""
|
||||||
|
if not _is_pil_image(img):
|
||||||
|
raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
|
||||||
|
|
||||||
|
return img.crop((j, i, j + w, i + h))
|
||||||
|
|
||||||
|
|
||||||
|
def center_crop(img, output_size):
|
||||||
|
if isinstance(output_size, numbers.Number):
|
||||||
|
output_size = (int(output_size), int(output_size))
|
||||||
|
w, h = img.size
|
||||||
|
th, tw = output_size
|
||||||
|
i = int(round((h - th) / 2.))
|
||||||
|
j = int(round((w - tw) / 2.))
|
||||||
|
return crop(img, i, j, th, tw)
|
||||||
|
|
||||||
|
|
||||||
|
def resized_crop(img, i, j, h, w, size, interpolation=Image.BILINEAR):
|
||||||
|
"""Crop the given PIL Image and resize it to desired size.
|
||||||
|
|
||||||
|
Notably used in RandomResizedCrop.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
img (PIL Image): Image to be cropped.
|
||||||
|
i: Upper pixel coordinate.
|
||||||
|
j: Left pixel coordinate.
|
||||||
|
h: Height of the cropped image.
|
||||||
|
w: Width of the cropped image.
|
||||||
|
size (sequence or int): Desired output size. Same semantics as ``scale``.
|
||||||
|
interpolation (int, optional): Desired interpolation. Default is
|
||||||
|
``PIL.Image.BILINEAR``.
|
||||||
|
Returns:
|
||||||
|
PIL Image: Cropped image.
|
||||||
|
"""
|
||||||
|
assert _is_pil_image(img), 'img should be PIL Image'
|
||||||
|
img = crop(img, i, j, h, w)
|
||||||
|
img = resize(img, size, interpolation)
|
||||||
|
return img
|
||||||
|
|
||||||
|
|
||||||
|
def hflip(img):
|
||||||
|
"""Horizontally flip the given PIL Image.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
img (PIL Image): Image to be flipped.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
PIL Image: Horizontall flipped image.
|
||||||
|
"""
|
||||||
|
if not _is_pil_image(img):
|
||||||
|
raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
|
||||||
|
|
||||||
|
return img.transpose(Image.FLIP_LEFT_RIGHT)
|
||||||
|
|
||||||
|
|
||||||
|
def vflip(img):
|
||||||
|
"""Vertically flip the given PIL Image.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
img (PIL Image): Image to be flipped.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
PIL Image: Vertically flipped image.
|
||||||
|
"""
|
||||||
|
if not _is_pil_image(img):
|
||||||
|
raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
|
||||||
|
|
||||||
|
return img.transpose(Image.FLIP_TOP_BOTTOM)
|
||||||
|
|
||||||
|
|
||||||
|
def five_crop(img, size):
|
||||||
|
"""Crop the given PIL Image into four corners and the central crop.
|
||||||
|
|
||||||
|
.. Note::
|
||||||
|
This transform returns a tuple of images and there may be a
|
||||||
|
mismatch in the number of inputs and targets your ``Dataset`` returns.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
size (sequence or int): Desired output size of the crop. If size is an
|
||||||
|
int instead of sequence like (h, w), a square crop (size, size) is
|
||||||
|
made.
|
||||||
|
Returns:
|
||||||
|
tuple: tuple (tl, tr, bl, br, center) corresponding top left,
|
||||||
|
top right, bottom left, bottom right and center crop.
|
||||||
|
"""
|
||||||
|
if isinstance(size, numbers.Number):
|
||||||
|
size = (int(size), int(size))
|
||||||
|
else:
|
||||||
|
assert len(size) == 2, "Please provide only two dimensions (h, w) for size."
|
||||||
|
|
||||||
|
w, h = img.size
|
||||||
|
crop_h, crop_w = size
|
||||||
|
if crop_w > w or crop_h > h:
|
||||||
|
raise ValueError("Requested crop size {} is bigger than input size {}".format(size,
|
||||||
|
(h, w)))
|
||||||
|
tl = img.crop((0, 0, crop_w, crop_h))
|
||||||
|
tr = img.crop((w - crop_w, 0, w, crop_h))
|
||||||
|
bl = img.crop((0, h - crop_h, crop_w, h))
|
||||||
|
br = img.crop((w - crop_w, h - crop_h, w, h))
|
||||||
|
center = center_crop(img, (crop_h, crop_w))
|
||||||
|
return (tl, tr, bl, br, center)
|
||||||
|
|
||||||
|
|
||||||
|
def ten_crop(img, size, vertical_flip=False):
|
||||||
|
"""Crop the given PIL Image into four corners and the central crop plus the
|
||||||
|
flipped version of these (horizontal flipping is used by default).
|
||||||
|
|
||||||
|
.. Note::
|
||||||
|
This transform returns a tuple of images and there may be a
|
||||||
|
mismatch in the number of inputs and targets your ``Dataset`` returns.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
size (sequence or int): Desired output size of the crop. If size is an
|
||||||
|
int instead of sequence like (h, w), a square crop (size, size) is
|
||||||
|
made.
|
||||||
|
vertical_flip (bool): Use vertical flipping instead of horizontal
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
tuple: tuple (tl, tr, bl, br, center, tl_flip, tr_flip, bl_flip,
|
||||||
|
br_flip, center_flip) corresponding top left, top right,
|
||||||
|
bottom left, bottom right and center crop and same for the
|
||||||
|
flipped image.
|
||||||
|
"""
|
||||||
|
if isinstance(size, numbers.Number):
|
||||||
|
size = (int(size), int(size))
|
||||||
|
else:
|
||||||
|
assert len(size) == 2, "Please provide only two dimensions (h, w) for size."
|
||||||
|
|
||||||
|
first_five = five_crop(img, size)
|
||||||
|
|
||||||
|
if vertical_flip:
|
||||||
|
img = vflip(img)
|
||||||
|
else:
|
||||||
|
img = hflip(img)
|
||||||
|
|
||||||
|
second_five = five_crop(img, size)
|
||||||
|
return first_five + second_five
|
||||||
|
|
||||||
|
|
||||||
|
def adjust_brightness(img, brightness_factor):
|
||||||
|
"""Adjust brightness of an Image.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
img (PIL Image): PIL Image to be adjusted.
|
||||||
|
brightness_factor (float): How much to adjust the brightness. Can be
|
||||||
|
any non negative number. 0 gives a black image, 1 gives the
|
||||||
|
original image while 2 increases the brightness by a factor of 2.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
PIL Image: Brightness adjusted image.
|
||||||
|
"""
|
||||||
|
if not _is_pil_image(img):
|
||||||
|
raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
|
||||||
|
|
||||||
|
enhancer = ImageEnhance.Brightness(img)
|
||||||
|
img = enhancer.enhance(brightness_factor)
|
||||||
|
return img
|
||||||
|
|
||||||
|
|
||||||
|
def adjust_contrast(img, contrast_factor):
|
||||||
|
"""Adjust contrast of an Image.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
img (PIL Image): PIL Image to be adjusted.
|
||||||
|
contrast_factor (float): How much to adjust the contrast. Can be any
|
||||||
|
non negative number. 0 gives a solid gray image, 1 gives the
|
||||||
|
original image while 2 increases the contrast by a factor of 2.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
PIL Image: Contrast adjusted image.
|
||||||
|
"""
|
||||||
|
if not _is_pil_image(img):
|
||||||
|
raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
|
||||||
|
|
||||||
|
enhancer = ImageEnhance.Contrast(img)
|
||||||
|
img = enhancer.enhance(contrast_factor)
|
||||||
|
return img
|
||||||
|
|
||||||
|
|
||||||
|
def adjust_saturation(img, saturation_factor):
|
||||||
|
"""Adjust color saturation of an image.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
img (PIL Image): PIL Image to be adjusted.
|
||||||
|
saturation_factor (float): How much to adjust the saturation. 0 will
|
||||||
|
give a black and white image, 1 will give the original image while
|
||||||
|
2 will enhance the saturation by a factor of 2.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
PIL Image: Saturation adjusted image.
|
||||||
|
"""
|
||||||
|
if not _is_pil_image(img):
|
||||||
|
raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
|
||||||
|
|
||||||
|
enhancer = ImageEnhance.Color(img)
|
||||||
|
img = enhancer.enhance(saturation_factor)
|
||||||
|
return img
|
||||||
|
|
||||||
|
|
||||||
|
def adjust_hue(img, hue_factor):
|
||||||
|
"""Adjust hue of an image.
|
||||||
|
|
||||||
|
The image hue is adjusted by converting the image to HSV and
|
||||||
|
cyclically shifting the intensities in the hue channel (H).
|
||||||
|
The image is then converted back to original image mode.
|
||||||
|
|
||||||
|
`hue_factor` is the amount of shift in H channel and must be in the
|
||||||
|
interval `[-0.5, 0.5]`.
|
||||||
|
|
||||||
|
See https://en.wikipedia.org/wiki/Hue for more details on Hue.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
img (PIL Image): PIL Image to be adjusted.
|
||||||
|
hue_factor (float): How much to shift the hue channel. Should be in
|
||||||
|
[-0.5, 0.5]. 0.5 and -0.5 give complete reversal of hue channel in
|
||||||
|
HSV space in positive and negative direction respectively.
|
||||||
|
0 means no shift. Therefore, both -0.5 and 0.5 will give an image
|
||||||
|
with complementary colors while 0 gives the original image.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
PIL Image: Hue adjusted image.
|
||||||
|
"""
|
||||||
|
if not(-0.5 <= hue_factor <= 0.5):
|
||||||
|
raise ValueError('hue_factor is not in [-0.5, 0.5].'.format(hue_factor))
|
||||||
|
|
||||||
|
if not _is_pil_image(img):
|
||||||
|
raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
|
||||||
|
|
||||||
|
input_mode = img.mode
|
||||||
|
if input_mode in {'L', '1', 'I', 'F'}:
|
||||||
|
return img
|
||||||
|
|
||||||
|
h, s, v = img.convert('HSV').split()
|
||||||
|
|
||||||
|
np_h = np.array(h, dtype=np.uint8)
|
||||||
|
# uint8 addition take cares of rotation across boundaries
|
||||||
|
with np.errstate(over='ignore'):
|
||||||
|
np_h += np.uint8(hue_factor * 255)
|
||||||
|
h = Image.fromarray(np_h, 'L')
|
||||||
|
|
||||||
|
img = Image.merge('HSV', (h, s, v)).convert(input_mode)
|
||||||
|
return img
|
||||||
|
|
||||||
|
|
||||||
|
def adjust_gamma(img, gamma, gain=1):
|
||||||
|
"""Perform gamma correction on an image.
|
||||||
|
|
||||||
|
Also known as Power Law Transform. Intensities in RGB mode are adjusted
|
||||||
|
based on the following equation:
|
||||||
|
|
||||||
|
I_out = 255 * gain * ((I_in / 255) ** gamma)
|
||||||
|
|
||||||
|
See https://en.wikipedia.org/wiki/Gamma_correction for more details.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
img (PIL Image): PIL Image to be adjusted.
|
||||||
|
gamma (float): Non negative real number. gamma larger than 1 make the
|
||||||
|
shadows darker, while gamma smaller than 1 make dark regions
|
||||||
|
lighter.
|
||||||
|
gain (float): The constant multiplier.
|
||||||
|
"""
|
||||||
|
if not _is_pil_image(img):
|
||||||
|
raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
|
||||||
|
|
||||||
|
if gamma < 0:
|
||||||
|
raise ValueError('Gamma should be a non-negative real number')
|
||||||
|
|
||||||
|
input_mode = img.mode
|
||||||
|
img = img.convert('RGB')
|
||||||
|
|
||||||
|
np_img = np.array(img, dtype=np.float32)
|
||||||
|
np_img = 255 * gain * ((np_img / 255) ** gamma)
|
||||||
|
np_img = np.uint8(np.clip(np_img, 0, 255))
|
||||||
|
|
||||||
|
img = Image.fromarray(np_img, 'RGB').convert(input_mode)
|
||||||
|
return img
|
||||||
|
|
||||||
|
|
||||||
|
def rotate(img, angle, resample=False, expand=False, center=None):
|
||||||
|
"""Rotate the image by angle and then (optionally) translate it by (n_columns, n_rows)
|
||||||
|
|
||||||
|
|
||||||
|
Args:
|
||||||
|
img (PIL Image): PIL Image to be rotated.
|
||||||
|
angle ({float, int}): In degrees degrees counter clockwise order.
|
||||||
|
resample ({PIL.Image.NEAREST, PIL.Image.BILINEAR, PIL.Image.BICUBIC}, optional):
|
||||||
|
An optional resampling filter.
|
||||||
|
See http://pillow.readthedocs.io/en/3.4.x/handbook/concepts.html#filters
|
||||||
|
If omitted, or if the image has mode "1" or "P", it is set to PIL.Image.NEAREST.
|
||||||
|
expand (bool, optional): Optional expansion flag.
|
||||||
|
If true, expands the output image to make it large enough to hold the entire rotated image.
|
||||||
|
If false or omitted, make the output image the same size as the input image.
|
||||||
|
Note that the expand flag assumes rotation around the center and no translation.
|
||||||
|
center (2-tuple, optional): Optional center of rotation.
|
||||||
|
Origin is the upper left corner.
|
||||||
|
Default is the center of the image.
|
||||||
|
"""
|
||||||
|
|
||||||
|
if not _is_pil_image(img):
|
||||||
|
raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
|
||||||
|
|
||||||
|
return img.rotate(angle, resample, expand, center)
|
||||||
|
|
||||||
|
|
||||||
|
def to_grayscale(img, num_output_channels=1):
|
||||||
|
"""Convert image to grayscale version of image.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
img (PIL Image): Image to be converted to grayscale.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
PIL Image: Grayscale version of the image.
|
||||||
|
if num_output_channels == 1 : returned image is single channel
|
||||||
|
if num_output_channels == 3 : returned image is 3 channel with r == g == b
|
||||||
|
"""
|
||||||
|
if not _is_pil_image(img):
|
||||||
|
raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
|
||||||
|
|
||||||
|
if num_output_channels == 1:
|
||||||
|
img = img.convert('L')
|
||||||
|
elif num_output_channels == 3:
|
||||||
|
img = img.convert('L')
|
||||||
|
np_img = np.array(img, dtype=np.uint8)
|
||||||
|
np_img = np.dstack([np_img, np_img, np_img])
|
||||||
|
img = Image.fromarray(np_img, 'RGB')
|
||||||
|
else:
|
||||||
|
raise ValueError('num_output_channels should be either 1 or 3')
|
||||||
|
|
||||||
|
return img
|
347
src/data_io/transform.py
Normal file
@ -0,0 +1,347 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# @Time : 20-6-4 下午4:19
|
||||||
|
# @Author : zhuying
|
||||||
|
# @Company : Minivision
|
||||||
|
# @File : transform.py
|
||||||
|
# @Software : PyCharm
|
||||||
|
|
||||||
|
from __future__ import division
|
||||||
|
import math
|
||||||
|
import random
|
||||||
|
from PIL import Image
|
||||||
|
try:
|
||||||
|
import accimage
|
||||||
|
except ImportError:
|
||||||
|
accimage = None
|
||||||
|
import numpy as np
|
||||||
|
import numbers
|
||||||
|
import types
|
||||||
|
|
||||||
|
from src.data_io import functional as F
|
||||||
|
|
||||||
|
__all__ = ["Compose", "ToTensor", "ToPILImage", "Normalize", "RandomHorizontalFlip",
|
||||||
|
"Lambda", "RandomResizedCrop", "ColorJitter", "RandomRotation"]
|
||||||
|
|
||||||
|
|
||||||
|
class Compose(object):
|
||||||
|
"""Composes several transforms together.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
transforms (list of ``Transform`` objects): list of transforms to compose.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
>>> transforms.Compose([
|
||||||
|
>>> transforms.CenterCrop(10),
|
||||||
|
>>> transforms.ToTensor(),
|
||||||
|
>>> ])
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, transforms):
|
||||||
|
self.transforms = transforms
|
||||||
|
|
||||||
|
def __call__(self, img):
|
||||||
|
for t in self.transforms:
|
||||||
|
img = t(img)
|
||||||
|
return img
|
||||||
|
|
||||||
|
|
||||||
|
class ToTensor(object):
|
||||||
|
|
||||||
|
"""Convert a ``PIL Image`` or ``numpy.ndarray`` to tensor.
|
||||||
|
|
||||||
|
Converts a PIL Image or numpy.ndarray (H x W x C) in the range
|
||||||
|
[0, 255] to a torch.FloatTensor of shape (C x H x W) in the range [0.0, 1.0].
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __call__(self, pic):
|
||||||
|
"""
|
||||||
|
Args:
|
||||||
|
pic (PIL Image or numpy.ndarray): Image to be converted to tensor.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tensor: Converted image.
|
||||||
|
"""
|
||||||
|
return F.to_tensor(pic)
|
||||||
|
|
||||||
|
|
||||||
|
class Lambda(object):
|
||||||
|
"""Apply a user-defined lambda as a transform.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
lambd (function): Lambda/function to be used for transform.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, lambd):
|
||||||
|
assert isinstance(lambd, types.LambdaType)
|
||||||
|
self.lambd = lambd
|
||||||
|
|
||||||
|
def __call__(self, img):
|
||||||
|
return self.lambd(img)
|
||||||
|
|
||||||
|
|
||||||
|
class ToPILImage(object):
|
||||||
|
"""Convert a tensor or an ndarray to PIL Image.
|
||||||
|
|
||||||
|
Converts a torch.*Tensor of shape C x H x W or a numpy ndarray of shape
|
||||||
|
H x W x C to a PIL Image while preserving the value range.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
mode (`PIL.Image mode`_): color space and pixel depth of input data (optional).
|
||||||
|
If ``mode`` is ``None`` (default) there are some assumptions made about the input data:
|
||||||
|
1. If the input has 3 channels, the ``mode`` is assumed to be ``RGB``.
|
||||||
|
2. If the input has 4 channels, the ``mode`` is assumed to be ``RGBA``.
|
||||||
|
3. If the input has 1 channel, the ``mode`` is determined by the data type (i,e,
|
||||||
|
``int``, ``float``, ``short``).
|
||||||
|
|
||||||
|
.. _PIL.Image mode: http://pillow.readthedocs.io/en/3.4.x/handbook/concepts.html#modes
|
||||||
|
"""
|
||||||
|
def __init__(self, mode=None):
|
||||||
|
self.mode = mode
|
||||||
|
|
||||||
|
def __call__(self, pic):
|
||||||
|
"""
|
||||||
|
Args:
|
||||||
|
pic (Tensor or numpy.ndarray): Image to be converted to PIL Image.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
PIL Image: Image converted to PIL Image.
|
||||||
|
|
||||||
|
"""
|
||||||
|
return F.to_pil_image(pic, self.mode)
|
||||||
|
|
||||||
|
|
||||||
|
class Normalize(object):
|
||||||
|
"""Normalize an tensor image with mean and standard deviation.
|
||||||
|
Given mean: ``(M1,...,Mn)`` and std: ``(S1,..,Sn)`` for ``n`` channels, this transform
|
||||||
|
will normalize each channel of the input ``torch.*Tensor`` i.e.
|
||||||
|
``input[channel] = (input[channel] - mean[channel]) / std[channel]``
|
||||||
|
|
||||||
|
Args:
|
||||||
|
mean (sequence): Sequence of means for each channel.
|
||||||
|
std (sequence): Sequence of standard deviations for each channel.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, mean, std):
|
||||||
|
self.mean = mean
|
||||||
|
self.std = std
|
||||||
|
|
||||||
|
def __call__(self, tensor):
|
||||||
|
"""
|
||||||
|
Args:
|
||||||
|
tensor (Tensor): Tensor image of size (C, H, W) to be normalized.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tensor: Normalized Tensor image.
|
||||||
|
"""
|
||||||
|
return F.normalize(tensor, self.mean, self.std)
|
||||||
|
|
||||||
|
|
||||||
|
class RandomHorizontalFlip(object):
|
||||||
|
"""Horizontally flip the given PIL Image randomly with a probability of 0.5."""
|
||||||
|
|
||||||
|
def __call__(self, img):
|
||||||
|
"""
|
||||||
|
Args:
|
||||||
|
img (PIL Image): Image to be flipped.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
PIL Image: Randomly flipped image.
|
||||||
|
"""
|
||||||
|
if random.random() < 0.5:
|
||||||
|
return F.hflip(img)
|
||||||
|
return img
|
||||||
|
|
||||||
|
|
||||||
|
class RandomResizedCrop(object):
|
||||||
|
"""Crop the given PIL Image to random size and aspect ratio.
|
||||||
|
|
||||||
|
A crop of random size (default: of 0.08 to 1.0) of the original size and a random
|
||||||
|
aspect ratio (default: of 3/4 to 4/3) of the original aspect ratio is made. This crop
|
||||||
|
is finally resized to given size.
|
||||||
|
This is popularly used to train the Inception networks.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
size: expected output size of each edge
|
||||||
|
scale: range of size of the origin size cropped
|
||||||
|
ratio: range of aspect ratio of the origin aspect ratio cropped
|
||||||
|
interpolation: Default: PIL.Image.BILINEAR
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, size, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.), interpolation=Image.BILINEAR):
|
||||||
|
if isinstance(size, tuple):
|
||||||
|
self.size = size
|
||||||
|
else:
|
||||||
|
self.size = (size, size)
|
||||||
|
self.interpolation = interpolation
|
||||||
|
self.scale = scale
|
||||||
|
self.ratio = ratio
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_params(img, scale, ratio):
|
||||||
|
"""Get parameters for ``crop`` for a random sized crop.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
img (PIL Image): Image to be cropped.
|
||||||
|
scale (tuple): range of size of the origin size cropped
|
||||||
|
ratio (tuple): range of aspect ratio of the origin aspect ratio cropped
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
tuple: params (i, j, h, w) to be passed to ``crop`` for a random
|
||||||
|
sized crop.
|
||||||
|
"""
|
||||||
|
for attempt in range(10):
|
||||||
|
area = img.size[0] * img.size[1]
|
||||||
|
target_area = random.uniform(*scale) * area
|
||||||
|
aspect_ratio = random.uniform(*ratio)
|
||||||
|
|
||||||
|
w = int(round(math.sqrt(target_area * aspect_ratio)))
|
||||||
|
h = int(round(math.sqrt(target_area / aspect_ratio)))
|
||||||
|
|
||||||
|
if random.random() < 0.5:
|
||||||
|
w, h = h, w
|
||||||
|
|
||||||
|
if w <= img.size[0] and h <= img.size[1]:
|
||||||
|
i = random.randint(0, img.size[1] - h)
|
||||||
|
j = random.randint(0, img.size[0] - w)
|
||||||
|
return i, j, h, w
|
||||||
|
|
||||||
|
# Fallback
|
||||||
|
w = min(img.size[0], img.size[1])
|
||||||
|
i = (img.size[1] - w) // 2
|
||||||
|
j = (img.size[0] - w) // 2
|
||||||
|
return i, j, w, w
|
||||||
|
|
||||||
|
def __call__(self, img):
|
||||||
|
"""
|
||||||
|
Args:
|
||||||
|
img (PIL Image): Image to be flipped.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
PIL Image: Randomly cropped and resize image.
|
||||||
|
"""
|
||||||
|
i, j, h, w = self.get_params(img, self.scale, self.ratio)
|
||||||
|
return F.resized_crop(img, i, j, h, w, self.size, self.interpolation)
|
||||||
|
|
||||||
|
|
||||||
|
class ColorJitter(object):
|
||||||
|
"""Randomly change the brightness, contrast and saturation of an image.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
brightness (float): How much to jitter brightness. brightness_factor
|
||||||
|
is chosen uniformly from [max(0, 1 - brightness), 1 + brightness].
|
||||||
|
contrast (float): How much to jitter contrast. contrast_factor
|
||||||
|
is chosen uniformly from [max(0, 1 - contrast), 1 + contrast].
|
||||||
|
saturation (float): How much to jitter saturation. saturation_factor
|
||||||
|
is chosen uniformly from [max(0, 1 - saturation), 1 + saturation].
|
||||||
|
hue(float): How much to jitter hue. hue_factor is chosen uniformly from
|
||||||
|
[-hue, hue]. Should be >=0 and <= 0.5.
|
||||||
|
"""
|
||||||
|
def __init__(self, brightness=0, contrast=0, saturation=0, hue=0):
|
||||||
|
self.brightness = brightness
|
||||||
|
self.contrast = contrast
|
||||||
|
self.saturation = saturation
|
||||||
|
self.hue = hue
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_params(brightness, contrast, saturation, hue):
|
||||||
|
"""Get a randomized transform to be applied on image.
|
||||||
|
|
||||||
|
Arguments are same as that of __init__.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Transform which randomly adjusts brightness, contrast and
|
||||||
|
saturation in a random order.
|
||||||
|
"""
|
||||||
|
transforms = []
|
||||||
|
if brightness > 0:
|
||||||
|
brightness_factor = np.random.uniform(max(0, 1 - brightness), 1 + brightness)
|
||||||
|
transforms.append(Lambda(lambda img: F.adjust_brightness(img, brightness_factor)))
|
||||||
|
|
||||||
|
if contrast > 0:
|
||||||
|
contrast_factor = np.random.uniform(max(0, 1 - contrast), 1 + contrast)
|
||||||
|
transforms.append(Lambda(lambda img: F.adjust_contrast(img, contrast_factor)))
|
||||||
|
|
||||||
|
if saturation > 0:
|
||||||
|
saturation_factor = np.random.uniform(max(0, 1 - saturation), 1 + saturation)
|
||||||
|
transforms.append(Lambda(lambda img: F.adjust_saturation(img, saturation_factor)))
|
||||||
|
|
||||||
|
if hue > 0:
|
||||||
|
hue_factor = np.random.uniform(-hue, hue)
|
||||||
|
transforms.append(Lambda(lambda img: F.adjust_hue(img, hue_factor)))
|
||||||
|
|
||||||
|
np.random.shuffle(transforms)
|
||||||
|
transform = Compose(transforms)
|
||||||
|
|
||||||
|
return transform
|
||||||
|
|
||||||
|
def __call__(self, img):
|
||||||
|
"""
|
||||||
|
Args:
|
||||||
|
img (PIL Image): Input image.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
PIL Image: Color jittered image.
|
||||||
|
"""
|
||||||
|
transform = self.get_params(self.brightness, self.contrast,
|
||||||
|
self.saturation, self.hue)
|
||||||
|
return transform(img)
|
||||||
|
|
||||||
|
|
||||||
|
class RandomRotation(object):
|
||||||
|
"""Rotate the image by angle.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
degrees (sequence or float or int): Range of degrees to select from.
|
||||||
|
If degrees is a number instead of sequence like (min, max), the range of degrees
|
||||||
|
will be (-degrees, +degrees).
|
||||||
|
resample ({PIL.Image.NEAREST, PIL.Image.BILINEAR, PIL.Image.BICUBIC}, optional):
|
||||||
|
An optional resampling filter.
|
||||||
|
See http://pillow.readthedocs.io/en/3.4.x/handbook/concepts.html#filters
|
||||||
|
If omitted, or if the image has mode "1" or "P", it is set to PIL.Image.NEAREST.
|
||||||
|
expand (bool, optional): Optional expansion flag.
|
||||||
|
If true, expands the output to make it large enough to hold the entire rotated image.
|
||||||
|
If false or omitted, make the output image the same size as the input image.
|
||||||
|
Note that the expand flag assumes rotation around the center and no translation.
|
||||||
|
center (2-tuple, optional): Optional center of rotation.
|
||||||
|
Origin is the upper left corner.
|
||||||
|
Default is the center of the image.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, degrees, resample=False, expand=False, center=None):
|
||||||
|
if isinstance(degrees, numbers.Number):
|
||||||
|
if degrees < 0:
|
||||||
|
raise ValueError("If degrees is a single number, it must be positive.")
|
||||||
|
self.degrees = (-degrees, degrees)
|
||||||
|
else:
|
||||||
|
if len(degrees) != 2:
|
||||||
|
raise ValueError("If degrees is a sequence, it must be of len 2.")
|
||||||
|
self.degrees = degrees
|
||||||
|
|
||||||
|
self.resample = resample
|
||||||
|
self.expand = expand
|
||||||
|
self.center = center
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_params(degrees):
|
||||||
|
"""Get parameters for ``rotate`` for a random rotation.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
sequence: params to be passed to ``rotate`` for random rotation.
|
||||||
|
"""
|
||||||
|
angle = np.random.uniform(degrees[0], degrees[1])
|
||||||
|
|
||||||
|
return angle
|
||||||
|
|
||||||
|
def __call__(self, img):
|
||||||
|
"""
|
||||||
|
img (PIL Image): Image to be rotated.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
PIL Image: Rotated image.
|
||||||
|
"""
|
||||||
|
|
||||||
|
angle = self.get_params(self.degrees)
|
||||||
|
|
||||||
|
return F.rotate(img, angle, self.resample, self.expand, self.center)
|
||||||
|
|
||||||
|
|
73
src/default_config.py
Normal file
@ -0,0 +1,73 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# @Time : 20-6-4 上午9:12
|
||||||
|
# @Author : zhuying
|
||||||
|
# @Company : Minivision
|
||||||
|
# @File : default_config.py
|
||||||
|
# @Software : PyCharm
|
||||||
|
# --*-- coding: utf-8 --*--
|
||||||
|
"""
|
||||||
|
default config for training
|
||||||
|
"""
|
||||||
|
|
||||||
|
import torch
|
||||||
|
from datetime import datetime
|
||||||
|
from easydict import EasyDict
|
||||||
|
from src.utility import make_if_not_exist, get_width_height, get_kernel
|
||||||
|
|
||||||
|
|
||||||
|
def get_default_config():
|
||||||
|
conf = EasyDict()
|
||||||
|
|
||||||
|
# ----------------------training---------------
|
||||||
|
conf.lr = 1e-1
|
||||||
|
# [9, 13, 15]
|
||||||
|
conf.milestones = [10, 15, 22] # down learing rate
|
||||||
|
conf.gamma = 0.1
|
||||||
|
conf.epochs = 25
|
||||||
|
conf.momentum = 0.9
|
||||||
|
conf.batch_size = 1024
|
||||||
|
|
||||||
|
# model
|
||||||
|
conf.num_classes = 3
|
||||||
|
conf.input_channel = 3
|
||||||
|
conf.embedding_size = 128
|
||||||
|
|
||||||
|
# dataset
|
||||||
|
conf.train_root_path = './datasets/rgb_image'
|
||||||
|
|
||||||
|
# save file path
|
||||||
|
conf.snapshot_dir_path = './saved_logs/snapshot'
|
||||||
|
|
||||||
|
# log path
|
||||||
|
conf.log_path = './saved_logs/jobs'
|
||||||
|
# tensorboard
|
||||||
|
conf.board_loss_every = 10
|
||||||
|
# save model/iter
|
||||||
|
conf.save_every = 30
|
||||||
|
|
||||||
|
return conf
|
||||||
|
|
||||||
|
|
||||||
|
def update_config(args, conf):
|
||||||
|
conf.devices = args.devices
|
||||||
|
conf.patch_info = args.patch_info
|
||||||
|
w_input, h_input = get_width_height(args.patch_info)
|
||||||
|
conf.input_size = [h_input, w_input]
|
||||||
|
conf.kernel_size = get_kernel(h_input, w_input)
|
||||||
|
conf.device = "cuda:{}".format(conf.devices[0]) if torch.cuda.is_available() else "cpu"
|
||||||
|
|
||||||
|
# resize fourier image size
|
||||||
|
conf.ft_height = 2*conf.kernel_size[0]
|
||||||
|
conf.ft_width = 2*conf.kernel_size[1]
|
||||||
|
current_time = datetime.now().strftime('%b%d_%H-%M-%S')
|
||||||
|
job_name = 'Anti_Spoofing_{}'.format(args.patch_info)
|
||||||
|
log_path = '{}/{}/{} '.format(conf.log_path, job_name, current_time)
|
||||||
|
snapshot_dir = '{}/{}'.format(conf.snapshot_dir_path, job_name)
|
||||||
|
|
||||||
|
make_if_not_exist(snapshot_dir)
|
||||||
|
make_if_not_exist(log_path)
|
||||||
|
|
||||||
|
conf.model_path = snapshot_dir
|
||||||
|
conf.log_path = log_path
|
||||||
|
conf.job_name = job_name
|
||||||
|
return conf
|
65
src/generate_patches.py
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# @Time : 20-6-9 下午3:06
|
||||||
|
# @Author : zhuying
|
||||||
|
# @Company : Minivision
|
||||||
|
# @File : test.py
|
||||||
|
# @Software : PyCharm
|
||||||
|
"""
|
||||||
|
Create patch from original input image by using bbox coordinate
|
||||||
|
"""
|
||||||
|
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
|
class CropImage:
|
||||||
|
@staticmethod
|
||||||
|
def _get_new_box(src_w, src_h, bbox, scale):
|
||||||
|
x = bbox[0]
|
||||||
|
y = bbox[1]
|
||||||
|
box_w = bbox[2]
|
||||||
|
box_h = bbox[3]
|
||||||
|
|
||||||
|
scale = min((src_h-1)/box_h, min((src_w-1)/box_w, scale))
|
||||||
|
|
||||||
|
new_width = box_w * scale
|
||||||
|
new_height = box_h * scale
|
||||||
|
center_x, center_y = box_w/2+x, box_h/2+y
|
||||||
|
|
||||||
|
left_top_x = center_x-new_width/2
|
||||||
|
left_top_y = center_y-new_height/2
|
||||||
|
right_bottom_x = center_x+new_width/2
|
||||||
|
right_bottom_y = center_y+new_height/2
|
||||||
|
|
||||||
|
if left_top_x < 0:
|
||||||
|
right_bottom_x -= left_top_x
|
||||||
|
left_top_x = 0
|
||||||
|
|
||||||
|
if left_top_y < 0:
|
||||||
|
right_bottom_y -= left_top_y
|
||||||
|
left_top_y = 0
|
||||||
|
|
||||||
|
if right_bottom_x > src_w-1:
|
||||||
|
left_top_x -= right_bottom_x-src_w+1
|
||||||
|
right_bottom_x = src_w-1
|
||||||
|
|
||||||
|
if right_bottom_y > src_h-1:
|
||||||
|
left_top_y -= right_bottom_y-src_h+1
|
||||||
|
right_bottom_y = src_h-1
|
||||||
|
|
||||||
|
return int(left_top_x), int(left_top_y),\
|
||||||
|
int(right_bottom_x), int(right_bottom_y)
|
||||||
|
|
||||||
|
def crop(self, org_img, bbox, scale, out_w, out_h, crop=True):
|
||||||
|
|
||||||
|
if not crop:
|
||||||
|
dst_img = cv2.resize(org_img, (out_w, out_h))
|
||||||
|
else:
|
||||||
|
src_h, src_w, _ = np.shape(org_img)
|
||||||
|
left_top_x, left_top_y, \
|
||||||
|
right_bottom_x, right_bottom_y = self._get_new_box(src_w, src_h, bbox, scale)
|
||||||
|
|
||||||
|
img = org_img[left_top_y: right_bottom_y+1,
|
||||||
|
left_top_x: right_bottom_x+1]
|
||||||
|
dst_img = cv2.resize(img, (out_w, out_h))
|
||||||
|
return dst_img
|
296
src/model_lib/MiniFASNet.py
Normal file
@ -0,0 +1,296 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# @Time : 20-6-3 下午4:45
|
||||||
|
# @Author : zhuying
|
||||||
|
# @Company : Minivision
|
||||||
|
# @File : MiniFASNet.py
|
||||||
|
# @Software : PyCharm
|
||||||
|
import torch
|
||||||
|
import torch.nn.functional as F
|
||||||
|
from torch.nn import Linear, Conv2d, BatchNorm1d, BatchNorm2d, PReLU, ReLU, Sigmoid, \
|
||||||
|
AdaptiveAvgPool2d, Sequential, Module
|
||||||
|
|
||||||
|
|
||||||
|
class L2Norm(Module):
|
||||||
|
def forward(self, input):
|
||||||
|
return F.normalize(input)
|
||||||
|
|
||||||
|
|
||||||
|
class Flatten(Module):
|
||||||
|
def forward(self, input):
|
||||||
|
return input.view(input.size(0), -1)
|
||||||
|
|
||||||
|
|
||||||
|
class Conv_block(Module):
|
||||||
|
def __init__(self, in_c, out_c, kernel=(1, 1), stride=(1, 1), padding=(0, 0), groups=1):
|
||||||
|
super(Conv_block, self).__init__()
|
||||||
|
self.conv = Conv2d(in_c, out_c, kernel_size=kernel, groups=groups,
|
||||||
|
stride=stride, padding=padding, bias=False)
|
||||||
|
self.bn = BatchNorm2d(out_c)
|
||||||
|
self.prelu = PReLU(out_c)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
x = self.conv(x)
|
||||||
|
x = self.bn(x)
|
||||||
|
x = self.prelu(x)
|
||||||
|
return x
|
||||||
|
|
||||||
|
|
||||||
|
class Linear_block(Module):
|
||||||
|
def __init__(self, in_c, out_c, kernel=(1, 1), stride=(1, 1), padding=(0, 0), groups=1):
|
||||||
|
super(Linear_block, self).__init__()
|
||||||
|
self.conv = Conv2d(in_c, out_channels=out_c, kernel_size=kernel,
|
||||||
|
groups=groups, stride=stride, padding=padding, bias=False)
|
||||||
|
self.bn = BatchNorm2d(out_c)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
x = self.conv(x)
|
||||||
|
x = self.bn(x)
|
||||||
|
return x
|
||||||
|
|
||||||
|
|
||||||
|
class Depth_Wise(Module):
|
||||||
|
def __init__(self, c1, c2, c3, residual=False, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=1):
|
||||||
|
super(Depth_Wise, self).__init__()
|
||||||
|
c1_in, c1_out = c1
|
||||||
|
c2_in, c2_out = c2
|
||||||
|
c3_in, c3_out = c3
|
||||||
|
self.conv = Conv_block(c1_in, out_c=c1_out, kernel=(1, 1), padding=(0, 0), stride=(1, 1))
|
||||||
|
self.conv_dw = Conv_block(c2_in, c2_out, groups=c2_in, kernel=kernel, padding=padding, stride=stride)
|
||||||
|
self.project = Linear_block(c3_in, c3_out, kernel=(1, 1), padding=(0, 0), stride=(1, 1))
|
||||||
|
self.residual = residual
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
if self.residual:
|
||||||
|
short_cut = x
|
||||||
|
x = self.conv(x)
|
||||||
|
x = self.conv_dw(x)
|
||||||
|
x = self.project(x)
|
||||||
|
if self.residual:
|
||||||
|
output = short_cut + x
|
||||||
|
else:
|
||||||
|
output = x
|
||||||
|
return output
|
||||||
|
|
||||||
|
|
||||||
|
class Residual(Module):
|
||||||
|
def __init__(self, c1, c2, c3, num_block, groups, kernel=(3, 3), stride=(1, 1), padding=(1, 1)):
|
||||||
|
super(Residual, self).__init__()
|
||||||
|
modules = []
|
||||||
|
for i in range(num_block):
|
||||||
|
c1_tuple = c1[i]
|
||||||
|
c2_tuple = c2[i]
|
||||||
|
c3_tuple = c3[i]
|
||||||
|
modules.append(Depth_Wise(c1_tuple, c2_tuple, c3_tuple, residual=True,
|
||||||
|
kernel=kernel, padding=padding, stride=stride, groups=groups))
|
||||||
|
self.model = Sequential(*modules)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
return self.model(x)
|
||||||
|
|
||||||
|
|
||||||
|
class SEModule(Module):
|
||||||
|
def __init__(self, channels, reduction):
|
||||||
|
super(SEModule, self).__init__()
|
||||||
|
self.avg_pool = AdaptiveAvgPool2d(1)
|
||||||
|
self.fc1 = Conv2d(
|
||||||
|
channels, channels // reduction, kernel_size=1, padding=0, bias=False)
|
||||||
|
self.bn1 = BatchNorm2d(channels // reduction)
|
||||||
|
self.relu = ReLU(inplace=True)
|
||||||
|
self.fc2 = Conv2d(
|
||||||
|
channels // reduction, channels, kernel_size=1, padding=0, bias=False)
|
||||||
|
self.bn2 = BatchNorm2d(channels)
|
||||||
|
self.sigmoid = Sigmoid()
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
module_input = x
|
||||||
|
x = self.avg_pool(x)
|
||||||
|
x = self.fc1(x)
|
||||||
|
x = self.bn1(x)
|
||||||
|
x = self.relu(x)
|
||||||
|
x = self.fc2(x)
|
||||||
|
x = self.bn2(x)
|
||||||
|
x = self.sigmoid(x)
|
||||||
|
return module_input * x
|
||||||
|
|
||||||
|
|
||||||
|
class ResidualSE(Module):
|
||||||
|
def __init__(self, c1, c2, c3, num_block, groups, kernel=(3, 3), stride=(1, 1), padding=(1, 1), se_reduct=4):
|
||||||
|
super(ResidualSE, self).__init__()
|
||||||
|
modules = []
|
||||||
|
for i in range(num_block):
|
||||||
|
c1_tuple = c1[i]
|
||||||
|
c2_tuple = c2[i]
|
||||||
|
c3_tuple = c3[i]
|
||||||
|
if i == num_block-1:
|
||||||
|
modules.append(
|
||||||
|
Depth_Wise_SE(c1_tuple, c2_tuple, c3_tuple, residual=True, kernel=kernel, padding=padding, stride=stride,
|
||||||
|
groups=groups, se_reduct=se_reduct))
|
||||||
|
else:
|
||||||
|
modules.append(Depth_Wise(c1_tuple, c2_tuple, c3_tuple, residual=True, kernel=kernel, padding=padding,
|
||||||
|
stride=stride, groups=groups))
|
||||||
|
self.model = Sequential(*modules)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
return self.model(x)
|
||||||
|
|
||||||
|
|
||||||
|
class Depth_Wise_SE(Module):
|
||||||
|
def __init__(self, c1, c2, c3, residual=False, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=1, se_reduct=8):
|
||||||
|
super(Depth_Wise_SE, self).__init__()
|
||||||
|
c1_in, c1_out = c1
|
||||||
|
c2_in, c2_out = c2
|
||||||
|
c3_in, c3_out = c3
|
||||||
|
self.conv = Conv_block(c1_in, out_c=c1_out, kernel=(1, 1), padding=(0, 0), stride=(1, 1))
|
||||||
|
self.conv_dw = Conv_block(c2_in, c2_out, groups=c2_in, kernel=kernel, padding=padding, stride=stride)
|
||||||
|
self.project = Linear_block(c3_in, c3_out, kernel=(1, 1), padding=(0, 0), stride=(1, 1))
|
||||||
|
self.residual = residual
|
||||||
|
self.se_module = SEModule(c3_out, se_reduct)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
if self.residual:
|
||||||
|
short_cut = x
|
||||||
|
x = self.conv(x)
|
||||||
|
x = self.conv_dw(x)
|
||||||
|
x = self.project(x)
|
||||||
|
if self.residual:
|
||||||
|
x = self.se_module(x)
|
||||||
|
output = short_cut + x
|
||||||
|
else:
|
||||||
|
output = x
|
||||||
|
return output
|
||||||
|
|
||||||
|
|
||||||
|
class MiniFASNet(Module):
|
||||||
|
def __init__(self, keep, embedding_size, conv6_kernel=(7, 7),
|
||||||
|
drop_p=0.0, num_classes=3, img_channel=3):
|
||||||
|
super(MiniFASNet, self).__init__()
|
||||||
|
self.embedding_size = embedding_size
|
||||||
|
|
||||||
|
self.conv1 = Conv_block(img_channel, keep[0], kernel=(3, 3), stride=(2, 2), padding=(1, 1))
|
||||||
|
self.conv2_dw = Conv_block(keep[0], keep[1], kernel=(3, 3), stride=(1, 1), padding=(1, 1), groups=keep[1])
|
||||||
|
|
||||||
|
c1 = [(keep[1], keep[2])]
|
||||||
|
c2 = [(keep[2], keep[3])]
|
||||||
|
c3 = [(keep[3], keep[4])]
|
||||||
|
|
||||||
|
self.conv_23 = Depth_Wise(c1[0], c2[0], c3[0], kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=keep[3])
|
||||||
|
|
||||||
|
c1 = [(keep[4], keep[5]), (keep[7], keep[8]), (keep[10], keep[11]), (keep[13], keep[14])]
|
||||||
|
c2 = [(keep[5], keep[6]), (keep[8], keep[9]), (keep[11], keep[12]), (keep[14], keep[15])]
|
||||||
|
c3 = [(keep[6], keep[7]), (keep[9], keep[10]), (keep[12], keep[13]), (keep[15], keep[16])]
|
||||||
|
|
||||||
|
self.conv_3 = Residual(c1, c2, c3, num_block=4, groups=keep[4], kernel=(3, 3), stride=(1, 1), padding=(1, 1))
|
||||||
|
|
||||||
|
c1 = [(keep[16], keep[17])]
|
||||||
|
c2 = [(keep[17], keep[18])]
|
||||||
|
c3 = [(keep[18], keep[19])]
|
||||||
|
|
||||||
|
self.conv_34 = Depth_Wise(c1[0], c2[0], c3[0], kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=keep[19])
|
||||||
|
|
||||||
|
c1 = [(keep[19], keep[20]), (keep[22], keep[23]), (keep[25], keep[26]), (keep[28], keep[29]),
|
||||||
|
(keep[31], keep[32]), (keep[34], keep[35])]
|
||||||
|
c2 = [(keep[20], keep[21]), (keep[23], keep[24]), (keep[26], keep[27]), (keep[29], keep[30]),
|
||||||
|
(keep[32], keep[33]), (keep[35], keep[36])]
|
||||||
|
c3 = [(keep[21], keep[22]), (keep[24], keep[25]), (keep[27], keep[28]), (keep[30], keep[31]),
|
||||||
|
(keep[33], keep[34]), (keep[36], keep[37])]
|
||||||
|
|
||||||
|
self.conv_4 = Residual(c1, c2, c3, num_block=6, groups=keep[19], kernel=(3, 3), stride=(1, 1), padding=(1, 1))
|
||||||
|
|
||||||
|
c1 = [(keep[37], keep[38])]
|
||||||
|
c2 = [(keep[38], keep[39])]
|
||||||
|
c3 = [(keep[39], keep[40])]
|
||||||
|
|
||||||
|
self.conv_45 = Depth_Wise(c1[0], c2[0], c3[0], kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=keep[40])
|
||||||
|
|
||||||
|
c1 = [(keep[40], keep[41]), (keep[43], keep[44])]
|
||||||
|
c2 = [(keep[41], keep[42]), (keep[44], keep[45])]
|
||||||
|
c3 = [(keep[42], keep[43]), (keep[45], keep[46])]
|
||||||
|
|
||||||
|
self.conv_5 = Residual(c1, c2, c3, num_block=2, groups=keep[40], kernel=(3, 3), stride=(1, 1), padding=(1, 1))
|
||||||
|
self.conv_6_sep = Conv_block(keep[46], keep[47], kernel=(1, 1), stride=(1, 1), padding=(0, 0))
|
||||||
|
self.conv_6_dw = Linear_block(keep[47], keep[48], groups=keep[48], kernel=conv6_kernel, stride=(1, 1), padding=(0, 0))
|
||||||
|
self.conv_6_flatten = Flatten()
|
||||||
|
self.linear = Linear(512, embedding_size, bias=False)
|
||||||
|
self.bn = BatchNorm1d(embedding_size)
|
||||||
|
self.drop = torch.nn.Dropout(p=drop_p)
|
||||||
|
self.prob = Linear(embedding_size, num_classes, bias=False)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
out = self.conv1(x)
|
||||||
|
out = self.conv2_dw(out)
|
||||||
|
out = self.conv_23(out)
|
||||||
|
out = self.conv_3(out)
|
||||||
|
out = self.conv_34(out)
|
||||||
|
out = self.conv_4(out)
|
||||||
|
out = self.conv_45(out)
|
||||||
|
out = self.conv_5(out)
|
||||||
|
out = self.conv_6_sep(out)
|
||||||
|
out = self.conv_6_dw(out)
|
||||||
|
out = self.conv_6_flatten(out)
|
||||||
|
if self.embedding_size != 512:
|
||||||
|
out = self.linear(out)
|
||||||
|
out = self.bn(out)
|
||||||
|
out = self.drop(out)
|
||||||
|
out = self.prob(out)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
class MiniFASNetSE(MiniFASNet):
|
||||||
|
def __init__(self, keep, embedding_size, conv6_kernel=(7, 7),drop_p=0.75, num_classes=4, img_channel=3):
|
||||||
|
super(MiniFASNetSE, self).__init__(keep=keep, embedding_size=embedding_size, conv6_kernel=conv6_kernel,
|
||||||
|
drop_p=drop_p, num_classes=num_classes, img_channel=img_channel)
|
||||||
|
|
||||||
|
c1 = [(keep[4], keep[5]), (keep[7], keep[8]), (keep[10], keep[11]), (keep[13], keep[14])]
|
||||||
|
c2 = [(keep[5], keep[6]), (keep[8], keep[9]), (keep[11], keep[12]), (keep[14], keep[15])]
|
||||||
|
c3 = [(keep[6], keep[7]), (keep[9], keep[10]), (keep[12], keep[13]), (keep[15], keep[16])]
|
||||||
|
|
||||||
|
self.conv_3 = ResidualSE(c1, c2, c3, num_block=4, groups=keep[4], kernel=(3, 3), stride=(1, 1), padding=(1, 1))
|
||||||
|
|
||||||
|
c1 = [(keep[19], keep[20]), (keep[22], keep[23]), (keep[25], keep[26]), (keep[28], keep[29]),
|
||||||
|
(keep[31], keep[32]), (keep[34], keep[35])]
|
||||||
|
c2 = [(keep[20], keep[21]), (keep[23], keep[24]), (keep[26], keep[27]), (keep[29], keep[30]),
|
||||||
|
(keep[32], keep[33]), (keep[35], keep[36])]
|
||||||
|
c3 = [(keep[21], keep[22]), (keep[24], keep[25]), (keep[27], keep[28]), (keep[30], keep[31]),
|
||||||
|
(keep[33], keep[34]), (keep[36], keep[37])]
|
||||||
|
|
||||||
|
self.conv_4 = ResidualSE(c1, c2, c3, num_block=6, groups=keep[19], kernel=(3, 3), stride=(1, 1), padding=(1, 1))
|
||||||
|
|
||||||
|
c1 = [(keep[40], keep[41]), (keep[43], keep[44])]
|
||||||
|
c2 = [(keep[41], keep[42]), (keep[44], keep[45])]
|
||||||
|
c3 = [(keep[42], keep[43]), (keep[45], keep[46])]
|
||||||
|
self.conv_5 = ResidualSE(c1, c2, c3, num_block=2, groups=keep[40], kernel=(3, 3), stride=(1, 1), padding=(1, 1))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
keep_dict = {'1.8M': [32, 32, 103, 103, 64, 13, 13, 64, 26, 26,
|
||||||
|
64, 13, 13, 64, 52, 52, 64, 231, 231, 128,
|
||||||
|
154, 154, 128, 52, 52, 128, 26, 26, 128, 52,
|
||||||
|
52, 128, 26, 26, 128, 26, 26, 128, 308, 308,
|
||||||
|
128, 26, 26, 128, 26, 26, 128, 512, 512],
|
||||||
|
|
||||||
|
'1.8M_': [32, 32, 103, 103, 64, 13, 13, 64, 13, 13, 64, 13,
|
||||||
|
13, 64, 13, 13, 64, 231, 231, 128, 231, 231, 128, 52,
|
||||||
|
52, 128, 26, 26, 128, 77, 77, 128, 26, 26, 128, 26, 26,
|
||||||
|
128, 308, 308, 128, 26, 26, 128, 26, 26, 128, 512, 512]
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# (80x80) flops: 0.044, params: 0.41
|
||||||
|
def MiniFASNetV1(embedding_size=128, conv6_kernel=(7, 7),
|
||||||
|
drop_p=0.2, num_classes=3, img_channel=3):
|
||||||
|
return MiniFASNet(keep_dict['1.8M'], embedding_size, conv6_kernel, drop_p, num_classes, img_channel)
|
||||||
|
|
||||||
|
|
||||||
|
# (80x80) flops: 0.044, params: 0.43
|
||||||
|
def MiniFASNetV2(embedding_size=128, conv6_kernel=(7, 7),
|
||||||
|
drop_p=0.2, num_classes=3, img_channel=3):
|
||||||
|
return MiniFASNet(keep_dict['1.8M_'], embedding_size, conv6_kernel, drop_p, num_classes, img_channel)
|
||||||
|
|
||||||
|
def MiniFASNetV1SE(embedding_size=128, conv6_kernel=(7, 7),
|
||||||
|
drop_p=0.75, num_classes=3, img_channel=3):
|
||||||
|
return MiniFASNetSE(keep_dict['1.8M'], embedding_size, conv6_kernel,drop_p, num_classes, img_channel)
|
||||||
|
|
||||||
|
# (80x80) flops: 0.044, params: 0.43
|
||||||
|
def MiniFASNetV2SE(embedding_size=128, conv6_kernel=(7, 7),
|
||||||
|
drop_p=0.75, num_classes=4, img_channel=3):
|
||||||
|
return MiniFASNetSE(keep_dict['1.8M_'], embedding_size, conv6_kernel,drop_p, num_classes, img_channel)
|