This commit is contained in:
认知智能 2024-07-29 11:24:25 +08:00
parent 7cc5524251
commit 4ab77432c5
126 changed files with 21475 additions and 0 deletions

BIN
Database/new.npy Normal file

Binary file not shown.

BIN
Database/student.npy Normal file

Binary file not shown.

132
accuracy.py Normal file
View File

@ -0,0 +1,132 @@
import os
import time
import torch
import cv2
import numpy as np
from backbones import iresnet50,iresnet18,iresnet100
def load_image(img_path):
#img = cv2.imread(img_path)
img = cv2.imdecode(np.fromfile(img_path,dtype=np.uint8),cv2.IMREAD_COLOR)
img = img.transpose((2, 0, 1))
img = img[np.newaxis, :, :, :]
img = np.array(img, dtype=np.float32)
img -= 127.5
img /= 127.5
return img
def findEuclideanDistance(source_representation, test_representation):
euclidean_distance = source_representation - test_representation
euclidean_distance = np.sum(np.multiply(euclidean_distance, euclidean_distance))
euclidean_distance = np.sqrt(euclidean_distance)
return euclidean_distance
def l2_normalize(x):
return x / np.sqrt(np.sum(np.multiply(x, x)))
def load_npy(path):
data = np.load(path,allow_pickle=True)
data = data.item()
return data
def findmindistance(pred,threshold,k_v):
distance = 10
most_like = ""
for name in k_v.keys():
tmp = findEuclideanDistance(k_v[name],pred)
if distance > tmp:
distance = tmp
most_like = name
if distance < threshold:
return most_like
else:
return -1
def findOne(img,model,k_v):
with torch.no_grad():
start_time = time.time()
pred = model(img)
end_time = time.time()
#print("predOne time: " + str(end_time - start_time))
pred = pred.numpy()
name = findmindistance(l2_normalize(pred),threshold=1.20,k_v=k_v)
if name != -1:
return name
else:
return "unknown"
def findAll(imglist,model,k_v):
with torch.no_grad():
name_list = []
pred = model(imglist)
pred = pred.numpy()
for pr in pred:
name = findmindistance(l2_normalize(pr),threshold=1.20,k_v=k_v)
if name != -1:
name_list.append(name)
else:
name_list.append("unknown")
return name_list
if __name__=='__main__':
model = iresnet100()
model.load_state_dict(torch.load("./model/backbone100.pth", map_location="cpu"))
model.eval()
pred_name = []
order_name = []
order_path = []
unknown = []
test_path = "D:\Download\out\cfp_test"
name_list = os.listdir(test_path)
for name in name_list:
img_list = os.listdir(os.path.join(test_path,name))
for img in img_list:
order_name.append(name)
order_path.append(os.path.join(os.path.join(test_path,name),img))
order_img = np.zeros((len(order_path), 3, 112, 112), dtype=np.float32)
for index,img_path in enumerate(order_path):
order_img[index] = load_image(img_path)
print(order_img.shape)
# for name in order_path:
# print(name)
k_v = load_npy("cfp.npy")
start_time = time.time()
order_img = torch.from_numpy(order_img)
batch = 256
now = 0
number = len(order_img)
#number = 1400
for i in range(number):
unknown.append("unknown")
while now < number:
if now+batch < number:
name = findAll(order_img[now:now+batch],model,k_v)
else:
name = findAll(order_img[now:number], model, k_v)
now = now+batch
for na in name:
pred_name.append(na)
print("batch"+str(now))
end_time = time.time()
print("findAll time: " + str(end_time - start_time))
#print(len(pred_name))
right = 0
for i,name in enumerate(pred_name):
if pred_name[i] == order_name[i]:
right += 1
filed = 0
for i, name in enumerate(pred_name):
if pred_name[i] == unknown[i]:
filed += 1
error = 0
for i,name in enumerate(pred_name):
if pred_name[i] != order_name[i]:
error += 1
print(order_name[i]+" "+pred_name[i]+" "+order_path[i])
print("total:" + str(number))
print("right:" + str(right) + " rate:" + str(right / number))
print("filed:" + str(filed) + " rate:" + str(filed / number))
print("error:"+str(error-filed)+" rate:"+str((error-filed)/number))

134
accuracy_GPU.py Normal file
View File

@ -0,0 +1,134 @@
import os
import time
import torch
import cv2
import numpy as np
from backbones import iresnet50,iresnet18,iresnet100
def load_image(img_path):
#img = cv2.imread(img_path)
img = cv2.imdecode(np.fromfile(img_path,dtype=np.uint8),cv2.IMREAD_COLOR)
img = img.transpose((2, 0, 1))
img = img[np.newaxis, :, :, :]
img = np.array(img, dtype=np.float32)
img -= 127.5
img /= 127.5
return img
def findEuclideanDistance(source_representation, test_representation):
euclidean_distance = source_representation - test_representation
euclidean_distance = np.sum(np.multiply(euclidean_distance, euclidean_distance))
euclidean_distance = np.sqrt(euclidean_distance)
return euclidean_distance
def l2_normalize(x):
return x / np.sqrt(np.sum(np.multiply(x, x)))
def load_npy(path):
data = np.load(path,allow_pickle=True)
data = data.item()
return data
def findmindistance(pred,threshold,k_v):
distance = 10
most_like = ""
for name in k_v.keys():
tmp = findEuclideanDistance(k_v[name],pred)
if distance > tmp:
distance = tmp
most_like = name
if distance < threshold:
return most_like
else:
return -1
def findOne(img,model,k_v):
with torch.no_grad():
start_time = time.time()
pred = model(img)
end_time = time.time()
#print("predOne time: " + str(end_time - start_time))
pred = pred.numpy()
name = findmindistance(l2_normalize(pred),threshold=1.20,k_v=k_v)
if name != -1:
return name
else:
return "unknown"
def findAll(imglist,model,k_v):
with torch.no_grad():
name_list = []
imglist = imglist.to(torch.device("cuda"))
pred = model(imglist)
pred = pred.cpu().numpy()
for pr in pred:
name = findmindistance(l2_normalize(pr),threshold=1.20,k_v=k_v)
if name != -1:
name_list.append(name)
else:
name_list.append("unknown")
return name_list
if __name__=='__main__':
model = iresnet100()
model.load_state_dict(torch.load("./model/backbone100.pth"))
model.to(torch.device("cuda"))
model.eval()
pred_name = []
order_name = []
order_path = []
unknown = []
test_path = "./retinaface_test"
name_list = os.listdir(test_path)
for name in name_list:
img_list = os.listdir(os.path.join(test_path,name))
for img in img_list:
order_name.append(name)
order_path.append(os.path.join(os.path.join(test_path,name),img))
order_img = np.zeros((len(order_path), 3, 112, 112), dtype=np.float32)
for index,img_path in enumerate(order_path):
order_img[index] = load_image(img_path)
print(order_img.shape)
# for name in order_path:
# print(name)
k_v = load_npy("retinaface_lfw_myalign.npy")
start_time = time.time()
order_img = torch.from_numpy(order_img)
batch = 256
now = 0
number = len(order_img)
#number = 1400
for i in range(number):
unknown.append("unknown")
while now < number:
if now+batch < number:
name = findAll(order_img[now:now+batch],model,k_v)
else:
name = findAll(order_img[now:number], model, k_v)
now = now+batch
for na in name:
pred_name.append(na)
print("batch"+str(now))
end_time = time.time()
print("findAll time: " + str(end_time - start_time))
#print(len(pred_name))
right = 0
for i,name in enumerate(pred_name):
if pred_name[i] == order_name[i]:
right += 1
filed = 0
for i, name in enumerate(pred_name):
if pred_name[i] == unknown[i]:
filed += 1
error = 0
for i,name in enumerate(pred_name):
if pred_name[i] != order_name[i]:
error += 1
print(order_name[i]+" "+pred_name[i]+" "+order_path[i])
print("total:" + str(number))
print("right:" + str(right) + " rate:" + str(right / number))
print("filed:" + str(filed) + " rate:" + str(filed / number))
print("error:"+str(error-filed)+" rate:"+str((error-filed)/number))

150
anti.py Normal file
View File

@ -0,0 +1,150 @@
import os
import cv2
import numpy as np
import argparse
import warnings
import time
import torch
import torch.nn.functional as F
from src.generate_patches import CropImage
from src.model_lib.MiniFASNet import MiniFASNetV1, MiniFASNetV2,MiniFASNetV1SE,MiniFASNetV2SE
from src.data_io import transform as trans
from src.utility import get_kernel, parse_model_name
warnings.filterwarnings('ignore')
MODEL_MAPPING = {
'MiniFASNetV1': MiniFASNetV1,
'MiniFASNetV2': MiniFASNetV2,
'MiniFASNetV1SE':MiniFASNetV1SE,
'MiniFASNetV2SE':MiniFASNetV2SE
}
class AntiSpoofPredict():
def __init__(self, cpu_or_cuda):
super(AntiSpoofPredict, self).__init__()
self.device = torch.device("cuda" if cpu_or_cuda == "cuda" else "cpu")
def predict(self, img, model):
test_transform = trans.Compose([
trans.ToTensor(),
])
img = test_transform(img)
img = img.unsqueeze(0).to(self.device)
with torch.no_grad():
result = model.forward(img)
result = F.softmax(result).cpu().numpy()
return result
def load_anti_model(model_dir,cpu_or_cuda):
model_list = []
for model_path in os.listdir(model_dir):
model_list.append(_load_model(os.path.join(model_dir, model_path), cpu_or_cuda))
return model_list
def _load_model(model_path,cpu_or_cuda):
# define model
device = torch.device("cuda" if cpu_or_cuda == "cuda" else "cpu")
model_name = os.path.basename(model_path)
h_input, w_input, model_type, _ = parse_model_name(model_name)
kernel_size = get_kernel(h_input, w_input, )
model = MODEL_MAPPING[model_type](conv6_kernel=kernel_size).to(device)
# load model weight
state_dict = torch.load(model_path, map_location=device)
keys = iter(state_dict)
first_layer_name = keys.__next__()
if first_layer_name.find('module.') >= 0:
from collections import OrderedDict
new_state_dict = OrderedDict()
for key, value in state_dict.items():
name_key = key[7:]
new_state_dict[name_key] = value
model.load_state_dict(new_state_dict)
else:
model.load_state_dict(state_dict)
model.eval()
return model
# 因为安卓端APK获取的视频流宽高比为3:4,为了与之一致所以将宽高比限制为3:4
def check_image(image):
height, width, channel = image.shape
if width/height != 3/4:
print("Image is not appropriate!!!\nHeight/Width should be 4/3.")
return False
else:
return True
# 人脸活体检测
def anti_spoofing(image_name, model_dir, cpu_or_cuda, bbox, model_list):
model_test = AntiSpoofPredict(cpu_or_cuda)
image_cropper = CropImage()
image = cv2.imdecode(np.fromfile(image_name, dtype=np.uint8), cv2.IMREAD_COLOR)
h, w = image.shape[:2]
factor = h / w
if (w > 1000):
image = cv2.resize(image, (600, int(600 * factor)))
# result = check_image(image)
# if result is False:
# return
# image_bbox = model_test.get_bbox(image)
image_bbox = bbox
prediction = np.zeros((1, 3))
test_speed = 0
# sum the prediction from single model's result
for index, model_name in enumerate(os.listdir(model_dir)):
h_input, w_input, model_type, scale = parse_model_name(model_name)
param = {
"org_img": image,
"bbox": image_bbox,
"scale": scale,
"out_w": w_input,
"out_h": h_input,
"crop": True,
}
if scale is None:
param["crop"] = False
img = image_cropper.crop(**param)
start = time.time()
prediction += model_test.predict(img, model_list[index])
test_speed += time.time()-start
label = np.argmax(prediction)
# print(prediction)
# cv2.rectangle(
# image,
# (image_bbox[0], image_bbox[1]),
# (image_bbox[0] + image_bbox[2], image_bbox[1] + image_bbox[3]),
# (225,0,0), 2)
# cv2.imshow("out",image)
# cv2.waitKey(0)
value = prediction[0][1]/2
if value > 0.915:
return "real face", '{:.10f}'.format(value)
else:
return "fake face", '{:.10f}'.format(value)
if __name__ == "__main__":
desc = "test"
parser = argparse.ArgumentParser(description=desc)
parser.add_argument(
"--device_id",
type=int,
default=0,
help="which gpu id, [0/1/2/3]")
parser.add_argument(
"--model_dir",
type=str,
default="./resources/anti_spoof_models",
help="model_lib used to test")
parser.add_argument(
"--image_name",
type=str,
default="000_0.bmp",
help="image used to test")
args = parser.parse_args()
# anti_spoofing(args.image_name, args.model_dir, args.device_id)

449
app.py Normal file
View File

@ -0,0 +1,449 @@
import time
import faiss
from flask import Flask, render_template, request, jsonify, send_from_directory
from markupsafe import escape, escape_silent
from werkzeug.utils import secure_filename
from anti import anti_spoofing, load_anti_model
from face_api import load_arcface_model, load_npy, findOne, load_image, face_verification, findAll, add_one_to_database, \
get_claster_tmp_file_embedding, cluster, detect_video
from gender_age import set_gender_conf, gender_age, load_gender_model
from retinaface_detect import load_retinaface_model, detect_one, set_retinaface_conf
from werkzeug.exceptions import RequestEntityTooLarge
import zipfile
import os
import shutil
import re
import numpy as np
import torch
ALLOWED_IMG = set(['png', 'jpg', 'jpeg', 'bmp', 'PNG', 'JPG', 'JPEG'])
# 限制上传的图片最大为10M
ALLOWED_IMG_SIZE = 10 * 1024 * 1024
ALLOWED_FILE = set(['zip'])
ALLOWED_VIDEO = set(['mp4'])
app = Flask(__name__)
# 限制上传的文件最大为100M
app.config['MAX_CONTENT_LENGTH'] = 100 * 1024 * 1024
# 使用jsonify避免中文乱码
app.config['JSON_AS_ASCII'] = False
# 设置使用CPU或者GPU传入cuda
cpu_or_cuda = "cuda" if torch.cuda.is_available() else "cpu"
# 加载人脸识别模型
arcface_model = load_arcface_model("./model/backbone100.pth", cpu_or_cuda=cpu_or_cuda)
# 加载人脸检测模型
retinaface_args = set_retinaface_conf(cpu_or_cuda=cpu_or_cuda)
retinaface_model = load_retinaface_model(retinaface_args)
# 加载性别年龄识别模型
gender_args = set_gender_conf()
gender_model = load_gender_model(gender_args, 'fc1')
anti_spoofing_model_path = "model/anti_spoof_models"
anti_model = load_anti_model(anti_spoofing_model_path, cpu_or_cuda)
# 读取人脸库
@app.route('/')
def index():
return "model"
@app.route('/hello')
@app.route('/hello/<name>')
def hello(name=None):
return render_template('hello.html', name=name)
@app.route('/user', methods=['GET'])
def show_user_name():
return request.args.get('username', '')
# 创建返回的json数据
# 函数参数用是否=None判断函数中定义的data,result用truefalse判断
def create_response(status, name=None, distance=None, verification=None, gender=None, age=None, num=None, anti=None,
score=None, box_and_point=None, addfile_names=None,fail_names=None,database_name=None,msg=None,
delete_names=None,not_exist_names=None):
# res为总的json结构体
res = {}
res['status'] = status
data = {}
try:
data["box_and_point"] = box_and_point.tolist()
except AttributeError:
pass
if anti != None and score != None:
liveness = {}
liveness["spoofing"] = anti
liveness['score'] = score
data['liveness'] = liveness
if distance!=None:
data['distance'] = float(distance)
if verification!=None:
data['verification'] = verification
if num!=None:
data['number'] = num
if gender!=None:
data['gender'] = gender
if age!=None:
data['age'] = age
if name!=None:
data['name'] = name
if data:
res['data'] = data
# 数据库增删接口返回数据
result = {}
if msg!=None:
res['msg'] = msg
if database_name!=None:
result['database_name'] = database_name
# 增加人脸
if addfile_names!=None or fail_names!=None:
result['success_names'] = addfile_names
result['fail_names'] = fail_names
# 删除人脸
if delete_names!=None or not_exist_names!=None:
result['delete_names'] = delete_names
result['not_exist_names'] = not_exist_names
if result:
res['result'] = result
return jsonify(res)
# 创建cluster接口返回的json数据
def create_cluster_response(status, all_cluster):
res = {}
data = {}
for index, cluster in enumerate(all_cluster):
data['cluster' + str(index)] = cluster
res['data'] = data
res['status'] = status
return res
# 检查上传文件格式
def check_file_format(file_name, format):
if '.' in file_name:
file_format = file_name.rsplit('.')[1]
if file_format in format:
return True
return False
# 检查img大小大于10M抛出异常
def check_img_size(img_path):
fsize = os.path.getsize(img_path)
if fsize > ALLOWED_IMG_SIZE:
raise RequestEntityTooLarge
# 解压zip文件存到某路径
def unzip(zip_src, dst_dir):
f = zipfile.is_zipfile(zip_src)
if f:
fz = zipfile.ZipFile(zip_src, 'r')
for file in fz.namelist():
fz.extract(file, dst_dir)
return True
else:
return False
# 解压文件
def un_zip(file_path, output_path):
zip_file = zipfile.ZipFile(file_path)
if os.path.isdir(output_path):
pass
else:
os.mkdir(output_path)
zip_file.extractall(output_path)
# for names in zip_file.namelist():
# zip_file.extract(names,output_path)
zip_file.close()
# 人脸识别、性别年龄识别
@app.route('/recognition', methods=['POST'])
def recognition():
try:
f = request.files['file_name']
if f and check_file_format(f.filename, ALLOWED_IMG):
img_path = './img/recognition/' + secure_filename(f.filename)
f.save(img_path)
check_img_size(img_path)
# img3 = load_image('./file/'+secure_filename(f.filename))
# img3 = torch.from_numpy(img3)
tic = time.time()
img3, box_and_point = detect_one(img_path, retinaface_model, retinaface_args)
print('detect time: {:.4f}'.format(time.time() - tic))
if len(img3) == 0:
return create_response('no face')
elif len(img3) > 1:
namelist = findAll(img3, arcface_model, index, database_name_list, cpu_or_cuda)
gender_list, age_list = [], []
# gender_list, age_list = gender_age(img3, gender_model)
res = create_response('success', namelist, gender=gender_list, age=age_list,
box_and_point=box_and_point)
else:
b = box_and_point[0]
w = b[2] - b[0]
h = b[3] - b[1]
b[2] = w
b[3] = h
label, value = anti_spoofing(img_path, anti_spoofing_model_path, cpu_or_cuda, np.array(b[:4], int),
anti_model)
# print(index,database_name_list)
name, distance = findOne(img3, arcface_model, index, database_name_list, cpu_or_cuda)
gender_list, age_list = [], []
# gender_list, age_list = gender_age(img3, gender_model)
res = create_response('success', name, gender=gender_list, age=age_list, distance=distance,
anti=label, score=value, box_and_point=box_and_point)
return res
else:
return create_response('png jpg jpeg bmp are allowed')
except RequestEntityTooLarge:
return create_response('image size should be less than 10M')
# 两张图片比对
@app.route('/compare', methods=['POST'])
def compare_file():
try:
file1 = request.files['file1_name']
file2 = request.files['file2_name']
if file1 and check_file_format(file1.filename, ALLOWED_IMG) and file2 and check_file_format(file2.filename,
ALLOWED_IMG):
img1_path = './img/compare/' + secure_filename(file1.filename)
img2_path = './img/compare/' + secure_filename(file2.filename)
file1.save(img1_path)
file2.save(img2_path)
check_img_size(img1_path)
check_img_size(img2_path)
img1, box_and_point1 = detect_one(img1_path, retinaface_model,
retinaface_args)
img2, box_and_point2 = detect_one(img2_path, retinaface_model, retinaface_args)
if len(img1) == 1 and len(img2) == 1:
result,distance = face_verification(img1, img2, arcface_model, cpu_or_cuda)
print(result,distance)
return create_response('success', verification=result,distance=distance)
else:
return create_response('image contains no face or more than 1 face')
else:
return create_response('png jpg jpeg bmp are allowed')
except RequestEntityTooLarge:
return create_response('image size should be less than 10M')
# 数据库增加人脸,可实现向“现有/新建”数据库增加“单张/多张”人脸
# 增和改
@app.route('/databaseAdd', methods=['POST'])
def DB_add_face():
try:
# 上传人脸图片(>=1
# key都为file_listvalue为不同的值可实现批量上传图片
upload_files = request.files.getlist("file_list")
# '',[],{},0都可以视为False
if not upload_files:
msg = "上传文件为空"
return create_response(0,msg=msg)
database_name = request.form.get("database_name")
database_path = "./Database/" + database_name + ".npy"
if not os.path.exists(database_path):
msg = "数据库不存在"
return create_response(0,msg=msg)
# 数据库中已存在的人名
names = load_npy(database_path).keys()
# print(names)
# 这是服务器上用于暂存上传图片的文件夹,每次上传前重建,使用后删除
# 后面可根据需要改为定期删除
file_temp_path = './img/uploadNew/'
if not os.path.exists(file_temp_path):
os.makedirs(file_temp_path)
# 正则表达式用于提取文件名中的中文,用于.npy中的keys
r = re.compile('[\u4e00-\u9fa5]+')
# 分别存取添加成功或失败的名字
success_names = []
fail_names = {}
# 添加失败的两种情况:格式错误或已经存在
format_wrong = []
alreadyExist = []
# 分别处理每一张图片,先判断格式对不对,再判断是否存在
for file in upload_files:
filename = file.filename
name = r.findall(filename)[0]
if file and check_file_format(filename, ALLOWED_IMG):
if name in names:
alreadyExist.append(name)
continue
save_path = file_temp_path + filename
file.save(save_path)
check_img_size(save_path)
img_file, box_and_point = detect_one(save_path, retinaface_model, retinaface_args)
add_one_to_database(img=img_file, model=arcface_model, name=name, database_path=database_path,
cpu_or_cuda=cpu_or_cuda)
success_names.append(name)
else:
format_wrong.append(name)
continue
shutil.rmtree(file_temp_path)
# 如果有错误情况
if format_wrong or alreadyExist:
status = 0
else:
status = 1
fail_names['formatWrong'] = format_wrong
fail_names['alreadyExist'] = alreadyExist
return create_response(status=status,addfile_names=success_names,fail_names=fail_names,database_name=database_name,msg="新增人脸操作执行完成")
except RequestEntityTooLarge:
return create_response(0,msg='image size should be less than 10M')
# 数据库删除人脸,可实现在现有数据库中删除’单/多‘张人脸
@app.route('/databaseDelete', methods=['POST'])
def DB_delete_face():
try:
delete_names = request.form.getlist("delete_names")
database_name = request.form.get("database_name")
database_path = "./Database/" + database_name + ".npy"
if not os.path.exists(database_path):
msg = "数据库不存在"
return create_response(0,msg=msg)
if not delete_names:
msg = "delete_names参数为空"
return create_response(0,msg=msg)
k_v = load_npy(database_path)
print(k_v.keys())
success_list = []
fail_list = []
for name in delete_names:
if name in k_v.keys():
del k_v[name]
success_list.append(name)
else:
fail_list.append(name)
continue
np.save(database_path, k_v)
status = 1
if fail_list:
status = 0
return create_response(status=status,delete_names=success_list,not_exist_names=fail_list,database_name=database_name,
msg="删除人脸操作完成")
except RequestEntityTooLarge:
return create_response(0,'image size should be less than 10M')
# 以图搜图接口:
# 上传图片压缩包建图片库
@app.route('/uploadZip', methods=['POST'])
def upload_Zip():
try:
zip = request.files['zip_name']
dst_dir = './img/search/'
if unzip(zip, dst_dir):
return create_response('upload zip success')
else:
return create_response('upload zip file please')
except RequestEntityTooLarge:
return create_response('image size should be less than 10M')
# 以图搜图
@app.route('/imgSearchImg', methods=['POST'])
def img_search_img():
searchfile = './img/search/face'
try:
file = request.files['img_name']
if file and check_file_format(file.filename, ALLOWED_IMG):
img_path = './img/search/' + secure_filename(file.filename)
file.save(img_path)
check_img_size(img_path)
img, box_and_point = detect_one(img_path, retinaface_model,
retinaface_args)
if len(img) == 1:
Onename = []
num = 0
for filenames in os.listdir(searchfile):
imgpath = os.path.join(searchfile, filenames)
imgdata, box_and_point = detect_one(imgpath, retinaface_model, retinaface_args)
result = face_verification(img, imgdata, arcface_model, cpu_or_cuda)
isOne, distance = result.split(' ', -1)[0], result.split(' ', -1)[1]
if isOne == 'same':
Onename.append(filenames)
num += 1
return create_response('success', name=Onename, num=num)
else:
return create_response('image contains no face or more than 1 face')
else:
return create_response('png jpg jpeg bmp are allowed')
except RequestEntityTooLarge:
return create_response('image size should be less than 10M')
# 人脸聚类接口
@app.route('/cluster', methods=['POST'])
def zip_cluster():
try:
f = request.files['file_name']
if f and check_file_format(f.filename, ALLOWED_FILE):
zip_name = secure_filename(f.filename)
f.save('./img/cluster_tmp_file/' + zip_name)
un_zip('./img/cluster_tmp_file/' + zip_name, './img/cluster_tmp_file/')
emb_list, name_list = get_claster_tmp_file_embedding("./img/cluster_tmp_file/" + zip_name.rsplit('.')[0],
retinaface_model,
retinaface_args, arcface_model, cpu_or_cuda)
return create_cluster_response("success", cluster(emb_list, name_list))
else:
return create_response('zip are allowed')
except RequestEntityTooLarge:
return create_response('file size should be less than 100M')
# 视频识别接口
@app.route('/videorecognition', methods=['POST'])
def video_recognition():
try:
f = request.files['file_name']
if f and check_file_format(f.filename, ALLOWED_VIDEO):
video_name = secure_filename(f.filename)
f.save('./video/' + video_name)
detect_video('./video/' + video_name, './videoout/' + video_name, retinaface_model, arcface_model, k_v,
retinaface_args)
return create_response("success")
else:
return create_response('mp4 are allowed')
except RequestEntityTooLarge:
return create_response('file size should be less than 100M')
@app.route('/download/<string:filename>', methods=['GET'])
def download(filename):
if os.path.isfile(os.path.join('./videoout/', filename)):
return send_from_directory('./videoout/', filename, as_attachment=True)
else:
return create_response("Download failed")
if __name__ == '__main__':
k_v = load_npy("./Database/student.npy")
database_name_list = list(k_v.keys())
vector_list = np.array(list(k_v.values()))
print(vector_list.shape)
#print(database_name_list)
nlist = 50
quantizer = faiss.IndexFlatL2(512) # the other index
index = faiss.IndexIVFFlat(quantizer, 512, nlist, faiss.METRIC_L2)
index.train(vector_list)
# index = faiss.IndexFlatL2(512)
index.add(vector_list)
index.nprobe = 50
app.run(host="0.0.0.0", port=5000)

1
backbones/__init__.py Normal file
View File

@ -0,0 +1 @@
from .iresnet import iresnet18, iresnet34, iresnet50, iresnet100, iresnet200

Binary file not shown.

Binary file not shown.

187
backbones/iresnet.py Normal file
View File

@ -0,0 +1,187 @@
import torch
from torch import nn
__all__ = ['iresnet18', 'iresnet34', 'iresnet50', 'iresnet100', 'iresnet200']
def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
"""3x3 convolution with padding"""
return nn.Conv2d(in_planes,
out_planes,
kernel_size=3,
stride=stride,
padding=dilation,
groups=groups,
bias=False,
dilation=dilation)
def conv1x1(in_planes, out_planes, stride=1):
"""1x1 convolution"""
return nn.Conv2d(in_planes,
out_planes,
kernel_size=1,
stride=stride,
bias=False)
class IBasicBlock(nn.Module):
expansion = 1
def __init__(self, inplanes, planes, stride=1, downsample=None,
groups=1, base_width=64, dilation=1):
super(IBasicBlock, self).__init__()
if groups != 1 or base_width != 64:
raise ValueError('BasicBlock only supports groups=1 and base_width=64')
if dilation > 1:
raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
self.bn1 = nn.BatchNorm2d(inplanes, eps=1e-05,)
self.conv1 = conv3x3(inplanes, planes)
self.bn2 = nn.BatchNorm2d(planes, eps=1e-05,)
self.prelu = nn.PReLU(planes)
self.conv2 = conv3x3(planes, planes, stride)
self.bn3 = nn.BatchNorm2d(planes, eps=1e-05,)
self.downsample = downsample
self.stride = stride
def forward(self, x):
identity = x
out = self.bn1(x)
out = self.conv1(out)
out = self.bn2(out)
out = self.prelu(out)
out = self.conv2(out)
out = self.bn3(out)
if self.downsample is not None:
identity = self.downsample(x)
out += identity
return out
class IResNet(nn.Module):
fc_scale = 7 * 7
def __init__(self,
block, layers, dropout=0, num_features=512, zero_init_residual=False,
groups=1, width_per_group=64, replace_stride_with_dilation=None, fp16=False):
super(IResNet, self).__init__()
self.fp16 = fp16
self.inplanes = 64
self.dilation = 1
if replace_stride_with_dilation is None:
replace_stride_with_dilation = [False, False, False]
if len(replace_stride_with_dilation) != 3:
raise ValueError("replace_stride_with_dilation should be None "
"or a 3-element tuple, got {}".format(replace_stride_with_dilation))
self.groups = groups
self.base_width = width_per_group
self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=3, stride=1, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(self.inplanes, eps=1e-05)
self.prelu = nn.PReLU(self.inplanes)
self.layer1 = self._make_layer(block, 64, layers[0], stride=2)
self.layer2 = self._make_layer(block,
128,
layers[1],
stride=2,
dilate=replace_stride_with_dilation[0])
self.layer3 = self._make_layer(block,
256,
layers[2],
stride=2,
dilate=replace_stride_with_dilation[1])
self.layer4 = self._make_layer(block,
512,
layers[3],
stride=2,
dilate=replace_stride_with_dilation[2])
self.bn2 = nn.BatchNorm2d(512 * block.expansion, eps=1e-05,)
self.dropout = nn.Dropout(p=dropout, inplace=True)
self.fc = nn.Linear(512 * block.expansion * self.fc_scale, num_features)
self.features = nn.BatchNorm1d(num_features, eps=1e-05)
nn.init.constant_(self.features.weight, 1.0)
self.features.weight.requires_grad = False
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.normal_(m.weight, 0, 0.1)
elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
if zero_init_residual:
for m in self.modules():
if isinstance(m, IBasicBlock):
nn.init.constant_(m.bn2.weight, 0)
def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
downsample = None
previous_dilation = self.dilation
if dilate:
self.dilation *= stride
stride = 1
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
conv1x1(self.inplanes, planes * block.expansion, stride),
nn.BatchNorm2d(planes * block.expansion, eps=1e-05, ),
)
layers = []
layers.append(
block(self.inplanes, planes, stride, downsample, self.groups,
self.base_width, previous_dilation))
self.inplanes = planes * block.expansion
for _ in range(1, blocks):
layers.append(
block(self.inplanes,
planes,
groups=self.groups,
base_width=self.base_width,
dilation=self.dilation))
return nn.Sequential(*layers)
def forward(self, x):
with torch.cuda.amp.autocast(self.fp16):
x = self.conv1(x)
x = self.bn1(x)
x = self.prelu(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.bn2(x)
x = torch.flatten(x, 1)
x = self.dropout(x)
x = self.fc(x.float() if self.fp16 else x)
x = self.features(x)
return x
def _iresnet(arch, block, layers, pretrained, progress, **kwargs):
model = IResNet(block, layers, **kwargs)
if pretrained:
raise ValueError()
return model
def iresnet18(pretrained=False, progress=True, **kwargs):
return _iresnet('iresnet18', IBasicBlock, [2, 2, 2, 2], pretrained,
progress, **kwargs)
def iresnet34(pretrained=False, progress=True, **kwargs):
return _iresnet('iresnet34', IBasicBlock, [3, 4, 6, 3], pretrained,
progress, **kwargs)
def iresnet50(pretrained=False, progress=True, **kwargs):
return _iresnet('iresnet50', IBasicBlock, [3, 4, 14, 3], pretrained,
progress, **kwargs)
def iresnet100(pretrained=False, progress=True, **kwargs):
return _iresnet('iresnet100', IBasicBlock, [3, 13, 30, 3], pretrained,
progress, **kwargs)
def iresnet200(pretrained=False, progress=True, **kwargs):
return _iresnet('iresnet200', IBasicBlock, [6, 26, 60, 6], pretrained,
progress, **kwargs)

135
centerface.py Normal file
View File

@ -0,0 +1,135 @@
import time
import numpy as np
import cv2
import datetime
class CenterFace(object):
def __init__(self, landmarks=True):
self.landmarks = landmarks
if self.landmarks:
self.net = cv2.dnn.readNetFromONNX('./model/onnx/centerface.onnx')
else:
self.net = cv2.dnn.readNetFromONNX('./model/onnx/cface.1k.onnx')
self.img_h_new, self.img_w_new, self.scale_h, self.scale_w = 0, 0, 0, 0
def __call__(self, img, height, width, threshold=0.5):
self.img_h_new, self.img_w_new, self.scale_h, self.scale_w = self.transform(height, width)
return self.inference_opencv(img, threshold)
def inference_opencv(self, img, threshold):
blob = cv2.dnn.blobFromImage(img, scalefactor=1.0, size=(self.img_w_new, self.img_h_new), mean=(0, 0, 0), swapRB=True, crop=False)
self.net.setInput(blob)
begin = datetime.datetime.now()
start_time = time.time()
if self.landmarks:
heatmap, scale, offset, lms = self.net.forward(["537", "538", "539", '540'])
else:
heatmap, scale, offset = self.net.forward(["535", "536", "537"])
end = datetime.datetime.now()
end_time = time.time()
# print("cpuOne time: " + str(end_time - start_time))
# print("cpu times = ", end - begin)
return self.postprocess(heatmap, lms, offset, scale, threshold)
def transform(self, h, w):
img_h_new, img_w_new = int(np.ceil(h / 32) * 32), int(np.ceil(w / 32) * 32)
scale_h, scale_w = img_h_new / h, img_w_new / w
return img_h_new, img_w_new, scale_h, scale_w
def postprocess(self, heatmap, lms, offset, scale, threshold):
if self.landmarks:
dets, lms = self.decode(heatmap, scale, offset, lms, (self.img_h_new, self.img_w_new), threshold=threshold)
else:
dets = self.decode(heatmap, scale, offset, None, (self.img_h_new, self.img_w_new), threshold=threshold)
if len(dets) > 0:
dets[:, 0:4:2], dets[:, 1:4:2] = dets[:, 0:4:2] / self.scale_w, dets[:, 1:4:2] / self.scale_h
if self.landmarks:
lms[:, 0:10:2], lms[:, 1:10:2] = lms[:, 0:10:2] / self.scale_w, lms[:, 1:10:2] / self.scale_h
else:
dets = np.empty(shape=[0, 5], dtype=np.float32)
if self.landmarks:
lms = np.empty(shape=[0, 10], dtype=np.float32)
if self.landmarks:
return dets, lms
else:
return dets
def decode(self, heatmap, scale, offset, landmark, size, threshold=0.1):
heatmap = np.squeeze(heatmap)
scale0, scale1 = scale[0, 0, :, :], scale[0, 1, :, :]
offset0, offset1 = offset[0, 0, :, :], offset[0, 1, :, :]
c0, c1 = np.where(heatmap > threshold)
if self.landmarks:
boxes, lms = [], []
else:
boxes = []
if len(c0) > 0:
for i in range(len(c0)):
s0, s1 = np.exp(scale0[c0[i], c1[i]]) * 4, np.exp(scale1[c0[i], c1[i]]) * 4
o0, o1 = offset0[c0[i], c1[i]], offset1[c0[i], c1[i]]
s = heatmap[c0[i], c1[i]]
x1, y1 = max(0, (c1[i] + o1 + 0.5) * 4 - s1 / 2), max(0, (c0[i] + o0 + 0.5) * 4 - s0 / 2)
x1, y1 = min(x1, size[1]), min(y1, size[0])
boxes.append([x1, y1, min(x1 + s1, size[1]), min(y1 + s0, size[0]), s])
if self.landmarks:
lm = []
for j in range(5):
lm.append(landmark[0, j * 2 + 1, c0[i], c1[i]] * s1 + x1)
lm.append(landmark[0, j * 2, c0[i], c1[i]] * s0 + y1)
lms.append(lm)
boxes = np.asarray(boxes, dtype=np.float32)
keep = self.nms(boxes[:, :4], boxes[:, 4], 0.3)
boxes = boxes[keep, :]
if self.landmarks:
lms = np.asarray(lms, dtype=np.float32)
lms = lms[keep, :]
if self.landmarks:
return boxes, lms
else:
return boxes
def nms(self, boxes, scores, nms_thresh):
x1 = boxes[:, 0]
y1 = boxes[:, 1]
x2 = boxes[:, 2]
y2 = boxes[:, 3]
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
order = np.argsort(scores)[::-1]
num_detections = boxes.shape[0]
suppressed = np.zeros((num_detections,), dtype=np.bool)
keep = []
for _i in range(num_detections):
i = order[_i]
if suppressed[i]:
continue
keep.append(i)
ix1 = x1[i]
iy1 = y1[i]
ix2 = x2[i]
iy2 = y2[i]
iarea = areas[i]
for _j in range(_i + 1, num_detections):
j = order[_j]
if suppressed[j]:
continue
xx1 = max(ix1, x1[j])
yy1 = max(iy1, y1[j])
xx2 = min(ix2, x2[j])
yy2 = min(iy2, y2[j])
w = max(0, xx2 - xx1 + 1)
h = max(0, yy2 - yy1 + 1)
inter = w * h
ovr = inter / (iarea + areas[j] - inter)
if ovr >= nms_thresh:
suppressed[j] = True
return keep

67
config.py Normal file
View File

@ -0,0 +1,67 @@
from easydict import EasyDict as edict
config = edict()
config.dataset = "ms1m-retinaface-t2"
config.embedding_size = 512
config.sample_rate = 1
config.fp16 = False
config.momentum = 0.9
config.weight_decay = 5e-4
config.batch_size = 64
config.lr = 0.1 # batch size is 512
config.output = "ms1mv3_arcface_r50"
if config.dataset == "emore":
config.rec = "/train_tmp/faces_emore"
config.num_classes = 85742
config.num_image = 5822653
config.num_epoch = 16
config.warmup_epoch = -1
config.val_targets = ["lfw", ]
def lr_step_func(epoch):
return ((epoch + 1) / (4 + 1)) ** 2 if epoch < -1 else 0.1 ** len(
[m for m in [8, 14] if m - 1 <= epoch])
config.lr_func = lr_step_func
elif config.dataset == "ms1m-retinaface-t2":
config.rec = "/train_tmp/ms1m-retinaface-t2"
config.num_classes = 91180
config.num_epoch = 25
config.warmup_epoch = -1
config.val_targets = ["lfw", "cfp_fp", "agedb_30"]
def lr_step_func(epoch):
return ((epoch + 1) / (4 + 1)) ** 2 if epoch < -1 else 0.1 ** len(
[m for m in [11, 17, 22] if m - 1 <= epoch])
config.lr_func = lr_step_func
elif config.dataset == "glint360k":
# make training faster
# our RAM is 256G
# mount -t tmpfs -o size=140G tmpfs /train_tmp
config.rec = "/train_tmp/glint360k"
config.num_classes = 360232
config.num_image = 17091657
config.num_epoch = 20
config.warmup_epoch = -1
config.val_targets = ["lfw", "cfp_fp", "agedb_30"]
def lr_step_func(epoch):
return ((epoch + 1) / (4 + 1)) ** 2 if epoch < config.warmup_epoch else 0.1 ** len(
[m for m in [8, 12, 15, 18] if m - 1 <= epoch])
config.lr_func = lr_step_func
elif config.dataset == "webface":
config.rec = "/train_tmp/faces_webface_112x112"
config.num_classes = 10572
config.num_image = "forget"
config.num_epoch = 34
config.warmup_epoch = -1
config.val_targets = ["lfw", "cfp_fp", "agedb_30"]
def lr_step_func(epoch):
return ((epoch + 1) / (4 + 1)) ** 2 if epoch < config.warmup_epoch else 0.1 ** len(
[m for m in [20, 28, 32] if m - 1 <= epoch])
config.lr_func = lr_step_func

168
create_database.py Normal file
View File

@ -0,0 +1,168 @@
import os
import time
import re
import torch
import cv2
import numpy as np
from backbones import iresnet50,iresnet18,iresnet100
def load_image(img_path):
#img = cv2.imread(img_path)
img = cv2.imdecode(np.fromfile(img_path,dtype=np.uint8),cv2.IMREAD_COLOR)
img = img.transpose((2, 0, 1))
img = img[np.newaxis, :, :, :]
img = np.array(img, dtype=np.float32)
img -= 127.5
img /= 127.5
return img
def findEuclideanDistance(source_representation, test_representation):
euclidean_distance = source_representation - test_representation
euclidean_distance = np.sum(np.multiply(euclidean_distance, euclidean_distance))
euclidean_distance = np.sqrt(euclidean_distance)
return euclidean_distance
def findCosineDistance(source_representation, test_representation):
a = np.matmul(np.transpose(source_representation), test_representation)
b = np.sum(np.multiply(source_representation, source_representation))
c = np.sum(np.multiply(test_representation, test_representation))
return 1 - (a / (np.sqrt(b) * np.sqrt(c)))
def l2_normalize(x):
return x / np.sqrt(np.sum(np.multiply(x, x)))
def cosin_metric(x1, x2):
return np.dot(x1, x2) / (np.linalg.norm(x1) * np.linalg.norm(x2))
def load_npy(path):
data = np.load(path,allow_pickle=True)
data = data.item()
return data
def create_database(path,model,database_path):
name_list = os.listdir(path)
k_v = {}
if os.path.exists(database_path):
k_v = np.load(database_path, allow_pickle=True)
k_v = k_v.item()
for name in name_list:
img_path = os.listdir(os.path.join(path,name))
for img_name in img_path[:1]:
img = load_image(os.path.join(path,name,img_name))
img = torch.from_numpy(img)
with torch.no_grad():
pred = model(img)
pred = pred.numpy()
k_v[name] = l2_normalize(pred)
np.save(database_path, k_v)
def create_database_batch(path,model,database_path):
name_list = os.listdir(path)
k_v = {}
if os.path.exists(database_path):
k_v = np.load(database_path, allow_pickle=True)
k_v = k_v.item()
batch = 256
order_name = []
order_path = []
emb_list = []
for name in name_list:
img_path = os.listdir(os.path.join(path,name))
for img_name in img_path[:1]:
order_name.append(name)
order_path.append(os.path.join(path,name,img_name))
order_img = np.zeros((len(order_path), 3, 112, 112), dtype=np.float32)
for index, img_path in enumerate(order_path):
order_img[index] = load_image(img_path)
print(order_img.shape)
order_img = torch.from_numpy(order_img)
now = 0
number = len(order_img)
with torch.no_grad():
while now < number:
if now + batch < number:
emb = model(order_img[now:now+batch])
else:
emb = model(order_img[now:])
now = now + batch
for em in emb:
emb_list.append(em)
print("batch"+str(now))
for i, emb in enumerate(emb_list):
k_v[order_name[i]] = l2_normalize(emb.numpy())
np.save(database_path, k_v)
def add_one(img,model,name,database_path):
img = torch.from_numpy(img)
with torch.no_grad():
pred = model(img)
pred = pred.numpy()
k_v = {}
if os.path.exists(database_path):
k_v = np.load(database_path, allow_pickle=True)
k_v = k_v.item()
k_v[name] = l2_normalize(pred)
np.save(database_path, k_v)
def findmindistance(pred,threshold,k_v):
distance = 10
most_like = ""
for name in k_v.keys():
tmp = findEuclideanDistance(k_v[name],pred)
if distance > tmp:
distance = tmp
most_like = name
if distance < threshold:
return most_like
else:
return -1
def findOne(img,model,k_v):
with torch.no_grad():
start_time = time.time()
pred = model(img)
end_time = time.time()
#print("predOne time: " + str(end_time - start_time))
pred = pred.numpy()
name = findmindistance(l2_normalize(pred),threshold=1.20,k_v=k_v)
if name != -1:
return name
else:
return "unknown"
def findAll(imglist,model,k_v):
with torch.no_grad():
name_list = []
pred = model(imglist)
pred = pred.numpy()
for pr in pred:
name = findmindistance(l2_normalize(pr),threshold=1.20,k_v=k_v)
if name != -1:
name_list.append(name)
else:
name_list.append("unknown")
return name_list
if __name__=='__main__':
model = iresnet100()
model.load_state_dict(torch.load("./model/backbone100.pth", map_location="cpu"))
model.eval()
#img = load_image(r"D:\Download\out\facedatabase\man.jpg")
#img = load_image(r"D:\Download\out\facedatabase\man6.jpg")
# img = load_image(r"D:\Download\out\alig_students\student.jpg")
# print(img.shape)
#
# k_v = load_npy("./Database/student.npy")
# start_time = time.time()
# img = torch.from_numpy(img)
# name = findOne(img,model,k_v)
# mo = r'[\u4e00-\u9fa5]*'
# name = re.match(mo,name)
# print(name.group(0))
# end_time = time.time()
# print("findOne time: " + str(end_time - start_time))
#create_database_batch(r"D:\Download\out\alig_students",model,"./Database/student.npy")
create_database_batch(r"D:\Download\out\cfp_database", model, "cfp.npy")
#add_one(img,model,"Arminio_Fraga","centerface_lfw.npy")

2845
data/FDDB/img_list.txt Normal file

File diff suppressed because it is too large Load Diff

3
data/__init__.py Normal file
View File

@ -0,0 +1,3 @@
from .wider_face import WiderFaceDetection, detection_collate
from .data_augment import *
from .config import *

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

42
data/config.py Normal file
View File

@ -0,0 +1,42 @@
# config.py
cfg_mnet = {
'name': 'mobilenet0.25',
'min_sizes': [[16, 32], [64, 128], [256, 512]],
'steps': [8, 16, 32],
'variance': [0.1, 0.2],
'clip': False,
'loc_weight': 2.0,
'gpu_train': True,
'batch_size': 32,
'ngpu': 1,
'epoch': 250,
'decay1': 190,
'decay2': 220,
'image_size': 640,
'pretrain': True,
'return_layers': {'stage1': 1, 'stage2': 2, 'stage3': 3},
'in_channel': 32,
'out_channel': 64
}
cfg_re50 = {
'name': 'Resnet50',
'min_sizes': [[16, 32], [64, 128], [256, 512]],
'steps': [8, 16, 32],
'variance': [0.1, 0.2],
'clip': False,
'loc_weight': 2.0,
'gpu_train': True,
'batch_size': 24,
'ngpu': 4,
'epoch': 100,
'decay1': 70,
'decay2': 90,
'image_size': 840,
'pretrain': True,
'return_layers': {'layer2': 1, 'layer3': 2, 'layer4': 3},
'in_channel': 256,
'out_channel': 256
}

237
data/data_augment.py Normal file
View File

@ -0,0 +1,237 @@
import cv2
import numpy as np
import random
from utils.box_utils import matrix_iof
def _crop(image, boxes, labels, landm, img_dim):
height, width, _ = image.shape
pad_image_flag = True
for _ in range(250):
"""
if random.uniform(0, 1) <= 0.2:
scale = 1.0
else:
scale = random.uniform(0.3, 1.0)
"""
PRE_SCALES = [0.3, 0.45, 0.6, 0.8, 1.0]
scale = random.choice(PRE_SCALES)
short_side = min(width, height)
w = int(scale * short_side)
h = w
if width == w:
l = 0
else:
l = random.randrange(width - w)
if height == h:
t = 0
else:
t = random.randrange(height - h)
roi = np.array((l, t, l + w, t + h))
value = matrix_iof(boxes, roi[np.newaxis])
flag = (value >= 1)
if not flag.any():
continue
centers = (boxes[:, :2] + boxes[:, 2:]) / 2
mask_a = np.logical_and(roi[:2] < centers, centers < roi[2:]).all(axis=1)
boxes_t = boxes[mask_a].copy()
labels_t = labels[mask_a].copy()
landms_t = landm[mask_a].copy()
landms_t = landms_t.reshape([-1, 5, 2])
if boxes_t.shape[0] == 0:
continue
image_t = image[roi[1]:roi[3], roi[0]:roi[2]]
boxes_t[:, :2] = np.maximum(boxes_t[:, :2], roi[:2])
boxes_t[:, :2] -= roi[:2]
boxes_t[:, 2:] = np.minimum(boxes_t[:, 2:], roi[2:])
boxes_t[:, 2:] -= roi[:2]
# landm
landms_t[:, :, :2] = landms_t[:, :, :2] - roi[:2]
landms_t[:, :, :2] = np.maximum(landms_t[:, :, :2], np.array([0, 0]))
landms_t[:, :, :2] = np.minimum(landms_t[:, :, :2], roi[2:] - roi[:2])
landms_t = landms_t.reshape([-1, 10])
# make sure that the cropped image contains at least one face > 16 pixel at training image scale
b_w_t = (boxes_t[:, 2] - boxes_t[:, 0] + 1) / w * img_dim
b_h_t = (boxes_t[:, 3] - boxes_t[:, 1] + 1) / h * img_dim
mask_b = np.minimum(b_w_t, b_h_t) > 0.0
boxes_t = boxes_t[mask_b]
labels_t = labels_t[mask_b]
landms_t = landms_t[mask_b]
if boxes_t.shape[0] == 0:
continue
pad_image_flag = False
return image_t, boxes_t, labels_t, landms_t, pad_image_flag
return image, boxes, labels, landm, pad_image_flag
def _distort(image):
def _convert(image, alpha=1, beta=0):
tmp = image.astype(float) * alpha + beta
tmp[tmp < 0] = 0
tmp[tmp > 255] = 255
image[:] = tmp
image = image.copy()
if random.randrange(2):
#brightness distortion
if random.randrange(2):
_convert(image, beta=random.uniform(-32, 32))
#contrast distortion
if random.randrange(2):
_convert(image, alpha=random.uniform(0.5, 1.5))
image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
#saturation distortion
if random.randrange(2):
_convert(image[:, :, 1], alpha=random.uniform(0.5, 1.5))
#hue distortion
if random.randrange(2):
tmp = image[:, :, 0].astype(int) + random.randint(-18, 18)
tmp %= 180
image[:, :, 0] = tmp
image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR)
else:
#brightness distortion
if random.randrange(2):
_convert(image, beta=random.uniform(-32, 32))
image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
#saturation distortion
if random.randrange(2):
_convert(image[:, :, 1], alpha=random.uniform(0.5, 1.5))
#hue distortion
if random.randrange(2):
tmp = image[:, :, 0].astype(int) + random.randint(-18, 18)
tmp %= 180
image[:, :, 0] = tmp
image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR)
#contrast distortion
if random.randrange(2):
_convert(image, alpha=random.uniform(0.5, 1.5))
return image
def _expand(image, boxes, fill, p):
if random.randrange(2):
return image, boxes
height, width, depth = image.shape
scale = random.uniform(1, p)
w = int(scale * width)
h = int(scale * height)
left = random.randint(0, w - width)
top = random.randint(0, h - height)
boxes_t = boxes.copy()
boxes_t[:, :2] += (left, top)
boxes_t[:, 2:] += (left, top)
expand_image = np.empty(
(h, w, depth),
dtype=image.dtype)
expand_image[:, :] = fill
expand_image[top:top + height, left:left + width] = image
image = expand_image
return image, boxes_t
def _mirror(image, boxes, landms):
_, width, _ = image.shape
if random.randrange(2):
image = image[:, ::-1]
boxes = boxes.copy()
boxes[:, 0::2] = width - boxes[:, 2::-2]
# landm
landms = landms.copy()
landms = landms.reshape([-1, 5, 2])
landms[:, :, 0] = width - landms[:, :, 0]
tmp = landms[:, 1, :].copy()
landms[:, 1, :] = landms[:, 0, :]
landms[:, 0, :] = tmp
tmp1 = landms[:, 4, :].copy()
landms[:, 4, :] = landms[:, 3, :]
landms[:, 3, :] = tmp1
landms = landms.reshape([-1, 10])
return image, boxes, landms
def _pad_to_square(image, rgb_mean, pad_image_flag):
if not pad_image_flag:
return image
height, width, _ = image.shape
long_side = max(width, height)
image_t = np.empty((long_side, long_side, 3), dtype=image.dtype)
image_t[:, :] = rgb_mean
image_t[0:0 + height, 0:0 + width] = image
return image_t
def _resize_subtract_mean(image, insize, rgb_mean):
interp_methods = [cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_NEAREST, cv2.INTER_LANCZOS4]
interp_method = interp_methods[random.randrange(5)]
image = cv2.resize(image, (insize, insize), interpolation=interp_method)
image = image.astype(np.float32)
image -= rgb_mean
return image.transpose(2, 0, 1)
class preproc(object):
def __init__(self, img_dim, rgb_means):
self.img_dim = img_dim
self.rgb_means = rgb_means
def __call__(self, image, targets):
assert targets.shape[0] > 0, "this image does not have gt"
boxes = targets[:, :4].copy()
labels = targets[:, -1].copy()
landm = targets[:, 4:-1].copy()
image_t, boxes_t, labels_t, landm_t, pad_image_flag = _crop(image, boxes, labels, landm, self.img_dim)
image_t = _distort(image_t)
image_t = _pad_to_square(image_t,self.rgb_means, pad_image_flag)
image_t, boxes_t, landm_t = _mirror(image_t, boxes_t, landm_t)
height, width, _ = image_t.shape
image_t = _resize_subtract_mean(image_t, self.img_dim, self.rgb_means)
boxes_t[:, 0::2] /= width
boxes_t[:, 1::2] /= height
landm_t[:, 0::2] /= width
landm_t[:, 1::2] /= height
labels_t = np.expand_dims(labels_t, 1)
targets_t = np.hstack((boxes_t, landm_t, labels_t))
return image_t, targets_t

258
data/realtime_detect.py Normal file
View File

@ -0,0 +1,258 @@
import subprocess
import time
import cv2
import torch
import numpy as np
from skimage import transform as trans
from PIL import Image, ImageDraw, ImageFont
from data import cfg_mnet, cfg_re50
from face_api import load_arcface_model, load_npy
from layers.functions.prior_box import PriorBox
from retinaface_detect import set_retinaface_conf, load_retinaface_model, findAll
from utils.nms.py_cpu_nms import py_cpu_nms
from utils.box_utils import decode, decode_landm
import faiss
ppi = 1280
ppi2 = 640
step = 3
def detect_rtsp(rtsp, out_rtsp, net, arcface_model, k_v, args):
tic_total = time.time()
cfg = None
if args.network == "mobile0.25":
cfg = cfg_mnet
elif args.network == "resnet50":
cfg = cfg_re50
device = torch.device("cpu" if args.cpu else "cuda")
resize = 1
# testing begin
cap = cv2.VideoCapture(rtsp)
ret, frame = cap.read()
h, w = frame.shape[:2]
factor = 0
if (w > ppi):
factor = h / w
frame = cv2.resize(frame, (ppi, int(ppi * factor)))
h, w = frame.shape[:2]
arf = 1
detect_h, detect_w = frame.shape[:2]
frame_detect = frame
factor2 = 0
if (w > ppi2):
factor2 = h / w
frame_detect = cv2.resize(frame, (ppi2, int(ppi2 * factor2)))
detect_h, detect_w = frame_detect.shape[:2]
arf = w/detect_w
print(w,h)
print(detect_w,detect_h)
#fps = cap.get(cv2.CAP_PROP_FPS)
#print(fps)
size = (w, h)
sizeStr = str(size[0]) + 'x' + str(size[1])
if(out_rtsp.startswith("rtsp")):
command = ['ffmpeg',
'-y', '-an',
'-f', 'rawvideo',
'-vcodec', 'rawvideo',
'-pix_fmt', 'bgr24',
'-s', sizeStr,
'-r', "25",
'-i', '-',
'-c:v', 'libx265',
'-b:v', '3000k',
'-pix_fmt', 'yuv420p',
'-preset', 'ultrafast',
'-f', 'rtsp',
out_rtsp]
pipe = subprocess.Popen(command, shell=False, stdin=subprocess.PIPE)
number = step
dets = []
name_list = []
font = ImageFont.truetype("font.ttf", 22)
priorbox = PriorBox(cfg, image_size=(detect_h, detect_w))
priors = priorbox.forward()
priors = priors.to(device)
prior_data = priors.data
scale = torch.Tensor([detect_w, detect_h, detect_w, detect_h])
scale = scale.to(device)
scale1 = torch.Tensor([detect_w, detect_h, detect_w, detect_h,
detect_w, detect_h, detect_w, detect_h,
detect_w, detect_h])
scale1 = scale1.to(device)
src1 = np.array([
[38.3814, 51.6963],
[73.6186, 51.5014],
[56.1120, 71.7366],
[41.6361, 92.3655],
[70.8167, 92.2041]], dtype=np.float32)
tform = trans.SimilarityTransform()
while ret:
tic_all = time.time()
if number == step:
tic = time.time()
img = np.float32(frame_detect)
img -= (104, 117, 123)
img = img.transpose(2, 0, 1)
img = torch.from_numpy(img).unsqueeze(0)
img = img.to(device)
loc, conf, landms = net(img) # forward pass
boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
boxes = boxes * scale / resize
boxes = boxes.cpu().numpy()
scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance'])
landms = landms * scale1 / resize
landms = landms.cpu().numpy()
# ignore low scores
inds = np.where(scores > args.confidence_threshold)[0]
boxes = boxes[inds]
landms = landms[inds]
scores = scores[inds]
# keep top-K before NMS
order = scores.argsort()[::-1][:args.top_k]
boxes = boxes[order]
landms = landms[order]
scores = scores[order]
# do NMS
dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
keep = py_cpu_nms(dets, args.nms_threshold)
# keep = nms(dets, args.nms_threshold,force_cpu=args.cpu)
dets = dets[keep, :]
landms = landms[keep]
# keep top-K faster NMS
dets = dets[:args.keep_top_k, :]
landms = landms[:args.keep_top_k, :]
dets = np.concatenate((dets, landms), axis=1)
face_list = []
name_list = []
print('net forward time: {:.4f}'.format(time.time() - tic))
start_time_findall = time.time()
for i, det in enumerate(dets[:1]):
if det[4] < args.vis_thres:
continue
#boxes, score = det[:4], det[4]
dst = np.reshape(landms[i], (5, 2))
dst = dst * arf
tform.estimate(dst, src1)
M = tform.params[0:2, :]
frame2 = cv2.warpAffine(frame, M, (w, h), borderValue=0.0)
img112 = frame2[0:112, 0:112, :]
face_list.append(img112)
if len(face_list) != 0:
face_list = np.array(face_list)
face_list = face_list.transpose((0, 3, 1, 2))
face_list = np.array(face_list, dtype=np.float32)
face_list -= 127.5
face_list /= 127.5
print(face_list.shape)
print("warpALL time: " + str(time.time() - start_time_findall ))
#start_time = time.time()
name_list = findAll(face_list, arcface_model, k_v, "cpu" if args.cpu else "cuda")
#print(name_list)
#print("findOneframe time: " + str(time.time() - start_time_findall))
#start_time = time.time()
# if (len(dets) != 0):
# for i, det in enumerate(dets[:]):
# if det[4] < args.vis_thres:
# continue
# boxes, score = det[:4], det[4]
# boxes = boxes * arf
# name = name_list[i]
# cv2.rectangle(frame, (int(boxes[0]), int(boxes[1])), (int(boxes[2]), int(boxes[3])), (255, 0, 0), 2)
# cv2.putText(frame, name, (int(boxes[0]), int(boxes[1])), cv2.FONT_HERSHEY_COMPLEX, 0.4,(0, 225, 255), 1)
start_time = time.time()
if(len(dets) != 0):
img_PIL = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
draw = ImageDraw.Draw(img_PIL)
for i, det in enumerate(dets[:1]):
if det[4] < args.vis_thres:
continue
boxes, score = det[:4], det[4]
boxes = boxes * arf
name = name_list[i]
if not isinstance(name, np.unicode):
name = name.decode('utf8')
draw.text((int(boxes[0]), int(boxes[1])), name, fill=(255, 0, 0), font=font)
draw.rectangle((int(boxes[0]), int(boxes[1]), int(boxes[2]), int(boxes[3])), outline="green", width=3)
frame = cv2.cvtColor(np.asarray(img_PIL), cv2.COLOR_RGB2BGR)
pipe.stdin.write(frame.tostring())
print("drawOneframe time: " + str(time.time() - start_time))
#start_time = time.time()
ret, frame = cap.read()
frame_detect = frame
number = step
if (ret != 0 and factor != 0):
frame = cv2.resize(frame, (ppi, int(ppi * factor)))
if (ret != 0 and factor2 != 0):
frame_detect = cv2.resize(frame, (ppi2, int(ppi2 * factor2)))
#print("readframe time: " + str(time.time() - start_time))
else:
number += 1
if (len(dets) != 0):
for i, det in enumerate(dets[:4]):
if det[4] < args.vis_thres:
continue
boxes, score = det[:4], det[4]
cv2.rectangle(frame, (int(boxes[0]), int(boxes[1])), (int(boxes[2]), int(boxes[3])), (2, 255, 0), 1)
# if (len(dets) != 0):
# img_PIL = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
# draw = ImageDraw.Draw(img_PIL)
# for i, det in enumerate(dets[:4]):
# if det[4] < args.vis_thres:
# continue
# boxes, score = det[:4], det[4]
# name = name_list[i]
# if not isinstance(name, np.unicode):
# name = name.decode('utf8')
# draw.text((int(boxes[0]), int(boxes[1])), name, fill=(255, 0, 0), font=font)
# draw.rectangle((int(boxes[0]), int(boxes[1]), int(boxes[2]), int(boxes[3])), outline="green",
# width=3)
# frame = cv2.cvtColor(np.asarray(img_PIL), cv2.COLOR_RGB2BGR)
start_time = time.time()
pipe.stdin.write(frame.tostring())
print("writeframe time: " + str(time.time() - start_time))
start_time = time.time()
ret, frame = cap.read()
if (ret != 0 and factor != 0):
frame = cv2.resize(frame, (ppi, int(ppi * factor)))
print("readframe time: " + str(time.time() - start_time))
print('all time: {:.4f}'.format(time.time() - tic_all))
cap.release()
pipe.terminate()
print('total time: {:.4f}'.format(time.time() - tic_total))
if __name__ == "__main__":
cpu_or_cuda = "cuda" if torch.cuda.is_available() else "cpu"
# 加载人脸识别模型
arcface_model = load_arcface_model("./model/backbone100.pth", cpu_or_cuda=cpu_or_cuda)
# 加载人脸检测模型
retinaface_args = set_retinaface_conf(cpu_or_cuda=cpu_or_cuda)
retinaface_model = load_retinaface_model(retinaface_args)
k_v = load_npy("./Database/student.npy")
#print(list(k_v.keys()))
database_name_list = list(k_v.keys())
vector_list = np.array(list(k_v.values()))
print(vector_list.shape)
index = faiss.IndexFlatL2(512)
index.add(vector_list)
#detect_rtsp("software.mp4", 'rtsp://localhost/test2', retinaface_model, arcface_model, index ,database_name_list, retinaface_args)
detect_rtsp("cut.mp4", 'rtsp://localhost:5001/test2', retinaface_model, arcface_model, k_v, retinaface_args)

101
data/wider_face.py Normal file
View File

@ -0,0 +1,101 @@
import os
import os.path
import sys
import torch
import torch.utils.data as data
import cv2
import numpy as np
class WiderFaceDetection(data.Dataset):
def __init__(self, txt_path, preproc=None):
self.preproc = preproc
self.imgs_path = []
self.words = []
f = open(txt_path,'r')
lines = f.readlines()
isFirst = True
labels = []
for line in lines:
line = line.rstrip()
if line.startswith('#'):
if isFirst is True:
isFirst = False
else:
labels_copy = labels.copy()
self.words.append(labels_copy)
labels.clear()
path = line[2:]
path = txt_path.replace('label.txt','images/') + path
self.imgs_path.append(path)
else:
line = line.split(' ')
label = [float(x) for x in line]
labels.append(label)
self.words.append(labels)
def __len__(self):
return len(self.imgs_path)
def __getitem__(self, index):
img = cv2.imread(self.imgs_path[index])
height, width, _ = img.shape
labels = self.words[index]
annotations = np.zeros((0, 15))
if len(labels) == 0:
return annotations
for idx, label in enumerate(labels):
annotation = np.zeros((1, 15))
# bbox
annotation[0, 0] = label[0] # x1
annotation[0, 1] = label[1] # y1
annotation[0, 2] = label[0] + label[2] # x2
annotation[0, 3] = label[1] + label[3] # y2
# landmarks
annotation[0, 4] = label[4] # l0_x
annotation[0, 5] = label[5] # l0_y
annotation[0, 6] = label[7] # l1_x
annotation[0, 7] = label[8] # l1_y
annotation[0, 8] = label[10] # l2_x
annotation[0, 9] = label[11] # l2_y
annotation[0, 10] = label[13] # l3_x
annotation[0, 11] = label[14] # l3_y
annotation[0, 12] = label[16] # l4_x
annotation[0, 13] = label[17] # l4_y
if (annotation[0, 4]<0):
annotation[0, 14] = -1
else:
annotation[0, 14] = 1
annotations = np.append(annotations, annotation, axis=0)
target = np.array(annotations)
if self.preproc is not None:
img, target = self.preproc(img, target)
return torch.from_numpy(img), target
def detection_collate(batch):
"""Custom collate fn for dealing with batches of images that have a different
number of associated object annotations (bounding boxes).
Arguments:
batch: (tuple) A tuple of tensor images and lists of annotations
Return:
A tuple containing:
1) (tensor) batch of images stacked on their 0 dim
2) (list of tensors) annotations for a given image are stacked on 0 dim
"""
targets = []
imgs = []
for _, sample in enumerate(batch):
for _, tup in enumerate(sample):
if torch.is_tensor(tup):
imgs.append(tup)
elif isinstance(tup, type(np.empty(0))):
annos = torch.from_numpy(tup).float()
targets.append(annos)
return (torch.stack(imgs, 0), targets)

107
dataset.py Normal file
View File

@ -0,0 +1,107 @@
import numbers
import os
import queue as Queue
import threading
import mxnet as mx
import numpy as np
import torch
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
class BackgroundGenerator(threading.Thread):
def __init__(self, generator, local_rank, max_prefetch=6):
super(BackgroundGenerator, self).__init__()
self.queue = Queue.Queue(max_prefetch)
self.generator = generator
self.local_rank = local_rank
self.daemon = True
self.start()
def run(self):
torch.cuda.set_device(self.local_rank)
for item in self.generator:
self.queue.put(item)
self.queue.put(None)
def next(self):
next_item = self.queue.get()
if next_item is None:
raise StopIteration
return next_item
def __next__(self):
return self.next()
def __iter__(self):
return self
class DataLoaderX(DataLoader):
def __init__(self, local_rank, **kwargs):
super(DataLoaderX, self).__init__(**kwargs)
self.stream = torch.cuda.Stream(local_rank)
self.local_rank = local_rank
def __iter__(self):
self.iter = super(DataLoaderX, self).__iter__()
self.iter = BackgroundGenerator(self.iter, self.local_rank)
self.preload()
return self
def preload(self):
self.batch = next(self.iter, None)
if self.batch is None:
return None
with torch.cuda.stream(self.stream):
for k in range(len(self.batch)):
self.batch[k] = self.batch[k].to(device=self.local_rank,
non_blocking=True)
def __next__(self):
torch.cuda.current_stream().wait_stream(self.stream)
batch = self.batch
if batch is None:
raise StopIteration
self.preload()
return batch
class MXFaceDataset(Dataset):
def __init__(self, root_dir, local_rank):
super(MXFaceDataset, self).__init__()
self.transform = transforms.Compose(
[transforms.ToPILImage(),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
])
self.root_dir = root_dir
self.local_rank = local_rank
path_imgrec = os.path.join(root_dir, 'train.rec')
path_imgidx = os.path.join(root_dir, 'train.idx')
self.imgrec = mx.recordio.MXIndexedRecordIO(path_imgidx, path_imgrec, 'r')
s = self.imgrec.read_idx(0)
header, _ = mx.recordio.unpack(s)
if header.flag > 0:
self.header0 = (int(header.label[0]), int(header.label[1]))
self.imgidx = np.array(range(1, int(header.label[0])))
else:
self.imgidx = np.array(list(self.imgrec.keys))
def __getitem__(self, index):
idx = self.imgidx[index]
s = self.imgrec.read_idx(idx)
header, img = mx.recordio.unpack(s)
label = header.label
if not isinstance(label, numbers.Number):
label = label[0]
label = torch.tensor(label, dtype=torch.long)
sample = mx.image.imdecode(img).asnumpy()
if self.transform is not None:
sample = self.transform(sample)
return sample, label
def __len__(self):
return len(self.imgidx)

0
eval/__init__.py Normal file
View File

409
eval/verification.py Normal file
View File

@ -0,0 +1,409 @@
"""Helper for evaluation on the Labeled Faces in the Wild dataset
"""
# MIT License
#
# Copyright (c) 2016 David Sandberg
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
import datetime
import os
import pickle
import mxnet as mx
import numpy as np
import sklearn
import torch
from mxnet import ndarray as nd
from scipy import interpolate
from sklearn.decomposition import PCA
from sklearn.model_selection import KFold
class LFold:
def __init__(self, n_splits=2, shuffle=False):
self.n_splits = n_splits
if self.n_splits > 1:
self.k_fold = KFold(n_splits=n_splits, shuffle=shuffle)
def split(self, indices):
if self.n_splits > 1:
return self.k_fold.split(indices)
else:
return [(indices, indices)]
def calculate_roc(thresholds,
embeddings1,
embeddings2,
actual_issame,
nrof_folds=10,
pca=0):
assert (embeddings1.shape[0] == embeddings2.shape[0])
assert (embeddings1.shape[1] == embeddings2.shape[1])
nrof_pairs = min(len(actual_issame), embeddings1.shape[0])
nrof_thresholds = len(thresholds)
k_fold = LFold(n_splits=nrof_folds, shuffle=False)
tprs = np.zeros((nrof_folds, nrof_thresholds))
fprs = np.zeros((nrof_folds, nrof_thresholds))
accuracy = np.zeros((nrof_folds))
indices = np.arange(nrof_pairs)
if pca == 0:
diff = np.subtract(embeddings1, embeddings2)
dist = np.sum(np.square(diff), 1)
for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)):
if pca > 0:
print('doing pca on', fold_idx)
embed1_train = embeddings1[train_set]
embed2_train = embeddings2[train_set]
_embed_train = np.concatenate((embed1_train, embed2_train), axis=0)
pca_model = PCA(n_components=pca)
pca_model.fit(_embed_train)
embed1 = pca_model.transform(embeddings1)
embed2 = pca_model.transform(embeddings2)
embed1 = sklearn.preprocessing.normalize(embed1)
embed2 = sklearn.preprocessing.normalize(embed2)
diff = np.subtract(embed1, embed2)
dist = np.sum(np.square(diff), 1)
# Find the best threshold for the fold
acc_train = np.zeros((nrof_thresholds))
for threshold_idx, threshold in enumerate(thresholds):
_, _, acc_train[threshold_idx] = calculate_accuracy(
threshold, dist[train_set], actual_issame[train_set])
best_threshold_index = np.argmax(acc_train)
for threshold_idx, threshold in enumerate(thresholds):
tprs[fold_idx, threshold_idx], fprs[fold_idx, threshold_idx], _ = calculate_accuracy(
threshold, dist[test_set],
actual_issame[test_set])
_, _, accuracy[fold_idx] = calculate_accuracy(
thresholds[best_threshold_index], dist[test_set],
actual_issame[test_set])
tpr = np.mean(tprs, 0)
fpr = np.mean(fprs, 0)
return tpr, fpr, accuracy
def calculate_accuracy(threshold, dist, actual_issame):
predict_issame = np.less(dist, threshold)
tp = np.sum(np.logical_and(predict_issame, actual_issame))
fp = np.sum(np.logical_and(predict_issame, np.logical_not(actual_issame)))
tn = np.sum(
np.logical_and(np.logical_not(predict_issame),
np.logical_not(actual_issame)))
fn = np.sum(np.logical_and(np.logical_not(predict_issame), actual_issame))
tpr = 0 if (tp + fn == 0) else float(tp) / float(tp + fn)
fpr = 0 if (fp + tn == 0) else float(fp) / float(fp + tn)
acc = float(tp + tn) / dist.size
return tpr, fpr, acc
def calculate_val(thresholds,
embeddings1,
embeddings2,
actual_issame,
far_target,
nrof_folds=10):
assert (embeddings1.shape[0] == embeddings2.shape[0])
assert (embeddings1.shape[1] == embeddings2.shape[1])
nrof_pairs = min(len(actual_issame), embeddings1.shape[0])
nrof_thresholds = len(thresholds)
k_fold = LFold(n_splits=nrof_folds, shuffle=False)
val = np.zeros(nrof_folds)
far = np.zeros(nrof_folds)
diff = np.subtract(embeddings1, embeddings2)
dist = np.sum(np.square(diff), 1)
indices = np.arange(nrof_pairs)
for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)):
# Find the threshold that gives FAR = far_target
far_train = np.zeros(nrof_thresholds)
for threshold_idx, threshold in enumerate(thresholds):
_, far_train[threshold_idx] = calculate_val_far(
threshold, dist[train_set], actual_issame[train_set])
if np.max(far_train) >= far_target:
f = interpolate.interp1d(far_train, thresholds, kind='slinear')
threshold = f(far_target)
else:
threshold = 0.0
val[fold_idx], far[fold_idx] = calculate_val_far(
threshold, dist[test_set], actual_issame[test_set])
val_mean = np.mean(val)
far_mean = np.mean(far)
val_std = np.std(val)
return val_mean, val_std, far_mean
def calculate_val_far(threshold, dist, actual_issame):
predict_issame = np.less(dist, threshold)
true_accept = np.sum(np.logical_and(predict_issame, actual_issame))
false_accept = np.sum(
np.logical_and(predict_issame, np.logical_not(actual_issame)))
n_same = np.sum(actual_issame)
n_diff = np.sum(np.logical_not(actual_issame))
# print(true_accept, false_accept)
# print(n_same, n_diff)
val = float(true_accept) / float(n_same)
far = float(false_accept) / float(n_diff)
return val, far
def evaluate(embeddings, actual_issame, nrof_folds=10, pca=0):
# Calculate evaluation metrics
thresholds = np.arange(0, 4, 0.01)
embeddings1 = embeddings[0::2]
embeddings2 = embeddings[1::2]
tpr, fpr, accuracy = calculate_roc(thresholds,
embeddings1,
embeddings2,
np.asarray(actual_issame),
nrof_folds=nrof_folds,
pca=pca)
thresholds = np.arange(0, 4, 0.001)
val, val_std, far = calculate_val(thresholds,
embeddings1,
embeddings2,
np.asarray(actual_issame),
1e-3,
nrof_folds=nrof_folds)
return tpr, fpr, accuracy, val, val_std, far
@torch.no_grad()
def load_bin(path, image_size):
try:
with open(path, 'rb') as f:
bins, issame_list = pickle.load(f) # py2
except UnicodeDecodeError as e:
with open(path, 'rb') as f:
bins, issame_list = pickle.load(f, encoding='bytes') # py3
data_list = []
for flip in [0, 1]:
data = torch.empty((len(issame_list) * 2, 3, image_size[0], image_size[1]))
data_list.append(data)
for idx in range(len(issame_list) * 2):
_bin = bins[idx]
img = mx.image.imdecode(_bin)
if img.shape[1] != image_size[0]:
img = mx.image.resize_short(img, image_size[0])
img = nd.transpose(img, axes=(2, 0, 1))
for flip in [0, 1]:
if flip == 1:
img = mx.ndarray.flip(data=img, axis=2)
data_list[flip][idx][:] = torch.from_numpy(img.asnumpy())
if idx % 1000 == 0:
print('loading bin', idx)
print(data_list[0].shape)
return data_list, issame_list
@torch.no_grad()
def test(data_set, backbone, batch_size, nfolds=10):
print('testing verification..')
data_list = data_set[0]
issame_list = data_set[1]
embeddings_list = []
time_consumed = 0.0
for i in range(len(data_list)):
data = data_list[i]
embeddings = None
ba = 0
while ba < data.shape[0]:
bb = min(ba + batch_size, data.shape[0])
count = bb - ba
_data = data[bb - batch_size: bb]
time0 = datetime.datetime.now()
img = ((_data / 255) - 0.5) / 0.5
net_out: torch.Tensor = backbone(img)
_embeddings = net_out.detach().cpu().numpy()
time_now = datetime.datetime.now()
diff = time_now - time0
time_consumed += diff.total_seconds()
if embeddings is None:
embeddings = np.zeros((data.shape[0], _embeddings.shape[1]))
embeddings[ba:bb, :] = _embeddings[(batch_size - count):, :]
ba = bb
embeddings_list.append(embeddings)
_xnorm = 0.0
_xnorm_cnt = 0
for embed in embeddings_list:
for i in range(embed.shape[0]):
_em = embed[i]
_norm = np.linalg.norm(_em)
_xnorm += _norm
_xnorm_cnt += 1
_xnorm /= _xnorm_cnt
embeddings = embeddings_list[0].copy()
embeddings = sklearn.preprocessing.normalize(embeddings)
acc1 = 0.0
std1 = 0.0
embeddings = embeddings_list[0] + embeddings_list[1]
embeddings = sklearn.preprocessing.normalize(embeddings)
print(embeddings.shape)
print('infer time', time_consumed)
_, _, accuracy, val, val_std, far = evaluate(embeddings, issame_list, nrof_folds=nfolds)
acc2, std2 = np.mean(accuracy), np.std(accuracy)
return acc1, std1, acc2, std2, _xnorm, embeddings_list
def dumpR(data_set,
backbone,
batch_size,
name='',
data_extra=None,
label_shape=None):
print('dump verification embedding..')
data_list = data_set[0]
issame_list = data_set[1]
embeddings_list = []
time_consumed = 0.0
for i in range(len(data_list)):
data = data_list[i]
embeddings = None
ba = 0
while ba < data.shape[0]:
bb = min(ba + batch_size, data.shape[0])
count = bb - ba
_data = nd.slice_axis(data, axis=0, begin=bb - batch_size, end=bb)
time0 = datetime.datetime.now()
if data_extra is None:
db = mx.io.DataBatch(data=(_data,), label=(_label,))
else:
db = mx.io.DataBatch(data=(_data, _data_extra),
label=(_label,))
model.forward(db, is_train=False)
net_out = model.get_outputs()
_embeddings = net_out[0].asnumpy()
time_now = datetime.datetime.now()
diff = time_now - time0
time_consumed += diff.total_seconds()
if embeddings is None:
embeddings = np.zeros((data.shape[0], _embeddings.shape[1]))
embeddings[ba:bb, :] = _embeddings[(batch_size - count):, :]
ba = bb
embeddings_list.append(embeddings)
embeddings = embeddings_list[0] + embeddings_list[1]
embeddings = sklearn.preprocessing.normalize(embeddings)
actual_issame = np.asarray(issame_list)
outname = os.path.join('temp.bin')
with open(outname, 'wb') as f:
pickle.dump((embeddings, issame_list),
f,
protocol=pickle.HIGHEST_PROTOCOL)
# if __name__ == '__main__':
#
# parser = argparse.ArgumentParser(description='do verification')
# # general
# parser.add_argument('--data-dir', default='', help='')
# parser.add_argument('--model',
# default='../model/softmax,50',
# help='path to load model.')
# parser.add_argument('--target',
# default='lfw,cfp_ff,cfp_fp,agedb_30',
# help='test targets.')
# parser.add_argument('--gpu', default=0, type=int, help='gpu id')
# parser.add_argument('--batch-size', default=32, type=int, help='')
# parser.add_argument('--max', default='', type=str, help='')
# parser.add_argument('--mode', default=0, type=int, help='')
# parser.add_argument('--nfolds', default=10, type=int, help='')
# args = parser.parse_args()
# image_size = [112, 112]
# print('image_size', image_size)
# ctx = mx.gpu(args.gpu)
# nets = []
# vec = args.model.split(',')
# prefix = args.model.split(',')[0]
# epochs = []
# if len(vec) == 1:
# pdir = os.path.dirname(prefix)
# for fname in os.listdir(pdir):
# if not fname.endswith('.params'):
# continue
# _file = os.path.join(pdir, fname)
# if _file.startswith(prefix):
# epoch = int(fname.split('.')[0].split('-')[1])
# epochs.append(epoch)
# epochs = sorted(epochs, reverse=True)
# if len(args.max) > 0:
# _max = [int(x) for x in args.max.split(',')]
# assert len(_max) == 2
# if len(epochs) > _max[1]:
# epochs = epochs[_max[0]:_max[1]]
#
# else:
# epochs = [int(x) for x in vec[1].split('|')]
# print('model number', len(epochs))
# time0 = datetime.datetime.now()
# for epoch in epochs:
# print('loading', prefix, epoch)
# sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
# # arg_params, aux_params = ch_dev(arg_params, aux_params, ctx)
# all_layers = sym.get_internals()
# sym = all_layers['fc1_output']
# model = mx.mod.Module(symbol=sym, context=ctx, label_names=None)
# # model.bind(data_shapes=[('data', (args.batch_size, 3, image_size[0], image_size[1]))], label_shapes=[('softmax_label', (args.batch_size,))])
# model.bind(data_shapes=[('data', (args.batch_size, 3, image_size[0],
# image_size[1]))])
# model.set_params(arg_params, aux_params)
# nets.append(model)
# time_now = datetime.datetime.now()
# diff = time_now - time0
# print('model loading time', diff.total_seconds())
#
# ver_list = []
# ver_name_list = []
# for name in args.target.split(','):
# path = os.path.join(args.data_dir, name + ".bin")
# if os.path.exists(path):
# print('loading.. ', name)
# data_set = load_bin(path, image_size)
# ver_list.append(data_set)
# ver_name_list.append(name)
#
# if args.mode == 0:
# for i in range(len(ver_list)):
# results = []
# for model in nets:
# acc1, std1, acc2, std2, xnorm, embeddings_list = test(
# ver_list[i], model, args.batch_size, args.nfolds)
# print('[%s]XNorm: %f' % (ver_name_list[i], xnorm))
# print('[%s]Accuracy: %1.5f+-%1.5f' % (ver_name_list[i], acc1, std1))
# print('[%s]Accuracy-Flip: %1.5f+-%1.5f' % (ver_name_list[i], acc2, std2))
# results.append(acc2)
# print('Max of [%s] is %1.5f' % (ver_name_list[i], np.max(results)))
# elif args.mode == 1:
# raise ValueError
# else:
# model = nets[0]
# dumpR(ver_list[0], model, args.batch_size, args.target)

483
eval_ijbc.py Normal file
View File

@ -0,0 +1,483 @@
# coding: utf-8
import os
import pickle
import matplotlib
import pandas as pd
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import timeit
import sklearn
import argparse
from sklearn.metrics import roc_curve, auc
from menpo.visualize.viewmatplotlib import sample_colours_from_colourmap
from prettytable import PrettyTable
from pathlib import Path
import sys
import warnings
sys.path.insert(0, "../")
warnings.filterwarnings("ignore")
parser = argparse.ArgumentParser(description='do ijb test')
# general
parser.add_argument('--model-prefix', default='', help='path to load model.')
parser.add_argument('--image-path', default='', type=str, help='')
parser.add_argument('--result-dir', default='.', type=str, help='')
parser.add_argument('--batch-size', default=128, type=int, help='')
parser.add_argument('--network', default='iresnet50', type=str, help='')
parser.add_argument('--job', default='insightface', type=str, help='job name')
parser.add_argument('--target', default='IJBC', type=str, help='target, set to IJBC or IJBB')
args = parser.parse_args()
target = args.target
model_path = args.model_prefix
image_path = args.image_path
result_dir = args.result_dir
gpu_id = None
use_norm_score = True # if Ture, TestMode(N1)
use_detector_score = True # if Ture, TestMode(D1)
use_flip_test = True # if Ture, TestMode(F1)
job = args.job
batch_size = args.batch_size
import cv2
import numpy as np
import torch
from skimage import transform as trans
import backbones
class Embedding(object):
def __init__(self, prefix, data_shape, batch_size=1):
image_size = (112, 112)
self.image_size = image_size
weight = torch.load(prefix)
resnet = eval("backbones.{}".format(args.network))(False).cuda()
resnet.load_state_dict(weight)
model = torch.nn.DataParallel(resnet)
self.model = model
self.model.eval()
src = np.array([
[30.2946, 51.6963],
[65.5318, 51.5014],
[48.0252, 71.7366],
[33.5493, 92.3655],
[62.7299, 92.2041]], dtype=np.float32)
src[:, 0] += 8.0
self.src = src
self.batch_size = batch_size
self.data_shape = data_shape
def get(self, rimg, landmark):
assert landmark.shape[0] == 68 or landmark.shape[0] == 5
assert landmark.shape[1] == 2
if landmark.shape[0] == 68:
landmark5 = np.zeros((5, 2), dtype=np.float32)
landmark5[0] = (landmark[36] + landmark[39]) / 2
landmark5[1] = (landmark[42] + landmark[45]) / 2
landmark5[2] = landmark[30]
landmark5[3] = landmark[48]
landmark5[4] = landmark[54]
else:
landmark5 = landmark
tform = trans.SimilarityTransform()
tform.estimate(landmark5, self.src)
M = tform.params[0:2, :]
img = cv2.warpAffine(rimg,
M, (self.image_size[1], self.image_size[0]),
borderValue=0.0)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img_flip = np.fliplr(img)
img = np.transpose(img, (2, 0, 1)) # 3*112*112, RGB
img_flip = np.transpose(img_flip, (2, 0, 1))
input_blob = np.zeros((2, 3, self.image_size[1], self.image_size[0]), dtype=np.uint8)
input_blob[0] = img
input_blob[1] = img_flip
return input_blob
@torch.no_grad()
def forward_db(self, batch_data):
imgs = torch.Tensor(batch_data).cuda()
imgs.div_(255).sub_(0.5).div_(0.5)
feat = self.model(imgs)
feat = feat.reshape([self.batch_size, 2 * feat.shape[1]])
return feat.cpu().numpy()
# 将一个list尽量均分成n份限制len(list)==n份数大于原list内元素个数则分配空list[]
def divideIntoNstrand(listTemp, n):
twoList = [[] for i in range(n)]
for i, e in enumerate(listTemp):
twoList[i % n].append(e)
return twoList
def read_template_media_list(path):
# ijb_meta = np.loadtxt(path, dtype=str)
ijb_meta = pd.read_csv(path, sep=' ', header=None).values
templates = ijb_meta[:, 1].astype(np.int)
medias = ijb_meta[:, 2].astype(np.int)
return templates, medias
# In[ ]:
def read_template_pair_list(path):
# pairs = np.loadtxt(path, dtype=str)
pairs = pd.read_csv(path, sep=' ', header=None).values
# print(pairs.shape)
# print(pairs[:, 0].astype(np.int))
t1 = pairs[:, 0].astype(np.int)
t2 = pairs[:, 1].astype(np.int)
label = pairs[:, 2].astype(np.int)
return t1, t2, label
# In[ ]:
def read_image_feature(path):
with open(path, 'rb') as fid:
img_feats = pickle.load(fid)
return img_feats
# In[ ]:
def get_image_feature(img_path, files_list, model_path, epoch, gpu_id):
batch_size = args.batch_size
data_shape = (3, 112, 112)
files = files_list
print('files:', len(files))
rare_size = len(files) % batch_size
faceness_scores = []
batch = 0
img_feats = np.empty((len(files), 1024), dtype=np.float32)
batch_data = np.empty((2 * batch_size, 3, 112, 112))
embedding = Embedding(model_path, data_shape, batch_size)
for img_index, each_line in enumerate(files[:len(files) - rare_size]):
name_lmk_score = each_line.strip().split(' ')
img_name = os.path.join(img_path, name_lmk_score[0])
img = cv2.imread(img_name)
lmk = np.array([float(x) for x in name_lmk_score[1:-1]],
dtype=np.float32)
lmk = lmk.reshape((5, 2))
input_blob = embedding.get(img, lmk)
batch_data[2 * (img_index - batch * batch_size)][:] = input_blob[0]
batch_data[2 * (img_index - batch * batch_size) + 1][:] = input_blob[1]
if (img_index + 1) % batch_size == 0:
print('batch', batch)
img_feats[batch * batch_size:batch * batch_size +
batch_size][:] = embedding.forward_db(batch_data)
batch += 1
faceness_scores.append(name_lmk_score[-1])
batch_data = np.empty((2 * rare_size, 3, 112, 112))
embedding = Embedding(model_path, data_shape, rare_size)
for img_index, each_line in enumerate(files[len(files) - rare_size:]):
name_lmk_score = each_line.strip().split(' ')
img_name = os.path.join(img_path, name_lmk_score[0])
img = cv2.imread(img_name)
lmk = np.array([float(x) for x in name_lmk_score[1:-1]],
dtype=np.float32)
lmk = lmk.reshape((5, 2))
input_blob = embedding.get(img, lmk)
batch_data[2 * img_index][:] = input_blob[0]
batch_data[2 * img_index + 1][:] = input_blob[1]
if (img_index + 1) % rare_size == 0:
print('batch', batch)
img_feats[len(files) -
rare_size:][:] = embedding.forward_db(batch_data)
batch += 1
faceness_scores.append(name_lmk_score[-1])
faceness_scores = np.array(faceness_scores).astype(np.float32)
# img_feats = np.ones( (len(files), 1024), dtype=np.float32) * 0.01
# faceness_scores = np.ones( (len(files), ), dtype=np.float32 )
return img_feats, faceness_scores
# In[ ]:
def image2template_feature(img_feats=None, templates=None, medias=None):
# ==========================================================
# 1. face image feature l2 normalization. img_feats:[number_image x feats_dim]
# 2. compute media feature.
# 3. compute template feature.
# ==========================================================
unique_templates = np.unique(templates)
template_feats = np.zeros((len(unique_templates), img_feats.shape[1]))
for count_template, uqt in enumerate(unique_templates):
(ind_t,) = np.where(templates == uqt)
face_norm_feats = img_feats[ind_t]
face_medias = medias[ind_t]
unique_medias, unique_media_counts = np.unique(face_medias,
return_counts=True)
media_norm_feats = []
for u, ct in zip(unique_medias, unique_media_counts):
(ind_m,) = np.where(face_medias == u)
if ct == 1:
media_norm_feats += [face_norm_feats[ind_m]]
else: # image features from the same video will be aggregated into one feature
media_norm_feats += [
np.mean(face_norm_feats[ind_m], axis=0, keepdims=True)
]
media_norm_feats = np.array(media_norm_feats)
# media_norm_feats = media_norm_feats / np.sqrt(np.sum(media_norm_feats ** 2, -1, keepdims=True))
template_feats[count_template] = np.sum(media_norm_feats, axis=0)
if count_template % 2000 == 0:
print('Finish Calculating {} template features.'.format(
count_template))
# template_norm_feats = template_feats / np.sqrt(np.sum(template_feats ** 2, -1, keepdims=True))
template_norm_feats = sklearn.preprocessing.normalize(template_feats)
# print(template_norm_feats.shape)
return template_norm_feats, unique_templates
# In[ ]:
def verification(template_norm_feats=None,
unique_templates=None,
p1=None,
p2=None):
# ==========================================================
# Compute set-to-set Similarity Score.
# ==========================================================
template2id = np.zeros((max(unique_templates) + 1, 1), dtype=int)
for count_template, uqt in enumerate(unique_templates):
template2id[uqt] = count_template
score = np.zeros((len(p1),)) # save cosine distance between pairs
total_pairs = np.array(range(len(p1)))
batchsize = 100000 # small batchsize instead of all pairs in one batch due to the memory limiation
sublists = [
total_pairs[i:i + batchsize] for i in range(0, len(p1), batchsize)
]
total_sublists = len(sublists)
for c, s in enumerate(sublists):
feat1 = template_norm_feats[template2id[p1[s]]]
feat2 = template_norm_feats[template2id[p2[s]]]
similarity_score = np.sum(feat1 * feat2, -1)
score[s] = similarity_score.flatten()
if c % 10 == 0:
print('Finish {}/{} pairs.'.format(c, total_sublists))
return score
# In[ ]:
def verification2(template_norm_feats=None,
unique_templates=None,
p1=None,
p2=None):
template2id = np.zeros((max(unique_templates) + 1, 1), dtype=int)
for count_template, uqt in enumerate(unique_templates):
template2id[uqt] = count_template
score = np.zeros((len(p1),)) # save cosine distance between pairs
total_pairs = np.array(range(len(p1)))
batchsize = 100000 # small batchsize instead of all pairs in one batch due to the memory limiation
sublists = [
total_pairs[i:i + batchsize] for i in range(0, len(p1), batchsize)
]
total_sublists = len(sublists)
for c, s in enumerate(sublists):
feat1 = template_norm_feats[template2id[p1[s]]]
feat2 = template_norm_feats[template2id[p2[s]]]
similarity_score = np.sum(feat1 * feat2, -1)
score[s] = similarity_score.flatten()
if c % 10 == 0:
print('Finish {}/{} pairs.'.format(c, total_sublists))
return score
def read_score(path):
with open(path, 'rb') as fid:
img_feats = pickle.load(fid)
return img_feats
# # Step1: Load Meta Data
# In[ ]:
assert target == 'IJBC' or target == 'IJBB'
# =============================================================
# load image and template relationships for template feature embedding
# tid --> template id, mid --> media id
# format:
# image_name tid mid
# =============================================================
start = timeit.default_timer()
templates, medias = read_template_media_list(
os.path.join('%s/meta' % image_path,
'%s_face_tid_mid.txt' % target.lower()))
stop = timeit.default_timer()
print('Time: %.2f s. ' % (stop - start))
# In[ ]:
# =============================================================
# load template pairs for template-to-template verification
# tid : template id, label : 1/0
# format:
# tid_1 tid_2 label
# =============================================================
start = timeit.default_timer()
p1, p2, label = read_template_pair_list(
os.path.join('%s/meta' % image_path,
'%s_template_pair_label.txt' % target.lower()))
stop = timeit.default_timer()
print('Time: %.2f s. ' % (stop - start))
# # Step 2: Get Image Features
# In[ ]:
# =============================================================
# load image features
# format:
# img_feats: [image_num x feats_dim] (227630, 512)
# =============================================================
start = timeit.default_timer()
img_path = '%s/loose_crop' % image_path
img_list_path = '%s/meta/%s_name_5pts_score.txt' % (image_path, target.lower())
img_list = open(img_list_path)
files = img_list.readlines()
# files_list = divideIntoNstrand(files, rank_size)
files_list = files
# img_feats
# for i in range(rank_size):
img_feats, faceness_scores = get_image_feature(img_path, files_list,
model_path, 0, gpu_id)
stop = timeit.default_timer()
print('Time: %.2f s. ' % (stop - start))
print('Feature Shape: ({} , {}) .'.format(img_feats.shape[0],
img_feats.shape[1]))
# # Step3: Get Template Features
# In[ ]:
# =============================================================
# compute template features from image features.
# =============================================================
start = timeit.default_timer()
# ==========================================================
# Norm feature before aggregation into template feature?
# Feature norm from embedding network and faceness score are able to decrease weights for noise samples (not face).
# ==========================================================
# 1. FaceScore Feature Norm
# 2. FaceScore Detector
if use_flip_test:
# concat --- F1
# img_input_feats = img_feats
# add --- F2
img_input_feats = img_feats[:, 0:img_feats.shape[1] //
2] + img_feats[:, img_feats.shape[1] // 2:]
else:
img_input_feats = img_feats[:, 0:img_feats.shape[1] // 2]
if use_norm_score:
img_input_feats = img_input_feats
else:
# normalise features to remove norm information
img_input_feats = img_input_feats / np.sqrt(
np.sum(img_input_feats ** 2, -1, keepdims=True))
if use_detector_score:
print(img_input_feats.shape, faceness_scores.shape)
img_input_feats = img_input_feats * faceness_scores[:, np.newaxis]
else:
img_input_feats = img_input_feats
template_norm_feats, unique_templates = image2template_feature(
img_input_feats, templates, medias)
stop = timeit.default_timer()
print('Time: %.2f s. ' % (stop - start))
# # Step 4: Get Template Similarity Scores
# In[ ]:
# =============================================================
# compute verification scores between template pairs.
# =============================================================
start = timeit.default_timer()
score = verification(template_norm_feats, unique_templates, p1, p2)
stop = timeit.default_timer()
print('Time: %.2f s. ' % (stop - start))
# In[ ]:
save_path = os.path.join(result_dir, args.job)
# save_path = result_dir + '/%s_result' % target
if not os.path.exists(save_path):
os.makedirs(save_path)
score_save_file = os.path.join(save_path, "%s.npy" % target.lower())
np.save(score_save_file, score)
# # Step 5: Get ROC Curves and TPR@FPR Table
# In[ ]:
files = [score_save_file]
methods = []
scores = []
for file in files:
methods.append(Path(file).stem)
scores.append(np.load(file))
methods = np.array(methods)
scores = dict(zip(methods, scores))
colours = dict(
zip(methods, sample_colours_from_colourmap(methods.shape[0], 'Set2')))
x_labels = [10 ** -6, 10 ** -5, 10 ** -4, 10 ** -3, 10 ** -2, 10 ** -1]
tpr_fpr_table = PrettyTable(['Methods'] + [str(x) for x in x_labels])
fig = plt.figure()
for method in methods:
fpr, tpr, _ = roc_curve(label, scores[method])
roc_auc = auc(fpr, tpr)
fpr = np.flipud(fpr)
tpr = np.flipud(tpr) # select largest tpr at same fpr
plt.plot(fpr,
tpr,
color=colours[method],
lw=1,
label=('[%s (AUC = %0.4f %%)]' %
(method.split('-')[-1], roc_auc * 100)))
tpr_fpr_row = []
tpr_fpr_row.append("%s-%s" % (method, target))
for fpr_iter in np.arange(len(x_labels)):
_, min_index = min(
list(zip(abs(fpr - x_labels[fpr_iter]), range(len(fpr)))))
tpr_fpr_row.append('%.2f' % (tpr[min_index] * 100))
tpr_fpr_table.add_row(tpr_fpr_row)
plt.xlim([10 ** -6, 0.1])
plt.ylim([0.3, 1.0])
plt.grid(linestyle='--', linewidth=1)
plt.xticks(x_labels)
plt.yticks(np.linspace(0.3, 1.0, 8, endpoint=True))
plt.xscale('log')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC on IJB')
plt.legend(loc="lower right")
fig.savefig(os.path.join(save_path, '%s.pdf' % target.lower()))
print(tpr_fpr_table)

377
face_api.py Normal file
View File

@ -0,0 +1,377 @@
import os
import time
import re
import torch
import cv2
import numpy as np
from anti import anti_spoofing, load_anti_model
from backbones import iresnet50, iresnet18, iresnet100
from retinaface_detect import load_retinaface_model, detect_one, detect_video, set_retinaface_conf
from torch2trt import torch2trt, TRTModule
threshold = 0.7
# 读取112x112的本地图片并变换通道位置归一化
def load_image(img_path):
img = cv2.imdecode(np.fromfile(img_path, dtype=np.uint8), cv2.IMREAD_COLOR)
img = img.transpose((2, 0, 1))
img = img[np.newaxis, :, :, :]
img = np.array(img, dtype=np.float32)
img -= 127.5
img /= 127.5
return img
# 计算两个特征向量的欧式距离
def findEuclideanDistance(source_representation, test_representation):
euclidean_distance = source_representation - test_representation
euclidean_distance = np.sum(np.multiply(euclidean_distance, euclidean_distance))
euclidean_distance = np.sqrt(euclidean_distance)
return euclidean_distance
# 计算两个特征向量的余弦距离
def findCosineDistance(source_representation, test_representation):
a = np.matmul(np.transpose(source_representation), test_representation)
b = np.sum(np.multiply(source_representation, source_representation))
c = np.sum(np.multiply(test_representation, test_representation))
return 1 - (a / (np.sqrt(b) * np.sqrt(c)))
# 归一化欧氏距离
def l2_normalize(x):
return x / np.sqrt(np.sum(np.multiply(x, x)))
# 归一化余弦距离
def cosin_metric(x1, x2):
return np.dot(x1, x2) / (np.linalg.norm(x1) * np.linalg.norm(x2))
# 加载保存的姓名、人脸特征向量的人脸库
def load_npy(path):
data = np.load(path, allow_pickle=True)
data = data.item()
return data
# 批量化生成人脸特征向量并保存到人脸库
def create_database_batch(path, model, database_path):
name_list = os.listdir(path)
k_v = {}
if os.path.exists(database_path):
k_v = np.load(database_path, allow_pickle=True)
k_v = k_v.item()
batch = 256
order_name = []
order_path = []
emb_list = []
for name in name_list[:]:
img_path = os.path.join(path, name)
# for img_name in img_path[:1]:
order_name.append(name[:-4])
order_path.append(img_path)
order_img = np.zeros((len(order_path), 3, 112, 112), dtype=np.float32)
for index, img_path in enumerate(order_path):
order_img[index] = load_image(img_path)
print(order_img.shape)
order_img = torch.from_numpy(order_img)
order_img = order_img.to(torch.device("cuda" if cpu_or_cuda == "cuda" else "cpu"))
now = 0
number = len(order_img)
with torch.no_grad():
while now < number:
if now + batch < number:
emb = model(order_img[now:now + batch])
else:
emb = model(order_img[now:])
now = now + batch
emb = emb.cpu().numpy()
for em in emb:
emb_list.append(em)
print("batch" + str(now))
for i, emb in enumerate(emb_list):
k_v[order_name[i]] = l2_normalize(emb)
np.save(database_path, k_v)
def create_database_from_img(order_name, order_img, model, database_path, cpu_or_cuda):
k_v = {}
if os.path.exists(database_path):
k_v = np.load(database_path, allow_pickle=True)
k_v = k_v.item()
batch = 256
emb_list = []
print(order_img.shape)
order_img = torch.from_numpy(order_img)
order_img = order_img.to(torch.device("cuda" if cpu_or_cuda == "cuda" else "cpu"))
now = 0
number = len(order_img)
with torch.no_grad():
while now < number:
if now + batch < number:
emb = model(order_img[now:now + batch])
else:
emb = model(order_img[now:])
now = now + batch
emb = emb.cpu().numpy()
for em in emb:
emb_list.append(em)
print("batch" + str(now))
for i, emb in enumerate(emb_list):
k_v[order_name[i]] = l2_normalize(emb)
np.save(database_path, k_v)
# 向人脸库中新增一个人的姓名和人脸特征向量,若人脸库不存在则创建
def add_one_to_database(img, model, name, database_path, cpu_or_cuda):
img = torch.from_numpy(img)
img = img.to(torch.device("cuda" if cpu_or_cuda == "cuda" else "cpu"))
with torch.no_grad():
pred = model(img)
pred = pred.cpu().numpy()
k_v = {}
if os.path.exists(database_path):
k_v = np.load(database_path, allow_pickle=True)
k_v = k_v.item()
k_v[name] = l2_normalize(pred)
np.save(database_path, k_v)
# 计算此特征向量与人脸库中的哪个人脸特征向量距离最近
def findmindistance(pred, threshold, k_v):
distance = 10
most_like = ""
for name in k_v.keys():
tmp = findEuclideanDistance(k_v[name], pred)
if distance > tmp:
distance = tmp
most_like = name
if distance < threshold:
return most_like, distance
else:
return -1, distance
def faiss_find_face(pred, index, database_name_list):
name_list = []
start_time = time.time()
D, I = index.search(pred, 1)
end_time = time.time()
# print("faiss cost %fs" % (end_time - start_time))
# print(D, I)
if len(pred) == 1:
if D[0][0] < threshold:
# print(database_name_list[I[0][0]])
return database_name_list[I[0][0]], D[0][0]
else:
return "unknown", D[0][0]
else:
for i,index in enumerate(I):
if D[i][0] < threshold:
#print(database_name_list[I[0][0]])
name_list.append(database_name_list[index[0]]+str(D[i][0]))
else:
name_list.append("unknown"+str(D[i][0]))
return name_list
# 从人脸库中找到单个人脸
def findOne(img, model, index, database_name_list, cpu_or_cuda):
img = torch.from_numpy(img)
img = img.to(torch.device("cuda" if cpu_or_cuda == "cuda" else "cpu"))
with torch.no_grad():
start_time = time.time()
pred = model(img)
end_time = time.time()
print("predOne time: " + str(end_time - start_time))
pred = pred.cpu().numpy()
# start_time = time.time()
# name, distance = findmindistance(l2_normalize(pred), threshold=threshold, k_v=k_v)
# end_time = time.time()
# print("baoli time: " + str(end_time - start_time))
name, distance = faiss_find_face(l2_normalize(pred), index, database_name_list)
print(pred.shape)
if name != -1:
mo = r'[\u4e00-\u9fa5_a-zA-Z0-9]*'
name = re.match(mo, name)
return name.group(0), distance
else:
return "unknown", distance
# 从人脸库中找到传入的人脸列表中的所有人脸
def findAll(imglist, model, index ,database_name_list, cpu_or_cuda):
imglist = torch.from_numpy(imglist)
imglist = imglist.to(torch.device("cuda" if cpu_or_cuda == "cuda" else "cpu"))
with torch.no_grad():
name_list =[]
start_time = time.time()
pred = model(imglist)
end_time = time.time()
print("predOne time: " + str(end_time - start_time))
pred = pred.cpu().numpy()
start_time = time.time()
#name_list = faiss_find_face(l2_normalize(pred), index, database_name_list)
for pr in pred:
pr = np.expand_dims(l2_normalize(pr), 0)
# #print(pr.shape)
name, distance = faiss_find_face(l2_normalize(pr), index, database_name_list)
#name_list.append(name+" "+str(distance))
name_list.append(name)
# for pr in pred:
# name, distance = findmindistance(l2_normalize(pr), threshold=threshold, k_v=k_v)
# if name != -1:
# mo = r'[\u4e00-\u9fa5_a-zA-Z]*'
# name = re.match(mo, name)
# name_list.append(name.group(0) + str(distance))
# else:
# name_list.append("unknown" + str(distance))
end_time = time.time()
print("searchALL time: " + str(end_time - start_time))
return name_list
# 提取为512维特征向量
def embedding(order_img, model, cpu_or_cuda):
number = len(order_img)
order_img = torch.from_numpy(order_img)
order_img = order_img.to(torch.device("cuda" if cpu_or_cuda == "cuda" else "cpu"))
batch = 64
emb_list = []
now = 0
with torch.no_grad():
while now < number:
if now + batch < number:
emb = model(order_img[now:now + batch])
else:
emb = model(order_img[now:])
now = now + batch
emb = emb.cpu().numpy()
for em in emb:
emb_list.append(l2_normalize(em))
# print("batch" + str(now))
emb_list = np.array(emb_list)
return emb_list
# 处理聚类人脸文件夹,返回特征向量列表,文件名列表
def get_claster_tmp_file_embedding(file_path, retinaface_model, retinaface_args, arcface_model, cpu_or_cuda):
img_name = os.listdir(file_path)
img_list = []
for name in img_name:
all_face, box_and_point = detect_one(os.path.join(file_path, name), retinaface_model, retinaface_args)
img_list.append(all_face[0])
img_list = np.array(img_list)
# print(img_list.shape)
emb_list = embedding(img_list, arcface_model, cpu_or_cuda)
return emb_list, img_name
# 同一个人聚为一类
def cluster(emb_list, name_list):
all_claster = []
cla = []
in_claster_name = []
img_number = len(emb_list)
for index, emb in enumerate(emb_list):
if name_list[index] in in_claster_name:
continue
for j in range(img_number - index - 1):
if findEuclideanDistance(emb, emb_list[index + 1 + j]) < threshold:
if name_list[index + 1 + j] not in in_claster_name:
cla.append(name_list[index + 1 + j])
in_claster_name.append(name_list[index + 1 + j])
cla.append(name_list[index])
in_claster_name.append(name_list[index])
all_claster.append(cla)
cla = []
return all_claster
# 加载人脸识别模型
def load_arcface_model(model_path, cpu_or_cuda):
if cpu_or_cuda == "trt":
model = TRTModule()
model.load_state_dict(torch.load('./model/arcface_trt.pth'))
elif cpu_or_cuda == "trt_new":
model = iresnet100()
model.load_state_dict(torch.load(model_path, map_location="cuda"))
model = model.eval()
model.to(torch.device("cuda"))
x = torch.ones((1, 3, 112, 112)).to(torch.device("cuda"))
model = torch2trt(model, [x], max_batch_size=4)
torch.save(model.state_dict(), './model/arcface_trt.pth')
else:
model = iresnet100()
model.load_state_dict(torch.load(model_path, map_location=cpu_or_cuda))
model = model.eval()
model.to(torch.device("cuda" if cpu_or_cuda == "cuda" else "cpu"))
return model
# 对比两张人脸是否相同
def face_verification(img1, img2, model, cpu_or_cuda):
img_list = np.concatenate((img1, img2), axis=0)
img_list = torch.from_numpy(img_list)
img_list = img_list.to(torch.device("cuda" if cpu_or_cuda == "cuda" else "cpu"))
with torch.no_grad():
pred = model(img_list)
pred = pred.cpu().numpy()
distance = findEuclideanDistance(l2_normalize(pred[0]), l2_normalize(pred[1]))
# print("EuclideanDistance is :" + str(distance))
if distance < threshold:
return 'same ',distance
else:
return 'different ', distance
if __name__ == '__main__':
cpu_or_cuda = "cuda" if torch.cuda.is_available() else "cpu"
arcface_model = load_arcface_model("./model/backbone100.pth", cpu_or_cuda=cpu_or_cuda)
# retinaface_args = set_retinaface_conf(cpu_or_cuda=cpu_or_cuda)
# retinaface_model = load_retinaface_model(retinaface_args)
#
# anti_spoofing_model_path = "model/anti_spoof_models"
# anti_model = load_anti_model(anti_spoofing_model_path, 0)
#
# k_v = load_npy("./Database/student.npy")
# 对比两张人脸
# img1, box_and_point = detect_one("D:\Download\lfw\lfw\Aaron_Peirsol\Aaron_Peirsol_0001.jpg", retinaface_model, retinaface_args)
# img2, box_and_point = detect_one("D:\Download\lfw\lfw\Aaron_Peirsol\Aaron_Peirsol_0002.jpg", retinaface_model, retinaface_args)
# print(face_verification(img1, img2, arcface_model))
# img3 = load_image(r"D:\Download\out\alig_students\student.jpg")
# img3 = torch.from_numpy(img3)
# 单张人脸活体检测
# img3, b_p = detect_one(r"C:\Users\ASUS\Desktop\face\IMG_20210525_113950.jpg", retinaface_model, retinaface_args)
# b = b_p[0]
# w = b[2] - b[0]
# h = b[3] - b[1]
# b[2] = w
# b[3] = h
# label, value = anti_spoofing("./img/recognition/000_0.bmp", "model/anti_spoof_models", 0, np.array(b[:4], int), anti_model)
# print(label,value)
# name = findOne(img3, arcface_model, k_v, cpu_or_cuda)
# print(name)
# 人脸聚类
# emb_list, name_list = get_claster_tmp_file_embedding("./img/cluster_tmp_file/face", retinaface_model,
# retinaface_args, arcface_model, cpu_or_cuda)
# print(cluster(emb_list, name_list))
# img3, box_and_point = detect_one("D:\Download\out\students\student.jpg", retinaface_model, retinaface_args)
# print(embedding(img3,arcface_model).shape)
# 人脸库中增加一张人脸
# add_one_to_database(img1,arcface_model,"Aaron_Peirsol","./Database/student.npy")
# name = findOne(img1, arcface_model, k_v)
# print(name)
# 人脸库中批量增加人脸
create_database_batch(r"D:\Download\out\alig_students_all", arcface_model, "./Database/sfz.npy")
# 识别视频中的人脸
# detect_video("software.mp4","out.avi",retinaface_model,arcface_model,k_v,retinaface_args)

BIN
font.ttf Normal file

Binary file not shown.

98
gender_age.py Normal file
View File

@ -0,0 +1,98 @@
import datetime
import mxnet as mx
import numpy as np
from retinaface_detect import detect_one, load_retinaface_model, set_retinaface_conf
# 年龄性别配置
class ConfGenderModel(object):
def __init__(self, image_size, image, model, gpu, det):
self.image_size = image_size
self.image = image
self.gpu = gpu
self.model = model
self.det = det
# 实例化一个配置
def set_gender_conf():
args = ConfGenderModel(image_size='112,112',
image=r'C:\Users\ASUS\Desktop\man.png',
gpu=-1,
model='model/model,0',
det=0)
return args
# 加载性别年龄模型
def load_gender_model(args, layer):
if args.gpu >= 0:
ctx = mx.gpu(args.gpu)
else:
ctx = mx.cpu()
_vec = args.image_size.split(',')
assert len(_vec) == 2
image_size = (int(_vec[0]), int(_vec[1]))
_vec = args.model.split(',')
assert len(_vec) == 2
prefix = _vec[0]
epoch = int(_vec[1])
print('loading', prefix, epoch)
sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
all_layers = sym.get_internals()
sym = all_layers[layer + '_output']
model = mx.mod.Module(symbol=sym, context=ctx, label_names=None)
model.bind(data_shapes=[('data', (1, 3, image_size[0], image_size[1]))])
model.set_params(arg_params, aux_params)
return model
# 前向推理
def get_ga(model, img):
# print(data)
model.forward(img, is_train=False)
ret = model.get_outputs()[0].asnumpy()
g = ret[:, 0:2].flatten()
gender = np.argmax(g)
a = ret[:, 2:202].reshape((100, 2))
a = np.argmax(a, axis=1)
age = int(sum(a))
return gender, age
# 预测人脸列表中每个人的性别年龄
def gender_age(img_list, gender_model):
gender_list = []
age_list = []
if len(img_list) == 0:
print("find no face")
else:
time_now = datetime.datetime.now()
img_list *= 127.5
img_list += 127.5
for img in img_list:
img = np.expand_dims(img, axis=0)
img = mx.nd.array(img)
img = mx.io.DataBatch(data=(img,))
gender, age = get_ga(gender_model, img)
if gender == 1:
gender_list.append("man")
else:
gender_list.append('woman')
age_list.append(age)
time_now2 = datetime.datetime.now()
diff = time_now2 - time_now
print('time cost', diff.total_seconds())
return gender_list,age_list
if __name__ == "__main__":
args = set_gender_conf()
retinaface_args = set_retinaface_conf()
gender_model = load_gender_model(args, 'fc1')
retinaface_model = load_retinaface_model(retinaface_args)
img_list, box_and_point = detect_one(args.image, retinaface_model,retinaface_args)
gender_list, age_list = gender_age(img_list, gender_model)
print(gender_list)

49
gender_model.py Normal file
View File

@ -0,0 +1,49 @@
import numpy as np
import mxnet as mx
# 加载性别年龄模型
def get_model(ctx, image_size, model_str, layer):
_vec = model_str.split(',')
assert len(_vec) == 2
prefix = _vec[0]
epoch = int(_vec[1])
print('loading', prefix, epoch)
sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
all_layers = sym.get_internals()
sym = all_layers[layer + '_output']
model = mx.mod.Module(symbol=sym, context=ctx, label_names=None)
model.bind(data_shapes=[('data', (1, 3, image_size[0], image_size[1]))])
model.set_params(arg_params, aux_params)
return model
class GenderModel:
def __init__(self, args):
self.args = args
if args.gpu >= 0:
ctx = mx.gpu(args.gpu)
else:
ctx = mx.cpu()
_vec = args.image_size.split(',')
assert len(_vec) == 2
image_size = (int(_vec[0]), int(_vec[1]))
self.model = None
if len(args.model) > 0:
self.model = get_model(ctx, image_size, args.model, 'fc1')
self.det_minsize = 50
self.det_threshold = [0.6, 0.7, 0.8]
# self.det_factor = 0.9
self.image_size = image_size
def get_ga(self, data):
# print(data)
self.model.forward(data, is_train=False)
ret = self.model.get_outputs()[0].asnumpy()
g = ret[:, 0:2].flatten()
gender = np.argmax(g)
a = ret[:, 2:202].reshape((100, 2))
a = np.argmax(a, axis=1)
age = int(sum(a))
return gender, age

BIN
img/search/000_1.bmp Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 900 KiB

BIN
img/search/002_1.bmp Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 900 KiB

BIN
img/search/377_3.bmp Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 900 KiB

BIN
img/search/face/000_0.bmp Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 900 KiB

BIN
img/search/face/000_1.bmp Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 900 KiB

BIN
img/search/face/000_2.bmp Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 900 KiB

BIN
img/search/face/000_3.bmp Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 900 KiB

BIN
img/search/face/000_4.bmp Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 900 KiB

BIN
img/search/face/001_0.bmp Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 900 KiB

BIN
img/search/face/001_1.bmp Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 900 KiB

BIN
img/search/face/001_2.bmp Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 900 KiB

BIN
img/search/face/001_3.bmp Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 900 KiB

BIN
img/search/face/001_4.bmp Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 900 KiB

BIN
img/search/face/002_0.bmp Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 900 KiB

BIN
img/search/face/002_1.bmp Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 900 KiB

BIN
img/search/face/002_2.bmp Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 900 KiB

BIN
img/search/face/002_3.bmp Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 900 KiB

BIN
img/search/face/002_4.bmp Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 900 KiB

BIN
img/search/face/003_0.bmp Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 900 KiB

BIN
img/search/face/003_1.bmp Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 900 KiB

BIN
img/search/face/003_2.bmp Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 900 KiB

BIN
img/search/face/003_3.bmp Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 900 KiB

BIN
img/search/face/003_4.bmp Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 900 KiB

BIN
img/search/face/004_0.bmp Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 900 KiB

BIN
img/search/face/004_1.bmp Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 900 KiB

BIN
img/search/face/004_2.bmp Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 900 KiB

BIN
img/search/face/004_3.bmp Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 900 KiB

BIN
img/search/face/004_4.bmp Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 900 KiB

2
layers/__init__.py Normal file
View File

@ -0,0 +1,2 @@
from .functions import *
from .modules import *

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1,34 @@
import torch
from itertools import product as product
import numpy as np
from math import ceil
class PriorBox(object):
def __init__(self, cfg, image_size=None, phase='train'):
super(PriorBox, self).__init__()
self.min_sizes = cfg['min_sizes']
self.steps = cfg['steps']
self.clip = cfg['clip']
self.image_size = image_size
self.feature_maps = [[ceil(self.image_size[0]/step), ceil(self.image_size[1]/step)] for step in self.steps]
self.name = "s"
def forward(self):
anchors = []
for k, f in enumerate(self.feature_maps):
min_sizes = self.min_sizes[k]
for i, j in product(range(f[0]), range(f[1])):
for min_size in min_sizes:
s_kx = min_size / self.image_size[1]
s_ky = min_size / self.image_size[0]
dense_cx = [x * self.steps[k] / self.image_size[1] for x in [j + 0.5]]
dense_cy = [y * self.steps[k] / self.image_size[0] for y in [i + 0.5]]
for cy, cx in product(dense_cy, dense_cx):
anchors += [cx, cy, s_kx, s_ky]
# back to torch land
output = torch.Tensor(anchors).view(-1, 4)
if self.clip:
output.clamp_(max=1, min=0)
return output

View File

@ -0,0 +1,3 @@
from .multibox_loss import MultiBoxLoss
__all__ = ['MultiBoxLoss']

Binary file not shown.

View File

@ -0,0 +1,125 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from utils.box_utils import match, log_sum_exp
from data import cfg_mnet
GPU = cfg_mnet['gpu_train']
class MultiBoxLoss(nn.Module):
"""SSD Weighted Loss Function
Compute Targets:
1) Produce Confidence Target Indices by matching ground truth boxes
with (default) 'priorboxes' that have jaccard index > threshold parameter
(default threshold: 0.5).
2) Produce localization target by 'encoding' variance into offsets of ground
truth boxes and their matched 'priorboxes'.
3) Hard negative mining to filter the excessive number of negative examples
that comes with using a large number of default bounding boxes.
(default negative:positive ratio 3:1)
Objective Loss:
L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
Where, Lconf is the CrossEntropy Loss and Lloc is the SmoothL1 Loss
weighted by α which is set to 1 by cross val.
Args:
c: class confidences,
l: predicted boxes,
g: ground truth boxes
N: number of matched default boxes
See: https://arxiv.org/pdf/1512.02325.pdf for more details.
"""
def __init__(self, num_classes, overlap_thresh, prior_for_matching, bkg_label, neg_mining, neg_pos, neg_overlap, encode_target):
super(MultiBoxLoss, self).__init__()
self.num_classes = num_classes
self.threshold = overlap_thresh
self.background_label = bkg_label
self.encode_target = encode_target
self.use_prior_for_matching = prior_for_matching
self.do_neg_mining = neg_mining
self.negpos_ratio = neg_pos
self.neg_overlap = neg_overlap
self.variance = [0.1, 0.2]
def forward(self, predictions, priors, targets):
"""Multibox Loss
Args:
predictions (tuple): A tuple containing loc preds, conf preds,
and prior boxes from SSD net.
conf shape: torch.size(batch_size,num_priors,num_classes)
loc shape: torch.size(batch_size,num_priors,4)
priors shape: torch.size(num_priors,4)
ground_truth (tensor): Ground truth boxes and labels for a batch,
shape: [batch_size,num_objs,5] (last idx is the label).
"""
loc_data, conf_data, landm_data = predictions
priors = priors
num = loc_data.size(0)
num_priors = (priors.size(0))
# match priors (default boxes) and ground truth boxes
loc_t = torch.Tensor(num, num_priors, 4)
landm_t = torch.Tensor(num, num_priors, 10)
conf_t = torch.LongTensor(num, num_priors)
for idx in range(num):
truths = targets[idx][:, :4].data
labels = targets[idx][:, -1].data
landms = targets[idx][:, 4:14].data
defaults = priors.data
match(self.threshold, truths, defaults, self.variance, labels, landms, loc_t, conf_t, landm_t, idx)
if GPU:
loc_t = loc_t.cuda()
conf_t = conf_t.cuda()
landm_t = landm_t.cuda()
zeros = torch.tensor(0).cuda()
# landm Loss (Smooth L1)
# Shape: [batch,num_priors,10]
pos1 = conf_t > zeros
num_pos_landm = pos1.long().sum(1, keepdim=True)
N1 = max(num_pos_landm.data.sum().float(), 1)
pos_idx1 = pos1.unsqueeze(pos1.dim()).expand_as(landm_data)
landm_p = landm_data[pos_idx1].view(-1, 10)
landm_t = landm_t[pos_idx1].view(-1, 10)
loss_landm = F.smooth_l1_loss(landm_p, landm_t, reduction='sum')
pos = conf_t != zeros
conf_t[pos] = 1
# Localization Loss (Smooth L1)
# Shape: [batch,num_priors,4]
pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
loc_p = loc_data[pos_idx].view(-1, 4)
loc_t = loc_t[pos_idx].view(-1, 4)
loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction='sum')
# Compute max conf across batch for hard negative mining
batch_conf = conf_data.view(-1, self.num_classes)
loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1, 1))
# Hard Negative Mining
loss_c[pos.view(-1, 1)] = 0 # filter out pos boxes for now
loss_c = loss_c.view(num, -1)
_, loss_idx = loss_c.sort(1, descending=True)
_, idx_rank = loss_idx.sort(1)
num_pos = pos.long().sum(1, keepdim=True)
num_neg = torch.clamp(self.negpos_ratio*num_pos, max=pos.size(1)-1)
neg = idx_rank < num_neg.expand_as(idx_rank)
# Confidence Loss Including Positive and Negative Examples
pos_idx = pos.unsqueeze(2).expand_as(conf_data)
neg_idx = neg.unsqueeze(2).expand_as(conf_data)
conf_p = conf_data[(pos_idx+neg_idx).gt(0)].view(-1,self.num_classes)
targets_weighted = conf_t[(pos+neg).gt(0)]
loss_c = F.cross_entropy(conf_p, targets_weighted, reduction='sum')
# Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
N = max(num_pos.data.sum().float(), 1)
loss_l /= N
loss_c /= N
loss_landm /= N1
return loss_l, loss_c, loss_landm

33
losses.py Normal file
View File

@ -0,0 +1,33 @@
import torch
from torch import nn
class CosFace(nn.Module):
def __init__(self, s=64.0, m=0.40):
super(CosFace, self).__init__()
self.s = s
self.m = m
def forward(self, cosine, label):
index = torch.where(label != -1)[0]
m_hot = torch.zeros(index.size()[0], cosine.size()[1], device=cosine.device)
m_hot.scatter_(1, label[index, None], self.m)
cosine[index] -= m_hot
ret = cosine * self.s
return ret
class ArcFace(nn.Module):
def __init__(self, s=64.0, m=0.5):
super(ArcFace, self).__init__()
self.s = s
self.m = m
def forward(self, cosine: torch.Tensor, label):
index = torch.where(label != -1)[0]
m_hot = torch.zeros(index.size()[0], cosine.size()[1], device=cosine.device)
m_hot.scatter_(1, label[index, None], self.m)
cosine.acos_()
cosine[index] += m_hot
cosine.cos_().mul_(self.s)
return cosine

Binary file not shown.

Binary file not shown.

BIN
model/backbone100.pth Normal file

Binary file not shown.

7094
model/log Normal file

File diff suppressed because it is too large Load Diff

BIN
model/model-0000.params Normal file

Binary file not shown.

2399
model/model-symbol.json Normal file

File diff suppressed because it is too large Load Diff

BIN
model/onnx/centerface.onnx Normal file

Binary file not shown.

Binary file not shown.

0
models/__init__.py Normal file
View File

Binary file not shown.

Binary file not shown.

Binary file not shown.

137
models/net.py Normal file
View File

@ -0,0 +1,137 @@
import time
import torch
import torch.nn as nn
import torchvision.models._utils as _utils
import torchvision.models as models
import torch.nn.functional as F
from torch.autograd import Variable
def conv_bn(inp, oup, stride = 1, leaky = 0):
return nn.Sequential(
nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
nn.BatchNorm2d(oup),
nn.LeakyReLU(negative_slope=leaky, inplace=True)
)
def conv_bn_no_relu(inp, oup, stride):
return nn.Sequential(
nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
nn.BatchNorm2d(oup),
)
def conv_bn1X1(inp, oup, stride, leaky=0):
return nn.Sequential(
nn.Conv2d(inp, oup, 1, stride, padding=0, bias=False),
nn.BatchNorm2d(oup),
nn.LeakyReLU(negative_slope=leaky, inplace=True)
)
def conv_dw(inp, oup, stride, leaky=0.1):
return nn.Sequential(
nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False),
nn.BatchNorm2d(inp),
nn.LeakyReLU(negative_slope= leaky,inplace=True),
nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
nn.BatchNorm2d(oup),
nn.LeakyReLU(negative_slope= leaky,inplace=True),
)
class SSH(nn.Module):
def __init__(self, in_channel, out_channel):
super(SSH, self).__init__()
assert out_channel % 4 == 0
leaky = 0
if (out_channel <= 64):
leaky = 0.1
self.conv3X3 = conv_bn_no_relu(in_channel, out_channel//2, stride=1)
self.conv5X5_1 = conv_bn(in_channel, out_channel//4, stride=1, leaky = leaky)
self.conv5X5_2 = conv_bn_no_relu(out_channel//4, out_channel//4, stride=1)
self.conv7X7_2 = conv_bn(out_channel//4, out_channel//4, stride=1, leaky = leaky)
self.conv7x7_3 = conv_bn_no_relu(out_channel//4, out_channel//4, stride=1)
def forward(self, input):
conv3X3 = self.conv3X3(input)
conv5X5_1 = self.conv5X5_1(input)
conv5X5 = self.conv5X5_2(conv5X5_1)
conv7X7_2 = self.conv7X7_2(conv5X5_1)
conv7X7 = self.conv7x7_3(conv7X7_2)
out = torch.cat([conv3X3, conv5X5, conv7X7], dim=1)
out = F.relu(out)
return out
class FPN(nn.Module):
def __init__(self,in_channels_list,out_channels):
super(FPN,self).__init__()
leaky = 0
if (out_channels <= 64):
leaky = 0.1
self.output1 = conv_bn1X1(in_channels_list[0], out_channels, stride = 1, leaky = leaky)
self.output2 = conv_bn1X1(in_channels_list[1], out_channels, stride = 1, leaky = leaky)
self.output3 = conv_bn1X1(in_channels_list[2], out_channels, stride = 1, leaky = leaky)
self.merge1 = conv_bn(out_channels, out_channels, leaky = leaky)
self.merge2 = conv_bn(out_channels, out_channels, leaky = leaky)
def forward(self, input):
# names = list(input.keys())
input = list(input.values())
output1 = self.output1(input[0])
output2 = self.output2(input[1])
output3 = self.output3(input[2])
up3 = F.interpolate(output3, size=[output2.size(2), output2.size(3)], mode="nearest")
output2 = output2 + up3
output2 = self.merge2(output2)
up2 = F.interpolate(output2, size=[output1.size(2), output1.size(3)], mode="nearest")
output1 = output1 + up2
output1 = self.merge1(output1)
out = [output1, output2, output3]
return out
class MobileNetV1(nn.Module):
def __init__(self):
super(MobileNetV1, self).__init__()
self.stage1 = nn.Sequential(
conv_bn(3, 8, 2, leaky = 0.1), # 3
conv_dw(8, 16, 1), # 7
conv_dw(16, 32, 2), # 11
conv_dw(32, 32, 1), # 19
conv_dw(32, 64, 2), # 27
conv_dw(64, 64, 1), # 43
)
self.stage2 = nn.Sequential(
conv_dw(64, 128, 2), # 43 + 16 = 59
conv_dw(128, 128, 1), # 59 + 32 = 91
conv_dw(128, 128, 1), # 91 + 32 = 123
conv_dw(128, 128, 1), # 123 + 32 = 155
conv_dw(128, 128, 1), # 155 + 32 = 187
conv_dw(128, 128, 1), # 187 + 32 = 219
)
self.stage3 = nn.Sequential(
conv_dw(128, 256, 2), # 219 +3 2 = 241
conv_dw(256, 256, 1), # 241 + 64 = 301
)
self.avg = nn.AdaptiveAvgPool2d((1,1))
self.fc = nn.Linear(256, 1000)
def forward(self, x):
x = self.stage1(x)
x = self.stage2(x)
x = self.stage3(x)
x = self.avg(x)
# x = self.model(x)
x = x.view(-1, 256)
x = self.fc(x)
return x

127
models/retinaface.py Normal file
View File

@ -0,0 +1,127 @@
import torch
import torch.nn as nn
import torchvision.models.detection.backbone_utils as backbone_utils
import torchvision.models._utils as _utils
import torch.nn.functional as F
from collections import OrderedDict
from models.net import MobileNetV1 as MobileNetV1
from models.net import FPN as FPN
from models.net import SSH as SSH
class ClassHead(nn.Module):
def __init__(self,inchannels=512,num_anchors=3):
super(ClassHead,self).__init__()
self.num_anchors = num_anchors
self.conv1x1 = nn.Conv2d(inchannels,self.num_anchors*2,kernel_size=(1,1),stride=1,padding=0)
def forward(self,x):
out = self.conv1x1(x)
out = out.permute(0,2,3,1).contiguous()
return out.view(out.shape[0], -1, 2)
class BboxHead(nn.Module):
def __init__(self,inchannels=512,num_anchors=3):
super(BboxHead,self).__init__()
self.conv1x1 = nn.Conv2d(inchannels,num_anchors*4,kernel_size=(1,1),stride=1,padding=0)
def forward(self,x):
out = self.conv1x1(x)
out = out.permute(0,2,3,1).contiguous()
return out.view(out.shape[0], -1, 4)
class LandmarkHead(nn.Module):
def __init__(self,inchannels=512,num_anchors=3):
super(LandmarkHead,self).__init__()
self.conv1x1 = nn.Conv2d(inchannels,num_anchors*10,kernel_size=(1,1),stride=1,padding=0)
def forward(self,x):
out = self.conv1x1(x)
out = out.permute(0,2,3,1).contiguous()
return out.view(out.shape[0], -1, 10)
class RetinaFace(nn.Module):
def __init__(self, cfg = None, phase = 'train'):
"""
:param cfg: Network related settings.
:param phase: train or test.
"""
super(RetinaFace,self).__init__()
self.phase = phase
backbone = None
if cfg['name'] == 'mobilenet0.25':
backbone = MobileNetV1()
if cfg['pretrain']:
checkpoint = torch.load("./weights/mobilenetV1X0.25_pretrain.tar", map_location=torch.device('cpu'))
from collections import OrderedDict
new_state_dict = OrderedDict()
for k, v in checkpoint['state_dict'].items():
name = k[7:] # remove module.
new_state_dict[name] = v
# load params
backbone.load_state_dict(new_state_dict)
elif cfg['name'] == 'Resnet50':
import torchvision.models as models
backbone = models.resnet50(pretrained=cfg['pretrain'])
self.body = _utils.IntermediateLayerGetter(backbone, cfg['return_layers'])
in_channels_stage2 = cfg['in_channel']
in_channels_list = [
in_channels_stage2 * 2,
in_channels_stage2 * 4,
in_channels_stage2 * 8,
]
out_channels = cfg['out_channel']
self.fpn = FPN(in_channels_list,out_channels)
self.ssh1 = SSH(out_channels, out_channels)
self.ssh2 = SSH(out_channels, out_channels)
self.ssh3 = SSH(out_channels, out_channels)
self.ClassHead = self._make_class_head(fpn_num=3, inchannels=cfg['out_channel'])
self.BboxHead = self._make_bbox_head(fpn_num=3, inchannels=cfg['out_channel'])
self.LandmarkHead = self._make_landmark_head(fpn_num=3, inchannels=cfg['out_channel'])
def _make_class_head(self,fpn_num=3,inchannels=64,anchor_num=2):
classhead = nn.ModuleList()
for i in range(fpn_num):
classhead.append(ClassHead(inchannels,anchor_num))
return classhead
def _make_bbox_head(self,fpn_num=3,inchannels=64,anchor_num=2):
bboxhead = nn.ModuleList()
for i in range(fpn_num):
bboxhead.append(BboxHead(inchannels,anchor_num))
return bboxhead
def _make_landmark_head(self,fpn_num=3,inchannels=64,anchor_num=2):
landmarkhead = nn.ModuleList()
for i in range(fpn_num):
landmarkhead.append(LandmarkHead(inchannels,anchor_num))
return landmarkhead
def forward(self,inputs):
out = self.body(inputs)
# FPN
fpn = self.fpn(out)
# SSH
feature1 = self.ssh1(fpn[0])
feature2 = self.ssh2(fpn[1])
feature3 = self.ssh3(fpn[2])
features = [feature1, feature2, feature3]
bbox_regressions = torch.cat([self.BboxHead[i](feature) for i, feature in enumerate(features)], dim=1)
classifications = torch.cat([self.ClassHead[i](feature) for i, feature in enumerate(features)],dim=1)
ldm_regressions = torch.cat([self.LandmarkHead[i](feature) for i, feature in enumerate(features)], dim=1)
if self.phase == 'train':
output = (bbox_regressions, classifications, ldm_regressions)
else:
output = (bbox_regressions, F.softmax(classifications, dim=-1), ldm_regressions)
return output

161
partial_fc.py Normal file
View File

@ -0,0 +1,161 @@
import logging
import os
import torch
import torch.distributed as dist
from torch.nn import Module
from torch.nn.functional import normalize, linear
from torch.nn.parameter import Parameter
class PartialFC(Module):
"""
Author: {Xiang An, Yang Xiao, XuHan Zhu} in DeepGlint,
Partial FC: Training 10 Million Identities on a Single Machine
See the original paper:
https://arxiv.org/abs/2010.05222
"""
@torch.no_grad()
def __init__(self, rank, local_rank, world_size, batch_size, resume,
margin_softmax, num_classes, sample_rate=1.0, embedding_size=512, prefix="./"):
super(PartialFC, self).__init__()
#
self.num_classes: int = num_classes
self.rank: int = rank
self.local_rank: int = local_rank
self.device: torch.device = torch.device("cuda:{}".format(self.local_rank))
self.world_size: int = world_size
self.batch_size: int = batch_size
self.margin_softmax: callable = margin_softmax
self.sample_rate: float = sample_rate
self.embedding_size: int = embedding_size
self.prefix: str = prefix
self.num_local: int = num_classes // world_size + int(rank < num_classes % world_size)
self.class_start: int = num_classes // world_size * rank + min(rank, num_classes % world_size)
self.num_sample: int = int(self.sample_rate * self.num_local)
self.weight_name = os.path.join(self.prefix, "rank:{}_softmax_weight.pt".format(self.rank))
self.weight_mom_name = os.path.join(self.prefix, "rank:{}_softmax_weight_mom.pt".format(self.rank))
if resume:
try:
self.weight: torch.Tensor = torch.load(self.weight_name)
logging.info("softmax weight resume successfully!")
except (FileNotFoundError, KeyError, IndexError):
self.weight = torch.normal(0, 0.01, (self.num_local, self.embedding_size), device=self.device)
logging.info("softmax weight resume fail!")
try:
self.weight_mom: torch.Tensor = torch.load(self.weight_mom_name)
logging.info("softmax weight mom resume successfully!")
except (FileNotFoundError, KeyError, IndexError):
self.weight_mom: torch.Tensor = torch.zeros_like(self.weight)
logging.info("softmax weight mom resume fail!")
else:
self.weight = torch.normal(0, 0.01, (self.num_local, self.embedding_size), device=self.device)
self.weight_mom: torch.Tensor = torch.zeros_like(self.weight)
logging.info("softmax weight init successfully!")
logging.info("softmax weight mom init successfully!")
self.stream: torch.cuda.Stream = torch.cuda.Stream(local_rank)
self.index = None
if int(self.sample_rate) == 1:
self.update = lambda: 0
self.sub_weight = Parameter(self.weight)
self.sub_weight_mom = self.weight_mom
else:
self.sub_weight = Parameter(torch.empty((0, 0)).cuda(local_rank))
def save_params(self):
torch.save(self.weight.data, self.weight_name)
torch.save(self.weight_mom, self.weight_mom_name)
@torch.no_grad()
def sample(self, total_label):
index_positive = (self.class_start <= total_label) & (total_label < self.class_start + self.num_local)
total_label[~index_positive] = -1
total_label[index_positive] -= self.class_start
if int(self.sample_rate) != 1:
positive = torch.unique(total_label[index_positive], sorted=True)
if self.num_sample - positive.size(0) >= 0:
perm = torch.rand(size=[self.num_local], device=self.device)
perm[positive] = 2.0
index = torch.topk(perm, k=self.num_sample)[1]
index = index.sort()[0]
else:
index = positive
self.index = index
total_label[index_positive] = torch.searchsorted(index, total_label[index_positive])
self.sub_weight = Parameter(self.weight[index])
self.sub_weight_mom = self.weight_mom[index]
def forward(self, total_features, norm_weight):
torch.cuda.current_stream().wait_stream(self.stream)
logits = linear(total_features, norm_weight)
return logits
@torch.no_grad()
def update(self):
self.weight_mom[self.index] = self.sub_weight_mom
self.weight[self.index] = self.sub_weight
def prepare(self, label, optimizer):
with torch.cuda.stream(self.stream):
total_label = torch.zeros(
size=[self.batch_size * self.world_size], device=self.device, dtype=torch.long)
dist.all_gather(list(total_label.chunk(self.world_size, dim=0)), label)
self.sample(total_label)
optimizer.state.pop(optimizer.param_groups[-1]['params'][0], None)
optimizer.param_groups[-1]['params'][0] = self.sub_weight
optimizer.state[self.sub_weight]['momentum_buffer'] = self.sub_weight_mom
norm_weight = normalize(self.sub_weight)
return total_label, norm_weight
def forward_backward(self, label, features, optimizer):
total_label, norm_weight = self.prepare(label, optimizer)
total_features = torch.zeros(
size=[self.batch_size * self.world_size, self.embedding_size], device=self.device)
dist.all_gather(list(total_features.chunk(self.world_size, dim=0)), features.data)
total_features.requires_grad = True
logits = self.forward(total_features, norm_weight)
logits = self.margin_softmax(logits, total_label)
with torch.no_grad():
max_fc = torch.max(logits, dim=1, keepdim=True)[0]
dist.all_reduce(max_fc, dist.ReduceOp.MAX)
# calculate exp(logits) and all-reduce
logits_exp = torch.exp(logits - max_fc)
logits_sum_exp = logits_exp.sum(dim=1, keepdims=True)
dist.all_reduce(logits_sum_exp, dist.ReduceOp.SUM)
# calculate prob
logits_exp.div_(logits_sum_exp)
# get one-hot
grad = logits_exp
index = torch.where(total_label != -1)[0]
one_hot = torch.zeros(size=[index.size()[0], grad.size()[1]], device=grad.device)
one_hot.scatter_(1, total_label[index, None], 1)
# calculate loss
loss = torch.zeros(grad.size()[0], 1, device=grad.device)
loss[index] = grad[index].gather(1, total_label[index, None])
dist.all_reduce(loss, dist.ReduceOp.SUM)
loss_v = loss.clamp_min_(1e-30).log_().mean() * (-1)
# calculate grad
grad[index] -= one_hot
grad.div_(self.batch_size * self.world_size)
logits.backward(grad)
if total_features.grad is not None:
total_features.grad.detach_()
x_grad: torch.Tensor = torch.zeros_like(features, requires_grad=True)
# feature gradient all-reduce
dist.reduce_scatter(x_grad, list(total_features.grad.chunk(self.world_size, dim=0)))
x_grad = x_grad * self.world_size
# backward backbone
return x_grad, loss_v

14
play.py Normal file
View File

@ -0,0 +1,14 @@
import cv2
cap = cv2.VideoCapture("rtsp://admin:2020@uestc@192.168.30.83:554/h264")
ret, frame = cap.read()
h, w = frame.shape[:2]
print("hight:"+str(h)+"with:"+str(w))
fps = cap.get(cv2.CAP_PROP_FPS)
print(fps)
# while ret:
# cv2.imshow('out', frame)
# if cv2.waitKey(1) & 0xFF == ord('q'):
# break
# ret, frame = cap.read()
cap.release()
cv2.destroyAllWindows()

282
realtime_detect.py Normal file
View File

@ -0,0 +1,282 @@
import argparse
import subprocess
import time
import cv2
import torch
import numpy as np
from skimage import transform as trans
from PIL import Image, ImageDraw, ImageFont
from data import cfg_mnet, cfg_re50
from face_api import load_arcface_model, load_npy
from layers.functions.prior_box import PriorBox
from retinaface_detect import set_retinaface_conf, load_retinaface_model, findAll
from utils.nms.py_cpu_nms import py_cpu_nms
from utils.box_utils import decode, decode_landm
import faiss
ppi = 1280
ppi2 = 1100
step = 3
def detect_rtsp(rtsp, out_rtsp, net, arcface_model, index ,database_name_list, k_v, args):
tic_total = time.time()
cfg = None
if args.network == "mobile0.25":
cfg = cfg_mnet
elif args.network == "resnet50":
cfg = cfg_re50
device = torch.device("cpu" if args.cpu else "cuda")
resize = 1
# testing begin
cap = cv2.VideoCapture(rtsp)
ret, frame = cap.read()
h, w = frame.shape[:2]
factor = 0
if (w > ppi):
factor = h / w
frame = cv2.resize(frame, (ppi, int(ppi * factor)))
h, w = frame.shape[:2]
arf = 1
detect_h, detect_w = frame.shape[:2]
frame_detect = frame
factor2 = 0
if (w > ppi2):
factor2 = h / w
frame_detect = cv2.resize(frame, (ppi2, int(ppi2 * factor2)))
detect_h, detect_w = frame_detect.shape[:2]
arf = w/detect_w
print(w,h)
print(detect_w,detect_h)
fps = cap.get(cv2.CAP_PROP_FPS)
#print(fps)
size = (w, h)
sizeStr = str(size[0]) + 'x' + str(size[1])
if(out_rtsp.startswith("rtsp")):
command = ['ffmpeg',
'-y', '-an',
'-f', 'rawvideo',
'-vcodec', 'rawvideo',
'-pix_fmt', 'bgr24',
'-s', sizeStr,
'-r', "25",
'-i', '-',
'-c:v', 'libx265',
'-b:v', '3000k',
'-pix_fmt', 'yuv420p',
'-preset', 'ultrafast',
'-f', 'rtsp',
out_rtsp]
pipe = subprocess.Popen(command, shell=False, stdin=subprocess.PIPE)
#out = cv2.VideoWriter("output.avi", cv2.VideoWriter_fourcc(*'XVID'), fps, size)
number = step
dets = []
name_list = []
font = ImageFont.truetype("font.ttf", 22)
priorbox = PriorBox(cfg, image_size=(detect_h, detect_w))
priors = priorbox.forward()
priors = priors.to(device)
prior_data = priors.data
scale = torch.Tensor([detect_w, detect_h, detect_w, detect_h])
scale = scale.to(device)
scale1 = torch.Tensor([detect_w, detect_h, detect_w, detect_h,
detect_w, detect_h, detect_w, detect_h,
detect_w, detect_h])
scale1 = scale1.to(device)
src1 = np.array([
[38.3814, 51.6963],
[73.6186, 51.5014],
[56.1120, 71.7366],
[41.6361, 92.3655],
[70.8167, 92.2041]], dtype=np.float32)
tform = trans.SimilarityTransform()
while ret:
tic_all = time.time()
if number == step:
tic = time.time()
img = np.float32(frame_detect)
img -= (104, 117, 123)
img = img.transpose(2, 0, 1)
img = torch.from_numpy(img).unsqueeze(0)
img = img.to(device)
loc, conf, landms = net(img) # forward pass
boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
boxes = boxes * scale / resize
boxes = boxes.cpu().numpy()
scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance'])
landms = landms * scale1 / resize
landms = landms.cpu().numpy()
# ignore low scores
inds = np.where(scores > args.confidence_threshold)[0]
boxes = boxes[inds]
landms = landms[inds]
scores = scores[inds]
# keep top-K before NMS
order = scores.argsort()[::-1][:args.top_k]
boxes = boxes[order]
landms = landms[order]
scores = scores[order]
# do NMS
dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
keep = py_cpu_nms(dets, args.nms_threshold)
# keep = nms(dets, args.nms_threshold,force_cpu=args.cpu)
dets = dets[keep, :]
landms = landms[keep]
# keep top-K faster NMS
dets = dets[:args.keep_top_k, :]
landms = landms[:args.keep_top_k, :]
dets = np.concatenate((dets, landms), axis=1)
face_list = []
name_list = []
print('net forward time: {:.4f}'.format(time.time() - tic))
start_time_findall = time.time()
for i, det in enumerate(dets[:4]):
if det[4] < args.vis_thres:
continue
#boxes, score = det[:4], det[4]
dst = np.reshape(landms[i], (5, 2))
dst = dst * arf
tform.estimate(dst, src1)
M = tform.params[0:2, :]
frame2 = cv2.warpAffine(frame, M, (w, h), borderValue=0.0)
img112 = frame2[0:112, 0:112, :]
face_list.append(img112)
if len(face_list) != 0:
face_list = np.array(face_list)
face_list = face_list.transpose((0, 3, 1, 2))
face_list = np.array(face_list, dtype=np.float32)
face_list -= 127.5
face_list /= 127.5
print(face_list.shape)
print("warpALL time: " + str(time.time() - start_time_findall ))
#start_time = time.time()
name_list = findAll(face_list, arcface_model, index ,database_name_list, k_v, "cpu" if args.cpu else "cuda")
#print(name_list)
#print("findOneframe time: " + str(time.time() - start_time_findall))
# start_time = time.time()
# if (len(dets) != 0):
# for i, det in enumerate(dets[:]):
# if det[4] < args.vis_thres:
# continue
# boxes, score = det[:4], det[4]
# boxes = boxes * arf
# name = name_list[i]
# cv2.rectangle(frame, (int(boxes[0]), int(boxes[1])), (int(boxes[2]), int(boxes[3])), (255, 0, 0), 2)
# cv2.putText(frame, name, (int(boxes[0]), int(boxes[1])), cv2.FONT_HERSHEY_COMPLEX, 0.4,(0, 225, 255), 1)
start_time = time.time()
if(len(dets) != 0):
img_PIL = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
draw = ImageDraw.Draw(img_PIL)
for i, det in enumerate(dets[:4]):
if det[4] < args.vis_thres:
continue
boxes, score = det[:4], det[4]
boxes = boxes * arf
name = name_list[i]
if not isinstance(name, np.unicode):
name = name.decode('utf8')
draw.text((int(boxes[0]), int(boxes[1])), name, fill=(255, 0, 0), font=font)
draw.rectangle((int(boxes[0]), int(boxes[1]), int(boxes[2]), int(boxes[3])), outline="green", width=3)
frame = cv2.cvtColor(np.asarray(img_PIL), cv2.COLOR_RGB2BGR)
pipe.stdin.write(frame.tostring())
#out.write(frame)
print("drawOneframe time: " + str(time.time() - start_time))
start_time = time.time()
ret, frame = cap.read()
frame_detect = frame
number = 0
if (ret != 0 and factor != 0):
frame = cv2.resize(frame, (ppi, int(ppi * factor)))
if (ret != 0 and factor2 != 0):
frame_detect = cv2.resize(frame, (ppi2, int(ppi2 * factor2)))
print("readframe time: " + str(time.time() - start_time))
else:
number += 1
# if (len(dets) != 0):
# for i, det in enumerate(dets[:4]):
# if det[4] < args.vis_thres:
# continue
# boxes, score = det[:4], det[4]
# cv2.rectangle(frame, (int(boxes[0]), int(boxes[1])), (int(boxes[2]), int(boxes[3])), (2, 255, 0), 1)
if (len(dets) != 0):
img_PIL = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
draw = ImageDraw.Draw(img_PIL)
for i, det in enumerate(dets[:4]):
if det[4] < args.vis_thres:
continue
boxes, score = det[:4], det[4]
boxes = boxes * arf
name = name_list[i]
if not isinstance(name, np.unicode):
name = name.decode('utf8')
draw.text((int(boxes[0]), int(boxes[1])), name, fill=(255, 0, 0), font=font)
draw.rectangle((int(boxes[0]), int(boxes[1]), int(boxes[2]), int(boxes[3])), outline="green",
width=3)
frame = cv2.cvtColor(np.asarray(img_PIL), cv2.COLOR_RGB2BGR)
start_time = time.time()
pipe.stdin.write(frame.tostring())
#out.write(frame)
print("writeframe time: " + str(time.time() - start_time))
start_time = time.time()
ret, frame = cap.read()
frame_detect = frame
if (ret != 0 and factor != 0):
frame = cv2.resize(frame, (ppi, int(ppi * factor)))
if (ret != 0 and factor2 != 0):
frame_detect = cv2.resize(frame, (ppi2, int(ppi2 * factor2)))
print("readframe time: " + str(time.time() - start_time))
print('all time: {:.4f}'.format(time.time() - tic_all))
cap.release()
#out.release()
pipe.terminate()
print('total time: {:.4f}'.format(time.time() - tic_total))
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"--rtsp",
type=str,
default="",
dest="rtsp_path"
)
args = parser.parse_args()
cpu_or_cuda = "cuda" if torch.cuda.is_available() else "cpu"
# 加载人脸识别模型
arcface_model = load_arcface_model("./model/backbone100.pth", cpu_or_cuda="cuda")
# 加载人脸检测模型
retinaface_args = set_retinaface_conf(cpu_or_cuda=cpu_or_cuda)
retinaface_model = load_retinaface_model(retinaface_args)
k_v = load_npy("./Database/student.npy")
#print(list(k_v.keys()))
database_name_list = list(k_v.keys())
vector_list = np.array(list(k_v.values()))
print(vector_list.shape)
nlist = 10
quantizer = faiss.IndexFlatL2(512) # the other index
index = faiss.IndexIVFFlat(quantizer, 512, nlist, faiss.METRIC_L2)
index.train(vector_list)
#index = faiss.IndexFlatL2(512)
index.add(vector_list)
index.nprobe=10
detect_rtsp(args.rtsp_path, 'rtsp://localhost:5001/test2', retinaface_model, arcface_model, index ,database_name_list, k_v, retinaface_args)
#detect_rtsp("rtsp://admin:2020@uestc@192.168.14.32:8557/h264", 'rtsp://localhost:5001/test2', retinaface_model, arcface_model, index ,database_name_list, k_v, retinaface_args)
#detect_rtsp("cut.mp4", 'rtsp://localhost:5001/test2', retinaface_model, arcface_model, k_v, retinaface_args)

283
recognition_video.py Normal file
View File

@ -0,0 +1,283 @@
import time
from centerface import CenterFace
from skimage import transform as trans
import numpy as np
import torch
import cv2
from backbones import iresnet100, iresnet18
from create_database import findOne, load_npy,findAll
from PIL import Image, ImageDraw,ImageFont
def show():
cap = cv2.VideoCapture("test.mp4")
ret, frame = cap.read()
h, w = frame.shape[:2]
centerface = CenterFace()
size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
out = cv2.VideoWriter('ccvt6.mp4', cv2.VideoWriter_fourcc('m', 'p', '4', 'v'), 30, size)
while ret:
start_time = time.time()
dets, lms = centerface(frame, h, w, threshold=0.35)
end_time = time.time()
print("findOne time: " + str(end_time - start_time))
for det in dets:
boxes, score = det[:4], det[4]
cv2.rectangle(frame, (int(boxes[0]), int(boxes[1])), (int(boxes[2]), int(boxes[3])), (2, 255, 0), 1)
for lm in lms:
for i in range(0, 5):
cv2.circle(frame, (int(lm[i * 2]), int(lm[i * 2 + 1])), 2, (0, 0, 255), -1)
cv2.imshow('out', frame)
out.write(frame)
# Press Q on keyboard to stop recording
if cv2.waitKey(1) & 0xFF == ord('q'):
break
ret, frame = cap.read()
cap.release()
out.release()
cv2.destroyAllWindows()
def video():
model = iresnet100()
model.load_state_dict(torch.load("./model/backbone100.pth", map_location="cpu"))
model.eval()
k_v = load_npy("student.npy")
count = 0
#cap = cv2.VideoCapture("http://ivi.bupt.edu.cn/hls/cctv6hd.m3u8")
cap = cv2.VideoCapture("software.mp4")
ret, frame = cap.read()
h, w = frame.shape[:2]
size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
fps = cap.get(cv2.CAP_PROP_FPS)
out = cv2.VideoWriter('ttt.mp4', cv2.VideoWriter_fourcc('m', 'p', '4', 'v'), fps, size)
centerface = CenterFace()
while ret:
start_time = time.time()
dets, lms = centerface(frame, h, w, threshold=0.35)
end_time = time.time()
print("detectOneframe time: " + str(end_time - start_time))
face_list = []
name_list = []
for i,det in enumerate(dets):
boxes, score = det[:4], det[4]
img_w = int(boxes[2] - boxes[0])
img_h = int(boxes[3] - boxes[1])
distace = int(abs(img_w - img_h) / 2)
img_w1 = int(boxes[0]) - distace
img_w2 = int(boxes[2]) + distace
# print(img_w,img_h,distace,max_hw)
if img_w <= img_h and img_w1 >= 0 and img_w2 <= frame.shape[1]:
img112 = frame[int(boxes[1]):int(boxes[3]), img_w1:img_w2, :]
img112 = cv2.resize(img112, (112, 112))
# cv2.imwrite("./img/man"+str(count)+".jpg", img112)
# count += 1
face_list.append(img112)
else:
img112 = frame[int(boxes[1]):int(boxes[3]), int(boxes[0]):int(boxes[2]), :]
img112 = cv2.resize(img112, (112, 112))
face_list.append(img112)
if len(face_list) != 0:
face_list = np.array(face_list)
face_list = face_list.transpose((0,3,1,2))
face_list = np.array(face_list, dtype=np.float32)
face_list -= 127.5
face_list /= 127.5
print(face_list.shape)
face_list = torch.from_numpy(face_list)
start_time = time.time()
for face in face_list:
face = face[np.newaxis, :, :, :]
name_list.append(findOne(face,model,k_v))
end_time = time.time()
print("findOneframe time: "+str(end_time-start_time))
img_PIL = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
draw = ImageDraw.Draw(img_PIL)
font = ImageFont.truetype("font.ttf",12)
for i,det in enumerate(dets):
boxes, score = det[:4], det[4]
# cv2.rectangle(frame, (int(boxes[0]), int(boxes[1])), (int(boxes[2]), int(boxes[3])), (2, 255, 0), 1)
# cv2.putText(frame, name_list[i], (int(boxes[0]), int(boxes[1])), cv2.FONT_HERSHEY_COMPLEX, 0.4,
# (0, 225, 255), 1)
name = name_list[i][:3]
if not isinstance(name, np.unicode):
name = name.decode('utf8')
draw.text((int(boxes[0]), int(boxes[1])),name,fill=(0, 225, 255),font=font)
draw.rectangle((int(boxes[0]), int(boxes[1]),int(boxes[2]), int(boxes[3])),outline="green",width=1)
frame = cv2.cvtColor(np.asarray(img_PIL),cv2.COLOR_RGB2BGR)
cv2.imshow('out', frame)
out.write(frame)
# Press Q on keyboard to stop recording
if cv2.waitKey(1) & 0xFF == ord('q'):
break
ret, frame = cap.read()
cap.release()
out.release()
cv2.destroyAllWindows()
def video_GPU():
model = iresnet100()
model.load_state_dict(torch.load("./model/backbone100.pth", map_location="cpu"))
model.eval()
k_v = load_npy("student.npy")
count = 0
#cap = cv2.VideoCapture("http://ivi.bupt.edu.cn/hls/cctv6hd.m3u8")
cap = cv2.VideoCapture("software.mp4")
ret, frame = cap.read()
h, w = frame.shape[:2]
size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
fps = cap.get(cv2.CAP_PROP_FPS)
out = cv2.VideoWriter('ttt.mp4', cv2.VideoWriter_fourcc('m', 'p', '4', 'v'), fps, size)
centerface = CenterFace()
while ret:
start_time = time.time()
dets, lms = centerface(frame, h, w, threshold=0.35)
end_time = time.time()
print("detectOneframe time: " + str(end_time - start_time))
face_list = []
name_list = []
for i,det in enumerate(dets):
boxes, score = det[:4], det[4]
img_w = int(boxes[2] - boxes[0])
img_h = int(boxes[3] - boxes[1])
distace = int(abs(img_w - img_h) / 2)
img_w1 = int(boxes[0]) - distace
img_w2 = int(boxes[2]) + distace
# print(img_w,img_h,distace,max_hw)
if img_w <= img_h and img_w1 >= 0 and img_w2 <= frame.shape[1]:
img112 = frame[int(boxes[1]):int(boxes[3]), img_w1:img_w2, :]
img112 = cv2.resize(img112, (112, 112))
# cv2.imwrite("./img/man"+str(count)+".jpg", img112)
# count += 1
face_list.append(img112)
else:
img112 = frame[int(boxes[1]):int(boxes[3]), int(boxes[0]):int(boxes[2]), :]
img112 = cv2.resize(img112, (112, 112))
face_list.append(img112)
if len(face_list) != 0:
face_list = np.array(face_list)
face_list = face_list.transpose((0,3,1,2))
face_list = np.array(face_list, dtype=np.float32)
face_list -= 127.5
face_list /= 127.5
print(face_list.shape)
face_list = torch.from_numpy(face_list)
start_time = time.time()
name_list = findAll(face_list, model, k_v)
# for face in face_list:
# face = face[np.newaxis, :, :, :]
#
# name_list.append(findOne(face,model,k_v))
end_time = time.time()
print("findOneframe time: "+str(end_time-start_time))
img_PIL = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
draw = ImageDraw.Draw(img_PIL)
font = ImageFont.truetype("font.ttf",18)
for i,det in enumerate(dets):
boxes, score = det[:4], det[4]
# cv2.rectangle(frame, (int(boxes[0]), int(boxes[1])), (int(boxes[2]), int(boxes[3])), (2, 255, 0), 1)
# cv2.putText(frame, name_list[i], (int(boxes[0]), int(boxes[1])), cv2.FONT_HERSHEY_COMPLEX, 0.4,
# (0, 225, 255), 1)
name = name_list[i][:3]
if not isinstance(name, np.unicode):
name = name.decode('utf8')
draw.text((int(boxes[0]), int(boxes[1])),name,fill=(255, 0, 0),font=font)
draw.rectangle((int(boxes[0]), int(boxes[1]),int(boxes[2]), int(boxes[3])),outline="green",width=2)
frame = cv2.cvtColor(np.asarray(img_PIL),cv2.COLOR_RGB2BGR)
cv2.imshow('out', frame)
out.write(frame)
# Press Q on keyboard to stop recording
if cv2.waitKey(1) & 0xFF == ord('q'):
break
ret, frame = cap.read()
cap.release()
out.release()
cv2.destroyAllWindows()
def video_GPU_retinaface():
model = iresnet100()
model.load_state_dict(torch.load("./model/backbone100.pth", map_location="cpu"))
model.eval()
k_v = load_npy("student.npy")
count = 0
#cap = cv2.VideoCapture("http://ivi.bupt.edu.cn/hls/cctv6hd.m3u8")
cap = cv2.VideoCapture("software.mp4")
ret, frame = cap.read()
h, w = frame.shape[:2]
size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
fps = cap.get(cv2.CAP_PROP_FPS)
out = cv2.VideoWriter('ttt.mp4', cv2.VideoWriter_fourcc('m', 'p', '4', 'v'), fps, size)
centerface = CenterFace()
while ret:
start_time = time.time()
dets, lms = centerface(frame, h, w, threshold=0.35)
end_time = time.time()
print("detectOneframe time: " + str(end_time - start_time))
face_list = []
name_list = []
print(dets.shape)
for i,det in enumerate(dets):
boxes, score = det[:4], det[4]
img_w = int(boxes[2] - boxes[0])
img_h = int(boxes[3] - boxes[1])
distace = int(abs(img_w - img_h) / 2)
img_w1 = int(boxes[0]) - distace
img_w2 = int(boxes[2]) + distace
# print(img_w,img_h,distace,max_hw)
if img_w <= img_h and img_w1 >= 0 and img_w2 <= frame.shape[1]:
img112 = frame[int(boxes[1]):int(boxes[3]), img_w1:img_w2, :]
img112 = cv2.resize(img112, (112, 112))
# cv2.imwrite("./img/man"+str(count)+".jpg", img112)
# count += 1
face_list.append(img112)
else:
img112 = frame[int(boxes[1]):int(boxes[3]), int(boxes[0]):int(boxes[2]), :]
img112 = cv2.resize(img112, (112, 112))
face_list.append(img112)
if len(face_list) != 0:
face_list = np.array(face_list)
face_list = face_list.transpose((0,3,1,2))
face_list = np.array(face_list, dtype=np.float32)
face_list -= 127.5
face_list /= 127.5
print(face_list.shape)
face_list = torch.from_numpy(face_list)
start_time = time.time()
name_list = findAll(face_list, model, k_v)
# for face in face_list:
# face = face[np.newaxis, :, :, :]
#
# name_list.append(findOne(face,model,k_v))
end_time = time.time()
print("findOneframe time: "+str(end_time-start_time))
img_PIL = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
draw = ImageDraw.Draw(img_PIL)
font = ImageFont.truetype("font.ttf",18)
for i,det in enumerate(dets):
boxes, score = det[:4], det[4]
# cv2.rectangle(frame, (int(boxes[0]), int(boxes[1])), (int(boxes[2]), int(boxes[3])), (2, 255, 0), 1)
# cv2.putText(frame, name_list[i], (int(boxes[0]), int(boxes[1])), cv2.FONT_HERSHEY_COMPLEX, 0.4,
# (0, 225, 255), 1)
name = name_list[i][:3]
if not isinstance(name, np.unicode):
name = name.decode('utf8')
draw.text((int(boxes[0]), int(boxes[1])),name,fill=(255, 0, 0),font=font)
draw.rectangle((int(boxes[0]), int(boxes[1]),int(boxes[2]), int(boxes[3])),outline="green",width=2)
frame = cv2.cvtColor(np.asarray(img_PIL),cv2.COLOR_RGB2BGR)
cv2.imshow('out', frame)
out.write(frame)
# Press Q on keyboard to stop recording
if cv2.waitKey(1) & 0xFF == ord('q'):
break
ret, frame = cap.read()
cap.release()
out.release()
cv2.destroyAllWindows()
video_GPU_retinaface()
#video_GPU()
#show()

111
requirements.txt Normal file
View File

@ -0,0 +1,111 @@
Package Version
---------------------- -----------
appdirs 1.4.4
attrs 21.2.0
backcall 0.2.0
beautifulsoup4 4.9.3
certifi 2021.5.30
cffi 1.14.0
chardet 4.0.0
click 8.0.1
conda 4.9.1
conda-build 3.20.5
conda-package-handling 1.7.0
cryptography 2.9.2
cycler 0.10.0
dataclasses 0.6
decorator 4.4.2
dnspython 2.0.0
faiss-cpu 1.7.1
filelock 3.0.12
fire 0.4.0
Flask 1.1.2
future 0.18.2
glob2 0.7
graphsurgeon 0.4.5
graphviz 0.8.4
h5py 3.3.0
idna 2.10
imageio 2.9.0
iniconfig 1.1.1
ipython 7.18.1
ipython-genutils 0.2.0
itsdangerous 2.0.1
jedi 0.17.2
Jinja2 3.0.1
joblib 1.0.1
kiwisolver 1.3.1
libarchive-c 2.9
Mako 1.1.4
MarkupSafe 2.0.1
matplotlib 3.4.1
mkl-fft 1.2.0
mkl-random 1.1.1
mkl-service 2.3.0
mxnet 1.8.0.post0
networkx 2.5.1
nltk 3.6
numpy 1.20.3
olefile 0.46
opencv-python 4.5.1.48
packaging 21.0
pandas 1.2.4
parso 0.7.0
pexpect 4.8.0
pickleshare 0.7.5
Pillow 8.0.0
pip 20.0.2
pkginfo 1.6.0
pluggy 1.0.0
prefetch-generator 1.0.1
prompt-toolkit 3.0.8
protobuf 3.15.8
psutil 5.7.2
ptyprocess 0.6.0
py 1.9.0
pycosat 0.6.3
pycparser 2.20
pycuda 2021.1
Pygments 2.7.1
pyOpenSSL 19.1.0
pyparsing 2.4.7
PySocks 1.7.1
pytest 6.2.5
python-dateutil 2.8.1
python-etcd 0.4.5
pytools 2021.2.6
pytz 2020.1
PyWavelets 1.1.1
PyYAML 5.3.1
pyzmq 22.1.0
regex 2021.8.3
requests 2.25.1
ruamel-yaml 0.15.87
scikit-image 0.18.1
scipy 1.6.3
seaborn 0.11.1
setuptools 57.1.0
six 1.14.0
soupsieve 2.0.1
tensorboard-logger 0.1.0
tensorrt 7.2.3.4
termcolor 1.1.0
tifffile 2021.4.8
toml 0.10.2
torch 1.7.1
torch2trt 0.2.0
torchelastic 0.2.1
torchfile 0.1.0
torchtext 0.8.0
torchvision 0.8.2
tornado 6.1
tqdm 4.46.0
traitlets 5.0.5
typing-extensions 3.7.4.3
uff 0.6.9
urllib3 1.26.5
visdom 0.1.8
wcwidth 0.2.5
websocket-client 1.1.0
Werkzeug 2.0.1
wheel 0.34.2

762
retinaface_arcface.py Normal file
View File

@ -0,0 +1,762 @@
from __future__ import print_function
import os
import argparse
import re
import faiss
import torch
import torch.backends.cudnn as cudnn
import numpy as np
from data import cfg_mnet, cfg_re50
from face_api import create_database_from_img, load_arcface_model, findAll
from layers.functions.prior_box import PriorBox
from utils.nms.py_cpu_nms import py_cpu_nms
import cv2
from models.retinaface import RetinaFace
from utils.box_utils import decode, decode_landm
import time
from face_api import load_arcface_model, load_npy
from skimage import transform as trans
from backbones import iresnet100, iresnet18
#from create_database import findOne, load_npy,findAll
from PIL import Image, ImageDraw,ImageFont
parser = argparse.ArgumentParser(description='Retinaface')
parser.add_argument('-m', '--trained_model', default='./weights/mobilenet0.25_Final.pth',
type=str, help='Trained state_dict file path to open')
parser.add_argument('--network', default='mobile0.25', help='Backbone network mobile0.25 or resnet50')
parser.add_argument('--cpu', action="store_true", default=False if torch.cuda.is_available() else True, help='Use cpu inference')
parser.add_argument('--confidence_threshold', default=0.02, type=float, help='confidence_threshold')
parser.add_argument('--top_k', default=5000, type=int, help='top_k')
parser.add_argument('--nms_threshold', default=0.4, type=float, help='nms_threshold')
parser.add_argument('--keep_top_k', default=750, type=int, help='keep_top_k')
parser.add_argument('-s', '--save_image', action="store_true", default=True, help='show detection results')
parser.add_argument('--vis_thres', default=0.6, type=float, help='visualization_threshold')
args = parser.parse_args()
def check_keys(model, pretrained_state_dict):
ckpt_keys = set(pretrained_state_dict.keys())
model_keys = set(model.state_dict().keys())
used_pretrained_keys = model_keys & ckpt_keys
unused_pretrained_keys = ckpt_keys - model_keys
missing_keys = model_keys - ckpt_keys
print('Missing keys:{}'.format(len(missing_keys)))
print('Unused checkpoint keys:{}'.format(len(unused_pretrained_keys)))
print('Used keys:{}'.format(len(used_pretrained_keys)))
assert len(used_pretrained_keys) > 0, 'load NONE from pretrained checkpoint'
return True
def remove_prefix(state_dict, prefix):
''' Old style model is stored with all names of parameters sharing common prefix 'module.' '''
print('remove prefix \'{}\''.format(prefix))
f = lambda x: x.split(prefix, 1)[-1] if x.startswith(prefix) else x
return {f(key): value for key, value in state_dict.items()}
def load_model(model, pretrained_path, load_to_cpu):
print('Loading pretrained model from {}'.format(pretrained_path))
if load_to_cpu:
pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage)
else:
device = torch.cuda.current_device()
pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage.cuda(device))
if "state_dict" in pretrained_dict.keys():
pretrained_dict = remove_prefix(pretrained_dict['state_dict'], 'module.')
else:
pretrained_dict = remove_prefix(pretrained_dict, 'module.')
check_keys(model, pretrained_dict)
model.load_state_dict(pretrained_dict, strict=False)
return model
def image_to112x112_retinaface():
torch.set_grad_enabled(False)
cfg = None
if args.network == "mobile0.25":
cfg = cfg_mnet
elif args.network == "resnet50":
cfg = cfg_re50
# net and model
net = RetinaFace(cfg=cfg, phase = 'test')
net = load_model(net, args.trained_model, args.cpu)
net.eval()
print('Finished loading model!')
#print(net)
cudnn.benchmark = True
device = torch.device("cpu" if args.cpu else "cuda")
net = net.to(device)
resize = 1
input_path = r"D:\Download\out\cfp"
output_path = "D:\Download\out\cfp_align"
folder1 = os.listdir(input_path)
count = 0
count2 =0
for f in folder1:
output_name_path = os.path.join(output_path, f)
if os.path.exists(output_name_path) == 0:
os.makedirs(output_name_path)
img_name_path = os.path.join(input_path, f)
img_list = os.listdir(img_name_path)
for img in img_list:
count2 +=1
print(count2)
path = os.path.join(img_name_path, img)
align_img_path = os.path.join(output_name_path, img)
# print(path)
frame = cv2.imread(path)
h, w = frame.shape[:2]
img = np.float32(frame)
im_height, im_width, _ = img.shape
scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
img -= (104, 117, 123)
img = img.transpose(2, 0, 1)
img = torch.from_numpy(img).unsqueeze(0)
img = img.to(device)
scale = scale.to(device)
tic = time.time()
loc, conf, landms = net(img) # forward pass
print('net forward time: {:.4f}'.format(time.time() - tic))
priorbox = PriorBox(cfg, image_size=(im_height, im_width))
priors = priorbox.forward()
priors = priors.to(device)
prior_data = priors.data
boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
boxes = boxes * scale / resize
boxes = boxes.cpu().numpy()
scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance'])
scale1 = torch.Tensor([img.shape[3], img.shape[2], img.shape[3], img.shape[2],
img.shape[3], img.shape[2], img.shape[3], img.shape[2],
img.shape[3], img.shape[2]])
scale1 = scale1.to(device)
landms = landms * scale1 / resize
landms = landms.cpu().numpy()
# ignore low scores
inds = np.where(scores > args.confidence_threshold)[0]
boxes = boxes[inds]
landms = landms[inds]
scores = scores[inds]
# keep top-K before NMS
order = scores.argsort()[::-1][:args.top_k]
boxes = boxes[order]
landms = landms[order]
scores = scores[order]
# do NMS
dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
keep = py_cpu_nms(dets, args.nms_threshold)
# keep = nms(dets, args.nms_threshold,force_cpu=args.cpu)
dets = dets[keep, :]
landms = landms[keep]
# keep top-K faster NMS
dets = dets[:args.keep_top_k, :]
landms = landms[:args.keep_top_k, :]
dets = np.concatenate((dets, landms), axis=1)
score = 500
# show image
if args.save_image:
dst = []
for i, det in enumerate(dets):
if det[4] < args.vis_thres:
continue
center_x = (det[2] + det[0]) / 2
center_y = (det[3] + det[1]) / 2
if abs(center_x - 125) + abs(center_y - 125) < score:
score = abs(center_x - 125) + abs(center_y - 125)
dst = np.reshape(landms[i], (5, 2))
if len(dst) > 0:
src1 = np.array([
[38.3814, 51.6963],
[73.6186, 51.5014],
[56.1120, 71.7366],
[41.6361, 92.3655],
[70.8167, 92.2041]], dtype=np.float32)
tform = trans.SimilarityTransform()
tform.estimate(dst, src1)
M = tform.params[0:2, :]
if w < 112 or h < 112:
count += 1
#print(align_img_path)
continue
frame = cv2.warpAffine(frame, M, (w, h), borderValue=0.0)
img112 = frame[0:112, 0:112, :]
cv2.imwrite(align_img_path, img112)
print(">112 number"+str(count))
def sfz_to112x112_retinaface(arcface_model,cpu_or_cuda):
torch.set_grad_enabled(False)
cfg = None
if args.network == "mobile0.25":
cfg = cfg_mnet
elif args.network == "resnet50":
cfg = cfg_re50
# net and model
net = RetinaFace(cfg=cfg, phase = 'test')
net = load_model(net, args.trained_model, args.cpu)
net.eval()
print('Finished loading model!')
#print(net)
cudnn.benchmark = True
device = torch.device("cpu" if args.cpu else "cuda")
net = net.to(device)
resize = 1
input_path = r"D:\Download\out\alig_students_all"
output_path = r"D:\Download\out\alig_students_all"
folder1 = os.listdir(input_path)
count = 0
count2 =0
print(len(folder1))
# print(folder1[0][:-4])
# return 0
order_img = []
order_name = []
tic = time.time()
for img_name in folder1[:2500]:
# output_name_path = os.path.join(output_path, img_name)
# if os.path.exists(output_name_path) == 0:
# os.makedirs(output_name_path)
img_name_path = os.path.join(input_path, img_name)
#img_list = os.listdir(img_name_path)
count2 += 1
if (count2 % 1000 == 0):
print('net forward time: {:.4f}'.format(time.time() - tic))
print(count2)
if len(order_img) > 0:
order_img = np.array(order_img)
order_img = order_img.transpose((0, 3, 1, 2))
order_img = np.array(order_img, dtype=np.float32)
order_img -= 127.5
order_img /= 127.5
# order_img = np.array(order_img)
# print(order_img.shape)
# print(len(order_name))
create_database_from_img(order_name, order_img, arcface_model, "./Database/sfz_test.npy", cpu_or_cuda)
order_img = []
order_name = []
tic = time.time()
# if img_name[19] != "1":
# continue
#path = os.path.join(img_name_path, img)
align_img_path = os.path.join(output_path, img_name)
# print(path)
#frame = cv2.imdecode(np.fromfile(img_name_path, dtype=np.uint8), cv2.IMREAD_COLOR)
try:
frame = cv2.imdecode(np.fromfile(img_name_path, dtype=np.uint8), cv2.IMREAD_COLOR)
h, w, d = frame.shape
except AttributeError:
print(img_name)
continue
if d == 1:
continue
factor = h / w
if (w > 1000):
frame = cv2.resize(frame, (600, int(600 * factor)))
h, w = frame.shape[:2]
img = np.float32(frame)
im_height, im_width, _ = img.shape
scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
img -= (104, 117, 123)
img = img.transpose(2, 0, 1)
img = torch.from_numpy(img).unsqueeze(0)
img = img.to(device)
scale = scale.to(device)
#tic = time.time()
loc, conf, landms = net(img) # forward pass
#print('net forward time: {:.4f}'.format(time.time() - tic))
priorbox = PriorBox(cfg, image_size=(im_height, im_width))
priors = priorbox.forward()
priors = priors.to(device)
prior_data = priors.data
boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
boxes = boxes * scale / resize
boxes = boxes.cpu().numpy()
scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance'])
scale1 = torch.Tensor([img.shape[3], img.shape[2], img.shape[3], img.shape[2],
img.shape[3], img.shape[2], img.shape[3], img.shape[2],
img.shape[3], img.shape[2]])
scale1 = scale1.to(device)
landms = landms * scale1 / resize
landms = landms.cpu().numpy()
# ignore low scores
inds = np.where(scores > args.confidence_threshold)[0]
boxes = boxes[inds]
landms = landms[inds]
scores = scores[inds]
# keep top-K before NMS
order = scores.argsort()[::-1][:args.top_k]
boxes = boxes[order]
landms = landms[order]
scores = scores[order]
# do NMS
dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
keep = py_cpu_nms(dets, args.nms_threshold)
# keep = nms(dets, args.nms_threshold,force_cpu=args.cpu)
dets = dets[keep, :]
landms = landms[keep]
# keep top-K faster NMS
dets = dets[:args.keep_top_k, :]
landms = landms[:args.keep_top_k, :]
dets = np.concatenate((dets, landms), axis=1)
score = 500
# show image
if args.save_image:
dst = []
for i, det in enumerate(dets):
if det[4] < args.vis_thres:
continue
# center_x = (det[2] + det[0]) / 2
# center_y = (det[3] + det[1]) / 2
# if abs(center_x - 125) + abs(center_y - 125) < score:
# score = abs(center_x - 125) + abs(center_y - 125)
dst = np.reshape(landms[i], (5, 2))
if len(dst) > 0:
src1 = np.array([
[38.3814, 51.6963],
[73.6186, 51.5014],
[56.1120, 71.7366],
[41.6361, 92.3655],
[70.8167, 92.2041]], dtype=np.float32)
tform = trans.SimilarityTransform()
tform.estimate(dst, src1)
M = tform.params[0:2, :]
if w < 112 or h < 112:
count += 1
print(img_name_path)
continue
frame = cv2.warpAffine(frame, M, (w, h), borderValue=0.0)
img112 = frame[0:112, 0:112, :]
order_img.append(img112)
order_name.append(img_name[:-6])
#cv2.imencode('.jpg', img112)[1].tofile(align_img_path)
#cv2.imwrite(align_img_path, img112)
print(">112 number"+str(count))
if len(order_img) > 0:
order_img = np.array(order_img)
order_img = order_img.transpose((0, 3, 1, 2))
order_img = np.array(order_img, dtype=np.float32)
order_img -= 127.5
order_img /= 127.5
#order_img = np.array(order_img)
# print(order_img.shape)
# print(len(order_name))
create_database_from_img(order_name, order_img, arcface_model, "./Database/sfz_test.npy", cpu_or_cuda)
def count_accuracy(arcface_model,cpu_or_cuda,index ,database_name_list):
torch.set_grad_enabled(False)
cfg = None
if args.network == "mobile0.25":
cfg = cfg_mnet
elif args.network == "resnet50":
cfg = cfg_re50
# net and model
net = RetinaFace(cfg=cfg, phase = 'test')
net = load_model(net, args.trained_model, args.cpu)
net.eval()
print('Finished loading model!')
#print(net)
cudnn.benchmark = True
device = torch.device("cpu" if args.cpu else "cuda")
net = net.to(device)
resize = 1
input_path = r"../face/czrkzp2"
folder1 = os.listdir(input_path)
count = 0
count2 =0
print(len(folder1))
# print(folder1[0][:-4])
# return 0
order_img = []
order_name = []
tic = time.time()
for img_name in folder1[:15000]:
# output_name_path = os.path.join(output_path, img_name)
# if os.path.exists(output_name_path) == 0:
# os.makedirs(output_name_path)
img_name_path = os.path.join(input_path, img_name)
#img_list = os.listdir(img_name_path)
count2 += 1
if (count2 % 5000 == 0):
print('net forward time: {:.4f}'.format(time.time() - tic))
print(count2)
# if len(order_img) > 0:
# order_img = np.array(order_img)
# order_img = order_img.transpose((0, 3, 1, 2))
# order_img = np.array(order_img, dtype=np.float32)
# order_img -= 127.5
# order_img /= 127.5
# # order_img = np.array(order_img)
# # print(order_img.shape)
# # print(len(order_name))
# create_database_from_img(order_name, order_img, arcface_model, "./Database/sfz_test.npy", cpu_or_cuda)
# order_img = []
# order_name = []
# tic = time.time()
if img_name[19] == "1":
continue
#path = os.path.join(img_name_path, img)
#align_img_path = os.path.join(output_path, img_name)
# print(path)
#frame = cv2.imdecode(np.fromfile(img_name_path, dtype=np.uint8), cv2.IMREAD_COLOR)
try:
frame = cv2.imread(img_name_path)
h, w, d = frame.shape
except AttributeError:
print(img_name)
continue
if d == 1:
continue
factor = h / w
if (w > 1000):
frame = cv2.resize(frame, (600, int(600 * factor)))
h, w = frame.shape[:2]
img = np.float32(frame)
im_height, im_width, _ = img.shape
scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
img -= (104, 117, 123)
img = img.transpose(2, 0, 1)
img = torch.from_numpy(img).unsqueeze(0)
img = img.to(device)
scale = scale.to(device)
#tic = time.time()
loc, conf, landms = net(img) # forward pass
#print('net forward time: {:.4f}'.format(time.time() - tic))
priorbox = PriorBox(cfg, image_size=(im_height, im_width))
priors = priorbox.forward()
priors = priors.to(device)
prior_data = priors.data
boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
boxes = boxes * scale / resize
boxes = boxes.cpu().numpy()
scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance'])
scale1 = torch.Tensor([img.shape[3], img.shape[2], img.shape[3], img.shape[2],
img.shape[3], img.shape[2], img.shape[3], img.shape[2],
img.shape[3], img.shape[2]])
scale1 = scale1.to(device)
landms = landms * scale1 / resize
landms = landms.cpu().numpy()
# ignore low scores
inds = np.where(scores > args.confidence_threshold)[0]
boxes = boxes[inds]
landms = landms[inds]
scores = scores[inds]
# keep top-K before NMS
order = scores.argsort()[::-1][:args.top_k]
boxes = boxes[order]
landms = landms[order]
scores = scores[order]
# do NMS
dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
keep = py_cpu_nms(dets, args.nms_threshold)
# keep = nms(dets, args.nms_threshold,force_cpu=args.cpu)
dets = dets[keep, :]
landms = landms[keep]
# keep top-K faster NMS
dets = dets[:args.keep_top_k, :]
landms = landms[:args.keep_top_k, :]
dets = np.concatenate((dets, landms), axis=1)
score = 500
# show image
if args.save_image:
dst = []
for i, det in enumerate(dets):
if det[4] < args.vis_thres:
continue
# center_x = (det[2] + det[0]) / 2
# center_y = (det[3] + det[1]) / 2
# if abs(center_x - 125) + abs(center_y - 125) < score:
# score = abs(center_x - 125) + abs(center_y - 125)
dst = np.reshape(landms[i], (5, 2))
if len(dst) > 0:
src1 = np.array([
[38.3814, 51.6963],
[73.6186, 51.5014],
[56.1120, 71.7366],
[41.6361, 92.3655],
[70.8167, 92.2041]], dtype=np.float32)
tform = trans.SimilarityTransform()
tform.estimate(dst, src1)
M = tform.params[0:2, :]
if w < 112 or h < 112:
count += 1
print(img_name_path)
continue
frame = cv2.warpAffine(frame, M, (w, h), borderValue=0.0)
img112 = frame[0:112, 0:112, :]
order_img.append(img112)
order_name.append(img_name)
#cv2.imencode('.jpg', img112)[1].tofile(align_img_path)
#cv2.imwrite(align_img_path, img112)
print(">112 number"+str(count))
if len(order_img) > 0:
order_img = np.array(order_img)
order_img = order_img.transpose((0, 3, 1, 2))
order_img = np.array(order_img, dtype=np.float32)
order_img -= 127.5
order_img /= 127.5
#order_img = np.array(order_img)
# print(order_img.shape)
# print(len(order_name))
count_acc(order_name,order_img,arcface_model,index ,database_name_list,cpu_or_cuda)
def count_acc(order_name,order_img,model,index ,database_name_list,cpu_or_cuda):
pred_name = []
unknown = []
print(order_img.shape)
start_time = time.time()
# order_img = torch.from_numpy(order_img)
# order_img = order_img.to(torch.device("cuda" if cpu_or_cuda == "cuda" else "cpu"))
batch = 256
now = 0
number = len(order_img)
# number = 1400
for i in range(number):
unknown.append("unknown")
while now < number:
if now + batch < number:
name = findAll(order_img[now:now + batch], model, index ,database_name_list, cpu_or_cuda)
else:
name = findAll(order_img[now:number], model, index ,database_name_list, cpu_or_cuda)
now = now + batch
for na in name:
pred_name.append(na)
print("batch" + str(now))
end_time = time.time()
print("findAll time: " + str(end_time - start_time))
# print(len(pred_name))
right = 0
for i, name in enumerate(pred_name):
if pred_name[i] == order_name[i][:-6]:
right += 1
filed = 0
for i, name in enumerate(pred_name):
if pred_name[i] == unknown[i]:
filed += 1
#print(order_name[i])
error = 0
print("----------------")
for i, name in enumerate(pred_name):
if pred_name[i] != order_name[i][:-6]:
error += 1
#print(order_name[i] + " " + pred_name[i] + " ")
#print(order_name)
#print(pred_name)
print("total:" + str(number))
print("right:" + str(right+filed) + " rate:" + str((filed+right) / number))
#print("filed:" + str(filed) + " rate:" + str(filed / number))
print("error:" + str(error - filed) + " rate:" + str((error - filed) / number))
# if __name__ == '__main__':
# torch.set_grad_enabled(False)
# cfg = None
# if args.network == "mobile0.25":
# cfg = cfg_mnet
# elif args.network == "resnet50":
# cfg = cfg_re50
# # net and model
# net = RetinaFace(cfg=cfg, phase = 'test')
# net = load_model(net, args.trained_model, args.cpu)
# net.eval()
# print('Finished loading model!')
# #print(net)
# cudnn.benchmark = True
# device = torch.device("cpu" if args.cpu else "cuda")
# net = net.to(device)
#
# resize = 1
#
# # testing begin
# cap = cv2.VideoCapture("rtsp://47.108.74.82:8557/h264")
# ret, frame = cap.read()
# h, w = frame.shape[:2]
# fps = cap.get(cv2.CAP_PROP_FPS)
# size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
# int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
# #out = cv2.VideoWriter('out.mp4', cv2.VideoWriter_fourcc('m', 'p', '4', 'v'), fps, size)
# out = cv2.VideoWriter('ttttttt.avi', cv2.VideoWriter_fourcc(*'XVID'), fps, size)
# number = 0
#
# model = iresnet100()
# model.load_state_dict(torch.load("./model/backbone100.pth", map_location="cpu"))
# model.eval()
# k_v = load_npy("./Database/student.npy")
#
# while ret:
# tic = time.time()
# img = np.float32(frame)
# im_height, im_width, _ = img.shape
# scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
# img -= (104, 117, 123)
# img = img.transpose(2, 0, 1)
# img = torch.from_numpy(img).unsqueeze(0)
# img = img.to(device)
# scale = scale.to(device)
#
# loc, conf, landms = net(img) # forward pass
#
#
# priorbox = PriorBox(cfg, image_size=(im_height, im_width))
# priors = priorbox.forward()
# priors = priors.to(device)
# prior_data = priors.data
# boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
# boxes = boxes * scale / resize
# boxes = boxes.cpu().numpy()
# scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
# landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance'])
# scale1 = torch.Tensor([img.shape[3], img.shape[2], img.shape[3], img.shape[2],
# img.shape[3], img.shape[2], img.shape[3], img.shape[2],
# img.shape[3], img.shape[2]])
# scale1 = scale1.to(device)
# landms = landms * scale1 / resize
# landms = landms.cpu().numpy()
#
# # ignore low scores
# inds = np.where(scores > args.confidence_threshold)[0]
# boxes = boxes[inds]
# landms = landms[inds]
# scores = scores[inds]
#
# # keep top-K before NMS
# order = scores.argsort()[::-1][:args.top_k]
# boxes = boxes[order]
# landms = landms[order]
# scores = scores[order]
#
# # do NMS
# dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
# keep = py_cpu_nms(dets, args.nms_threshold)
# # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu)
# dets = dets[keep, :]
# landms = landms[keep]
#
# # keep top-K faster NMS
# dets = dets[:args.keep_top_k, :]
# landms = landms[:args.keep_top_k, :]
#
# dets = np.concatenate((dets, landms), axis=1)
# face_list = []
# name_list = []
# #print(dets[:4])
# print('net forward time: {:.4f}'.format(time.time() - tic))
# start_time = time.time()
# for i, det in enumerate(dets):
# if det[4] < args.vis_thres:
# continue
# boxes, score = det[:4], det[4]
# dst = np.reshape(landms[i],(5,2))
# #print(dst.shape)
# src1 = np.array([
# [38.3814, 51.6963],
# [73.6186, 51.5014],
# [56.1120, 71.7366],
# [41.6361, 92.3655],
# [70.8167, 92.2041]], dtype=np.float32)
# #print(src1.shape)
# tform = trans.SimilarityTransform()
# tform.estimate(dst, src1)
# M = tform.params[0:2, :]
# frame2 = cv2.warpAffine(frame, M, (w, h), borderValue=0.0)
# img112 = frame2[0:112, 0:112, :]
# # cv2.imwrite("./img/man"+str(count)+".jpg", img112)
# # count += 1
# face_list.append(img112)
#
# if len(face_list) != 0:
# face_list = np.array(face_list)
# face_list = face_list.transpose((0, 3, 1, 2))
# face_list = np.array(face_list, dtype=np.float32)
# face_list -= 127.5
# face_list /= 127.5
# print(face_list.shape)
# face_list = torch.from_numpy(face_list)
#
# name_list = findAll(face_list, model, k_v)
# end_time = time.time()
# print("findOneframe time: " + str(end_time - start_time))
# start_time = time.time()
# img_PIL = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
# draw = ImageDraw.Draw(img_PIL)
# font = ImageFont.truetype("font.ttf", 22)
# for i, det in enumerate(dets):
# if det[4] < args.vis_thres:
# continue
# boxes, score = det[:4], det[4]
# #print(name_list)
# name = name_list[i]
# mo = r'[\u4e00-\u9fa5]*'
# name = re.match(mo, name).group(0)
# if not isinstance(name, np.unicode):
# name = name.decode('utf8')
# draw.text((int(boxes[0]), int(boxes[1])), name, fill=(255, 0, 0), font=font)
# draw.rectangle((int(boxes[0]), int(boxes[1]), int(boxes[2]), int(boxes[3])), outline="green", width=3)
# frame = cv2.cvtColor(np.asarray(img_PIL), cv2.COLOR_RGB2BGR)
# cv2.imshow('out', frame)
# out.write(frame)
# end_time = time.time()
# print("drawOneframe time: " + str(end_time - start_time))
# # Press Q on keyboard to stop recording
# if cv2.waitKey(1) & 0xFF == ord('q'):
# break
# ret, frame = cap.read()
# cap.release()
# out.release()
# cv2.destroyAllWindows()
if __name__ == '__main__':
cpu_or_cuda = "cuda" if torch.cuda.is_available() else "cpu"
arcface_model = load_arcface_model("./model/backbone100.pth", cpu_or_cuda=cpu_or_cuda)
k_v = load_npy("./Database/sfz_test.npy")
database_name_list = list(k_v.keys())
vector_list = np.array(list(k_v.values()))
print(vector_list.shape)
# print(database_name_list)
nlist = 500
quantizer = faiss.IndexFlatL2(512) # the other index
index = faiss.IndexIVFFlat(quantizer, 512, nlist, faiss.METRIC_L2)
index.train(vector_list)
# index = faiss.IndexFlatL2(512)
index.add(vector_list)
index.nprobe = 50
count_accuracy(arcface_model, cpu_or_cuda, index, database_name_list)
# sfz_to112x112_retinaface(arcface_model,cpu_or_cuda)

483
retinaface_detect.py Normal file
View File

@ -0,0 +1,483 @@
from __future__ import print_function
import re
import time
import cv2
import torch
import torch.backends.cudnn as cudnn
import numpy as np
from skimage import transform as trans
from PIL import Image, ImageDraw, ImageFont
from data import cfg_mnet, cfg_re50
from layers.functions.prior_box import PriorBox
from utils.nms.py_cpu_nms import py_cpu_nms
from models.retinaface import RetinaFace
from utils.box_utils import decode, decode_landm
threshold = 1.05
ppi = 1280
step = 3
class ConfRetinaface(object):
def __init__(self, trained_model, network, cpu, confidence_threshold, top_k, nms_threshold, keep_top_k, vis_thres):
self.trained_model = trained_model
self.network = network
self.cpu = cpu
self.confidence_threshold = confidence_threshold
self.top_k = top_k
self.nms_threshold = nms_threshold
self.keep_top_k = keep_top_k
self.vis_thres = vis_thres
def set_retinaface_conf(cpu_or_cuda):
args = ConfRetinaface(trained_model='./weights/mobilenet0.25_Final.pth',
network='mobile0.25',
cpu=True if cpu_or_cuda == 'cpu' else False,
confidence_threshold=0.02,
top_k=5000,
nms_threshold=0.4,
keep_top_k=750,
vis_thres=0.6)
return args
def check_keys(model, pretrained_state_dict):
ckpt_keys = set(pretrained_state_dict.keys())
model_keys = set(model.state_dict().keys())
used_pretrained_keys = model_keys & ckpt_keys
unused_pretrained_keys = ckpt_keys - model_keys
missing_keys = model_keys - ckpt_keys
print('Missing keys:{}'.format(len(missing_keys)))
print('Unused checkpoint keys:{}'.format(len(unused_pretrained_keys)))
print('Used keys:{}'.format(len(used_pretrained_keys)))
assert len(used_pretrained_keys) > 0, 'load NONE from pretrained checkpoint'
return True
def remove_prefix(state_dict, prefix):
''' Old style model is stored with all names of parameters sharing common prefix 'module.' '''
print('remove prefix \'{}\''.format(prefix))
f = lambda x: x.split(prefix, 1)[-1] if x.startswith(prefix) else x
return {f(key): value for key, value in state_dict.items()}
def load_model(model, pretrained_path, load_to_cpu):
print('Loading pretrained model from {}'.format(pretrained_path))
if load_to_cpu:
pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage)
else:
device = torch.cuda.current_device()
pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage.cuda(device))
if "state_dict" in pretrained_dict.keys():
pretrained_dict = remove_prefix(pretrained_dict['state_dict'], 'module.')
else:
pretrained_dict = remove_prefix(pretrained_dict, 'module.')
check_keys(model, pretrained_dict)
model.load_state_dict(pretrained_dict, strict=False)
return model
# 加载retinaface模型
def load_retinaface_model(args):
torch.set_grad_enabled(False)
cfg = None
if args.network == "mobile0.25":
cfg = cfg_mnet
elif args.network == "resnet50":
cfg = cfg_re50
# net and model
net = RetinaFace(cfg=cfg, phase='test')
net = load_model(net, args.trained_model, args.cpu)
net.eval()
cudnn.benchmark = True
device = torch.device("cpu" if args.cpu else "cuda")
net = net.to(device)
print('Finished loading model!')
return net
# 计算两个特征向量的欧式距离
def findEuclideanDistance(source_representation, test_representation):
euclidean_distance = source_representation - test_representation
euclidean_distance = np.sum(np.multiply(euclidean_distance, euclidean_distance))
euclidean_distance = np.sqrt(euclidean_distance)
return euclidean_distance
# 归一化欧氏距离
def l2_normalize(x):
return x / np.sqrt(np.sum(np.multiply(x, x)))
# 计算此特征向量与人脸库中的哪个人脸特征向量距离最近
def findmindistance(pred, threshold, k_v):
distance = 10
most_like = ""
for name in k_v.keys():
tmp = findEuclideanDistance(k_v[name], pred)
if distance > tmp:
distance = tmp
most_like = name
if distance < threshold:
return most_like
else:
return "unknown"
#
def faiss_find_face(pred,index ,database_name_list):
#print(len(database_name_list))
start_time = time.time()
D, I = index.search(pred, 1)
name_list = []
end_time = time.time()
print("faiss cost %fs" % (end_time - start_time))
print(D, I)
# if D[0][0] < threshold:
# print(database_name_list[I[0][0]])
# return database_name_list[I[0][0]]
# else:
# return "unknown"
for i,index in enumerate(I):
if D[i][0] < threshold:
#print(database_name_list[I[0][0]])
name_list.append(database_name_list[index[0]])
else:
name_list.append("unknown")
return name_list
# 从人脸库中找到传入的人脸列表中的所有人脸
def findAll(imglist, model, index ,database_name_list, k_v, cpu_or_cuda):
start_time = time.time()
imglist = torch.from_numpy(imglist)
imglist = imglist.to(torch.device("cuda" if cpu_or_cuda == "cuda" else "cpu"))
with torch.no_grad():
name_list = []
pred = model(imglist)
pred = pred.cpu().numpy()
print("predOne time: " + str(time.time() - start_time))
#print(pred.shape)
start_time = time.time()
#name_list = faiss_find_face(l2_normalize(pred), index, database_name_list)
for pr in pred:
name = findmindistance(l2_normalize(pr), threshold=threshold, k_v=k_v)
print(name)
# print(l2_normalize(pr).shape)
#pr = np.expand_dims(l2_normalize(pr), 0)
#print(pr.shape)
#name = faiss_find_face(pr,index ,database_name_list)
if name != "unknown":
mo = r'[\u4e00-\u9fa5_a-zA-Z]*'
name = re.match(mo, name)
name_list.append(name.group(0))
else:
name_list.append("unknown")
#name_list.append(name)
print("findOne time: " + str(time.time() - start_time))
return name_list
# 检测单张人脸返回1x3x112x112的数组
def detect_one(path, net, args):
cfg = None
if args.network == "mobile0.25":
cfg = cfg_mnet
elif args.network == "resnet50":
cfg = cfg_re50
device = torch.device("cpu" if args.cpu else "cuda")
resize = 1
# testing begin
frame = cv2.imdecode(np.fromfile(path, dtype=np.uint8), cv2.IMREAD_COLOR)
h, w = frame.shape[:2]
factor = h / w
if (w > 1000):
frame = cv2.resize(frame, (600, int(600 * factor)))
h, w = frame.shape[:2]
tic = time.time()
img = np.float32(frame)
im_height, im_width, _ = img.shape
scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
img -= (104, 117, 123)
img = img.transpose(2, 0, 1)
img = torch.from_numpy(img).unsqueeze(0)
img = img.to(device)
scale = scale.to(device)
loc, conf, landms = net(img) # forward pass
#print(loc.shape,landms.shape,conf.shape)
priorbox = PriorBox(cfg, image_size=(im_height, im_width))
priors = priorbox.forward()
priors = priors.to(device)
prior_data = priors.data
boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
boxes = boxes * scale / resize
boxes = boxes.cpu().numpy()
scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance'])
scale1 = torch.Tensor([img.shape[3], img.shape[2], img.shape[3], img.shape[2],
img.shape[3], img.shape[2], img.shape[3], img.shape[2],
img.shape[3], img.shape[2]])
scale1 = scale1.to(device)
landms = landms * scale1 / resize
landms = landms.cpu().numpy()
# ignore low scores
inds = np.where(scores > args.confidence_threshold)[0]
boxes = boxes[inds]
landms = landms[inds]
scores = scores[inds]
# keep top-K before NMS
order = scores.argsort()[::-1][:args.top_k]
boxes = boxes[order]
landms = landms[order]
scores = scores[order]
# do NMS
dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
keep = py_cpu_nms(dets, args.nms_threshold)
# keep = nms(dets, args.nms_threshold,force_cpu=args.cpu)
dets = dets[keep, :]
landms = landms[keep]
# keep top-K faster NMS
dets = dets[:args.keep_top_k, :]
landms = landms[:args.keep_top_k, :]
dets = np.concatenate((dets, landms), axis=1)
face_list = []
box_and_point = []
# print(dets[:4])
# print('net forward time: {:.4f}'.format(time.time() - tic))
print(len(dets))
for i, det in enumerate(dets):
if det[4] < args.vis_thres:
continue
box_and_point.append(det)
dst = np.reshape(landms[i], (5, 2))
# print(dst.shape)
src1 = np.array([
[38.3814, 51.6963],
[73.6186, 51.5014],
[56.1120, 71.7366],
[41.6361, 92.3655],
[70.8167, 92.2041]], dtype=np.float32)
# print(src1.shape)
tform = trans.SimilarityTransform()
tform.estimate(dst, src1)
M = tform.params[0:2, :]
frame2 = cv2.warpAffine(frame, M, (w, h), borderValue=0.0)
img112 = frame2[0:112, 0:112, :]
# cv2.imshow('out', img112)
# cv2.waitKey(0)
face_list.append(img112)
if len(face_list) > 0:
face_list = np.array(face_list)
face_list = face_list.transpose((0, 3, 1, 2))
face_list = np.array(face_list, dtype=np.float32)
face_list -= 127.5
face_list /= 127.5
box_and_point = np.array(box_and_point)
# face_list = torch.from_numpy(face_list)
# cv2.imshow('out', img112)
# cv2.waitKey(0)
return face_list, box_and_point
# 检测视频中的人脸并人脸识别
def detect_video(video_path, output_path, net, arcface_model, k_v, args):
tic_total = time.time()
cfg = None
if args.network == "mobile0.25":
cfg = cfg_mnet
elif args.network == "resnet50":
cfg = cfg_re50
device = torch.device("cpu" if args.cpu else "cuda")
resize = 1
# testing begin
cap = cv2.VideoCapture(video_path)
ret, frame = cap.read()
h, w = frame.shape[:2]
factor = 0
if (w > ppi):
factor = h / w
frame = cv2.resize(frame, (ppi, int(ppi * factor)))
h, w = frame.shape[:2]
fps = cap.get(cv2.CAP_PROP_FPS)
size = (w, h)
# size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
# int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
# out = cv2.VideoWriter('out.mp4', cv2.VideoWriter_fourcc('m', 'p', '4', 'v'), fps, size)
out = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'XVID'), fps, size)
number = step
dets = []
name_list = []
font = ImageFont.truetype("font.ttf", 22)
priorbox = PriorBox(cfg, image_size=(h, w))
priors = priorbox.forward()
priors = priors.to(device)
prior_data = priors.data
scale = torch.Tensor([w, h, w, h])
scale = scale.to(device)
scale1 = torch.Tensor([w, h, w, h,
w, h, w, h,
w, h])
scale1 = scale1.to(device)
src1 = np.array([
[38.3814, 51.6963],
[73.6186, 51.5014],
[56.1120, 71.7366],
[41.6361, 92.3655],
[70.8167, 92.2041]], dtype=np.float32)
# print(src1.shape)
tform = trans.SimilarityTransform()
while ret:
tic_all = time.time()
if number == step:
tic = time.time()
img = np.float32(frame)
img -= (104, 117, 123)
img = img.transpose(2, 0, 1)
img = torch.from_numpy(img).unsqueeze(0)
img = img.to(device)
loc, conf, landms = net(img) # forward pass
boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
boxes = boxes * scale / resize
boxes = boxes.cpu().numpy()
scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance'])
landms = landms * scale1 / resize
landms = landms.cpu().numpy()
# ignore low scores
inds = np.where(scores > args.confidence_threshold)[0]
boxes = boxes[inds]
landms = landms[inds]
scores = scores[inds]
# keep top-K before NMS
order = scores.argsort()[::-1][:args.top_k]
boxes = boxes[order]
landms = landms[order]
scores = scores[order]
# do NMS
dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
keep = py_cpu_nms(dets, args.nms_threshold)
# keep = nms(dets, args.nms_threshold,force_cpu=args.cpu)
dets = dets[keep, :]
landms = landms[keep]
# keep top-K faster NMS
dets = dets[:args.keep_top_k, :]
landms = landms[:args.keep_top_k, :]
dets = np.concatenate((dets, landms), axis=1)
face_list = []
name_list = []
# print(dets[:4])
print('net forward time: {:.4f}'.format(time.time() - tic))
start_time = time.time()
for i, det in enumerate(dets[:4]):
if det[4] < args.vis_thres:
continue
boxes, score = det[:4], det[4]
dst = np.reshape(landms[i], (5, 2))
# print(dst.shape)
tform.estimate(dst, src1)
M = tform.params[0:2, :]
frame2 = cv2.warpAffine(frame, M, (w, h), borderValue=0.0)
img112 = frame2[0:112, 0:112, :]
face_list.append(img112)
if len(face_list) != 0:
face_list = np.array(face_list)
face_list = face_list.transpose((0, 3, 1, 2))
face_list = np.array(face_list, dtype=np.float32)
face_list -= 127.5
face_list /= 127.5
print(face_list.shape)
# face_list = torch.from_numpy(face_list)
name_list = findAll(face_list, arcface_model, k_v, "cpu" if args.cpu else "cuda")
end_time = time.time()
print("findOneframe time: " + str(end_time - start_time))
start_time = time.time()
if (len(dets) != 0):
for i, det in enumerate(dets[:4]):
if det[4] < args.vis_thres:
continue
boxes, score = det[:4], det[4]
cv2.rectangle(frame, (int(boxes[0]), int(boxes[1])), (int(boxes[2]), int(boxes[3])), (2, 255, 0), 1)
# if (len(dets) != 0):
# img_PIL = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
# draw = ImageDraw.Draw(img_PIL)
#
# for i, det in enumerate(dets[:4]):
# if det[4] < args.vis_thres:
# continue
# boxes, score = det[:4], det[4]
# # print(name_list)
# name = name_list[i]
# if not isinstance(name, np.unicode):
# name = name.decode('utf8')
# draw.text((int(boxes[0]), int(boxes[1])), name, fill=(255, 0, 0), font=font)
# draw.rectangle((int(boxes[0]), int(boxes[1]), int(boxes[2]), int(boxes[3])), outline="green", width=3)
# frame = cv2.cvtColor(np.asarray(img_PIL), cv2.COLOR_RGB2BGR)
#cv2.imshow('out', frame)
#cv2.waitKey(0)
out.write(frame)
end_time = time.time()
print("drawOneframe time: " + str(end_time - start_time))
# Press Q on keyboard to stop recording
# if cv2.waitKey(1) & 0xFF == ord('q'):
# break
ret, frame = cap.read()
number = 0
if (ret != 0 and factor != 0):
frame = cv2.resize(frame, (ppi, int(ppi * factor)))
else:
number += 1
if (len(dets) != 0):
img_PIL = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
draw = ImageDraw.Draw(img_PIL)
for i, det in enumerate(dets[:4]):
if det[4] < args.vis_thres:
continue
boxes, score = det[:4], det[4]
# print(name_list)
name = name_list[i]
if not isinstance(name, np.unicode):
name = name.decode('utf8')
draw.text((int(boxes[0]), int(boxes[1])), name, fill=(255, 0, 0), font=font)
draw.rectangle((int(boxes[0]), int(boxes[1]), int(boxes[2]), int(boxes[3])), outline="green",
width=3)
frame = cv2.cvtColor(np.asarray(img_PIL), cv2.COLOR_RGB2BGR)
out.write(frame)
start_time = time.time()
ret, frame = cap.read()
if (ret != 0 and factor != 0):
frame = cv2.resize(frame, (ppi, int(ppi * factor)))
print("readframe time: " + str(time.time() - start_time))
print('all time: {:.4f}'.format(time.time() - tic_all))
cap.release()
out.release()
print('total time: {:.4f}'.format(time.time() - tic_total))
#cv2.destroyAllWindows()
if __name__ == "__main__":
args = set_retinaface_conf()
print(args.cpu)

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1,65 @@
# -*- coding: utf-8 -*-
# @Time : 20-6-4 下午4:04
# @Author : zhuying
# @Company : Minivision
# @File : dataset_folder.py
# @Software : PyCharm
import cv2
import torch
from torchvision import datasets
import numpy as np
def opencv_loader(path):
img = cv2.imread(path)
return img
class DatasetFolderFT(datasets.ImageFolder):
def __init__(self, root, transform=None, target_transform=None,
ft_width=10, ft_height=10, loader=opencv_loader):
super(DatasetFolderFT, self).__init__(root, transform, target_transform, loader)
self.root = root
self.ft_width = ft_width
self.ft_height = ft_height
def __getitem__(self, index):
path, target = self.samples[index]
sample = self.loader(path)
# generate the FT picture of the sample
ft_sample = generate_FT(sample)
if sample is None:
print('image is None --> ', path)
if ft_sample is None:
print('FT image is None -->', path)
assert sample is not None
ft_sample = cv2.resize(ft_sample, (self.ft_width, self.ft_height))
ft_sample = torch.from_numpy(ft_sample).float()
ft_sample = torch.unsqueeze(ft_sample, 0)
if self.transform is not None:
try:
sample = self.transform(sample)
except Exception as err:
print('Error Occured: %s' % err, path)
if self.target_transform is not None:
target = self.target_transform(target)
return sample, ft_sample, target
def generate_FT(image):
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
f = np.fft.fft2(image)
fshift = np.fft.fftshift(f)
fimg = np.log(np.abs(fshift)+1)
maxx = -1
minn = 100000
for i in range(len(fimg)):
if maxx < max(fimg[i]):
maxx = max(fimg[i])
if minn > min(fimg[i]):
minn = min(fimg[i])
fimg = (fimg - minn+1) / (maxx - minn+1)
return fimg

View File

@ -0,0 +1,33 @@
# -*- coding: utf-8 -*-
# @Time : 20-6-4 下午3:40
# @Author : zhuying
# @Company : Minivision
# @File : dataset_loader.py
# @Software : PyCharm
from torch.utils.data import DataLoader
from src.data_io.dataset_folder import DatasetFolderFT
from src.data_io import transform as trans
def get_train_loader(conf):
train_transform = trans.Compose([
trans.ToPILImage(),
trans.RandomResizedCrop(size=tuple(conf.input_size),
scale=(0.9, 1.1)),
trans.ColorJitter(brightness=0.4,
contrast=0.4, saturation=0.4, hue=0.1),
trans.RandomRotation(10),
trans.RandomHorizontalFlip(),
trans.ToTensor()
])
root_path = '{}/{}'.format(conf.train_root_path, conf.patch_info)
trainset = DatasetFolderFT(root_path, train_transform,
None, conf.ft_width, conf.ft_height)
train_loader = DataLoader(
trainset,
batch_size=conf.batch_size,
shuffle=True,
pin_memory=True,
num_workers=16)
return train_loader

589
src/data_io/functional.py Normal file
View File

@ -0,0 +1,589 @@
# -*- coding: utf-8 -*-
# @Time : 20-6-4 下午6:18
# @Author : zhuying
# @Company : Minivision
# @File : functional.py
# @Software : PyCharm
from __future__ import division
import torch
from PIL import Image, ImageOps, ImageEnhance
try:
import accimage
except ImportError:
accimage = None
import numpy as np
import numbers
import types
import collections
import warnings
def _is_pil_image(img):
if accimage is not None:
return isinstance(img, (Image.Image, accimage.Image))
else:
return isinstance(img, Image.Image)
def _is_tensor_image(img):
return torch.is_tensor(img) and img.ndimension() == 3
def _is_numpy_image(img):
return isinstance(img, np.ndarray) and (img.ndim in {2, 3})
def to_tensor(pic):
"""Convert a ``PIL Image`` or ``numpy.ndarray`` to tensor.
See ``ToTensor`` for more details.
Args:
pic (PIL Image or numpy.ndarray): Image to be converted to tensor.
Returns:
Tensor: Converted image.
"""
if not(_is_pil_image(pic) or _is_numpy_image(pic)):
raise TypeError('pic should be PIL Image or ndarray. Got {}'.format(type(pic)))
if isinstance(pic, np.ndarray):
# handle numpy array
# IR image channel=1: modify by lzc --> 20190730
if pic.ndim == 2:
pic = pic.reshape((pic.shape[0], pic.shape[1], 1))
img = torch.from_numpy(pic.transpose((2, 0, 1)))
# backward compatibility
# return img.float().div(255) modify by zkx
return img.float()
if accimage is not None and isinstance(pic, accimage.Image):
nppic = np.zeros([pic.channels, pic.height, pic.width], dtype=np.float32)
pic.copyto(nppic)
return torch.from_numpy(nppic)
# handle PIL Image
if pic.mode == 'I':
img = torch.from_numpy(np.array(pic, np.int32, copy=False))
elif pic.mode == 'I;16':
img = torch.from_numpy(np.array(pic, np.int16, copy=False))
else:
img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
# PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK
if pic.mode == 'YCbCr':
nchannel = 3
elif pic.mode == 'I;16':
nchannel = 1
else:
nchannel = len(pic.mode)
img = img.view(pic.size[1], pic.size[0], nchannel)
# put it from HWC to CHW format
# yikes, this transpose takes 80% of the loading time/CPU
img = img.transpose(0, 1).transpose(0, 2).contiguous()
if isinstance(img, torch.ByteTensor):
# return img.float().div(255) #modified by zkx
return img.float()
else:
return img
def to_pil_image(pic, mode=None):
"""Convert a tensor or an ndarray to PIL Image.
See :class:`~torchvision.transforms.ToPIlImage` for more details.
Args:
pic (Tensor or numpy.ndarray): Image to be converted to PIL Image.
mode (`PIL.Image mode`_): color space and pixel depth of input data (optional).
.. _PIL.Image mode: http://pillow.readthedocs.io/en/3.4.x/handbook/concepts.html#modes
Returns:
PIL Image: Image converted to PIL Image.
"""
if not(_is_numpy_image(pic) or _is_tensor_image(pic)):
raise TypeError('pic should be Tensor or ndarray. Got {}.'.format(type(pic)))
npimg = pic
if isinstance(pic, torch.FloatTensor):
pic = pic.mul(255).byte()
if torch.is_tensor(pic):
npimg = np.transpose(pic.numpy(), (1, 2, 0))
if not isinstance(npimg, np.ndarray):
raise TypeError('Input pic must be a torch.Tensor or NumPy ndarray, ' +
'not {}'.format(type(npimg)))
if npimg.shape[2] == 1:
expected_mode = None
npimg = npimg[:, :, 0]
if npimg.dtype == np.uint8:
expected_mode = 'L'
if npimg.dtype == np.int16:
expected_mode = 'I;16'
if npimg.dtype == np.int32:
expected_mode = 'I'
elif npimg.dtype == np.float32:
expected_mode = 'F'
if mode is not None and mode != expected_mode:
raise ValueError("Incorrect mode ({}) supplied for input type {}. Should be {}"
.format(mode, np.dtype, expected_mode))
mode = expected_mode
elif npimg.shape[2] == 4:
permitted_4_channel_modes = ['RGBA', 'CMYK']
if mode is not None and mode not in permitted_4_channel_modes:
raise ValueError("Only modes {} are supported for 4D inputs".format(permitted_4_channel_modes))
if mode is None and npimg.dtype == np.uint8:
mode = 'RGBA'
else:
permitted_3_channel_modes = ['RGB', 'YCbCr', 'HSV']
if mode is not None and mode not in permitted_3_channel_modes:
raise ValueError("Only modes {} are supported for 3D inputs".format(permitted_3_channel_modes))
if mode is None and npimg.dtype == np.uint8:
mode = 'RGB'
if mode is None:
raise TypeError('Input type {} is not supported'.format(npimg.dtype))
return Image.fromarray(npimg, mode=mode)
def normalize(tensor, mean, std):
"""Normalize a tensor image with mean and standard deviation.
See ``Normalize`` for more details.
Args:
tensor (Tensor): Tensor image of size (C, H, W) to be normalized.
mean (sequence): Sequence of means for each channel.
std (sequence): Sequence of standard deviations for each channely.
Returns:
Tensor: Normalized Tensor image.
"""
if not _is_tensor_image(tensor):
raise TypeError('tensor is not a torch image.')
for t, m, s in zip(tensor, mean, std):
t.sub_(m).div_(s)
return tensor
def resize(img, size, interpolation=Image.BILINEAR):
"""Resize the input PIL Image to the given size.
Args:
img (PIL Image): Image to be resized.
size (sequence or int): Desired output size. If size is a sequence like
(h, w), the output size will be matched to this. If size is an int,
the smaller edge of the image will be matched to this number maintaing
the aspect ratio. i.e, if height > width, then image will be rescaled to
(size * height / width, size)
interpolation (int, optional): Desired interpolation. Default is
``PIL.Image.BILINEAR``
Returns:
PIL Image: Resized image.
"""
if not _is_pil_image(img):
raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
if not (isinstance(size, int) or (isinstance(size, collections.Iterable) and len(size) == 2)):
raise TypeError('Got inappropriate size arg: {}'.format(size))
if isinstance(size, int):
w, h = img.size
if (w <= h and w == size) or (h <= w and h == size):
return img
if w < h:
ow = size
oh = int(size * h / w)
return img.resize((ow, oh), interpolation)
else:
oh = size
ow = int(size * w / h)
return img.resize((ow, oh), interpolation)
else:
return img.resize(size[::-1], interpolation)
def scale(*args, **kwargs):
warnings.warn("The use of the transforms.Scale transform is deprecated, " +
"please use transforms.Resize instead.")
return resize(*args, **kwargs)
def pad(img, padding, fill=0):
"""Pad the given PIL Image on all sides with the given "pad" value.
Args:
img (PIL Image): Image to be padded.
padding (int or tuple): Padding on each border. If a single int is provided this
is used to pad all borders. If tuple of length 2 is provided this is the padding
on left/right and top/bottom respectively. If a tuple of length 4 is provided
this is the padding for the left, top, right and bottom borders
respectively.
fill: Pixel fill value. Default is 0. If a tuple of
length 3, it is used to fill R, G, B channels respectively.
Returns:
PIL Image: Padded image.
"""
if not _is_pil_image(img):
raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
if not isinstance(padding, (numbers.Number, tuple)):
raise TypeError('Got inappropriate padding arg')
if not isinstance(fill, (numbers.Number, str, tuple)):
raise TypeError('Got inappropriate fill arg')
if isinstance(padding, collections.Sequence) and len(padding) not in [2, 4]:
raise ValueError("Padding must be an int or a 2, or 4 element tuple, not a " +
"{} element tuple".format(len(padding)))
return ImageOps.expand(img, border=padding, fill=fill)
def crop(img, i, j, h, w):
"""Crop the given PIL Image.
Args:
img (PIL Image): Image to be cropped.
i: Upper pixel coordinate.
j: Left pixel coordinate.
h: Height of the cropped image.
w: Width of the cropped image.
Returns:
PIL Image: Cropped image.
"""
if not _is_pil_image(img):
raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
return img.crop((j, i, j + w, i + h))
def center_crop(img, output_size):
if isinstance(output_size, numbers.Number):
output_size = (int(output_size), int(output_size))
w, h = img.size
th, tw = output_size
i = int(round((h - th) / 2.))
j = int(round((w - tw) / 2.))
return crop(img, i, j, th, tw)
def resized_crop(img, i, j, h, w, size, interpolation=Image.BILINEAR):
"""Crop the given PIL Image and resize it to desired size.
Notably used in RandomResizedCrop.
Args:
img (PIL Image): Image to be cropped.
i: Upper pixel coordinate.
j: Left pixel coordinate.
h: Height of the cropped image.
w: Width of the cropped image.
size (sequence or int): Desired output size. Same semantics as ``scale``.
interpolation (int, optional): Desired interpolation. Default is
``PIL.Image.BILINEAR``.
Returns:
PIL Image: Cropped image.
"""
assert _is_pil_image(img), 'img should be PIL Image'
img = crop(img, i, j, h, w)
img = resize(img, size, interpolation)
return img
def hflip(img):
"""Horizontally flip the given PIL Image.
Args:
img (PIL Image): Image to be flipped.
Returns:
PIL Image: Horizontall flipped image.
"""
if not _is_pil_image(img):
raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
return img.transpose(Image.FLIP_LEFT_RIGHT)
def vflip(img):
"""Vertically flip the given PIL Image.
Args:
img (PIL Image): Image to be flipped.
Returns:
PIL Image: Vertically flipped image.
"""
if not _is_pil_image(img):
raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
return img.transpose(Image.FLIP_TOP_BOTTOM)
def five_crop(img, size):
"""Crop the given PIL Image into four corners and the central crop.
.. Note::
This transform returns a tuple of images and there may be a
mismatch in the number of inputs and targets your ``Dataset`` returns.
Args:
size (sequence or int): Desired output size of the crop. If size is an
int instead of sequence like (h, w), a square crop (size, size) is
made.
Returns:
tuple: tuple (tl, tr, bl, br, center) corresponding top left,
top right, bottom left, bottom right and center crop.
"""
if isinstance(size, numbers.Number):
size = (int(size), int(size))
else:
assert len(size) == 2, "Please provide only two dimensions (h, w) for size."
w, h = img.size
crop_h, crop_w = size
if crop_w > w or crop_h > h:
raise ValueError("Requested crop size {} is bigger than input size {}".format(size,
(h, w)))
tl = img.crop((0, 0, crop_w, crop_h))
tr = img.crop((w - crop_w, 0, w, crop_h))
bl = img.crop((0, h - crop_h, crop_w, h))
br = img.crop((w - crop_w, h - crop_h, w, h))
center = center_crop(img, (crop_h, crop_w))
return (tl, tr, bl, br, center)
def ten_crop(img, size, vertical_flip=False):
"""Crop the given PIL Image into four corners and the central crop plus the
flipped version of these (horizontal flipping is used by default).
.. Note::
This transform returns a tuple of images and there may be a
mismatch in the number of inputs and targets your ``Dataset`` returns.
Args:
size (sequence or int): Desired output size of the crop. If size is an
int instead of sequence like (h, w), a square crop (size, size) is
made.
vertical_flip (bool): Use vertical flipping instead of horizontal
Returns:
tuple: tuple (tl, tr, bl, br, center, tl_flip, tr_flip, bl_flip,
br_flip, center_flip) corresponding top left, top right,
bottom left, bottom right and center crop and same for the
flipped image.
"""
if isinstance(size, numbers.Number):
size = (int(size), int(size))
else:
assert len(size) == 2, "Please provide only two dimensions (h, w) for size."
first_five = five_crop(img, size)
if vertical_flip:
img = vflip(img)
else:
img = hflip(img)
second_five = five_crop(img, size)
return first_five + second_five
def adjust_brightness(img, brightness_factor):
"""Adjust brightness of an Image.
Args:
img (PIL Image): PIL Image to be adjusted.
brightness_factor (float): How much to adjust the brightness. Can be
any non negative number. 0 gives a black image, 1 gives the
original image while 2 increases the brightness by a factor of 2.
Returns:
PIL Image: Brightness adjusted image.
"""
if not _is_pil_image(img):
raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
enhancer = ImageEnhance.Brightness(img)
img = enhancer.enhance(brightness_factor)
return img
def adjust_contrast(img, contrast_factor):
"""Adjust contrast of an Image.
Args:
img (PIL Image): PIL Image to be adjusted.
contrast_factor (float): How much to adjust the contrast. Can be any
non negative number. 0 gives a solid gray image, 1 gives the
original image while 2 increases the contrast by a factor of 2.
Returns:
PIL Image: Contrast adjusted image.
"""
if not _is_pil_image(img):
raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
enhancer = ImageEnhance.Contrast(img)
img = enhancer.enhance(contrast_factor)
return img
def adjust_saturation(img, saturation_factor):
"""Adjust color saturation of an image.
Args:
img (PIL Image): PIL Image to be adjusted.
saturation_factor (float): How much to adjust the saturation. 0 will
give a black and white image, 1 will give the original image while
2 will enhance the saturation by a factor of 2.
Returns:
PIL Image: Saturation adjusted image.
"""
if not _is_pil_image(img):
raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
enhancer = ImageEnhance.Color(img)
img = enhancer.enhance(saturation_factor)
return img
def adjust_hue(img, hue_factor):
"""Adjust hue of an image.
The image hue is adjusted by converting the image to HSV and
cyclically shifting the intensities in the hue channel (H).
The image is then converted back to original image mode.
`hue_factor` is the amount of shift in H channel and must be in the
interval `[-0.5, 0.5]`.
See https://en.wikipedia.org/wiki/Hue for more details on Hue.
Args:
img (PIL Image): PIL Image to be adjusted.
hue_factor (float): How much to shift the hue channel. Should be in
[-0.5, 0.5]. 0.5 and -0.5 give complete reversal of hue channel in
HSV space in positive and negative direction respectively.
0 means no shift. Therefore, both -0.5 and 0.5 will give an image
with complementary colors while 0 gives the original image.
Returns:
PIL Image: Hue adjusted image.
"""
if not(-0.5 <= hue_factor <= 0.5):
raise ValueError('hue_factor is not in [-0.5, 0.5].'.format(hue_factor))
if not _is_pil_image(img):
raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
input_mode = img.mode
if input_mode in {'L', '1', 'I', 'F'}:
return img
h, s, v = img.convert('HSV').split()
np_h = np.array(h, dtype=np.uint8)
# uint8 addition take cares of rotation across boundaries
with np.errstate(over='ignore'):
np_h += np.uint8(hue_factor * 255)
h = Image.fromarray(np_h, 'L')
img = Image.merge('HSV', (h, s, v)).convert(input_mode)
return img
def adjust_gamma(img, gamma, gain=1):
"""Perform gamma correction on an image.
Also known as Power Law Transform. Intensities in RGB mode are adjusted
based on the following equation:
I_out = 255 * gain * ((I_in / 255) ** gamma)
See https://en.wikipedia.org/wiki/Gamma_correction for more details.
Args:
img (PIL Image): PIL Image to be adjusted.
gamma (float): Non negative real number. gamma larger than 1 make the
shadows darker, while gamma smaller than 1 make dark regions
lighter.
gain (float): The constant multiplier.
"""
if not _is_pil_image(img):
raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
if gamma < 0:
raise ValueError('Gamma should be a non-negative real number')
input_mode = img.mode
img = img.convert('RGB')
np_img = np.array(img, dtype=np.float32)
np_img = 255 * gain * ((np_img / 255) ** gamma)
np_img = np.uint8(np.clip(np_img, 0, 255))
img = Image.fromarray(np_img, 'RGB').convert(input_mode)
return img
def rotate(img, angle, resample=False, expand=False, center=None):
"""Rotate the image by angle and then (optionally) translate it by (n_columns, n_rows)
Args:
img (PIL Image): PIL Image to be rotated.
angle ({float, int}): In degrees degrees counter clockwise order.
resample ({PIL.Image.NEAREST, PIL.Image.BILINEAR, PIL.Image.BICUBIC}, optional):
An optional resampling filter.
See http://pillow.readthedocs.io/en/3.4.x/handbook/concepts.html#filters
If omitted, or if the image has mode "1" or "P", it is set to PIL.Image.NEAREST.
expand (bool, optional): Optional expansion flag.
If true, expands the output image to make it large enough to hold the entire rotated image.
If false or omitted, make the output image the same size as the input image.
Note that the expand flag assumes rotation around the center and no translation.
center (2-tuple, optional): Optional center of rotation.
Origin is the upper left corner.
Default is the center of the image.
"""
if not _is_pil_image(img):
raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
return img.rotate(angle, resample, expand, center)
def to_grayscale(img, num_output_channels=1):
"""Convert image to grayscale version of image.
Args:
img (PIL Image): Image to be converted to grayscale.
Returns:
PIL Image: Grayscale version of the image.
if num_output_channels == 1 : returned image is single channel
if num_output_channels == 3 : returned image is 3 channel with r == g == b
"""
if not _is_pil_image(img):
raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
if num_output_channels == 1:
img = img.convert('L')
elif num_output_channels == 3:
img = img.convert('L')
np_img = np.array(img, dtype=np.uint8)
np_img = np.dstack([np_img, np_img, np_img])
img = Image.fromarray(np_img, 'RGB')
else:
raise ValueError('num_output_channels should be either 1 or 3')
return img

347
src/data_io/transform.py Normal file
View File

@ -0,0 +1,347 @@
# -*- coding: utf-8 -*-
# @Time : 20-6-4 下午4:19
# @Author : zhuying
# @Company : Minivision
# @File : transform.py
# @Software : PyCharm
from __future__ import division
import math
import random
from PIL import Image
try:
import accimage
except ImportError:
accimage = None
import numpy as np
import numbers
import types
from src.data_io import functional as F
__all__ = ["Compose", "ToTensor", "ToPILImage", "Normalize", "RandomHorizontalFlip",
"Lambda", "RandomResizedCrop", "ColorJitter", "RandomRotation"]
class Compose(object):
"""Composes several transforms together.
Args:
transforms (list of ``Transform`` objects): list of transforms to compose.
Example:
>>> transforms.Compose([
>>> transforms.CenterCrop(10),
>>> transforms.ToTensor(),
>>> ])
"""
def __init__(self, transforms):
self.transforms = transforms
def __call__(self, img):
for t in self.transforms:
img = t(img)
return img
class ToTensor(object):
"""Convert a ``PIL Image`` or ``numpy.ndarray`` to tensor.
Converts a PIL Image or numpy.ndarray (H x W x C) in the range
[0, 255] to a torch.FloatTensor of shape (C x H x W) in the range [0.0, 1.0].
"""
def __call__(self, pic):
"""
Args:
pic (PIL Image or numpy.ndarray): Image to be converted to tensor.
Returns:
Tensor: Converted image.
"""
return F.to_tensor(pic)
class Lambda(object):
"""Apply a user-defined lambda as a transform.
Args:
lambd (function): Lambda/function to be used for transform.
"""
def __init__(self, lambd):
assert isinstance(lambd, types.LambdaType)
self.lambd = lambd
def __call__(self, img):
return self.lambd(img)
class ToPILImage(object):
"""Convert a tensor or an ndarray to PIL Image.
Converts a torch.*Tensor of shape C x H x W or a numpy ndarray of shape
H x W x C to a PIL Image while preserving the value range.
Args:
mode (`PIL.Image mode`_): color space and pixel depth of input data (optional).
If ``mode`` is ``None`` (default) there are some assumptions made about the input data:
1. If the input has 3 channels, the ``mode`` is assumed to be ``RGB``.
2. If the input has 4 channels, the ``mode`` is assumed to be ``RGBA``.
3. If the input has 1 channel, the ``mode`` is determined by the data type (i,e,
``int``, ``float``, ``short``).
.. _PIL.Image mode: http://pillow.readthedocs.io/en/3.4.x/handbook/concepts.html#modes
"""
def __init__(self, mode=None):
self.mode = mode
def __call__(self, pic):
"""
Args:
pic (Tensor or numpy.ndarray): Image to be converted to PIL Image.
Returns:
PIL Image: Image converted to PIL Image.
"""
return F.to_pil_image(pic, self.mode)
class Normalize(object):
"""Normalize an tensor image with mean and standard deviation.
Given mean: ``(M1,...,Mn)`` and std: ``(S1,..,Sn)`` for ``n`` channels, this transform
will normalize each channel of the input ``torch.*Tensor`` i.e.
``input[channel] = (input[channel] - mean[channel]) / std[channel]``
Args:
mean (sequence): Sequence of means for each channel.
std (sequence): Sequence of standard deviations for each channel.
"""
def __init__(self, mean, std):
self.mean = mean
self.std = std
def __call__(self, tensor):
"""
Args:
tensor (Tensor): Tensor image of size (C, H, W) to be normalized.
Returns:
Tensor: Normalized Tensor image.
"""
return F.normalize(tensor, self.mean, self.std)
class RandomHorizontalFlip(object):
"""Horizontally flip the given PIL Image randomly with a probability of 0.5."""
def __call__(self, img):
"""
Args:
img (PIL Image): Image to be flipped.
Returns:
PIL Image: Randomly flipped image.
"""
if random.random() < 0.5:
return F.hflip(img)
return img
class RandomResizedCrop(object):
"""Crop the given PIL Image to random size and aspect ratio.
A crop of random size (default: of 0.08 to 1.0) of the original size and a random
aspect ratio (default: of 3/4 to 4/3) of the original aspect ratio is made. This crop
is finally resized to given size.
This is popularly used to train the Inception networks.
Args:
size: expected output size of each edge
scale: range of size of the origin size cropped
ratio: range of aspect ratio of the origin aspect ratio cropped
interpolation: Default: PIL.Image.BILINEAR
"""
def __init__(self, size, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.), interpolation=Image.BILINEAR):
if isinstance(size, tuple):
self.size = size
else:
self.size = (size, size)
self.interpolation = interpolation
self.scale = scale
self.ratio = ratio
@staticmethod
def get_params(img, scale, ratio):
"""Get parameters for ``crop`` for a random sized crop.
Args:
img (PIL Image): Image to be cropped.
scale (tuple): range of size of the origin size cropped
ratio (tuple): range of aspect ratio of the origin aspect ratio cropped
Returns:
tuple: params (i, j, h, w) to be passed to ``crop`` for a random
sized crop.
"""
for attempt in range(10):
area = img.size[0] * img.size[1]
target_area = random.uniform(*scale) * area
aspect_ratio = random.uniform(*ratio)
w = int(round(math.sqrt(target_area * aspect_ratio)))
h = int(round(math.sqrt(target_area / aspect_ratio)))
if random.random() < 0.5:
w, h = h, w
if w <= img.size[0] and h <= img.size[1]:
i = random.randint(0, img.size[1] - h)
j = random.randint(0, img.size[0] - w)
return i, j, h, w
# Fallback
w = min(img.size[0], img.size[1])
i = (img.size[1] - w) // 2
j = (img.size[0] - w) // 2
return i, j, w, w
def __call__(self, img):
"""
Args:
img (PIL Image): Image to be flipped.
Returns:
PIL Image: Randomly cropped and resize image.
"""
i, j, h, w = self.get_params(img, self.scale, self.ratio)
return F.resized_crop(img, i, j, h, w, self.size, self.interpolation)
class ColorJitter(object):
"""Randomly change the brightness, contrast and saturation of an image.
Args:
brightness (float): How much to jitter brightness. brightness_factor
is chosen uniformly from [max(0, 1 - brightness), 1 + brightness].
contrast (float): How much to jitter contrast. contrast_factor
is chosen uniformly from [max(0, 1 - contrast), 1 + contrast].
saturation (float): How much to jitter saturation. saturation_factor
is chosen uniformly from [max(0, 1 - saturation), 1 + saturation].
hue(float): How much to jitter hue. hue_factor is chosen uniformly from
[-hue, hue]. Should be >=0 and <= 0.5.
"""
def __init__(self, brightness=0, contrast=0, saturation=0, hue=0):
self.brightness = brightness
self.contrast = contrast
self.saturation = saturation
self.hue = hue
@staticmethod
def get_params(brightness, contrast, saturation, hue):
"""Get a randomized transform to be applied on image.
Arguments are same as that of __init__.
Returns:
Transform which randomly adjusts brightness, contrast and
saturation in a random order.
"""
transforms = []
if brightness > 0:
brightness_factor = np.random.uniform(max(0, 1 - brightness), 1 + brightness)
transforms.append(Lambda(lambda img: F.adjust_brightness(img, brightness_factor)))
if contrast > 0:
contrast_factor = np.random.uniform(max(0, 1 - contrast), 1 + contrast)
transforms.append(Lambda(lambda img: F.adjust_contrast(img, contrast_factor)))
if saturation > 0:
saturation_factor = np.random.uniform(max(0, 1 - saturation), 1 + saturation)
transforms.append(Lambda(lambda img: F.adjust_saturation(img, saturation_factor)))
if hue > 0:
hue_factor = np.random.uniform(-hue, hue)
transforms.append(Lambda(lambda img: F.adjust_hue(img, hue_factor)))
np.random.shuffle(transforms)
transform = Compose(transforms)
return transform
def __call__(self, img):
"""
Args:
img (PIL Image): Input image.
Returns:
PIL Image: Color jittered image.
"""
transform = self.get_params(self.brightness, self.contrast,
self.saturation, self.hue)
return transform(img)
class RandomRotation(object):
"""Rotate the image by angle.
Args:
degrees (sequence or float or int): Range of degrees to select from.
If degrees is a number instead of sequence like (min, max), the range of degrees
will be (-degrees, +degrees).
resample ({PIL.Image.NEAREST, PIL.Image.BILINEAR, PIL.Image.BICUBIC}, optional):
An optional resampling filter.
See http://pillow.readthedocs.io/en/3.4.x/handbook/concepts.html#filters
If omitted, or if the image has mode "1" or "P", it is set to PIL.Image.NEAREST.
expand (bool, optional): Optional expansion flag.
If true, expands the output to make it large enough to hold the entire rotated image.
If false or omitted, make the output image the same size as the input image.
Note that the expand flag assumes rotation around the center and no translation.
center (2-tuple, optional): Optional center of rotation.
Origin is the upper left corner.
Default is the center of the image.
"""
def __init__(self, degrees, resample=False, expand=False, center=None):
if isinstance(degrees, numbers.Number):
if degrees < 0:
raise ValueError("If degrees is a single number, it must be positive.")
self.degrees = (-degrees, degrees)
else:
if len(degrees) != 2:
raise ValueError("If degrees is a sequence, it must be of len 2.")
self.degrees = degrees
self.resample = resample
self.expand = expand
self.center = center
@staticmethod
def get_params(degrees):
"""Get parameters for ``rotate`` for a random rotation.
Returns:
sequence: params to be passed to ``rotate`` for random rotation.
"""
angle = np.random.uniform(degrees[0], degrees[1])
return angle
def __call__(self, img):
"""
img (PIL Image): Image to be rotated.
Returns:
PIL Image: Rotated image.
"""
angle = self.get_params(self.degrees)
return F.rotate(img, angle, self.resample, self.expand, self.center)

73
src/default_config.py Normal file
View File

@ -0,0 +1,73 @@
# -*- coding: utf-8 -*-
# @Time : 20-6-4 上午9:12
# @Author : zhuying
# @Company : Minivision
# @File : default_config.py
# @Software : PyCharm
# --*-- coding: utf-8 --*--
"""
default config for training
"""
import torch
from datetime import datetime
from easydict import EasyDict
from src.utility import make_if_not_exist, get_width_height, get_kernel
def get_default_config():
conf = EasyDict()
# ----------------------training---------------
conf.lr = 1e-1
# [9, 13, 15]
conf.milestones = [10, 15, 22] # down learing rate
conf.gamma = 0.1
conf.epochs = 25
conf.momentum = 0.9
conf.batch_size = 1024
# model
conf.num_classes = 3
conf.input_channel = 3
conf.embedding_size = 128
# dataset
conf.train_root_path = './datasets/rgb_image'
# save file path
conf.snapshot_dir_path = './saved_logs/snapshot'
# log path
conf.log_path = './saved_logs/jobs'
# tensorboard
conf.board_loss_every = 10
# save model/iter
conf.save_every = 30
return conf
def update_config(args, conf):
conf.devices = args.devices
conf.patch_info = args.patch_info
w_input, h_input = get_width_height(args.patch_info)
conf.input_size = [h_input, w_input]
conf.kernel_size = get_kernel(h_input, w_input)
conf.device = "cuda:{}".format(conf.devices[0]) if torch.cuda.is_available() else "cpu"
# resize fourier image size
conf.ft_height = 2*conf.kernel_size[0]
conf.ft_width = 2*conf.kernel_size[1]
current_time = datetime.now().strftime('%b%d_%H-%M-%S')
job_name = 'Anti_Spoofing_{}'.format(args.patch_info)
log_path = '{}/{}/{} '.format(conf.log_path, job_name, current_time)
snapshot_dir = '{}/{}'.format(conf.snapshot_dir_path, job_name)
make_if_not_exist(snapshot_dir)
make_if_not_exist(log_path)
conf.model_path = snapshot_dir
conf.log_path = log_path
conf.job_name = job_name
return conf

65
src/generate_patches.py Normal file
View File

@ -0,0 +1,65 @@
# -*- coding: utf-8 -*-
# @Time : 20-6-9 下午3:06
# @Author : zhuying
# @Company : Minivision
# @File : test.py
# @Software : PyCharm
"""
Create patch from original input image by using bbox coordinate
"""
import cv2
import numpy as np
class CropImage:
@staticmethod
def _get_new_box(src_w, src_h, bbox, scale):
x = bbox[0]
y = bbox[1]
box_w = bbox[2]
box_h = bbox[3]
scale = min((src_h-1)/box_h, min((src_w-1)/box_w, scale))
new_width = box_w * scale
new_height = box_h * scale
center_x, center_y = box_w/2+x, box_h/2+y
left_top_x = center_x-new_width/2
left_top_y = center_y-new_height/2
right_bottom_x = center_x+new_width/2
right_bottom_y = center_y+new_height/2
if left_top_x < 0:
right_bottom_x -= left_top_x
left_top_x = 0
if left_top_y < 0:
right_bottom_y -= left_top_y
left_top_y = 0
if right_bottom_x > src_w-1:
left_top_x -= right_bottom_x-src_w+1
right_bottom_x = src_w-1
if right_bottom_y > src_h-1:
left_top_y -= right_bottom_y-src_h+1
right_bottom_y = src_h-1
return int(left_top_x), int(left_top_y),\
int(right_bottom_x), int(right_bottom_y)
def crop(self, org_img, bbox, scale, out_w, out_h, crop=True):
if not crop:
dst_img = cv2.resize(org_img, (out_w, out_h))
else:
src_h, src_w, _ = np.shape(org_img)
left_top_x, left_top_y, \
right_bottom_x, right_bottom_y = self._get_new_box(src_w, src_h, bbox, scale)
img = org_img[left_top_y: right_bottom_y+1,
left_top_x: right_bottom_x+1]
dst_img = cv2.resize(img, (out_w, out_h))
return dst_img

296
src/model_lib/MiniFASNet.py Normal file
View File

@ -0,0 +1,296 @@
# -*- coding: utf-8 -*-
# @Time : 20-6-3 下午4:45
# @Author : zhuying
# @Company : Minivision
# @File : MiniFASNet.py
# @Software : PyCharm
import torch
import torch.nn.functional as F
from torch.nn import Linear, Conv2d, BatchNorm1d, BatchNorm2d, PReLU, ReLU, Sigmoid, \
AdaptiveAvgPool2d, Sequential, Module
class L2Norm(Module):
def forward(self, input):
return F.normalize(input)
class Flatten(Module):
def forward(self, input):
return input.view(input.size(0), -1)
class Conv_block(Module):
def __init__(self, in_c, out_c, kernel=(1, 1), stride=(1, 1), padding=(0, 0), groups=1):
super(Conv_block, self).__init__()
self.conv = Conv2d(in_c, out_c, kernel_size=kernel, groups=groups,
stride=stride, padding=padding, bias=False)
self.bn = BatchNorm2d(out_c)
self.prelu = PReLU(out_c)
def forward(self, x):
x = self.conv(x)
x = self.bn(x)
x = self.prelu(x)
return x
class Linear_block(Module):
def __init__(self, in_c, out_c, kernel=(1, 1), stride=(1, 1), padding=(0, 0), groups=1):
super(Linear_block, self).__init__()
self.conv = Conv2d(in_c, out_channels=out_c, kernel_size=kernel,
groups=groups, stride=stride, padding=padding, bias=False)
self.bn = BatchNorm2d(out_c)
def forward(self, x):
x = self.conv(x)
x = self.bn(x)
return x
class Depth_Wise(Module):
def __init__(self, c1, c2, c3, residual=False, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=1):
super(Depth_Wise, self).__init__()
c1_in, c1_out = c1
c2_in, c2_out = c2
c3_in, c3_out = c3
self.conv = Conv_block(c1_in, out_c=c1_out, kernel=(1, 1), padding=(0, 0), stride=(1, 1))
self.conv_dw = Conv_block(c2_in, c2_out, groups=c2_in, kernel=kernel, padding=padding, stride=stride)
self.project = Linear_block(c3_in, c3_out, kernel=(1, 1), padding=(0, 0), stride=(1, 1))
self.residual = residual
def forward(self, x):
if self.residual:
short_cut = x
x = self.conv(x)
x = self.conv_dw(x)
x = self.project(x)
if self.residual:
output = short_cut + x
else:
output = x
return output
class Residual(Module):
def __init__(self, c1, c2, c3, num_block, groups, kernel=(3, 3), stride=(1, 1), padding=(1, 1)):
super(Residual, self).__init__()
modules = []
for i in range(num_block):
c1_tuple = c1[i]
c2_tuple = c2[i]
c3_tuple = c3[i]
modules.append(Depth_Wise(c1_tuple, c2_tuple, c3_tuple, residual=True,
kernel=kernel, padding=padding, stride=stride, groups=groups))
self.model = Sequential(*modules)
def forward(self, x):
return self.model(x)
class SEModule(Module):
def __init__(self, channels, reduction):
super(SEModule, self).__init__()
self.avg_pool = AdaptiveAvgPool2d(1)
self.fc1 = Conv2d(
channels, channels // reduction, kernel_size=1, padding=0, bias=False)
self.bn1 = BatchNorm2d(channels // reduction)
self.relu = ReLU(inplace=True)
self.fc2 = Conv2d(
channels // reduction, channels, kernel_size=1, padding=0, bias=False)
self.bn2 = BatchNorm2d(channels)
self.sigmoid = Sigmoid()
def forward(self, x):
module_input = x
x = self.avg_pool(x)
x = self.fc1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.fc2(x)
x = self.bn2(x)
x = self.sigmoid(x)
return module_input * x
class ResidualSE(Module):
def __init__(self, c1, c2, c3, num_block, groups, kernel=(3, 3), stride=(1, 1), padding=(1, 1), se_reduct=4):
super(ResidualSE, self).__init__()
modules = []
for i in range(num_block):
c1_tuple = c1[i]
c2_tuple = c2[i]
c3_tuple = c3[i]
if i == num_block-1:
modules.append(
Depth_Wise_SE(c1_tuple, c2_tuple, c3_tuple, residual=True, kernel=kernel, padding=padding, stride=stride,
groups=groups, se_reduct=se_reduct))
else:
modules.append(Depth_Wise(c1_tuple, c2_tuple, c3_tuple, residual=True, kernel=kernel, padding=padding,
stride=stride, groups=groups))
self.model = Sequential(*modules)
def forward(self, x):
return self.model(x)
class Depth_Wise_SE(Module):
def __init__(self, c1, c2, c3, residual=False, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=1, se_reduct=8):
super(Depth_Wise_SE, self).__init__()
c1_in, c1_out = c1
c2_in, c2_out = c2
c3_in, c3_out = c3
self.conv = Conv_block(c1_in, out_c=c1_out, kernel=(1, 1), padding=(0, 0), stride=(1, 1))
self.conv_dw = Conv_block(c2_in, c2_out, groups=c2_in, kernel=kernel, padding=padding, stride=stride)
self.project = Linear_block(c3_in, c3_out, kernel=(1, 1), padding=(0, 0), stride=(1, 1))
self.residual = residual
self.se_module = SEModule(c3_out, se_reduct)
def forward(self, x):
if self.residual:
short_cut = x
x = self.conv(x)
x = self.conv_dw(x)
x = self.project(x)
if self.residual:
x = self.se_module(x)
output = short_cut + x
else:
output = x
return output
class MiniFASNet(Module):
def __init__(self, keep, embedding_size, conv6_kernel=(7, 7),
drop_p=0.0, num_classes=3, img_channel=3):
super(MiniFASNet, self).__init__()
self.embedding_size = embedding_size
self.conv1 = Conv_block(img_channel, keep[0], kernel=(3, 3), stride=(2, 2), padding=(1, 1))
self.conv2_dw = Conv_block(keep[0], keep[1], kernel=(3, 3), stride=(1, 1), padding=(1, 1), groups=keep[1])
c1 = [(keep[1], keep[2])]
c2 = [(keep[2], keep[3])]
c3 = [(keep[3], keep[4])]
self.conv_23 = Depth_Wise(c1[0], c2[0], c3[0], kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=keep[3])
c1 = [(keep[4], keep[5]), (keep[7], keep[8]), (keep[10], keep[11]), (keep[13], keep[14])]
c2 = [(keep[5], keep[6]), (keep[8], keep[9]), (keep[11], keep[12]), (keep[14], keep[15])]
c3 = [(keep[6], keep[7]), (keep[9], keep[10]), (keep[12], keep[13]), (keep[15], keep[16])]
self.conv_3 = Residual(c1, c2, c3, num_block=4, groups=keep[4], kernel=(3, 3), stride=(1, 1), padding=(1, 1))
c1 = [(keep[16], keep[17])]
c2 = [(keep[17], keep[18])]
c3 = [(keep[18], keep[19])]
self.conv_34 = Depth_Wise(c1[0], c2[0], c3[0], kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=keep[19])
c1 = [(keep[19], keep[20]), (keep[22], keep[23]), (keep[25], keep[26]), (keep[28], keep[29]),
(keep[31], keep[32]), (keep[34], keep[35])]
c2 = [(keep[20], keep[21]), (keep[23], keep[24]), (keep[26], keep[27]), (keep[29], keep[30]),
(keep[32], keep[33]), (keep[35], keep[36])]
c3 = [(keep[21], keep[22]), (keep[24], keep[25]), (keep[27], keep[28]), (keep[30], keep[31]),
(keep[33], keep[34]), (keep[36], keep[37])]
self.conv_4 = Residual(c1, c2, c3, num_block=6, groups=keep[19], kernel=(3, 3), stride=(1, 1), padding=(1, 1))
c1 = [(keep[37], keep[38])]
c2 = [(keep[38], keep[39])]
c3 = [(keep[39], keep[40])]
self.conv_45 = Depth_Wise(c1[0], c2[0], c3[0], kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=keep[40])
c1 = [(keep[40], keep[41]), (keep[43], keep[44])]
c2 = [(keep[41], keep[42]), (keep[44], keep[45])]
c3 = [(keep[42], keep[43]), (keep[45], keep[46])]
self.conv_5 = Residual(c1, c2, c3, num_block=2, groups=keep[40], kernel=(3, 3), stride=(1, 1), padding=(1, 1))
self.conv_6_sep = Conv_block(keep[46], keep[47], kernel=(1, 1), stride=(1, 1), padding=(0, 0))
self.conv_6_dw = Linear_block(keep[47], keep[48], groups=keep[48], kernel=conv6_kernel, stride=(1, 1), padding=(0, 0))
self.conv_6_flatten = Flatten()
self.linear = Linear(512, embedding_size, bias=False)
self.bn = BatchNorm1d(embedding_size)
self.drop = torch.nn.Dropout(p=drop_p)
self.prob = Linear(embedding_size, num_classes, bias=False)
def forward(self, x):
out = self.conv1(x)
out = self.conv2_dw(out)
out = self.conv_23(out)
out = self.conv_3(out)
out = self.conv_34(out)
out = self.conv_4(out)
out = self.conv_45(out)
out = self.conv_5(out)
out = self.conv_6_sep(out)
out = self.conv_6_dw(out)
out = self.conv_6_flatten(out)
if self.embedding_size != 512:
out = self.linear(out)
out = self.bn(out)
out = self.drop(out)
out = self.prob(out)
return out
class MiniFASNetSE(MiniFASNet):
def __init__(self, keep, embedding_size, conv6_kernel=(7, 7),drop_p=0.75, num_classes=4, img_channel=3):
super(MiniFASNetSE, self).__init__(keep=keep, embedding_size=embedding_size, conv6_kernel=conv6_kernel,
drop_p=drop_p, num_classes=num_classes, img_channel=img_channel)
c1 = [(keep[4], keep[5]), (keep[7], keep[8]), (keep[10], keep[11]), (keep[13], keep[14])]
c2 = [(keep[5], keep[6]), (keep[8], keep[9]), (keep[11], keep[12]), (keep[14], keep[15])]
c3 = [(keep[6], keep[7]), (keep[9], keep[10]), (keep[12], keep[13]), (keep[15], keep[16])]
self.conv_3 = ResidualSE(c1, c2, c3, num_block=4, groups=keep[4], kernel=(3, 3), stride=(1, 1), padding=(1, 1))
c1 = [(keep[19], keep[20]), (keep[22], keep[23]), (keep[25], keep[26]), (keep[28], keep[29]),
(keep[31], keep[32]), (keep[34], keep[35])]
c2 = [(keep[20], keep[21]), (keep[23], keep[24]), (keep[26], keep[27]), (keep[29], keep[30]),
(keep[32], keep[33]), (keep[35], keep[36])]
c3 = [(keep[21], keep[22]), (keep[24], keep[25]), (keep[27], keep[28]), (keep[30], keep[31]),
(keep[33], keep[34]), (keep[36], keep[37])]
self.conv_4 = ResidualSE(c1, c2, c3, num_block=6, groups=keep[19], kernel=(3, 3), stride=(1, 1), padding=(1, 1))
c1 = [(keep[40], keep[41]), (keep[43], keep[44])]
c2 = [(keep[41], keep[42]), (keep[44], keep[45])]
c3 = [(keep[42], keep[43]), (keep[45], keep[46])]
self.conv_5 = ResidualSE(c1, c2, c3, num_block=2, groups=keep[40], kernel=(3, 3), stride=(1, 1), padding=(1, 1))
keep_dict = {'1.8M': [32, 32, 103, 103, 64, 13, 13, 64, 26, 26,
64, 13, 13, 64, 52, 52, 64, 231, 231, 128,
154, 154, 128, 52, 52, 128, 26, 26, 128, 52,
52, 128, 26, 26, 128, 26, 26, 128, 308, 308,
128, 26, 26, 128, 26, 26, 128, 512, 512],
'1.8M_': [32, 32, 103, 103, 64, 13, 13, 64, 13, 13, 64, 13,
13, 64, 13, 13, 64, 231, 231, 128, 231, 231, 128, 52,
52, 128, 26, 26, 128, 77, 77, 128, 26, 26, 128, 26, 26,
128, 308, 308, 128, 26, 26, 128, 26, 26, 128, 512, 512]
}
# (80x80) flops: 0.044, params: 0.41
def MiniFASNetV1(embedding_size=128, conv6_kernel=(7, 7),
drop_p=0.2, num_classes=3, img_channel=3):
return MiniFASNet(keep_dict['1.8M'], embedding_size, conv6_kernel, drop_p, num_classes, img_channel)
# (80x80) flops: 0.044, params: 0.43
def MiniFASNetV2(embedding_size=128, conv6_kernel=(7, 7),
drop_p=0.2, num_classes=3, img_channel=3):
return MiniFASNet(keep_dict['1.8M_'], embedding_size, conv6_kernel, drop_p, num_classes, img_channel)
def MiniFASNetV1SE(embedding_size=128, conv6_kernel=(7, 7),
drop_p=0.75, num_classes=3, img_channel=3):
return MiniFASNetSE(keep_dict['1.8M'], embedding_size, conv6_kernel,drop_p, num_classes, img_channel)
# (80x80) flops: 0.044, params: 0.43
def MiniFASNetV2SE(embedding_size=128, conv6_kernel=(7, 7),
drop_p=0.75, num_classes=4, img_channel=3):
return MiniFASNetSE(keep_dict['1.8M_'], embedding_size, conv6_kernel,drop_p, num_classes, img_channel)

Some files were not shown because too many files have changed in this diff Show More