first

2024-07-29 11:24:25 +08:00 · 2024-07-29 11:24:25 +08:00 · 4ab77432c5
commit 4ab77432c5
parent 7cc5524251
126 changed files with 21475 additions and 0 deletions
--- a/Database/new.npy
+++ b/Database/new.npy
--- a/Database/student.npy
+++ b/Database/student.npy
--- a/accuracy.py
+++ b/accuracy.py
@ -0,0 +1,132 @@
 import os
 import time
 import torch
 import cv2
 import numpy as np
 from backbones import iresnet50,iresnet18,iresnet100
 def load_image(img_path):
    #img = cv2.imread(img_path)
    img = cv2.imdecode(np.fromfile(img_path,dtype=np.uint8),cv2.IMREAD_COLOR)
    img = img.transpose((2, 0, 1))
    img = img[np.newaxis, :, :, :]
    img = np.array(img, dtype=np.float32)
    img -= 127.5
    img /= 127.5
    return img
 def findEuclideanDistance(source_representation, test_representation):
    euclidean_distance = source_representation - test_representation
    euclidean_distance = np.sum(np.multiply(euclidean_distance, euclidean_distance))
    euclidean_distance = np.sqrt(euclidean_distance)
    return euclidean_distance
 def l2_normalize(x):
    return x / np.sqrt(np.sum(np.multiply(x, x)))
 def load_npy(path):
    data = np.load(path,allow_pickle=True)
    data = data.item()
    return data
 def findmindistance(pred,threshold,k_v):
    distance = 10
    most_like = ""
    for name in k_v.keys():
        tmp = findEuclideanDistance(k_v[name],pred)
        if distance > tmp:
            distance = tmp
            most_like = name
    if distance < threshold:
        return most_like
    else:
        return -1
 def findOne(img,model,k_v):
    with torch.no_grad():
        start_time = time.time()
        pred = model(img)
        end_time = time.time()
        #print("predOne time: " + str(end_time - start_time))
        pred = pred.numpy()
        name = findmindistance(l2_normalize(pred),threshold=1.20,k_v=k_v)
        if name != -1:
            return name
        else:
            return "unknown"
 def findAll(imglist,model,k_v):
    with torch.no_grad():
        name_list = []
        pred = model(imglist)
        pred = pred.numpy()
        for pr in pred:
            name = findmindistance(l2_normalize(pr),threshold=1.20,k_v=k_v)
            if name != -1:
                name_list.append(name)
            else:
                name_list.append("unknown")
        return name_list
 if __name__=='__main__':
    model = iresnet100()
    model.load_state_dict(torch.load("./model/backbone100.pth", map_location="cpu"))
    model.eval()
    pred_name = []
    order_name = []
    order_path = []
    unknown = []
    test_path = "D:\Download\out\cfp_test"
    name_list = os.listdir(test_path)
    for name in name_list:
        img_list = os.listdir(os.path.join(test_path,name))
        for img in img_list:
            order_name.append(name)
            order_path.append(os.path.join(os.path.join(test_path,name),img))
    order_img = np.zeros((len(order_path), 3, 112, 112), dtype=np.float32)
    for index,img_path in enumerate(order_path):
        order_img[index] = load_image(img_path)
    print(order_img.shape)
    # for name in order_path:
    #     print(name)
    k_v = load_npy("cfp.npy")
    start_time = time.time()
    order_img = torch.from_numpy(order_img)
    batch = 256
    now = 0
    number = len(order_img)
    #number = 1400
    for i in range(number):
        unknown.append("unknown")
    while now < number:
        if now+batch < number:
            name = findAll(order_img[now:now+batch],model,k_v)
        else:
            name = findAll(order_img[now:number], model, k_v)
        now = now+batch
        for na in name:
            pred_name.append(na)
        print("batch"+str(now))
    end_time = time.time()
    print("findAll time: " + str(end_time - start_time))
    #print(len(pred_name))
    right = 0
    for i,name in enumerate(pred_name):
        if pred_name[i] == order_name[i]:
            right += 1
    filed = 0
    for i, name in enumerate(pred_name):
        if pred_name[i] == unknown[i]:
            filed += 1
    error = 0
    for i,name in enumerate(pred_name):
        if pred_name[i] != order_name[i]:
            error += 1
            print(order_name[i]+" "+pred_name[i]+" "+order_path[i])
    print("total:" + str(number))
    print("right:" + str(right) + " rate:" + str(right / number))
    print("filed:" + str(filed) + " rate:" + str(filed / number))
    print("error:"+str(error-filed)+" rate:"+str((error-filed)/number))
--- a/accuracy_GPU.py
+++ b/accuracy_GPU.py
@ -0,0 +1,134 @@
 import os
 import time
 import torch
 import cv2
 import numpy as np
 from backbones import iresnet50,iresnet18,iresnet100
 def load_image(img_path):
    #img = cv2.imread(img_path)
    img = cv2.imdecode(np.fromfile(img_path,dtype=np.uint8),cv2.IMREAD_COLOR)
    img = img.transpose((2, 0, 1))
    img = img[np.newaxis, :, :, :]
    img = np.array(img, dtype=np.float32)
    img -= 127.5
    img /= 127.5
    return img
 def findEuclideanDistance(source_representation, test_representation):
    euclidean_distance = source_representation - test_representation
    euclidean_distance = np.sum(np.multiply(euclidean_distance, euclidean_distance))
    euclidean_distance = np.sqrt(euclidean_distance)
    return euclidean_distance
 def l2_normalize(x):
    return x / np.sqrt(np.sum(np.multiply(x, x)))
 def load_npy(path):
    data = np.load(path,allow_pickle=True)
    data = data.item()
    return data
 def findmindistance(pred,threshold,k_v):
    distance = 10
    most_like = ""
    for name in k_v.keys():
        tmp = findEuclideanDistance(k_v[name],pred)
        if distance > tmp:
            distance = tmp
            most_like = name
    if distance < threshold:
        return most_like
    else:
        return -1
 def findOne(img,model,k_v):
    with torch.no_grad():
        start_time = time.time()
        pred = model(img)
        end_time = time.time()
        #print("predOne time: " + str(end_time - start_time))
        pred = pred.numpy()
        name = findmindistance(l2_normalize(pred),threshold=1.20,k_v=k_v)
        if name != -1:
            return name
        else:
            return "unknown"
 def findAll(imglist,model,k_v):
    with torch.no_grad():
        name_list = []
        imglist = imglist.to(torch.device("cuda"))
        pred = model(imglist)
        pred = pred.cpu().numpy()
        for pr in pred:
            name = findmindistance(l2_normalize(pr),threshold=1.20,k_v=k_v)
            if name != -1:
                name_list.append(name)
            else:
                name_list.append("unknown")
        return name_list
 if __name__=='__main__':
    model = iresnet100()
    model.load_state_dict(torch.load("./model/backbone100.pth"))
    model.to(torch.device("cuda"))
    model.eval()
    pred_name = []
    order_name = []
    order_path = []
    unknown = []
    test_path = "./retinaface_test"
    name_list = os.listdir(test_path)
    for name in name_list:
        img_list = os.listdir(os.path.join(test_path,name))
        for img in img_list:
            order_name.append(name)
            order_path.append(os.path.join(os.path.join(test_path,name),img))
    order_img = np.zeros((len(order_path), 3, 112, 112), dtype=np.float32)
    for index,img_path in enumerate(order_path):
        order_img[index] = load_image(img_path)
    print(order_img.shape)
    # for name in order_path:
    #     print(name)
    k_v = load_npy("retinaface_lfw_myalign.npy")
    start_time = time.time()
    order_img = torch.from_numpy(order_img)
    batch = 256
    now = 0
    number = len(order_img)
    #number = 1400
    for i in range(number):
        unknown.append("unknown")
    while now < number:
        if now+batch < number:
            name = findAll(order_img[now:now+batch],model,k_v)
        else:
            name = findAll(order_img[now:number], model, k_v)
        now = now+batch
        for na in name:
            pred_name.append(na)
        print("batch"+str(now))
    end_time = time.time()
    print("findAll time: " + str(end_time - start_time))
    #print(len(pred_name))
    right = 0
    for i,name in enumerate(pred_name):
        if pred_name[i] == order_name[i]:
            right += 1
    filed = 0
    for i, name in enumerate(pred_name):
        if pred_name[i] == unknown[i]:
            filed += 1
    error = 0
    for i,name in enumerate(pred_name):
        if pred_name[i] != order_name[i]:
            error += 1
            print(order_name[i]+" "+pred_name[i]+" "+order_path[i])
    print("total:" + str(number))
    print("right:" + str(right) + " rate:" + str(right / number))
    print("filed:" + str(filed) + " rate:" + str(filed / number))
    print("error:"+str(error-filed)+" rate:"+str((error-filed)/number))
--- a/anti.py
+++ b/anti.py
@ -0,0 +1,150 @@
 import os
 import cv2
 import numpy as np
 import argparse
 import warnings
 import time
 import torch
 import torch.nn.functional as F
 from src.generate_patches import CropImage
 from src.model_lib.MiniFASNet import MiniFASNetV1, MiniFASNetV2,MiniFASNetV1SE,MiniFASNetV2SE
 from src.data_io import transform as trans
 from src.utility import get_kernel, parse_model_name
 warnings.filterwarnings('ignore')
 MODEL_MAPPING = {
    'MiniFASNetV1': MiniFASNetV1,
    'MiniFASNetV2': MiniFASNetV2,
    'MiniFASNetV1SE':MiniFASNetV1SE,
    'MiniFASNetV2SE':MiniFASNetV2SE
 }
 class AntiSpoofPredict():
    def __init__(self, cpu_or_cuda):
        super(AntiSpoofPredict, self).__init__()
        self.device = torch.device("cuda" if cpu_or_cuda == "cuda" else "cpu")
    def predict(self, img, model):
        test_transform = trans.Compose([
            trans.ToTensor(),
        ])
        img = test_transform(img)
        img = img.unsqueeze(0).to(self.device)
        with torch.no_grad():
            result = model.forward(img)
            result = F.softmax(result).cpu().numpy()
        return result
 def load_anti_model(model_dir,cpu_or_cuda):
    model_list = []
    for model_path in os.listdir(model_dir):
        model_list.append(_load_model(os.path.join(model_dir, model_path), cpu_or_cuda))
    return model_list
 def _load_model(model_path,cpu_or_cuda):
    # define model
    device = torch.device("cuda" if cpu_or_cuda == "cuda" else "cpu")
    model_name = os.path.basename(model_path)
    h_input, w_input, model_type, _ = parse_model_name(model_name)
    kernel_size = get_kernel(h_input, w_input, )
    model = MODEL_MAPPING[model_type](conv6_kernel=kernel_size).to(device)
    # load model weight
    state_dict = torch.load(model_path, map_location=device)
    keys = iter(state_dict)
    first_layer_name = keys.__next__()
    if first_layer_name.find('module.') >= 0:
        from collections import OrderedDict
        new_state_dict = OrderedDict()
        for key, value in state_dict.items():
            name_key = key[7:]
            new_state_dict[name_key] = value
        model.load_state_dict(new_state_dict)
    else:
        model.load_state_dict(state_dict)
    model.eval()
    return model
 # 因为安卓端APK获取的视频流宽高比为3:4,为了与之一致，所以将宽高比限制为3:4
 def check_image(image):
    height, width, channel = image.shape
    if width/height != 3/4:
        print("Image is not appropriate!!!\nHeight/Width should be 4/3.")
        return False
    else:
        return True
 # 人脸活体检测
 def anti_spoofing(image_name, model_dir, cpu_or_cuda, bbox, model_list):
    model_test = AntiSpoofPredict(cpu_or_cuda)
    image_cropper = CropImage()
    image = cv2.imdecode(np.fromfile(image_name, dtype=np.uint8), cv2.IMREAD_COLOR)
    h, w = image.shape[:2]
    factor = h / w
    if (w > 1000):
        image = cv2.resize(image, (600, int(600 * factor)))
    # result = check_image(image)
    # if result is False:
    #     return
    # image_bbox = model_test.get_bbox(image)
    image_bbox = bbox
    prediction = np.zeros((1, 3))
    test_speed = 0
    # sum the prediction from single model's result
    for index, model_name in enumerate(os.listdir(model_dir)):
        h_input, w_input, model_type, scale = parse_model_name(model_name)
        param = {
            "org_img": image,
            "bbox": image_bbox,
            "scale": scale,
            "out_w": w_input,
            "out_h": h_input,
            "crop": True,
        }
        if scale is None:
            param["crop"] = False
        img = image_cropper.crop(**param)
        start = time.time()
        prediction += model_test.predict(img, model_list[index])
        test_speed += time.time()-start
    label = np.argmax(prediction)
    # print(prediction)
    # cv2.rectangle(
    #     image,
    #     (image_bbox[0], image_bbox[1]),
    #     (image_bbox[0] + image_bbox[2], image_bbox[1] + image_bbox[3]),
    #     (225,0,0), 2)
    # cv2.imshow("out",image)
    # cv2.waitKey(0)
    value = prediction[0][1]/2
    if value > 0.915:
        return "real face", '{:.10f}'.format(value)
    else:
        return "fake face", '{:.10f}'.format(value)
 if __name__ == "__main__":
    desc = "test"
    parser = argparse.ArgumentParser(description=desc)
    parser.add_argument(
        "--device_id",
        type=int,
        default=0,
        help="which gpu id, [0/1/2/3]")
    parser.add_argument(
        "--model_dir",
        type=str,
        default="./resources/anti_spoof_models",
        help="model_lib used to test")
    parser.add_argument(
        "--image_name",
        type=str,
        default="000_0.bmp",
        help="image used to test")
    args = parser.parse_args()
    # anti_spoofing(args.image_name, args.model_dir, args.device_id)
--- a/app.py
+++ b/app.py
@ -0,0 +1,449 @@
 import time
 import faiss
 from flask import Flask, render_template, request, jsonify, send_from_directory
 from markupsafe import escape, escape_silent
 from werkzeug.utils import secure_filename
 from anti import anti_spoofing, load_anti_model
 from face_api import load_arcface_model, load_npy, findOne, load_image, face_verification, findAll, add_one_to_database, \
    get_claster_tmp_file_embedding, cluster, detect_video
 from gender_age import set_gender_conf, gender_age, load_gender_model
 from retinaface_detect import load_retinaface_model, detect_one, set_retinaface_conf
 from werkzeug.exceptions import RequestEntityTooLarge
 import zipfile
 import os
 import shutil
 import re
 import numpy as np
 import torch
 ALLOWED_IMG = set(['png', 'jpg', 'jpeg', 'bmp', 'PNG', 'JPG', 'JPEG'])
 # 限制上传的图片最大为10M
 ALLOWED_IMG_SIZE = 10 * 1024 * 1024
 ALLOWED_FILE = set(['zip'])
 ALLOWED_VIDEO = set(['mp4'])
 app = Flask(__name__)
 # 限制上传的文件最大为100M
 app.config['MAX_CONTENT_LENGTH'] = 100 * 1024 * 1024
 # 使用jsonify，避免中文乱码
 app.config['JSON_AS_ASCII'] = False
 # 设置使用CPU或者GPU（传入cuda）
 cpu_or_cuda = "cuda" if torch.cuda.is_available() else "cpu"
 # 加载人脸识别模型
 arcface_model = load_arcface_model("./model/backbone100.pth", cpu_or_cuda=cpu_or_cuda)
 # 加载人脸检测模型
 retinaface_args = set_retinaface_conf(cpu_or_cuda=cpu_or_cuda)
 retinaface_model = load_retinaface_model(retinaface_args)
 # 加载性别年龄识别模型
 gender_args = set_gender_conf()
 gender_model = load_gender_model(gender_args, 'fc1')
 anti_spoofing_model_path = "model/anti_spoof_models"
 anti_model = load_anti_model(anti_spoofing_model_path, cpu_or_cuda)
 # 读取人脸库
@app.route('/')
 def index():
    return "model"
@app.route('/hello')
@app.route('/hello/<name>')
 def hello(name=None):
    return render_template('hello.html', name=name)
@app.route('/user', methods=['GET'])
 def show_user_name():
    return request.args.get('username', '')
 # 创建返回的json数据
 # 函数参数用是否=None判断，函数中定义的data,result用true，false判断
 def create_response(status, name=None, distance=None, verification=None, gender=None, age=None, num=None, anti=None,
                    score=None, box_and_point=None, addfile_names=None,fail_names=None,database_name=None,msg=None,
                    delete_names=None,not_exist_names=None):
    # res为总的json结构体
    res = {}
    res['status'] = status
    data = {}
    try:
        data["box_and_point"] = box_and_point.tolist()
    except AttributeError:
        pass
    if anti != None and score != None:
        liveness = {}
        liveness["spoofing"] = anti
        liveness['score'] = score
        data['liveness'] = liveness
    if distance!=None:
        data['distance'] = float(distance)
    if verification!=None:
        data['verification'] = verification
    if num!=None:
        data['number'] = num
    if gender!=None:
        data['gender'] = gender
    if age!=None:
        data['age'] = age
    if name!=None:
        data['name'] = name
    if data:
        res['data'] = data
    # 数据库增删接口返回数据
    result = {}
    if msg!=None:
        res['msg'] = msg
    if database_name!=None:
        result['database_name'] = database_name
    # 增加人脸
    if addfile_names!=None or fail_names!=None:
        result['success_names'] = addfile_names
        result['fail_names'] = fail_names
    # 删除人脸
    if delete_names!=None or not_exist_names!=None:
        result['delete_names'] = delete_names
        result['not_exist_names'] = not_exist_names
    if result:
        res['result'] = result
    return jsonify(res)
 # 创建cluster接口返回的json数据
 def create_cluster_response(status, all_cluster):
    res = {}
    data = {}
    for index, cluster in enumerate(all_cluster):
        data['cluster' + str(index)] = cluster
    res['data'] = data
    res['status'] = status
    return res
 # 检查上传文件格式
 def check_file_format(file_name, format):
    if '.' in file_name:
        file_format = file_name.rsplit('.')[1]
        if file_format in format:
            return True
    return False
 # 检查img大小，大于10M抛出异常
 def check_img_size(img_path):
    fsize = os.path.getsize(img_path)
    if fsize > ALLOWED_IMG_SIZE:
        raise RequestEntityTooLarge
 # 解压zip文件存到某路径：
 def unzip(zip_src, dst_dir):
    f = zipfile.is_zipfile(zip_src)
    if f:
        fz = zipfile.ZipFile(zip_src, 'r')
        for file in fz.namelist():
            fz.extract(file, dst_dir)
        return True
    else:
        return False
 # 解压文件
 def un_zip(file_path, output_path):
    zip_file = zipfile.ZipFile(file_path)
    if os.path.isdir(output_path):
        pass
    else:
        os.mkdir(output_path)
    zip_file.extractall(output_path)
    # for names in zip_file.namelist():
    #     zip_file.extract(names,output_path)
    zip_file.close()
 # 人脸识别、性别年龄识别
@app.route('/recognition', methods=['POST'])
 def recognition():
    try:
        f = request.files['file_name']
        if f and check_file_format(f.filename, ALLOWED_IMG):
            img_path = './img/recognition/' + secure_filename(f.filename)
            f.save(img_path)
            check_img_size(img_path)
            # img3 = load_image('./file/'+secure_filename(f.filename))
            # img3 = torch.from_numpy(img3)
            tic = time.time()
            img3, box_and_point = detect_one(img_path, retinaface_model, retinaface_args)
            print('detect time: {:.4f}'.format(time.time() - tic))
            if len(img3) == 0:
                return create_response('no face')
            elif len(img3) > 1:
                namelist = findAll(img3, arcface_model, index, database_name_list, cpu_or_cuda)
                gender_list, age_list = [], []
                # gender_list, age_list = gender_age(img3, gender_model)
                res = create_response('success', namelist, gender=gender_list, age=age_list,
                                      box_and_point=box_and_point)
            else:
                b = box_and_point[0]
                w = b[2] - b[0]
                h = b[3] - b[1]
                b[2] = w
                b[3] = h
                label, value = anti_spoofing(img_path, anti_spoofing_model_path, cpu_or_cuda, np.array(b[:4], int),
                                             anti_model)
                # print(index,database_name_list)
                name, distance = findOne(img3, arcface_model, index, database_name_list, cpu_or_cuda)
                gender_list, age_list = [], []
                # gender_list, age_list = gender_age(img3, gender_model)
                res = create_response('success', name, gender=gender_list, age=age_list, distance=distance,
                                      anti=label, score=value, box_and_point=box_and_point)
            return res
        else:
            return create_response('png jpg jpeg bmp are allowed')
    except RequestEntityTooLarge:
        return create_response('image size should be less than 10M')
 # 两张图片比对
@app.route('/compare', methods=['POST'])
 def compare_file():
    try:
        file1 = request.files['file1_name']
        file2 = request.files['file2_name']
        if file1 and check_file_format(file1.filename, ALLOWED_IMG) and file2 and check_file_format(file2.filename,
                                                                                                    ALLOWED_IMG):
            img1_path = './img/compare/' + secure_filename(file1.filename)
            img2_path = './img/compare/' + secure_filename(file2.filename)
            file1.save(img1_path)
            file2.save(img2_path)
            check_img_size(img1_path)
            check_img_size(img2_path)
            img1, box_and_point1 = detect_one(img1_path, retinaface_model,
                                              retinaface_args)
            img2, box_and_point2 = detect_one(img2_path, retinaface_model, retinaface_args)
            if len(img1) == 1 and len(img2) == 1:
                result,distance = face_verification(img1, img2, arcface_model, cpu_or_cuda)
                print(result,distance)
                return create_response('success', verification=result,distance=distance)
            else:
                return create_response('image contains no face or more than 1 face')
        else:
            return create_response('png jpg jpeg bmp are allowed')
    except RequestEntityTooLarge:
        return create_response('image size should be less than 10M')
 # 数据库增加人脸，可实现向“现有/新建”数据库增加“单张/多张”人脸
 # 增和改
@app.route('/databaseAdd', methods=['POST'])
 def DB_add_face():
    try:
        # 上传人脸图片（>=1）
        # key都为file_list，value为不同的值可实现批量上传图片
        upload_files = request.files.getlist("file_list")
        # '',[],{},0都可以视为False
        if not upload_files:
            msg = "上传文件为空"
            return create_response(0,msg=msg)
        database_name = request.form.get("database_name")
        database_path = "./Database/" + database_name + ".npy"
        if not os.path.exists(database_path):
            msg = "数据库不存在"
            return create_response(0,msg=msg)
        # 数据库中已存在的人名
        names = load_npy(database_path).keys()
        # print(names)
        # 这是服务器上用于暂存上传图片的文件夹，每次上传前重建，使用后删除
        # 后面可根据需要改为定期删除
        file_temp_path = './img/uploadNew/'
        if not os.path.exists(file_temp_path):
            os.makedirs(file_temp_path)
        # 正则表达式用于提取文件名中的中文，用于.npy中的keys
        r = re.compile('[\u4e00-\u9fa5]+')
        # 分别存取添加成功或失败的名字
        success_names = []
        fail_names = {}
        # 添加失败的两种情况：格式错误或已经存在
        format_wrong = []
        alreadyExist = []
        # 分别处理每一张图片,先判断格式对不对，再判断是否存在
        for file in upload_files:
            filename = file.filename
            name = r.findall(filename)[0]
            if file and check_file_format(filename, ALLOWED_IMG):
                if name in names:
                    alreadyExist.append(name)
                    continue
                save_path = file_temp_path + filename
                file.save(save_path)
                check_img_size(save_path)
                img_file, box_and_point = detect_one(save_path, retinaface_model, retinaface_args)
                add_one_to_database(img=img_file, model=arcface_model, name=name, database_path=database_path,
                                    cpu_or_cuda=cpu_or_cuda)
                success_names.append(name)
            else:
                format_wrong.append(name)
                continue
        shutil.rmtree(file_temp_path)
        # 如果有错误情况
        if format_wrong or alreadyExist:
            status = 0
        else:
            status = 1
        fail_names['formatWrong'] = format_wrong
        fail_names['alreadyExist'] = alreadyExist
        return create_response(status=status,addfile_names=success_names,fail_names=fail_names,database_name=database_name,msg="新增人脸操作执行完成")
    except RequestEntityTooLarge:
        return create_response(0,msg='image size should be less than 10M')
 # 数据库删除人脸，可实现在现有数据库中删除’单/多‘张人脸
@app.route('/databaseDelete', methods=['POST'])
 def DB_delete_face():
    try:
        delete_names = request.form.getlist("delete_names")
        database_name = request.form.get("database_name")
        database_path = "./Database/" + database_name + ".npy"
        if not os.path.exists(database_path):
            msg = "数据库不存在"
            return create_response(0,msg=msg)
        if not delete_names:
            msg = "delete_names参数为空"
            return create_response(0,msg=msg)
        k_v = load_npy(database_path)
        print(k_v.keys())
        success_list = []
        fail_list = []
        for name in delete_names:
            if name in k_v.keys():
                del k_v[name]
                success_list.append(name)
            else:
                fail_list.append(name)
                continue
        np.save(database_path, k_v)
        status = 1
        if fail_list:
            status = 0
        return create_response(status=status,delete_names=success_list,not_exist_names=fail_list,database_name=database_name,
                               msg="删除人脸操作完成")
    except RequestEntityTooLarge:
        return create_response(0,'image size should be less than 10M')
 # 以图搜图接口：
 # 上传图片压缩包建图片库
@app.route('/uploadZip', methods=['POST'])
 def upload_Zip():
    try:
        zip = request.files['zip_name']
        dst_dir = './img/search/'
        if unzip(zip, dst_dir):
            return create_response('upload zip success')
        else:
            return create_response('upload zip file please')
    except RequestEntityTooLarge:
        return create_response('image size should be less than 10M')
 # 以图搜图
@app.route('/imgSearchImg', methods=['POST'])
 def img_search_img():
    searchfile = './img/search/face'
    try:
        file = request.files['img_name']
        if file and check_file_format(file.filename, ALLOWED_IMG):
            img_path = './img/search/' + secure_filename(file.filename)
            file.save(img_path)
            check_img_size(img_path)
            img, box_and_point = detect_one(img_path, retinaface_model,
                                            retinaface_args)
            if len(img) == 1:
                Onename = []
                num = 0
                for filenames in os.listdir(searchfile):
                    imgpath = os.path.join(searchfile, filenames)
                    imgdata, box_and_point = detect_one(imgpath, retinaface_model, retinaface_args)
                    result = face_verification(img, imgdata, arcface_model, cpu_or_cuda)
                    isOne, distance = result.split(' ', -1)[0], result.split(' ', -1)[1]
                    if isOne == 'same':
                        Onename.append(filenames)
                        num += 1
                return create_response('success', name=Onename, num=num)
            else:
                return create_response('image contains no face or more than 1 face')
        else:
            return create_response('png jpg jpeg bmp are allowed')
    except RequestEntityTooLarge:
        return create_response('image size should be less than 10M')
 # 人脸聚类接口
@app.route('/cluster', methods=['POST'])
 def zip_cluster():
    try:
        f = request.files['file_name']
        if f and check_file_format(f.filename, ALLOWED_FILE):
            zip_name = secure_filename(f.filename)
            f.save('./img/cluster_tmp_file/' + zip_name)
            un_zip('./img/cluster_tmp_file/' + zip_name, './img/cluster_tmp_file/')
            emb_list, name_list = get_claster_tmp_file_embedding("./img/cluster_tmp_file/" + zip_name.rsplit('.')[0],
                                                                 retinaface_model,
                                                                 retinaface_args, arcface_model, cpu_or_cuda)
            return create_cluster_response("success", cluster(emb_list, name_list))
        else:
            return create_response('zip are allowed')
    except RequestEntityTooLarge:
        return create_response('file size should be less than 100M')
 # 视频识别接口
@app.route('/videorecognition', methods=['POST'])
 def video_recognition():
    try:
        f = request.files['file_name']
        if f and check_file_format(f.filename, ALLOWED_VIDEO):
            video_name = secure_filename(f.filename)
            f.save('./video/' + video_name)
            detect_video('./video/' + video_name, './videoout/' + video_name, retinaface_model, arcface_model, k_v,
                         retinaface_args)
            return create_response("success")
        else:
            return create_response('mp4 are allowed')
    except RequestEntityTooLarge:
        return create_response('file size should be less than 100M')
@app.route('/download/<string:filename>', methods=['GET'])
 def download(filename):
    if os.path.isfile(os.path.join('./videoout/', filename)):
        return send_from_directory('./videoout/', filename, as_attachment=True)
    else:
        return create_response("Download failed")
 if __name__ == '__main__':
    k_v = load_npy("./Database/student.npy")
    database_name_list = list(k_v.keys())
    vector_list = np.array(list(k_v.values()))
    print(vector_list.shape)
    #print(database_name_list)
    nlist = 50
    quantizer = faiss.IndexFlatL2(512)  # the other index
    index = faiss.IndexIVFFlat(quantizer, 512, nlist, faiss.METRIC_L2)
    index.train(vector_list)
    # index = faiss.IndexFlatL2(512)
    index.add(vector_list)
    index.nprobe = 50
    app.run(host="0.0.0.0", port=5000)
--- a/backbones/init.py
+++ b/backbones/init.py
@ -0,0 +1 @@
 from .iresnet import iresnet18, iresnet34, iresnet50, iresnet100, iresnet200
--- a/backbones/pycache/init.cpython-38.pyc
+++ b/backbones/pycache/init.cpython-38.pyc
--- a/backbones/pycache/iresnet.cpython-38.pyc
+++ b/backbones/pycache/iresnet.cpython-38.pyc
--- a/backbones/iresnet.py
+++ b/backbones/iresnet.py
@ -0,0 +1,187 @@
 import torch
 from torch import nn
 __all__ = ['iresnet18', 'iresnet34', 'iresnet50', 'iresnet100', 'iresnet200']
 def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
    """3x3 convolution with padding"""
    return nn.Conv2d(in_planes,
                     out_planes,
                     kernel_size=3,
                     stride=stride,
                     padding=dilation,
                     groups=groups,
                     bias=False,
                     dilation=dilation)
 def conv1x1(in_planes, out_planes, stride=1):
    """1x1 convolution"""
    return nn.Conv2d(in_planes,
                     out_planes,
                     kernel_size=1,
                     stride=stride,
                     bias=False)
 class IBasicBlock(nn.Module):
    expansion = 1
    def __init__(self, inplanes, planes, stride=1, downsample=None,
                 groups=1, base_width=64, dilation=1):
        super(IBasicBlock, self).__init__()
        if groups != 1 or base_width != 64:
            raise ValueError('BasicBlock only supports groups=1 and base_width=64')
        if dilation > 1:
            raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
        self.bn1 = nn.BatchNorm2d(inplanes, eps=1e-05,)
        self.conv1 = conv3x3(inplanes, planes)
        self.bn2 = nn.BatchNorm2d(planes, eps=1e-05,)
        self.prelu = nn.PReLU(planes)
        self.conv2 = conv3x3(planes, planes, stride)
        self.bn3 = nn.BatchNorm2d(planes, eps=1e-05,)
        self.downsample = downsample
        self.stride = stride
    def forward(self, x):
        identity = x
        out = self.bn1(x)
        out = self.conv1(out)
        out = self.bn2(out)
        out = self.prelu(out)
        out = self.conv2(out)
        out = self.bn3(out)
        if self.downsample is not None:
            identity = self.downsample(x)
        out += identity
        return out
 class IResNet(nn.Module):
    fc_scale = 7 * 7
    def __init__(self,
                 block, layers, dropout=0, num_features=512, zero_init_residual=False,
                 groups=1, width_per_group=64, replace_stride_with_dilation=None, fp16=False):
        super(IResNet, self).__init__()
        self.fp16 = fp16
        self.inplanes = 64
        self.dilation = 1
        if replace_stride_with_dilation is None:
            replace_stride_with_dilation = [False, False, False]
        if len(replace_stride_with_dilation) != 3:
            raise ValueError("replace_stride_with_dilation should be None "
                             "or a 3-element tuple, got {}".format(replace_stride_with_dilation))
        self.groups = groups
        self.base_width = width_per_group
        self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(self.inplanes, eps=1e-05)
        self.prelu = nn.PReLU(self.inplanes)
        self.layer1 = self._make_layer(block, 64, layers[0], stride=2)
        self.layer2 = self._make_layer(block,
                                       128,
                                       layers[1],
                                       stride=2,
                                       dilate=replace_stride_with_dilation[0])
        self.layer3 = self._make_layer(block,
                                       256,
                                       layers[2],
                                       stride=2,
                                       dilate=replace_stride_with_dilation[1])
        self.layer4 = self._make_layer(block,
                                       512,
                                       layers[3],
                                       stride=2,
                                       dilate=replace_stride_with_dilation[2])
        self.bn2 = nn.BatchNorm2d(512 * block.expansion, eps=1e-05,)
        self.dropout = nn.Dropout(p=dropout, inplace=True)
        self.fc = nn.Linear(512 * block.expansion * self.fc_scale, num_features)
        self.features = nn.BatchNorm1d(num_features, eps=1e-05)
        nn.init.constant_(self.features.weight, 1.0)
        self.features.weight.requires_grad = False
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.normal_(m.weight, 0, 0.1)
            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
        if zero_init_residual:
            for m in self.modules():
                if isinstance(m, IBasicBlock):
                    nn.init.constant_(m.bn2.weight, 0)
    def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
        downsample = None
        previous_dilation = self.dilation
        if dilate:
            self.dilation *= stride
            stride = 1
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                conv1x1(self.inplanes, planes * block.expansion, stride),
                nn.BatchNorm2d(planes * block.expansion, eps=1e-05, ),
            )
        layers = []
        layers.append(
            block(self.inplanes, planes, stride, downsample, self.groups,
                  self.base_width, previous_dilation))
        self.inplanes = planes * block.expansion
        for _ in range(1, blocks):
            layers.append(
                block(self.inplanes,
                      planes,
                      groups=self.groups,
                      base_width=self.base_width,
                      dilation=self.dilation))
        return nn.Sequential(*layers)
    def forward(self, x):
        with torch.cuda.amp.autocast(self.fp16):
            x = self.conv1(x)
            x = self.bn1(x)
            x = self.prelu(x)
            x = self.layer1(x)
            x = self.layer2(x)
            x = self.layer3(x)
            x = self.layer4(x)
            x = self.bn2(x)
            x = torch.flatten(x, 1)
            x = self.dropout(x)
        x = self.fc(x.float() if self.fp16 else x)
        x = self.features(x)
        return x
 def _iresnet(arch, block, layers, pretrained, progress, **kwargs):
    model = IResNet(block, layers, **kwargs)
    if pretrained:
        raise ValueError()
    return model
 def iresnet18(pretrained=False, progress=True, **kwargs):
    return _iresnet('iresnet18', IBasicBlock, [2, 2, 2, 2], pretrained,
                    progress, **kwargs)
 def iresnet34(pretrained=False, progress=True, **kwargs):
    return _iresnet('iresnet34', IBasicBlock, [3, 4, 6, 3], pretrained,
                    progress, **kwargs)
 def iresnet50(pretrained=False, progress=True, **kwargs):
    return _iresnet('iresnet50', IBasicBlock, [3, 4, 14, 3], pretrained,
                    progress, **kwargs)
 def iresnet100(pretrained=False, progress=True, **kwargs):
    return _iresnet('iresnet100', IBasicBlock, [3, 13, 30, 3], pretrained,
                    progress, **kwargs)
 def iresnet200(pretrained=False, progress=True, **kwargs):
    return _iresnet('iresnet200', IBasicBlock, [6, 26, 60, 6], pretrained,
                    progress, **kwargs)
--- a/centerface.py
+++ b/centerface.py
@ -0,0 +1,135 @@
 import time
 import numpy as np
 import cv2
 import datetime
 class CenterFace(object):
    def __init__(self, landmarks=True):
        self.landmarks = landmarks
        if self.landmarks:
            self.net = cv2.dnn.readNetFromONNX('./model/onnx/centerface.onnx')
        else:
            self.net = cv2.dnn.readNetFromONNX('./model/onnx/cface.1k.onnx')
        self.img_h_new, self.img_w_new, self.scale_h, self.scale_w = 0, 0, 0, 0
    def __call__(self, img, height, width, threshold=0.5):
        self.img_h_new, self.img_w_new, self.scale_h, self.scale_w = self.transform(height, width)
        return self.inference_opencv(img, threshold)
    def inference_opencv(self, img, threshold):
        blob = cv2.dnn.blobFromImage(img, scalefactor=1.0, size=(self.img_w_new, self.img_h_new), mean=(0, 0, 0), swapRB=True, crop=False)
        self.net.setInput(blob)
        begin = datetime.datetime.now()
        start_time = time.time()
        if self.landmarks:
            heatmap, scale, offset, lms = self.net.forward(["537", "538", "539", '540'])
        else:
            heatmap, scale, offset = self.net.forward(["535", "536", "537"])
        end = datetime.datetime.now()
        end_time = time.time()
        # print("cpuOne time: " + str(end_time - start_time))
        # print("cpu times = ", end - begin)
        return self.postprocess(heatmap, lms, offset, scale, threshold)
    def transform(self, h, w):
        img_h_new, img_w_new = int(np.ceil(h / 32) * 32), int(np.ceil(w / 32) * 32)
        scale_h, scale_w = img_h_new / h, img_w_new / w
        return img_h_new, img_w_new, scale_h, scale_w
    def postprocess(self, heatmap, lms, offset, scale, threshold):
        if self.landmarks:
            dets, lms = self.decode(heatmap, scale, offset, lms, (self.img_h_new, self.img_w_new), threshold=threshold)
        else:
            dets = self.decode(heatmap, scale, offset, None, (self.img_h_new, self.img_w_new), threshold=threshold)
        if len(dets) > 0:
            dets[:, 0:4:2], dets[:, 1:4:2] = dets[:, 0:4:2] / self.scale_w, dets[:, 1:4:2] / self.scale_h
            if self.landmarks:
                lms[:, 0:10:2], lms[:, 1:10:2] = lms[:, 0:10:2] / self.scale_w, lms[:, 1:10:2] / self.scale_h
        else:
            dets = np.empty(shape=[0, 5], dtype=np.float32)
            if self.landmarks:
                lms = np.empty(shape=[0, 10], dtype=np.float32)
        if self.landmarks:
            return dets, lms
        else:
            return dets
    def decode(self, heatmap, scale, offset, landmark, size, threshold=0.1):
        heatmap = np.squeeze(heatmap)
        scale0, scale1 = scale[0, 0, :, :], scale[0, 1, :, :]
        offset0, offset1 = offset[0, 0, :, :], offset[0, 1, :, :]
        c0, c1 = np.where(heatmap > threshold)
        if self.landmarks:
            boxes, lms = [], []
        else:
            boxes = []
        if len(c0) > 0:
            for i in range(len(c0)):
                s0, s1 = np.exp(scale0[c0[i], c1[i]]) * 4, np.exp(scale1[c0[i], c1[i]]) * 4
                o0, o1 = offset0[c0[i], c1[i]], offset1[c0[i], c1[i]]
                s = heatmap[c0[i], c1[i]]
                x1, y1 = max(0, (c1[i] + o1 + 0.5) * 4 - s1 / 2), max(0, (c0[i] + o0 + 0.5) * 4 - s0 / 2)
                x1, y1 = min(x1, size[1]), min(y1, size[0])
                boxes.append([x1, y1, min(x1 + s1, size[1]), min(y1 + s0, size[0]), s])
                if self.landmarks:
                    lm = []
                    for j in range(5):
                        lm.append(landmark[0, j * 2 + 1, c0[i], c1[i]] * s1 + x1)
                        lm.append(landmark[0, j * 2, c0[i], c1[i]] * s0 + y1)
                    lms.append(lm)
            boxes = np.asarray(boxes, dtype=np.float32)
            keep = self.nms(boxes[:, :4], boxes[:, 4], 0.3)
            boxes = boxes[keep, :]
            if self.landmarks:
                lms = np.asarray(lms, dtype=np.float32)
                lms = lms[keep, :]
        if self.landmarks:
            return boxes, lms
        else:
            return boxes
    def nms(self, boxes, scores, nms_thresh):
        x1 = boxes[:, 0]
        y1 = boxes[:, 1]
        x2 = boxes[:, 2]
        y2 = boxes[:, 3]
        areas = (x2 - x1 + 1) * (y2 - y1 + 1)
        order = np.argsort(scores)[::-1]
        num_detections = boxes.shape[0]
        suppressed = np.zeros((num_detections,), dtype=np.bool)
        keep = []
        for _i in range(num_detections):
            i = order[_i]
            if suppressed[i]:
                continue
            keep.append(i)
            ix1 = x1[i]
            iy1 = y1[i]
            ix2 = x2[i]
            iy2 = y2[i]
            iarea = areas[i]
            for _j in range(_i + 1, num_detections):
                j = order[_j]
                if suppressed[j]:
                    continue
                xx1 = max(ix1, x1[j])
                yy1 = max(iy1, y1[j])
                xx2 = min(ix2, x2[j])
                yy2 = min(iy2, y2[j])
                w = max(0, xx2 - xx1 + 1)
                h = max(0, yy2 - yy1 + 1)
                inter = w * h
                ovr = inter / (iarea + areas[j] - inter)
                if ovr >= nms_thresh:
                    suppressed[j] = True
        return keep
--- a/config.py
+++ b/config.py
@ -0,0 +1,67 @@
 from easydict import EasyDict as edict
 config = edict()
 config.dataset = "ms1m-retinaface-t2"
 config.embedding_size = 512
 config.sample_rate = 1
 config.fp16 = False
 config.momentum = 0.9
 config.weight_decay = 5e-4
 config.batch_size = 64
 config.lr = 0.1  # batch size is 512
 config.output = "ms1mv3_arcface_r50"
 if config.dataset == "emore":
    config.rec = "/train_tmp/faces_emore"
    config.num_classes = 85742
    config.num_image = 5822653
    config.num_epoch = 16
    config.warmup_epoch = -1
    config.val_targets = ["lfw", ]
    def lr_step_func(epoch):
        return ((epoch + 1) / (4 + 1)) ** 2 if epoch < -1 else 0.1 ** len(
            [m for m in [8, 14] if m - 1 <= epoch])
    config.lr_func = lr_step_func
 elif config.dataset == "ms1m-retinaface-t2":
    config.rec = "/train_tmp/ms1m-retinaface-t2"
    config.num_classes = 91180
    config.num_epoch = 25
    config.warmup_epoch = -1
    config.val_targets = ["lfw", "cfp_fp", "agedb_30"]
    def lr_step_func(epoch):
        return ((epoch + 1) / (4 + 1)) ** 2 if epoch < -1 else 0.1 ** len(
            [m for m in [11, 17, 22] if m - 1 <= epoch])
    config.lr_func = lr_step_func
 elif config.dataset == "glint360k":
    # make training faster
    # our RAM is 256G
    # mount -t tmpfs -o size=140G  tmpfs /train_tmp
    config.rec = "/train_tmp/glint360k"
    config.num_classes = 360232
    config.num_image = 17091657
    config.num_epoch = 20
    config.warmup_epoch = -1
    config.val_targets = ["lfw", "cfp_fp", "agedb_30"]
    def lr_step_func(epoch):
        return ((epoch + 1) / (4 + 1)) ** 2 if epoch < config.warmup_epoch else 0.1 ** len(
            [m for m in [8, 12, 15, 18] if m - 1 <= epoch])
    config.lr_func = lr_step_func
 elif config.dataset == "webface":
    config.rec = "/train_tmp/faces_webface_112x112"
    config.num_classes = 10572
    config.num_image = "forget"
    config.num_epoch = 34
    config.warmup_epoch = -1
    config.val_targets = ["lfw", "cfp_fp", "agedb_30"]
    def lr_step_func(epoch):
        return ((epoch + 1) / (4 + 1)) ** 2 if epoch < config.warmup_epoch else 0.1 ** len(
            [m for m in [20, 28, 32] if m - 1 <= epoch])
    config.lr_func = lr_step_func
--- a/create_database.py
+++ b/create_database.py
@ -0,0 +1,168 @@
 import os
 import time
 import re
 import torch
 import cv2
 import numpy as np
 from backbones import iresnet50,iresnet18,iresnet100
 def load_image(img_path):
    #img = cv2.imread(img_path)
    img = cv2.imdecode(np.fromfile(img_path,dtype=np.uint8),cv2.IMREAD_COLOR)
    img = img.transpose((2, 0, 1))
    img = img[np.newaxis, :, :, :]
    img = np.array(img, dtype=np.float32)
    img -= 127.5
    img /= 127.5
    return img
 def findEuclideanDistance(source_representation, test_representation):
    euclidean_distance = source_representation - test_representation
    euclidean_distance = np.sum(np.multiply(euclidean_distance, euclidean_distance))
    euclidean_distance = np.sqrt(euclidean_distance)
    return euclidean_distance
 def findCosineDistance(source_representation, test_representation):
    a = np.matmul(np.transpose(source_representation), test_representation)
    b = np.sum(np.multiply(source_representation, source_representation))
    c = np.sum(np.multiply(test_representation, test_representation))
    return 1 - (a / (np.sqrt(b) * np.sqrt(c)))
 def l2_normalize(x):
    return x / np.sqrt(np.sum(np.multiply(x, x)))
 def cosin_metric(x1, x2):
    return np.dot(x1, x2) / (np.linalg.norm(x1) * np.linalg.norm(x2))
 def load_npy(path):
    data = np.load(path,allow_pickle=True)
    data = data.item()
    return data
 def create_database(path,model,database_path):
    name_list = os.listdir(path)
    k_v = {}
    if os.path.exists(database_path):
        k_v = np.load(database_path, allow_pickle=True)
        k_v = k_v.item()
    for name in name_list:
        img_path = os.listdir(os.path.join(path,name))
        for img_name in img_path[:1]:
            img = load_image(os.path.join(path,name,img_name))
            img = torch.from_numpy(img)
            with torch.no_grad():
                pred = model(img)
                pred = pred.numpy()
                k_v[name] = l2_normalize(pred)
    np.save(database_path, k_v)
 def create_database_batch(path,model,database_path):
    name_list = os.listdir(path)
    k_v = {}
    if os.path.exists(database_path):
        k_v = np.load(database_path, allow_pickle=True)
        k_v = k_v.item()
    batch = 256
    order_name = []
    order_path = []
    emb_list = []
    for name in name_list:
        img_path = os.listdir(os.path.join(path,name))
        for img_name in img_path[:1]:
            order_name.append(name)
            order_path.append(os.path.join(path,name,img_name))
    order_img = np.zeros((len(order_path), 3, 112, 112), dtype=np.float32)
    for index, img_path in enumerate(order_path):
        order_img[index] = load_image(img_path)
    print(order_img.shape)
    order_img = torch.from_numpy(order_img)
    now = 0
    number = len(order_img)
    with torch.no_grad():
        while now < number:
            if now + batch < number:
                emb = model(order_img[now:now+batch])
            else:
                emb = model(order_img[now:])
            now = now + batch
            for em in emb:
                emb_list.append(em)
            print("batch"+str(now))
    for i, emb in enumerate(emb_list):
        k_v[order_name[i]] = l2_normalize(emb.numpy())
    np.save(database_path, k_v)
 def add_one(img,model,name,database_path):
    img = torch.from_numpy(img)
    with torch.no_grad():
        pred = model(img)
        pred = pred.numpy()
        k_v = {}
        if os.path.exists(database_path):
            k_v = np.load(database_path, allow_pickle=True)
            k_v = k_v.item()
        k_v[name] = l2_normalize(pred)
        np.save(database_path, k_v)
 def findmindistance(pred,threshold,k_v):
    distance = 10
    most_like = ""
    for name in k_v.keys():
        tmp = findEuclideanDistance(k_v[name],pred)
        if distance > tmp:
            distance = tmp
            most_like = name
    if distance < threshold:
        return most_like
    else:
        return -1
 def findOne(img,model,k_v):
    with torch.no_grad():
        start_time = time.time()
        pred = model(img)
        end_time = time.time()
        #print("predOne time: " + str(end_time - start_time))
        pred = pred.numpy()
        name = findmindistance(l2_normalize(pred),threshold=1.20,k_v=k_v)
        if name != -1:
            return name
        else:
            return "unknown"
 def findAll(imglist,model,k_v):
    with torch.no_grad():
        name_list = []
        pred = model(imglist)
        pred = pred.numpy()
        for pr in pred:
            name = findmindistance(l2_normalize(pr),threshold=1.20,k_v=k_v)
            if name != -1:
                name_list.append(name)
            else:
                name_list.append("unknown")
        return name_list
 if __name__=='__main__':
    model = iresnet100()
    model.load_state_dict(torch.load("./model/backbone100.pth", map_location="cpu"))
    model.eval()
    #img = load_image(r"D:\Download\out\facedatabase\man.jpg")
    #img = load_image(r"D:\Download\out\facedatabase\man6.jpg")
    # img = load_image(r"D:\Download\out\alig_students\student.jpg")
    # print(img.shape)
    #
    # k_v = load_npy("./Database/student.npy")
    # start_time = time.time()
    # img = torch.from_numpy(img)
    # name = findOne(img,model,k_v)
    # mo = r'[\u4e00-\u9fa5]*'
    # name = re.match(mo,name)
    # print(name.group(0))
    # end_time = time.time()
    # print("findOne time: " + str(end_time - start_time))
    #create_database_batch(r"D:\Download\out\alig_students",model,"./Database/student.npy")
    create_database_batch(r"D:\Download\out\cfp_database", model, "cfp.npy")
    #add_one(img,model,"Arminio_Fraga","centerface_lfw.npy")
--- a/data/FDDB/img_list.txt
+++ b/data/FDDB/img_list.txt
--- a/data/init.py
+++ b/data/init.py
@ -0,0 +1,3 @@
 from .wider_face import WiderFaceDetection, detection_collate
 from .data_augment import *
 from .config import *
--- a/data/pycache/init.cpython-38.pyc
+++ b/data/pycache/init.cpython-38.pyc
--- a/data/pycache/config.cpython-38.pyc
+++ b/data/pycache/config.cpython-38.pyc
--- a/data/pycache/data_augment.cpython-38.pyc
+++ b/data/pycache/data_augment.cpython-38.pyc
--- a/data/pycache/wider_face.cpython-38.pyc
+++ b/data/pycache/wider_face.cpython-38.pyc
--- a/data/config.py
+++ b/data/config.py
@ -0,0 +1,42 @@
 # config.py
 cfg_mnet = {
    'name': 'mobilenet0.25',
    'min_sizes': [[16, 32], [64, 128], [256, 512]],
    'steps': [8, 16, 32],
    'variance': [0.1, 0.2],
    'clip': False,
    'loc_weight': 2.0,
    'gpu_train': True,
    'batch_size': 32,
    'ngpu': 1,
    'epoch': 250,
    'decay1': 190,
    'decay2': 220,
    'image_size': 640,
    'pretrain': True,
    'return_layers': {'stage1': 1, 'stage2': 2, 'stage3': 3},
    'in_channel': 32,
    'out_channel': 64
 }
 cfg_re50 = {
    'name': 'Resnet50',
    'min_sizes': [[16, 32], [64, 128], [256, 512]],
    'steps': [8, 16, 32],
    'variance': [0.1, 0.2],
    'clip': False,
    'loc_weight': 2.0,
    'gpu_train': True,
    'batch_size': 24,
    'ngpu': 4,
    'epoch': 100,
    'decay1': 70,
    'decay2': 90,
    'image_size': 840,
    'pretrain': True,
    'return_layers': {'layer2': 1, 'layer3': 2, 'layer4': 3},
    'in_channel': 256,
    'out_channel': 256
 }
--- a/data/data_augment.py
+++ b/data/data_augment.py
@ -0,0 +1,237 @@
 import cv2
 import numpy as np
 import random
 from utils.box_utils import matrix_iof
 def _crop(image, boxes, labels, landm, img_dim):
    height, width, _ = image.shape
    pad_image_flag = True
    for _ in range(250):
        """
        if random.uniform(0, 1) <= 0.2:
            scale = 1.0
        else:
            scale = random.uniform(0.3, 1.0)
        """
        PRE_SCALES = [0.3, 0.45, 0.6, 0.8, 1.0]
        scale = random.choice(PRE_SCALES)
        short_side = min(width, height)
        w = int(scale * short_side)
        h = w
        if width == w:
            l = 0
        else:
            l = random.randrange(width - w)
        if height == h:
            t = 0
        else:
            t = random.randrange(height - h)
        roi = np.array((l, t, l + w, t + h))
        value = matrix_iof(boxes, roi[np.newaxis])
        flag = (value >= 1)
        if not flag.any():
            continue
        centers = (boxes[:, :2] + boxes[:, 2:]) / 2
        mask_a = np.logical_and(roi[:2] < centers, centers < roi[2:]).all(axis=1)
        boxes_t = boxes[mask_a].copy()
        labels_t = labels[mask_a].copy()
        landms_t = landm[mask_a].copy()
        landms_t = landms_t.reshape([-1, 5, 2])
        if boxes_t.shape[0] == 0:
            continue
        image_t = image[roi[1]:roi[3], roi[0]:roi[2]]
        boxes_t[:, :2] = np.maximum(boxes_t[:, :2], roi[:2])
        boxes_t[:, :2] -= roi[:2]
        boxes_t[:, 2:] = np.minimum(boxes_t[:, 2:], roi[2:])
        boxes_t[:, 2:] -= roi[:2]
        # landm
        landms_t[:, :, :2] = landms_t[:, :, :2] - roi[:2]
        landms_t[:, :, :2] = np.maximum(landms_t[:, :, :2], np.array([0, 0]))
        landms_t[:, :, :2] = np.minimum(landms_t[:, :, :2], roi[2:] - roi[:2])
        landms_t = landms_t.reshape([-1, 10])
 	# make sure that the cropped image contains at least one face > 16 pixel at training image scale
        b_w_t = (boxes_t[:, 2] - boxes_t[:, 0] + 1) / w * img_dim
        b_h_t = (boxes_t[:, 3] - boxes_t[:, 1] + 1) / h * img_dim
        mask_b = np.minimum(b_w_t, b_h_t) > 0.0
        boxes_t = boxes_t[mask_b]
        labels_t = labels_t[mask_b]
        landms_t = landms_t[mask_b]
        if boxes_t.shape[0] == 0:
            continue
        pad_image_flag = False
        return image_t, boxes_t, labels_t, landms_t, pad_image_flag
    return image, boxes, labels, landm, pad_image_flag
 def _distort(image):
    def _convert(image, alpha=1, beta=0):
        tmp = image.astype(float) * alpha + beta
        tmp[tmp < 0] = 0
        tmp[tmp > 255] = 255
        image[:] = tmp
    image = image.copy()
    if random.randrange(2):
        #brightness distortion
        if random.randrange(2):
            _convert(image, beta=random.uniform(-32, 32))
        #contrast distortion
        if random.randrange(2):
            _convert(image, alpha=random.uniform(0.5, 1.5))
        image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
        #saturation distortion
        if random.randrange(2):
            _convert(image[:, :, 1], alpha=random.uniform(0.5, 1.5))
        #hue distortion
        if random.randrange(2):
            tmp = image[:, :, 0].astype(int) + random.randint(-18, 18)
            tmp %= 180
            image[:, :, 0] = tmp
        image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR)
    else:
        #brightness distortion
        if random.randrange(2):
            _convert(image, beta=random.uniform(-32, 32))
        image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
        #saturation distortion
        if random.randrange(2):
            _convert(image[:, :, 1], alpha=random.uniform(0.5, 1.5))
        #hue distortion
        if random.randrange(2):
            tmp = image[:, :, 0].astype(int) + random.randint(-18, 18)
            tmp %= 180
            image[:, :, 0] = tmp
        image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR)
        #contrast distortion
        if random.randrange(2):
            _convert(image, alpha=random.uniform(0.5, 1.5))
    return image
 def _expand(image, boxes, fill, p):
    if random.randrange(2):
        return image, boxes
    height, width, depth = image.shape
    scale = random.uniform(1, p)
    w = int(scale * width)
    h = int(scale * height)
    left = random.randint(0, w - width)
    top = random.randint(0, h - height)
    boxes_t = boxes.copy()
    boxes_t[:, :2] += (left, top)
    boxes_t[:, 2:] += (left, top)
    expand_image = np.empty(
        (h, w, depth),
        dtype=image.dtype)
    expand_image[:, :] = fill
    expand_image[top:top + height, left:left + width] = image
    image = expand_image
    return image, boxes_t
 def _mirror(image, boxes, landms):
    _, width, _ = image.shape
    if random.randrange(2):
        image = image[:, ::-1]
        boxes = boxes.copy()
        boxes[:, 0::2] = width - boxes[:, 2::-2]
        # landm
        landms = landms.copy()
        landms = landms.reshape([-1, 5, 2])
        landms[:, :, 0] = width - landms[:, :, 0]
        tmp = landms[:, 1, :].copy()
        landms[:, 1, :] = landms[:, 0, :]
        landms[:, 0, :] = tmp
        tmp1 = landms[:, 4, :].copy()
        landms[:, 4, :] = landms[:, 3, :]
        landms[:, 3, :] = tmp1
        landms = landms.reshape([-1, 10])
    return image, boxes, landms
 def _pad_to_square(image, rgb_mean, pad_image_flag):
    if not pad_image_flag:
        return image
    height, width, _ = image.shape
    long_side = max(width, height)
    image_t = np.empty((long_side, long_side, 3), dtype=image.dtype)
    image_t[:, :] = rgb_mean
    image_t[0:0 + height, 0:0 + width] = image
    return image_t
 def _resize_subtract_mean(image, insize, rgb_mean):
    interp_methods = [cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_NEAREST, cv2.INTER_LANCZOS4]
    interp_method = interp_methods[random.randrange(5)]
    image = cv2.resize(image, (insize, insize), interpolation=interp_method)
    image = image.astype(np.float32)
    image -= rgb_mean
    return image.transpose(2, 0, 1)
 class preproc(object):
    def __init__(self, img_dim, rgb_means):
        self.img_dim = img_dim
        self.rgb_means = rgb_means
    def __call__(self, image, targets):
        assert targets.shape[0] > 0, "this image does not have gt"
        boxes = targets[:, :4].copy()
        labels = targets[:, -1].copy()
        landm = targets[:, 4:-1].copy()
        image_t, boxes_t, labels_t, landm_t, pad_image_flag = _crop(image, boxes, labels, landm, self.img_dim)
        image_t = _distort(image_t)
        image_t = _pad_to_square(image_t,self.rgb_means, pad_image_flag)
        image_t, boxes_t, landm_t = _mirror(image_t, boxes_t, landm_t)
        height, width, _ = image_t.shape
        image_t = _resize_subtract_mean(image_t, self.img_dim, self.rgb_means)
        boxes_t[:, 0::2] /= width
        boxes_t[:, 1::2] /= height
        landm_t[:, 0::2] /= width
        landm_t[:, 1::2] /= height
        labels_t = np.expand_dims(labels_t, 1)
        targets_t = np.hstack((boxes_t, landm_t, labels_t))
        return image_t, targets_t
--- a/data/realtime_detect.py
+++ b/data/realtime_detect.py
@ -0,0 +1,258 @@
 import subprocess
 import time
 import cv2
 import torch
 import numpy as np
 from skimage import transform as trans
 from PIL import Image, ImageDraw, ImageFont
 from data import cfg_mnet, cfg_re50
 from face_api import load_arcface_model, load_npy
 from layers.functions.prior_box import PriorBox
 from retinaface_detect import set_retinaface_conf, load_retinaface_model, findAll
 from utils.nms.py_cpu_nms import py_cpu_nms
 from utils.box_utils import decode, decode_landm
 import faiss
 ppi = 1280
 ppi2 = 640
 step = 3
 def detect_rtsp(rtsp, out_rtsp, net, arcface_model, k_v, args):
    tic_total = time.time()
    cfg = None
    if args.network == "mobile0.25":
        cfg = cfg_mnet
    elif args.network == "resnet50":
        cfg = cfg_re50
    device = torch.device("cpu" if args.cpu else "cuda")
    resize = 1
    # testing begin
    cap = cv2.VideoCapture(rtsp)
    ret, frame = cap.read()
    h, w = frame.shape[:2]
    factor = 0
    if (w > ppi):
        factor = h / w
        frame = cv2.resize(frame, (ppi, int(ppi * factor)))
        h, w = frame.shape[:2]
    arf = 1
    detect_h, detect_w = frame.shape[:2]
    frame_detect = frame
    factor2 = 0
    if (w > ppi2):
        factor2 = h / w
        frame_detect = cv2.resize(frame, (ppi2, int(ppi2 * factor2)))
        detect_h, detect_w = frame_detect.shape[:2]
        arf = w/detect_w
    print(w,h)
    print(detect_w,detect_h)
    #fps = cap.get(cv2.CAP_PROP_FPS)
    #print(fps)
    size = (w, h)
    sizeStr = str(size[0]) + 'x' + str(size[1])
    if(out_rtsp.startswith("rtsp")):
        command = ['ffmpeg',
                   '-y', '-an',
                   '-f', 'rawvideo',
                   '-vcodec', 'rawvideo',
                   '-pix_fmt', 'bgr24',
                   '-s', sizeStr,
                   '-r', "25",
                   '-i', '-',
                   '-c:v', 'libx265',
                   '-b:v', '3000k',
                   '-pix_fmt', 'yuv420p',
                   '-preset', 'ultrafast',
                   '-f', 'rtsp',
                   out_rtsp]
    pipe = subprocess.Popen(command, shell=False, stdin=subprocess.PIPE)
    number = step
    dets = []
    name_list = []
    font = ImageFont.truetype("font.ttf", 22)
    priorbox = PriorBox(cfg, image_size=(detect_h, detect_w))
    priors = priorbox.forward()
    priors = priors.to(device)
    prior_data = priors.data
    scale = torch.Tensor([detect_w, detect_h, detect_w, detect_h])
    scale = scale.to(device)
    scale1 = torch.Tensor([detect_w, detect_h, detect_w, detect_h,
                           detect_w, detect_h, detect_w, detect_h,
                           detect_w, detect_h])
    scale1 = scale1.to(device)
    src1 = np.array([
        [38.3814, 51.6963],
        [73.6186, 51.5014],
        [56.1120, 71.7366],
        [41.6361, 92.3655],
        [70.8167, 92.2041]], dtype=np.float32)
    tform = trans.SimilarityTransform()
    while ret:
        tic_all = time.time()
        if number == step:
            tic = time.time()
            img = np.float32(frame_detect)
            img -= (104, 117, 123)
            img = img.transpose(2, 0, 1)
            img = torch.from_numpy(img).unsqueeze(0)
            img = img.to(device)
            loc, conf, landms = net(img)  # forward pass
            boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
            boxes = boxes * scale / resize
            boxes = boxes.cpu().numpy()
            scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
            landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance'])
            landms = landms * scale1 / resize
            landms = landms.cpu().numpy()
            # ignore low scores
            inds = np.where(scores > args.confidence_threshold)[0]
            boxes = boxes[inds]
            landms = landms[inds]
            scores = scores[inds]
            # keep top-K before NMS
            order = scores.argsort()[::-1][:args.top_k]
            boxes = boxes[order]
            landms = landms[order]
            scores = scores[order]
            # do NMS
            dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
            keep = py_cpu_nms(dets, args.nms_threshold)
            # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu)
            dets = dets[keep, :]
            landms = landms[keep]
            # keep top-K faster NMS
            dets = dets[:args.keep_top_k, :]
            landms = landms[:args.keep_top_k, :]
            dets = np.concatenate((dets, landms), axis=1)
            face_list = []
            name_list = []
            print('net forward time: {:.4f}'.format(time.time() - tic))
            start_time_findall = time.time()
            for i, det in enumerate(dets[:1]):
                if det[4] < args.vis_thres:
                    continue
                #boxes, score = det[:4], det[4]
                dst = np.reshape(landms[i], (5, 2))
                dst = dst * arf
                tform.estimate(dst, src1)
                M = tform.params[0:2, :]
                frame2 = cv2.warpAffine(frame, M, (w, h), borderValue=0.0)
                img112 = frame2[0:112, 0:112, :]
                face_list.append(img112)
            if len(face_list) != 0:
                face_list = np.array(face_list)
                face_list = face_list.transpose((0, 3, 1, 2))
                face_list = np.array(face_list, dtype=np.float32)
                face_list -= 127.5
                face_list /= 127.5
                print(face_list.shape)
                print("warpALL time: " + str(time.time() - start_time_findall ))
                #start_time = time.time()
                name_list = findAll(face_list, arcface_model, k_v, "cpu" if args.cpu else "cuda")
                #print(name_list)
            #print("findOneframe time: " + str(time.time() - start_time_findall))
            #start_time = time.time()
            # if (len(dets) != 0):
            #     for i, det in enumerate(dets[:]):
            #         if det[4] < args.vis_thres:
            #             continue
            #         boxes, score = det[:4], det[4]
            #         boxes = boxes * arf
            #         name = name_list[i]
            #         cv2.rectangle(frame, (int(boxes[0]), int(boxes[1])), (int(boxes[2]), int(boxes[3])), (255, 0, 0), 2)
            #         cv2.putText(frame, name, (int(boxes[0]), int(boxes[1])), cv2.FONT_HERSHEY_COMPLEX, 0.4,(0, 225, 255), 1)
            start_time = time.time()
            if(len(dets) != 0):
                img_PIL = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
                draw = ImageDraw.Draw(img_PIL)
                for i, det in enumerate(dets[:1]):
                    if det[4] < args.vis_thres:
                        continue
                    boxes, score = det[:4], det[4]
                    boxes = boxes * arf
                    name = name_list[i]
                    if not isinstance(name, np.unicode):
                        name = name.decode('utf8')
                    draw.text((int(boxes[0]), int(boxes[1])), name, fill=(255, 0, 0), font=font)
                    draw.rectangle((int(boxes[0]), int(boxes[1]), int(boxes[2]), int(boxes[3])), outline="green", width=3)
                frame = cv2.cvtColor(np.asarray(img_PIL), cv2.COLOR_RGB2BGR)
            pipe.stdin.write(frame.tostring())
            print("drawOneframe time: " + str(time.time() - start_time))
            #start_time = time.time()
            ret, frame = cap.read()
            frame_detect = frame
            number = step
            if (ret != 0 and factor != 0):
                frame = cv2.resize(frame, (ppi, int(ppi * factor)))
            if (ret != 0 and factor2 != 0):
                frame_detect = cv2.resize(frame, (ppi2, int(ppi2 * factor2)))
            #print("readframe time: " + str(time.time() - start_time))
        else:
            number += 1
            if (len(dets) != 0):
                for i, det in enumerate(dets[:4]):
                    if det[4] < args.vis_thres:
                        continue
                    boxes, score = det[:4], det[4]
                    cv2.rectangle(frame, (int(boxes[0]), int(boxes[1])), (int(boxes[2]), int(boxes[3])), (2, 255, 0), 1)
            # if (len(dets) != 0):
            #     img_PIL = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
            #     draw = ImageDraw.Draw(img_PIL)
            #     for i, det in enumerate(dets[:4]):
            #         if det[4] < args.vis_thres:
            #             continue
            #         boxes, score = det[:4], det[4]
            #         name = name_list[i]
            #         if not isinstance(name, np.unicode):
            #             name = name.decode('utf8')
            #         draw.text((int(boxes[0]), int(boxes[1])), name, fill=(255, 0, 0), font=font)
            #         draw.rectangle((int(boxes[0]), int(boxes[1]), int(boxes[2]), int(boxes[3])), outline="green",
            #                        width=3)
            #     frame = cv2.cvtColor(np.asarray(img_PIL), cv2.COLOR_RGB2BGR)
            start_time = time.time()
            pipe.stdin.write(frame.tostring())
            print("writeframe time: " + str(time.time() - start_time))
            start_time = time.time()
            ret, frame = cap.read()
            if (ret != 0 and factor != 0):
                frame = cv2.resize(frame, (ppi, int(ppi * factor)))
            print("readframe time: " + str(time.time() - start_time))
        print('all time: {:.4f}'.format(time.time() - tic_all))
    cap.release()
    pipe.terminate()
    print('total time: {:.4f}'.format(time.time() - tic_total))
 if __name__ == "__main__":
    cpu_or_cuda = "cuda" if torch.cuda.is_available() else "cpu"
    # 加载人脸识别模型
    arcface_model = load_arcface_model("./model/backbone100.pth", cpu_or_cuda=cpu_or_cuda)
    # 加载人脸检测模型
    retinaface_args = set_retinaface_conf(cpu_or_cuda=cpu_or_cuda)
    retinaface_model = load_retinaface_model(retinaface_args)
    k_v = load_npy("./Database/student.npy")
    #print(list(k_v.keys()))
    database_name_list = list(k_v.keys())
    vector_list = np.array(list(k_v.values()))
    print(vector_list.shape)
    index = faiss.IndexFlatL2(512)
    index.add(vector_list)
    #detect_rtsp("software.mp4", 'rtsp://localhost/test2', retinaface_model, arcface_model, index ,database_name_list, retinaface_args)
    detect_rtsp("cut.mp4", 'rtsp://localhost:5001/test2', retinaface_model, arcface_model, k_v, retinaface_args)
--- a/data/wider_face.py
+++ b/data/wider_face.py
@ -0,0 +1,101 @@
 import os
 import os.path
 import sys
 import torch
 import torch.utils.data as data
 import cv2
 import numpy as np
 class WiderFaceDetection(data.Dataset):
    def __init__(self, txt_path, preproc=None):
        self.preproc = preproc
        self.imgs_path = []
        self.words = []
        f = open(txt_path,'r')
        lines = f.readlines()
        isFirst = True
        labels = []
        for line in lines:
            line = line.rstrip()
            if line.startswith('#'):
                if isFirst is True:
                    isFirst = False
                else:
                    labels_copy = labels.copy()
                    self.words.append(labels_copy)
                    labels.clear()
                path = line[2:]
                path = txt_path.replace('label.txt','images/') + path
                self.imgs_path.append(path)
            else:
                line = line.split(' ')
                label = [float(x) for x in line]
                labels.append(label)
        self.words.append(labels)
    def __len__(self):
        return len(self.imgs_path)
    def __getitem__(self, index):
        img = cv2.imread(self.imgs_path[index])
        height, width, _ = img.shape
        labels = self.words[index]
        annotations = np.zeros((0, 15))
        if len(labels) == 0:
            return annotations
        for idx, label in enumerate(labels):
            annotation = np.zeros((1, 15))
            # bbox
            annotation[0, 0] = label[0]  # x1
            annotation[0, 1] = label[1]  # y1
            annotation[0, 2] = label[0] + label[2]  # x2
            annotation[0, 3] = label[1] + label[3]  # y2
            # landmarks
            annotation[0, 4] = label[4]    # l0_x
            annotation[0, 5] = label[5]    # l0_y
            annotation[0, 6] = label[7]    # l1_x
            annotation[0, 7] = label[8]    # l1_y
            annotation[0, 8] = label[10]   # l2_x
            annotation[0, 9] = label[11]   # l2_y
            annotation[0, 10] = label[13]  # l3_x
            annotation[0, 11] = label[14]  # l3_y
            annotation[0, 12] = label[16]  # l4_x
            annotation[0, 13] = label[17]  # l4_y
            if (annotation[0, 4]<0):
                annotation[0, 14] = -1
            else:
                annotation[0, 14] = 1
            annotations = np.append(annotations, annotation, axis=0)
        target = np.array(annotations)
        if self.preproc is not None:
            img, target = self.preproc(img, target)
        return torch.from_numpy(img), target
 def detection_collate(batch):
    """Custom collate fn for dealing with batches of images that have a different
    number of associated object annotations (bounding boxes).
    Arguments:
        batch: (tuple) A tuple of tensor images and lists of annotations
    Return:
        A tuple containing:
            1) (tensor) batch of images stacked on their 0 dim
            2) (list of tensors) annotations for a given image are stacked on 0 dim
    """
    targets = []
    imgs = []
    for _, sample in enumerate(batch):
        for _, tup in enumerate(sample):
            if torch.is_tensor(tup):
                imgs.append(tup)
            elif isinstance(tup, type(np.empty(0))):
                annos = torch.from_numpy(tup).float()
                targets.append(annos)
    return (torch.stack(imgs, 0), targets)
--- a/dataset.py
+++ b/dataset.py
@ -0,0 +1,107 @@
 import numbers
 import os
 import queue as Queue
 import threading
 import mxnet as mx
 import numpy as np
 import torch
 from torch.utils.data import DataLoader, Dataset
 from torchvision import transforms
 class BackgroundGenerator(threading.Thread):
    def __init__(self, generator, local_rank, max_prefetch=6):
        super(BackgroundGenerator, self).__init__()
        self.queue = Queue.Queue(max_prefetch)
        self.generator = generator
        self.local_rank = local_rank
        self.daemon = True
        self.start()
    def run(self):
        torch.cuda.set_device(self.local_rank)
        for item in self.generator:
            self.queue.put(item)
        self.queue.put(None)
    def next(self):
        next_item = self.queue.get()
        if next_item is None:
            raise StopIteration
        return next_item
    def __next__(self):
        return self.next()
    def __iter__(self):
        return self
 class DataLoaderX(DataLoader):
    def __init__(self, local_rank, **kwargs):
        super(DataLoaderX, self).__init__(**kwargs)
        self.stream = torch.cuda.Stream(local_rank)
        self.local_rank = local_rank
    def __iter__(self):
        self.iter = super(DataLoaderX, self).__iter__()
        self.iter = BackgroundGenerator(self.iter, self.local_rank)
        self.preload()
        return self
    def preload(self):
        self.batch = next(self.iter, None)
        if self.batch is None:
            return None
        with torch.cuda.stream(self.stream):
            for k in range(len(self.batch)):
                self.batch[k] = self.batch[k].to(device=self.local_rank,
                                                 non_blocking=True)
    def __next__(self):
        torch.cuda.current_stream().wait_stream(self.stream)
        batch = self.batch
        if batch is None:
            raise StopIteration
        self.preload()
        return batch
 class MXFaceDataset(Dataset):
    def __init__(self, root_dir, local_rank):
        super(MXFaceDataset, self).__init__()
        self.transform = transforms.Compose(
            [transforms.ToPILImage(),
             transforms.RandomHorizontalFlip(),
             transforms.ToTensor(),
             transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
             ])
        self.root_dir = root_dir
        self.local_rank = local_rank
        path_imgrec = os.path.join(root_dir, 'train.rec')
        path_imgidx = os.path.join(root_dir, 'train.idx')
        self.imgrec = mx.recordio.MXIndexedRecordIO(path_imgidx, path_imgrec, 'r')
        s = self.imgrec.read_idx(0)
        header, _ = mx.recordio.unpack(s)
        if header.flag > 0:
            self.header0 = (int(header.label[0]), int(header.label[1]))
            self.imgidx = np.array(range(1, int(header.label[0])))
        else:
            self.imgidx = np.array(list(self.imgrec.keys))
    def __getitem__(self, index):
        idx = self.imgidx[index]
        s = self.imgrec.read_idx(idx)
        header, img = mx.recordio.unpack(s)
        label = header.label
        if not isinstance(label, numbers.Number):
            label = label[0]
        label = torch.tensor(label, dtype=torch.long)
        sample = mx.image.imdecode(img).asnumpy()
        if self.transform is not None:
            sample = self.transform(sample)
        return sample, label
    def __len__(self):
        return len(self.imgidx)
--- a/eval/init.py
+++ b/eval/init.py
--- a/eval/verification.py
+++ b/eval/verification.py
@ -0,0 +1,409 @@
 """Helper for evaluation on the Labeled Faces in the Wild dataset 
 """
 # MIT License
 #
 # Copyright (c) 2016 David Sandberg
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
 # in the Software without restriction, including without limitation the rights
 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 # copies of the Software, and to permit persons to whom the Software is
 # furnished to do so, subject to the following conditions:
 #
 # The above copyright notice and this permission notice shall be included in all
 # copies or substantial portions of the Software.
 #
 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
 import datetime
 import os
 import pickle
 import mxnet as mx
 import numpy as np
 import sklearn
 import torch
 from mxnet import ndarray as nd
 from scipy import interpolate
 from sklearn.decomposition import PCA
 from sklearn.model_selection import KFold
 class LFold:
    def __init__(self, n_splits=2, shuffle=False):
        self.n_splits = n_splits
        if self.n_splits > 1:
            self.k_fold = KFold(n_splits=n_splits, shuffle=shuffle)
    def split(self, indices):
        if self.n_splits > 1:
            return self.k_fold.split(indices)
        else:
            return [(indices, indices)]
 def calculate_roc(thresholds,
                  embeddings1,
                  embeddings2,
                  actual_issame,
                  nrof_folds=10,
                  pca=0):
    assert (embeddings1.shape[0] == embeddings2.shape[0])
    assert (embeddings1.shape[1] == embeddings2.shape[1])
    nrof_pairs = min(len(actual_issame), embeddings1.shape[0])
    nrof_thresholds = len(thresholds)
    k_fold = LFold(n_splits=nrof_folds, shuffle=False)
    tprs = np.zeros((nrof_folds, nrof_thresholds))
    fprs = np.zeros((nrof_folds, nrof_thresholds))
    accuracy = np.zeros((nrof_folds))
    indices = np.arange(nrof_pairs)
    if pca == 0:
        diff = np.subtract(embeddings1, embeddings2)
        dist = np.sum(np.square(diff), 1)
    for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)):
        if pca > 0:
            print('doing pca on', fold_idx)
            embed1_train = embeddings1[train_set]
            embed2_train = embeddings2[train_set]
            _embed_train = np.concatenate((embed1_train, embed2_train), axis=0)
            pca_model = PCA(n_components=pca)
            pca_model.fit(_embed_train)
            embed1 = pca_model.transform(embeddings1)
            embed2 = pca_model.transform(embeddings2)
            embed1 = sklearn.preprocessing.normalize(embed1)
            embed2 = sklearn.preprocessing.normalize(embed2)
            diff = np.subtract(embed1, embed2)
            dist = np.sum(np.square(diff), 1)
        # Find the best threshold for the fold
        acc_train = np.zeros((nrof_thresholds))
        for threshold_idx, threshold in enumerate(thresholds):
            _, _, acc_train[threshold_idx] = calculate_accuracy(
                threshold, dist[train_set], actual_issame[train_set])
        best_threshold_index = np.argmax(acc_train)
        for threshold_idx, threshold in enumerate(thresholds):
            tprs[fold_idx, threshold_idx], fprs[fold_idx, threshold_idx], _ = calculate_accuracy(
                threshold, dist[test_set],
                actual_issame[test_set])
        _, _, accuracy[fold_idx] = calculate_accuracy(
            thresholds[best_threshold_index], dist[test_set],
            actual_issame[test_set])
    tpr = np.mean(tprs, 0)
    fpr = np.mean(fprs, 0)
    return tpr, fpr, accuracy
 def calculate_accuracy(threshold, dist, actual_issame):
    predict_issame = np.less(dist, threshold)
    tp = np.sum(np.logical_and(predict_issame, actual_issame))
    fp = np.sum(np.logical_and(predict_issame, np.logical_not(actual_issame)))
    tn = np.sum(
        np.logical_and(np.logical_not(predict_issame),
                       np.logical_not(actual_issame)))
    fn = np.sum(np.logical_and(np.logical_not(predict_issame), actual_issame))
    tpr = 0 if (tp + fn == 0) else float(tp) / float(tp + fn)
    fpr = 0 if (fp + tn == 0) else float(fp) / float(fp + tn)
    acc = float(tp + tn) / dist.size
    return tpr, fpr, acc
 def calculate_val(thresholds,
                  embeddings1,
                  embeddings2,
                  actual_issame,
                  far_target,
                  nrof_folds=10):
    assert (embeddings1.shape[0] == embeddings2.shape[0])
    assert (embeddings1.shape[1] == embeddings2.shape[1])
    nrof_pairs = min(len(actual_issame), embeddings1.shape[0])
    nrof_thresholds = len(thresholds)
    k_fold = LFold(n_splits=nrof_folds, shuffle=False)
    val = np.zeros(nrof_folds)
    far = np.zeros(nrof_folds)
    diff = np.subtract(embeddings1, embeddings2)
    dist = np.sum(np.square(diff), 1)
    indices = np.arange(nrof_pairs)
    for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)):
        # Find the threshold that gives FAR = far_target
        far_train = np.zeros(nrof_thresholds)
        for threshold_idx, threshold in enumerate(thresholds):
            _, far_train[threshold_idx] = calculate_val_far(
                threshold, dist[train_set], actual_issame[train_set])
        if np.max(far_train) >= far_target:
            f = interpolate.interp1d(far_train, thresholds, kind='slinear')
            threshold = f(far_target)
        else:
            threshold = 0.0
        val[fold_idx], far[fold_idx] = calculate_val_far(
            threshold, dist[test_set], actual_issame[test_set])
    val_mean = np.mean(val)
    far_mean = np.mean(far)
    val_std = np.std(val)
    return val_mean, val_std, far_mean
 def calculate_val_far(threshold, dist, actual_issame):
    predict_issame = np.less(dist, threshold)
    true_accept = np.sum(np.logical_and(predict_issame, actual_issame))
    false_accept = np.sum(
        np.logical_and(predict_issame, np.logical_not(actual_issame)))
    n_same = np.sum(actual_issame)
    n_diff = np.sum(np.logical_not(actual_issame))
    # print(true_accept, false_accept)
    # print(n_same, n_diff)
    val = float(true_accept) / float(n_same)
    far = float(false_accept) / float(n_diff)
    return val, far
 def evaluate(embeddings, actual_issame, nrof_folds=10, pca=0):
    # Calculate evaluation metrics
    thresholds = np.arange(0, 4, 0.01)
    embeddings1 = embeddings[0::2]
    embeddings2 = embeddings[1::2]
    tpr, fpr, accuracy = calculate_roc(thresholds,
                                       embeddings1,
                                       embeddings2,
                                       np.asarray(actual_issame),
                                       nrof_folds=nrof_folds,
                                       pca=pca)
    thresholds = np.arange(0, 4, 0.001)
    val, val_std, far = calculate_val(thresholds,
                                      embeddings1,
                                      embeddings2,
                                      np.asarray(actual_issame),
                                      1e-3,
                                      nrof_folds=nrof_folds)
    return tpr, fpr, accuracy, val, val_std, far
@torch.no_grad()
 def load_bin(path, image_size):
    try:
        with open(path, 'rb') as f:
            bins, issame_list = pickle.load(f)  # py2
    except UnicodeDecodeError as e:
        with open(path, 'rb') as f:
            bins, issame_list = pickle.load(f, encoding='bytes')  # py3
    data_list = []
    for flip in [0, 1]:
        data = torch.empty((len(issame_list) * 2, 3, image_size[0], image_size[1]))
        data_list.append(data)
    for idx in range(len(issame_list) * 2):
        _bin = bins[idx]
        img = mx.image.imdecode(_bin)
        if img.shape[1] != image_size[0]:
            img = mx.image.resize_short(img, image_size[0])
        img = nd.transpose(img, axes=(2, 0, 1))
        for flip in [0, 1]:
            if flip == 1:
                img = mx.ndarray.flip(data=img, axis=2)
            data_list[flip][idx][:] = torch.from_numpy(img.asnumpy())
        if idx % 1000 == 0:
            print('loading bin', idx)
    print(data_list[0].shape)
    return data_list, issame_list
@torch.no_grad()
 def test(data_set, backbone, batch_size, nfolds=10):
    print('testing verification..')
    data_list = data_set[0]
    issame_list = data_set[1]
    embeddings_list = []
    time_consumed = 0.0
    for i in range(len(data_list)):
        data = data_list[i]
        embeddings = None
        ba = 0
        while ba < data.shape[0]:
            bb = min(ba + batch_size, data.shape[0])
            count = bb - ba
            _data = data[bb - batch_size: bb]
            time0 = datetime.datetime.now()
            img = ((_data / 255) - 0.5) / 0.5
            net_out: torch.Tensor = backbone(img)
            _embeddings = net_out.detach().cpu().numpy()
            time_now = datetime.datetime.now()
            diff = time_now - time0
            time_consumed += diff.total_seconds()
            if embeddings is None:
                embeddings = np.zeros((data.shape[0], _embeddings.shape[1]))
            embeddings[ba:bb, :] = _embeddings[(batch_size - count):, :]
            ba = bb
        embeddings_list.append(embeddings)
    _xnorm = 0.0
    _xnorm_cnt = 0
    for embed in embeddings_list:
        for i in range(embed.shape[0]):
            _em = embed[i]
            _norm = np.linalg.norm(_em)
            _xnorm += _norm
            _xnorm_cnt += 1
    _xnorm /= _xnorm_cnt
    embeddings = embeddings_list[0].copy()
    embeddings = sklearn.preprocessing.normalize(embeddings)
    acc1 = 0.0
    std1 = 0.0
    embeddings = embeddings_list[0] + embeddings_list[1]
    embeddings = sklearn.preprocessing.normalize(embeddings)
    print(embeddings.shape)
    print('infer time', time_consumed)
    _, _, accuracy, val, val_std, far = evaluate(embeddings, issame_list, nrof_folds=nfolds)
    acc2, std2 = np.mean(accuracy), np.std(accuracy)
    return acc1, std1, acc2, std2, _xnorm, embeddings_list
 def dumpR(data_set,
          backbone,
          batch_size,
          name='',
          data_extra=None,
          label_shape=None):
    print('dump verification embedding..')
    data_list = data_set[0]
    issame_list = data_set[1]
    embeddings_list = []
    time_consumed = 0.0
    for i in range(len(data_list)):
        data = data_list[i]
        embeddings = None
        ba = 0
        while ba < data.shape[0]:
            bb = min(ba + batch_size, data.shape[0])
            count = bb - ba
            _data = nd.slice_axis(data, axis=0, begin=bb - batch_size, end=bb)
            time0 = datetime.datetime.now()
            if data_extra is None:
                db = mx.io.DataBatch(data=(_data,), label=(_label,))
            else:
                db = mx.io.DataBatch(data=(_data, _data_extra),
                                     label=(_label,))
            model.forward(db, is_train=False)
            net_out = model.get_outputs()
            _embeddings = net_out[0].asnumpy()
            time_now = datetime.datetime.now()
            diff = time_now - time0
            time_consumed += diff.total_seconds()
            if embeddings is None:
                embeddings = np.zeros((data.shape[0], _embeddings.shape[1]))
            embeddings[ba:bb, :] = _embeddings[(batch_size - count):, :]
            ba = bb
        embeddings_list.append(embeddings)
    embeddings = embeddings_list[0] + embeddings_list[1]
    embeddings = sklearn.preprocessing.normalize(embeddings)
    actual_issame = np.asarray(issame_list)
    outname = os.path.join('temp.bin')
    with open(outname, 'wb') as f:
        pickle.dump((embeddings, issame_list),
                    f,
                    protocol=pickle.HIGHEST_PROTOCOL)
 # if __name__ == '__main__':
 #
 #     parser = argparse.ArgumentParser(description='do verification')
 #     # general
 #     parser.add_argument('--data-dir', default='', help='')
 #     parser.add_argument('--model',
 #                         default='../model/softmax,50',
 #                         help='path to load model.')
 #     parser.add_argument('--target',
 #                         default='lfw,cfp_ff,cfp_fp,agedb_30',
 #                         help='test targets.')
 #     parser.add_argument('--gpu', default=0, type=int, help='gpu id')
 #     parser.add_argument('--batch-size', default=32, type=int, help='')
 #     parser.add_argument('--max', default='', type=str, help='')
 #     parser.add_argument('--mode', default=0, type=int, help='')
 #     parser.add_argument('--nfolds', default=10, type=int, help='')
 #     args = parser.parse_args()
 #     image_size = [112, 112]
 #     print('image_size', image_size)
 #     ctx = mx.gpu(args.gpu)
 #     nets = []
 #     vec = args.model.split(',')
 #     prefix = args.model.split(',')[0]
 #     epochs = []
 #     if len(vec) == 1:
 #         pdir = os.path.dirname(prefix)
 #         for fname in os.listdir(pdir):
 #             if not fname.endswith('.params'):
 #                 continue
 #             _file = os.path.join(pdir, fname)
 #             if _file.startswith(prefix):
 #                 epoch = int(fname.split('.')[0].split('-')[1])
 #                 epochs.append(epoch)
 #         epochs = sorted(epochs, reverse=True)
 #         if len(args.max) > 0:
 #             _max = [int(x) for x in args.max.split(',')]
 #             assert len(_max) == 2
 #             if len(epochs) > _max[1]:
 #                 epochs = epochs[_max[0]:_max[1]]
 #
 #     else:
 #         epochs = [int(x) for x in vec[1].split('|')]
 #     print('model number', len(epochs))
 #     time0 = datetime.datetime.now()
 #     for epoch in epochs:
 #         print('loading', prefix, epoch)
 #         sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
 #         # arg_params, aux_params = ch_dev(arg_params, aux_params, ctx)
 #         all_layers = sym.get_internals()
 #         sym = all_layers['fc1_output']
 #         model = mx.mod.Module(symbol=sym, context=ctx, label_names=None)
 #         # model.bind(data_shapes=[('data', (args.batch_size, 3, image_size[0], image_size[1]))], label_shapes=[('softmax_label', (args.batch_size,))])
 #         model.bind(data_shapes=[('data', (args.batch_size, 3, image_size[0],
 #                                           image_size[1]))])
 #         model.set_params(arg_params, aux_params)
 #         nets.append(model)
 #     time_now = datetime.datetime.now()
 #     diff = time_now - time0
 #     print('model loading time', diff.total_seconds())
 #
 #     ver_list = []
 #     ver_name_list = []
 #     for name in args.target.split(','):
 #         path = os.path.join(args.data_dir, name + ".bin")
 #         if os.path.exists(path):
 #             print('loading.. ', name)
 #             data_set = load_bin(path, image_size)
 #             ver_list.append(data_set)
 #             ver_name_list.append(name)
 #
 #     if args.mode == 0:
 #         for i in range(len(ver_list)):
 #             results = []
 #             for model in nets:
 #                 acc1, std1, acc2, std2, xnorm, embeddings_list = test(
 #                     ver_list[i], model, args.batch_size, args.nfolds)
 #                 print('[%s]XNorm: %f' % (ver_name_list[i], xnorm))
 #                 print('[%s]Accuracy: %1.5f+-%1.5f' % (ver_name_list[i], acc1, std1))
 #                 print('[%s]Accuracy-Flip: %1.5f+-%1.5f' % (ver_name_list[i], acc2, std2))
 #                 results.append(acc2)
 #             print('Max of [%s] is %1.5f' % (ver_name_list[i], np.max(results)))
 #     elif args.mode == 1:
 #         raise ValueError
 #     else:
 #         model = nets[0]
 #         dumpR(ver_list[0], model, args.batch_size, args.target)
--- a/eval_ijbc.py
+++ b/eval_ijbc.py
@ -0,0 +1,483 @@
 # coding: utf-8
 import os
 import pickle
 import matplotlib
 import pandas as pd
 matplotlib.use('Agg')
 import matplotlib.pyplot as plt
 import timeit
 import sklearn
 import argparse
 from sklearn.metrics import roc_curve, auc
 from menpo.visualize.viewmatplotlib import sample_colours_from_colourmap
 from prettytable import PrettyTable
 from pathlib import Path
 import sys
 import warnings
 sys.path.insert(0, "../")
 warnings.filterwarnings("ignore")
 parser = argparse.ArgumentParser(description='do ijb test')
 # general
 parser.add_argument('--model-prefix', default='', help='path to load model.')
 parser.add_argument('--image-path', default='', type=str, help='')
 parser.add_argument('--result-dir', default='.', type=str, help='')
 parser.add_argument('--batch-size', default=128, type=int, help='')
 parser.add_argument('--network', default='iresnet50', type=str, help='')
 parser.add_argument('--job', default='insightface', type=str, help='job name')
 parser.add_argument('--target', default='IJBC', type=str, help='target, set to IJBC or IJBB')
 args = parser.parse_args()
 target = args.target
 model_path = args.model_prefix
 image_path = args.image_path
 result_dir = args.result_dir
 gpu_id = None
 use_norm_score = True  # if Ture, TestMode(N1)
 use_detector_score = True  # if Ture, TestMode(D1)
 use_flip_test = True  # if Ture, TestMode(F1)
 job = args.job
 batch_size = args.batch_size
 import cv2
 import numpy as np
 import torch
 from skimage import transform as trans
 import backbones
 class Embedding(object):
    def __init__(self, prefix, data_shape, batch_size=1):
        image_size = (112, 112)
        self.image_size = image_size
        weight = torch.load(prefix)
        resnet = eval("backbones.{}".format(args.network))(False).cuda()
        resnet.load_state_dict(weight)
        model = torch.nn.DataParallel(resnet)
        self.model = model
        self.model.eval()
        src = np.array([
            [30.2946, 51.6963],
            [65.5318, 51.5014],
            [48.0252, 71.7366],
            [33.5493, 92.3655],
            [62.7299, 92.2041]], dtype=np.float32)
        src[:, 0] += 8.0
        self.src = src
        self.batch_size = batch_size
        self.data_shape = data_shape
    def get(self, rimg, landmark):
        assert landmark.shape[0] == 68 or landmark.shape[0] == 5
        assert landmark.shape[1] == 2
        if landmark.shape[0] == 68:
            landmark5 = np.zeros((5, 2), dtype=np.float32)
            landmark5[0] = (landmark[36] + landmark[39]) / 2
            landmark5[1] = (landmark[42] + landmark[45]) / 2
            landmark5[2] = landmark[30]
            landmark5[3] = landmark[48]
            landmark5[4] = landmark[54]
        else:
            landmark5 = landmark
        tform = trans.SimilarityTransform()
        tform.estimate(landmark5, self.src)
        M = tform.params[0:2, :]
        img = cv2.warpAffine(rimg,
                             M, (self.image_size[1], self.image_size[0]),
                             borderValue=0.0)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img_flip = np.fliplr(img)
        img = np.transpose(img, (2, 0, 1))  # 3*112*112, RGB
        img_flip = np.transpose(img_flip, (2, 0, 1))
        input_blob = np.zeros((2, 3, self.image_size[1], self.image_size[0]), dtype=np.uint8)
        input_blob[0] = img
        input_blob[1] = img_flip
        return input_blob
    @torch.no_grad()
    def forward_db(self, batch_data):
        imgs = torch.Tensor(batch_data).cuda()
        imgs.div_(255).sub_(0.5).div_(0.5)
        feat = self.model(imgs)
        feat = feat.reshape([self.batch_size, 2 * feat.shape[1]])
        return feat.cpu().numpy()
 # 将一个list尽量均分成n份，限制len(list)==n，份数大于原list内元素个数则分配空list[]
 def divideIntoNstrand(listTemp, n):
    twoList = [[] for i in range(n)]
    for i, e in enumerate(listTemp):
        twoList[i % n].append(e)
    return twoList
 def read_template_media_list(path):
    # ijb_meta = np.loadtxt(path, dtype=str)
    ijb_meta = pd.read_csv(path, sep=' ', header=None).values
    templates = ijb_meta[:, 1].astype(np.int)
    medias = ijb_meta[:, 2].astype(np.int)
    return templates, medias
 # In[ ]:
 def read_template_pair_list(path):
    # pairs = np.loadtxt(path, dtype=str)
    pairs = pd.read_csv(path, sep=' ', header=None).values
    # print(pairs.shape)
    # print(pairs[:, 0].astype(np.int))
    t1 = pairs[:, 0].astype(np.int)
    t2 = pairs[:, 1].astype(np.int)
    label = pairs[:, 2].astype(np.int)
    return t1, t2, label
 # In[ ]:
 def read_image_feature(path):
    with open(path, 'rb') as fid:
        img_feats = pickle.load(fid)
    return img_feats
 # In[ ]:
 def get_image_feature(img_path, files_list, model_path, epoch, gpu_id):
    batch_size = args.batch_size
    data_shape = (3, 112, 112)
    files = files_list
    print('files:', len(files))
    rare_size = len(files) % batch_size
    faceness_scores = []
    batch = 0
    img_feats = np.empty((len(files), 1024), dtype=np.float32)
    batch_data = np.empty((2 * batch_size, 3, 112, 112))
    embedding = Embedding(model_path, data_shape, batch_size)
    for img_index, each_line in enumerate(files[:len(files) - rare_size]):
        name_lmk_score = each_line.strip().split(' ')
        img_name = os.path.join(img_path, name_lmk_score[0])
        img = cv2.imread(img_name)
        lmk = np.array([float(x) for x in name_lmk_score[1:-1]],
                       dtype=np.float32)
        lmk = lmk.reshape((5, 2))
        input_blob = embedding.get(img, lmk)
        batch_data[2 * (img_index - batch * batch_size)][:] = input_blob[0]
        batch_data[2 * (img_index - batch * batch_size) + 1][:] = input_blob[1]
        if (img_index + 1) % batch_size == 0:
            print('batch', batch)
            img_feats[batch * batch_size:batch * batch_size +
                                         batch_size][:] = embedding.forward_db(batch_data)
            batch += 1
        faceness_scores.append(name_lmk_score[-1])
    batch_data = np.empty((2 * rare_size, 3, 112, 112))
    embedding = Embedding(model_path, data_shape, rare_size)
    for img_index, each_line in enumerate(files[len(files) - rare_size:]):
        name_lmk_score = each_line.strip().split(' ')
        img_name = os.path.join(img_path, name_lmk_score[0])
        img = cv2.imread(img_name)
        lmk = np.array([float(x) for x in name_lmk_score[1:-1]],
                       dtype=np.float32)
        lmk = lmk.reshape((5, 2))
        input_blob = embedding.get(img, lmk)
        batch_data[2 * img_index][:] = input_blob[0]
        batch_data[2 * img_index + 1][:] = input_blob[1]
        if (img_index + 1) % rare_size == 0:
            print('batch', batch)
            img_feats[len(files) -
                      rare_size:][:] = embedding.forward_db(batch_data)
            batch += 1
        faceness_scores.append(name_lmk_score[-1])
    faceness_scores = np.array(faceness_scores).astype(np.float32)
    # img_feats = np.ones( (len(files), 1024), dtype=np.float32) * 0.01
    # faceness_scores = np.ones( (len(files), ), dtype=np.float32 )
    return img_feats, faceness_scores
 # In[ ]:
 def image2template_feature(img_feats=None, templates=None, medias=None):
    # ==========================================================
    # 1. face image feature l2 normalization. img_feats:[number_image x feats_dim]
    # 2. compute media feature.
    # 3. compute template feature.
    # ==========================================================
    unique_templates = np.unique(templates)
    template_feats = np.zeros((len(unique_templates), img_feats.shape[1]))
    for count_template, uqt in enumerate(unique_templates):
        (ind_t,) = np.where(templates == uqt)
        face_norm_feats = img_feats[ind_t]
        face_medias = medias[ind_t]
        unique_medias, unique_media_counts = np.unique(face_medias,
                                                       return_counts=True)
        media_norm_feats = []
        for u, ct in zip(unique_medias, unique_media_counts):
            (ind_m,) = np.where(face_medias == u)
            if ct == 1:
                media_norm_feats += [face_norm_feats[ind_m]]
            else:  # image features from the same video will be aggregated into one feature
                media_norm_feats += [
                    np.mean(face_norm_feats[ind_m], axis=0, keepdims=True)
                ]
        media_norm_feats = np.array(media_norm_feats)
        # media_norm_feats = media_norm_feats / np.sqrt(np.sum(media_norm_feats ** 2, -1, keepdims=True))
        template_feats[count_template] = np.sum(media_norm_feats, axis=0)
        if count_template % 2000 == 0:
            print('Finish Calculating {} template features.'.format(
                count_template))
    # template_norm_feats = template_feats / np.sqrt(np.sum(template_feats ** 2, -1, keepdims=True))
    template_norm_feats = sklearn.preprocessing.normalize(template_feats)
    # print(template_norm_feats.shape)
    return template_norm_feats, unique_templates
 # In[ ]:
 def verification(template_norm_feats=None,
                 unique_templates=None,
                 p1=None,
                 p2=None):
    # ==========================================================
    #         Compute set-to-set Similarity Score.
    # ==========================================================
    template2id = np.zeros((max(unique_templates) + 1, 1), dtype=int)
    for count_template, uqt in enumerate(unique_templates):
        template2id[uqt] = count_template
    score = np.zeros((len(p1),))  # save cosine distance between pairs
    total_pairs = np.array(range(len(p1)))
    batchsize = 100000  # small batchsize instead of all pairs in one batch due to the memory limiation
    sublists = [
        total_pairs[i:i + batchsize] for i in range(0, len(p1), batchsize)
    ]
    total_sublists = len(sublists)
    for c, s in enumerate(sublists):
        feat1 = template_norm_feats[template2id[p1[s]]]
        feat2 = template_norm_feats[template2id[p2[s]]]
        similarity_score = np.sum(feat1 * feat2, -1)
        score[s] = similarity_score.flatten()
        if c % 10 == 0:
            print('Finish {}/{} pairs.'.format(c, total_sublists))
    return score
 # In[ ]:
 def verification2(template_norm_feats=None,
                  unique_templates=None,
                  p1=None,
                  p2=None):
    template2id = np.zeros((max(unique_templates) + 1, 1), dtype=int)
    for count_template, uqt in enumerate(unique_templates):
        template2id[uqt] = count_template
    score = np.zeros((len(p1),))  # save cosine distance between pairs
    total_pairs = np.array(range(len(p1)))
    batchsize = 100000  # small batchsize instead of all pairs in one batch due to the memory limiation
    sublists = [
        total_pairs[i:i + batchsize] for i in range(0, len(p1), batchsize)
    ]
    total_sublists = len(sublists)
    for c, s in enumerate(sublists):
        feat1 = template_norm_feats[template2id[p1[s]]]
        feat2 = template_norm_feats[template2id[p2[s]]]
        similarity_score = np.sum(feat1 * feat2, -1)
        score[s] = similarity_score.flatten()
        if c % 10 == 0:
            print('Finish {}/{} pairs.'.format(c, total_sublists))
    return score
 def read_score(path):
    with open(path, 'rb') as fid:
        img_feats = pickle.load(fid)
    return img_feats
 # # Step1: Load Meta Data
 # In[ ]:
 assert target == 'IJBC' or target == 'IJBB'
 # =============================================================
 # load image and template relationships for template feature embedding
 # tid --> template id,  mid --> media id
 # format:
 #           image_name tid mid
 # =============================================================
 start = timeit.default_timer()
 templates, medias = read_template_media_list(
    os.path.join('%s/meta' % image_path,
                 '%s_face_tid_mid.txt' % target.lower()))
 stop = timeit.default_timer()
 print('Time: %.2f s. ' % (stop - start))
 # In[ ]:
 # =============================================================
 # load template pairs for template-to-template verification
 # tid : template id,  label : 1/0
 # format:
 #           tid_1 tid_2 label
 # =============================================================
 start = timeit.default_timer()
 p1, p2, label = read_template_pair_list(
    os.path.join('%s/meta' % image_path,
                 '%s_template_pair_label.txt' % target.lower()))
 stop = timeit.default_timer()
 print('Time: %.2f s. ' % (stop - start))
 # # Step 2: Get Image Features
 # In[ ]:
 # =============================================================
 # load image features
 # format:
 #           img_feats: [image_num x feats_dim] (227630, 512)
 # =============================================================
 start = timeit.default_timer()
 img_path = '%s/loose_crop' % image_path
 img_list_path = '%s/meta/%s_name_5pts_score.txt' % (image_path, target.lower())
 img_list = open(img_list_path)
 files = img_list.readlines()
 # files_list = divideIntoNstrand(files, rank_size)
 files_list = files
 # img_feats
 # for i in range(rank_size):
 img_feats, faceness_scores = get_image_feature(img_path, files_list,
                                               model_path, 0, gpu_id)
 stop = timeit.default_timer()
 print('Time: %.2f s. ' % (stop - start))
 print('Feature Shape: ({} , {}) .'.format(img_feats.shape[0],
                                          img_feats.shape[1]))
 # # Step3: Get Template Features
 # In[ ]:
 # =============================================================
 # compute template features from image features.
 # =============================================================
 start = timeit.default_timer()
 # ==========================================================
 # Norm feature before aggregation into template feature?
 # Feature norm from embedding network and faceness score are able to decrease weights for noise samples (not face).
 # ==========================================================
 # 1. FaceScore （Feature Norm）
 # 2. FaceScore （Detector）
 if use_flip_test:
    # concat --- F1
    # img_input_feats = img_feats
    # add --- F2
    img_input_feats = img_feats[:, 0:img_feats.shape[1] //
                                     2] + img_feats[:, img_feats.shape[1] // 2:]
 else:
    img_input_feats = img_feats[:, 0:img_feats.shape[1] // 2]
 if use_norm_score:
    img_input_feats = img_input_feats
 else:
    # normalise features to remove norm information
    img_input_feats = img_input_feats / np.sqrt(
        np.sum(img_input_feats ** 2, -1, keepdims=True))
 if use_detector_score:
    print(img_input_feats.shape, faceness_scores.shape)
    img_input_feats = img_input_feats * faceness_scores[:, np.newaxis]
 else:
    img_input_feats = img_input_feats
 template_norm_feats, unique_templates = image2template_feature(
    img_input_feats, templates, medias)
 stop = timeit.default_timer()
 print('Time: %.2f s. ' % (stop - start))
 # # Step 4: Get Template Similarity Scores
 # In[ ]:
 # =============================================================
 # compute verification scores between template pairs.
 # =============================================================
 start = timeit.default_timer()
 score = verification(template_norm_feats, unique_templates, p1, p2)
 stop = timeit.default_timer()
 print('Time: %.2f s. ' % (stop - start))
 # In[ ]:
 save_path = os.path.join(result_dir, args.job)
 # save_path = result_dir + '/%s_result' % target
 if not os.path.exists(save_path):
    os.makedirs(save_path)
 score_save_file = os.path.join(save_path, "%s.npy" % target.lower())
 np.save(score_save_file, score)
 # # Step 5: Get ROC Curves and TPR@FPR Table
 # In[ ]:
 files = [score_save_file]
 methods = []
 scores = []
 for file in files:
    methods.append(Path(file).stem)
    scores.append(np.load(file))
 methods = np.array(methods)
 scores = dict(zip(methods, scores))
 colours = dict(
    zip(methods, sample_colours_from_colourmap(methods.shape[0], 'Set2')))
 x_labels = [10 ** -6, 10 ** -5, 10 ** -4, 10 ** -3, 10 ** -2, 10 ** -1]
 tpr_fpr_table = PrettyTable(['Methods'] + [str(x) for x in x_labels])
 fig = plt.figure()
 for method in methods:
    fpr, tpr, _ = roc_curve(label, scores[method])
    roc_auc = auc(fpr, tpr)
    fpr = np.flipud(fpr)
    tpr = np.flipud(tpr)  # select largest tpr at same fpr
    plt.plot(fpr,
             tpr,
             color=colours[method],
             lw=1,
             label=('[%s (AUC = %0.4f %%)]' %
                    (method.split('-')[-1], roc_auc * 100)))
    tpr_fpr_row = []
    tpr_fpr_row.append("%s-%s" % (method, target))
    for fpr_iter in np.arange(len(x_labels)):
        _, min_index = min(
            list(zip(abs(fpr - x_labels[fpr_iter]), range(len(fpr)))))
        tpr_fpr_row.append('%.2f' % (tpr[min_index] * 100))
    tpr_fpr_table.add_row(tpr_fpr_row)
 plt.xlim([10 ** -6, 0.1])
 plt.ylim([0.3, 1.0])
 plt.grid(linestyle='--', linewidth=1)
 plt.xticks(x_labels)
 plt.yticks(np.linspace(0.3, 1.0, 8, endpoint=True))
 plt.xscale('log')
 plt.xlabel('False Positive Rate')
 plt.ylabel('True Positive Rate')
 plt.title('ROC on IJB')
 plt.legend(loc="lower right")
 fig.savefig(os.path.join(save_path, '%s.pdf' % target.lower()))
 print(tpr_fpr_table)
--- a/face_api.py
+++ b/face_api.py
@ -0,0 +1,377 @@
 import os
 import time
 import re
 import torch
 import cv2
 import numpy as np
 from anti import anti_spoofing, load_anti_model
 from backbones import iresnet50, iresnet18, iresnet100
 from retinaface_detect import load_retinaface_model, detect_one, detect_video, set_retinaface_conf
 from torch2trt import torch2trt, TRTModule
 threshold = 0.7
 # 读取112x112的本地图片并变换通道位置归一化
 def load_image(img_path):
    img = cv2.imdecode(np.fromfile(img_path, dtype=np.uint8), cv2.IMREAD_COLOR)
    img = img.transpose((2, 0, 1))
    img = img[np.newaxis, :, :, :]
    img = np.array(img, dtype=np.float32)
    img -= 127.5
    img /= 127.5
    return img
 # 计算两个特征向量的欧式距离
 def findEuclideanDistance(source_representation, test_representation):
    euclidean_distance = source_representation - test_representation
    euclidean_distance = np.sum(np.multiply(euclidean_distance, euclidean_distance))
    euclidean_distance = np.sqrt(euclidean_distance)
    return euclidean_distance
 # 计算两个特征向量的余弦距离
 def findCosineDistance(source_representation, test_representation):
    a = np.matmul(np.transpose(source_representation), test_representation)
    b = np.sum(np.multiply(source_representation, source_representation))
    c = np.sum(np.multiply(test_representation, test_representation))
    return 1 - (a / (np.sqrt(b) * np.sqrt(c)))
 # 归一化欧氏距离
 def l2_normalize(x):
    return x / np.sqrt(np.sum(np.multiply(x, x)))
 # 归一化余弦距离
 def cosin_metric(x1, x2):
    return np.dot(x1, x2) / (np.linalg.norm(x1) * np.linalg.norm(x2))
 # 加载保存的姓名、人脸特征向量的人脸库
 def load_npy(path):
    data = np.load(path, allow_pickle=True)
    data = data.item()
    return data
 # 批量化生成人脸特征向量并保存到人脸库
 def create_database_batch(path, model, database_path):
    name_list = os.listdir(path)
    k_v = {}
    if os.path.exists(database_path):
        k_v = np.load(database_path, allow_pickle=True)
        k_v = k_v.item()
    batch = 256
    order_name = []
    order_path = []
    emb_list = []
    for name in name_list[:]:
        img_path = os.path.join(path, name)
        # for img_name in img_path[:1]:
        order_name.append(name[:-4])
        order_path.append(img_path)
    order_img = np.zeros((len(order_path), 3, 112, 112), dtype=np.float32)
    for index, img_path in enumerate(order_path):
        order_img[index] = load_image(img_path)
    print(order_img.shape)
    order_img = torch.from_numpy(order_img)
    order_img = order_img.to(torch.device("cuda" if cpu_or_cuda == "cuda" else "cpu"))
    now = 0
    number = len(order_img)
    with torch.no_grad():
        while now < number:
            if now + batch < number:
                emb = model(order_img[now:now + batch])
            else:
                emb = model(order_img[now:])
            now = now + batch
            emb = emb.cpu().numpy()
            for em in emb:
                emb_list.append(em)
            print("batch" + str(now))
    for i, emb in enumerate(emb_list):
        k_v[order_name[i]] = l2_normalize(emb)
    np.save(database_path, k_v)
 def create_database_from_img(order_name, order_img, model, database_path, cpu_or_cuda):
    k_v = {}
    if os.path.exists(database_path):
        k_v = np.load(database_path, allow_pickle=True)
        k_v = k_v.item()
    batch = 256
    emb_list = []
    print(order_img.shape)
    order_img = torch.from_numpy(order_img)
    order_img = order_img.to(torch.device("cuda" if cpu_or_cuda == "cuda" else "cpu"))
    now = 0
    number = len(order_img)
    with torch.no_grad():
        while now < number:
            if now + batch < number:
                emb = model(order_img[now:now + batch])
            else:
                emb = model(order_img[now:])
            now = now + batch
            emb = emb.cpu().numpy()
            for em in emb:
                emb_list.append(em)
            print("batch" + str(now))
    for i, emb in enumerate(emb_list):
        k_v[order_name[i]] = l2_normalize(emb)
    np.save(database_path, k_v)
 # 向人脸库中新增一个人的姓名和人脸特征向量，若人脸库不存在则创建
 def add_one_to_database(img, model, name, database_path, cpu_or_cuda):
    img = torch.from_numpy(img)
    img = img.to(torch.device("cuda" if cpu_or_cuda == "cuda" else "cpu"))
    with torch.no_grad():
        pred = model(img)
        pred = pred.cpu().numpy()
        k_v = {}
        if os.path.exists(database_path):
            k_v = np.load(database_path, allow_pickle=True)
            k_v = k_v.item()
        k_v[name] = l2_normalize(pred)
        np.save(database_path, k_v)
 # 计算此特征向量与人脸库中的哪个人脸特征向量距离最近
 def findmindistance(pred, threshold, k_v):
    distance = 10
    most_like = ""
    for name in k_v.keys():
        tmp = findEuclideanDistance(k_v[name], pred)
        if distance > tmp:
            distance = tmp
            most_like = name
    if distance < threshold:
        return most_like, distance
    else:
        return -1, distance
 def faiss_find_face(pred, index, database_name_list):
    name_list = []
    start_time = time.time()
    D, I = index.search(pred, 1)
    end_time = time.time()
    # print("faiss cost %fs" % (end_time - start_time))
    # print(D, I)
    if len(pred) == 1:
        if D[0][0] < threshold:
            # print(database_name_list[I[0][0]])
            return database_name_list[I[0][0]], D[0][0]
        else:
            return "unknown", D[0][0]
    else:
        for i,index in enumerate(I):
            if D[i][0] < threshold:
                #print(database_name_list[I[0][0]])
                name_list.append(database_name_list[index[0]]+str(D[i][0]))
            else:
                name_list.append("unknown"+str(D[i][0]))
        return name_list
 # 从人脸库中找到单个人脸
 def findOne(img, model, index, database_name_list, cpu_or_cuda):
    img = torch.from_numpy(img)
    img = img.to(torch.device("cuda" if cpu_or_cuda == "cuda" else "cpu"))
    with torch.no_grad():
        start_time = time.time()
        pred = model(img)
        end_time = time.time()
        print("predOne time: " + str(end_time - start_time))
        pred = pred.cpu().numpy()
        # start_time = time.time()
        # name, distance = findmindistance(l2_normalize(pred), threshold=threshold, k_v=k_v)
        # end_time = time.time()
        # print("baoli time: " + str(end_time - start_time))
        name, distance = faiss_find_face(l2_normalize(pred), index, database_name_list)
        print(pred.shape)
        if name != -1:
            mo = r'[\u4e00-\u9fa5_a-zA-Z0-9]*'
            name = re.match(mo, name)
            return name.group(0), distance
        else:
            return "unknown", distance
 # 从人脸库中找到传入的人脸列表中的所有人脸
 def findAll(imglist, model, index ,database_name_list, cpu_or_cuda):
    imglist = torch.from_numpy(imglist)
    imglist = imglist.to(torch.device("cuda" if cpu_or_cuda == "cuda" else "cpu"))
    with torch.no_grad():
        name_list =[]
        start_time = time.time()
        pred = model(imglist)
        end_time = time.time()
        print("predOne time: " + str(end_time - start_time))
        pred = pred.cpu().numpy()
        start_time = time.time()
        #name_list = faiss_find_face(l2_normalize(pred), index, database_name_list)
        for pr in pred:
            pr = np.expand_dims(l2_normalize(pr), 0)
            #     #print(pr.shape)
            name, distance = faiss_find_face(l2_normalize(pr), index, database_name_list)
            #name_list.append(name+" "+str(distance))
            name_list.append(name)
        # for pr in pred:
        #     name, distance = findmindistance(l2_normalize(pr), threshold=threshold, k_v=k_v)
        #     if name != -1:
        #         mo = r'[\u4e00-\u9fa5_a-zA-Z]*'
        #         name = re.match(mo, name)
        #         name_list.append(name.group(0) + str(distance))
        #     else:
        #         name_list.append("unknown" + str(distance))
        end_time = time.time()
        print("searchALL time: " + str(end_time - start_time))
        return name_list
 # 提取为512维特征向量
 def embedding(order_img, model, cpu_or_cuda):
    number = len(order_img)
    order_img = torch.from_numpy(order_img)
    order_img = order_img.to(torch.device("cuda" if cpu_or_cuda == "cuda" else "cpu"))
    batch = 64
    emb_list = []
    now = 0
    with torch.no_grad():
        while now < number:
            if now + batch < number:
                emb = model(order_img[now:now + batch])
            else:
                emb = model(order_img[now:])
            now = now + batch
            emb = emb.cpu().numpy()
            for em in emb:
                emb_list.append(l2_normalize(em))
            # print("batch" + str(now))
        emb_list = np.array(emb_list)
        return emb_list
 # 处理聚类人脸文件夹，返回特征向量列表，文件名列表
 def get_claster_tmp_file_embedding(file_path, retinaface_model, retinaface_args, arcface_model, cpu_or_cuda):
    img_name = os.listdir(file_path)
    img_list = []
    for name in img_name:
        all_face, box_and_point = detect_one(os.path.join(file_path, name), retinaface_model, retinaface_args)
        img_list.append(all_face[0])
    img_list = np.array(img_list)
    # print(img_list.shape)
    emb_list = embedding(img_list, arcface_model, cpu_or_cuda)
    return emb_list, img_name
 # 同一个人聚为一类
 def cluster(emb_list, name_list):
    all_claster = []
    cla = []
    in_claster_name = []
    img_number = len(emb_list)
    for index, emb in enumerate(emb_list):
        if name_list[index] in in_claster_name:
            continue
        for j in range(img_number - index - 1):
            if findEuclideanDistance(emb, emb_list[index + 1 + j]) < threshold:
                if name_list[index + 1 + j] not in in_claster_name:
                    cla.append(name_list[index + 1 + j])
                    in_claster_name.append(name_list[index + 1 + j])
        cla.append(name_list[index])
        in_claster_name.append(name_list[index])
        all_claster.append(cla)
        cla = []
    return all_claster
 # 加载人脸识别模型
 def load_arcface_model(model_path, cpu_or_cuda):
    if cpu_or_cuda == "trt":
        model = TRTModule()
        model.load_state_dict(torch.load('./model/arcface_trt.pth'))
    elif cpu_or_cuda == "trt_new":
        model = iresnet100()
        model.load_state_dict(torch.load(model_path, map_location="cuda"))
        model = model.eval()
        model.to(torch.device("cuda"))
        x = torch.ones((1, 3, 112, 112)).to(torch.device("cuda"))
        model = torch2trt(model, [x], max_batch_size=4)
        torch.save(model.state_dict(), './model/arcface_trt.pth')
    else:
        model = iresnet100()
        model.load_state_dict(torch.load(model_path, map_location=cpu_or_cuda))
        model = model.eval()
        model.to(torch.device("cuda" if cpu_or_cuda == "cuda" else "cpu"))
    return model
 # 对比两张人脸是否相同
 def face_verification(img1, img2, model, cpu_or_cuda):
    img_list = np.concatenate((img1, img2), axis=0)
    img_list = torch.from_numpy(img_list)
    img_list = img_list.to(torch.device("cuda" if cpu_or_cuda == "cuda" else "cpu"))
    with torch.no_grad():
        pred = model(img_list)
        pred = pred.cpu().numpy()
        distance = findEuclideanDistance(l2_normalize(pred[0]), l2_normalize(pred[1]))
        # print("EuclideanDistance is :" + str(distance))
        if distance < threshold:
            return 'same ',distance
        else:
            return 'different ', distance
 if __name__ == '__main__':
    cpu_or_cuda = "cuda" if torch.cuda.is_available() else "cpu"
    arcface_model = load_arcface_model("./model/backbone100.pth", cpu_or_cuda=cpu_or_cuda)
    # retinaface_args = set_retinaface_conf(cpu_or_cuda=cpu_or_cuda)
    # retinaface_model = load_retinaface_model(retinaface_args)
    #
    # anti_spoofing_model_path = "model/anti_spoof_models"
    # anti_model = load_anti_model(anti_spoofing_model_path, 0)
    #
    # k_v = load_npy("./Database/student.npy")
    # 对比两张人脸
    # img1, box_and_point = detect_one("D:\Download\lfw\lfw\Aaron_Peirsol\Aaron_Peirsol_0001.jpg", retinaface_model, retinaface_args)
    # img2, box_and_point = detect_one("D:\Download\lfw\lfw\Aaron_Peirsol\Aaron_Peirsol_0002.jpg", retinaface_model, retinaface_args)
    # print(face_verification(img1, img2, arcface_model))
    # img3 = load_image(r"D:\Download\out\alig_students\student.jpg")
    # img3 = torch.from_numpy(img3)
    # 单张人脸活体检测
    # img3, b_p = detect_one(r"C:\Users\ASUS\Desktop\face\IMG_20210525_113950.jpg", retinaface_model, retinaface_args)
    # b = b_p[0]
    # w = b[2] - b[0]
    # h = b[3] - b[1]
    # b[2] = w
    # b[3] = h
    # label, value = anti_spoofing("./img/recognition/000_0.bmp", "model/anti_spoof_models", 0, np.array(b[:4], int), anti_model)
    # print(label,value)
    # name = findOne(img3, arcface_model, k_v, cpu_or_cuda)
    # print(name)
    # 人脸聚类
    # emb_list, name_list = get_claster_tmp_file_embedding("./img/cluster_tmp_file/face", retinaface_model,
    #                                                      retinaface_args, arcface_model, cpu_or_cuda)
    # print(cluster(emb_list, name_list))
    # img3, box_and_point = detect_one("D:\Download\out\students\student.jpg", retinaface_model, retinaface_args)
    # print(embedding(img3,arcface_model).shape)
    # 人脸库中增加一张人脸
    # add_one_to_database(img1,arcface_model,"Aaron_Peirsol","./Database/student.npy")
    # name = findOne(img1, arcface_model, k_v)
    # print(name)
    # 人脸库中批量增加人脸
    create_database_batch(r"D:\Download\out\alig_students_all", arcface_model, "./Database/sfz.npy")
    # 识别视频中的人脸
    # detect_video("software.mp4","out.avi",retinaface_model,arcface_model,k_v,retinaface_args)
--- a/font.ttf
+++ b/font.ttf
--- a/gender_age.py
+++ b/gender_age.py
@ -0,0 +1,98 @@
 import datetime
 import mxnet as mx
 import numpy as np
 from retinaface_detect import detect_one, load_retinaface_model, set_retinaface_conf
 # 年龄性别配置
 class ConfGenderModel(object):
    def __init__(self, image_size, image, model, gpu, det):
        self.image_size = image_size
        self.image = image
        self.gpu = gpu
        self.model = model
        self.det = det
 # 实例化一个配置
 def set_gender_conf():
    args = ConfGenderModel(image_size='112,112',
                           image=r'C:\Users\ASUS\Desktop\man.png',
                           gpu=-1,
                           model='model/model,0',
                           det=0)
    return args
 # 加载性别年龄模型
 def load_gender_model(args, layer):
    if args.gpu >= 0:
        ctx = mx.gpu(args.gpu)
    else:
        ctx = mx.cpu()
    _vec = args.image_size.split(',')
    assert len(_vec) == 2
    image_size = (int(_vec[0]), int(_vec[1]))
    _vec = args.model.split(',')
    assert len(_vec) == 2
    prefix = _vec[0]
    epoch = int(_vec[1])
    print('loading', prefix, epoch)
    sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
    all_layers = sym.get_internals()
    sym = all_layers[layer + '_output']
    model = mx.mod.Module(symbol=sym, context=ctx, label_names=None)
    model.bind(data_shapes=[('data', (1, 3, image_size[0], image_size[1]))])
    model.set_params(arg_params, aux_params)
    return model
 # 前向推理
 def get_ga(model, img):
    # print(data)
    model.forward(img, is_train=False)
    ret = model.get_outputs()[0].asnumpy()
    g = ret[:, 0:2].flatten()
    gender = np.argmax(g)
    a = ret[:, 2:202].reshape((100, 2))
    a = np.argmax(a, axis=1)
    age = int(sum(a))
    return gender, age
 # 预测人脸列表中每个人的性别年龄
 def gender_age(img_list, gender_model):
    gender_list = []
    age_list = []
    if len(img_list) == 0:
        print("find no face")
    else:
        time_now = datetime.datetime.now()
        img_list *= 127.5
        img_list += 127.5
        for img in img_list:
            img = np.expand_dims(img, axis=0)
            img = mx.nd.array(img)
            img = mx.io.DataBatch(data=(img,))
            gender, age = get_ga(gender_model, img)
            if gender == 1:
                gender_list.append("man")
            else:
                gender_list.append('woman')
            age_list.append(age)
        time_now2 = datetime.datetime.now()
        diff = time_now2 - time_now
        print('time cost', diff.total_seconds())
    return gender_list,age_list
 if __name__ == "__main__":
    args = set_gender_conf()
    retinaface_args = set_retinaface_conf()
    gender_model = load_gender_model(args, 'fc1')
    retinaface_model = load_retinaface_model(retinaface_args)
    img_list, box_and_point = detect_one(args.image, retinaface_model,retinaface_args)
    gender_list, age_list = gender_age(img_list, gender_model)
    print(gender_list)
--- a/gender_model.py
+++ b/gender_model.py
@ -0,0 +1,49 @@
 import numpy as np
 import mxnet as mx
 # 加载性别年龄模型
 def get_model(ctx, image_size, model_str, layer):
    _vec = model_str.split(',')
    assert len(_vec) == 2
    prefix = _vec[0]
    epoch = int(_vec[1])
    print('loading', prefix, epoch)
    sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
    all_layers = sym.get_internals()
    sym = all_layers[layer + '_output']
    model = mx.mod.Module(symbol=sym, context=ctx, label_names=None)
    model.bind(data_shapes=[('data', (1, 3, image_size[0], image_size[1]))])
    model.set_params(arg_params, aux_params)
    return model
 class GenderModel:
    def __init__(self, args):
        self.args = args
        if args.gpu >= 0:
            ctx = mx.gpu(args.gpu)
        else:
            ctx = mx.cpu()
        _vec = args.image_size.split(',')
        assert len(_vec) == 2
        image_size = (int(_vec[0]), int(_vec[1]))
        self.model = None
        if len(args.model) > 0:
            self.model = get_model(ctx, image_size, args.model, 'fc1')
        self.det_minsize = 50
        self.det_threshold = [0.6, 0.7, 0.8]
        # self.det_factor = 0.9
        self.image_size = image_size
    def get_ga(self, data):
        # print(data)
        self.model.forward(data, is_train=False)
        ret = self.model.get_outputs()[0].asnumpy()
        g = ret[:, 0:2].flatten()
        gender = np.argmax(g)
        a = ret[:, 2:202].reshape((100, 2))
        a = np.argmax(a, axis=1)
        age = int(sum(a))
        return gender, age
--- a/img/search/000_1.bmp
+++ b/img/search/000_1.bmp
--- a/img/search/002_1.bmp
+++ b/img/search/002_1.bmp
--- a/img/search/377_3.bmp
+++ b/img/search/377_3.bmp
--- a/img/search/face/000_0.bmp
+++ b/img/search/face/000_0.bmp
--- a/img/search/face/000_1.bmp
+++ b/img/search/face/000_1.bmp
--- a/img/search/face/000_2.bmp
+++ b/img/search/face/000_2.bmp
--- a/img/search/face/000_3.bmp
+++ b/img/search/face/000_3.bmp
--- a/img/search/face/000_4.bmp
+++ b/img/search/face/000_4.bmp
--- a/img/search/face/001_0.bmp
+++ b/img/search/face/001_0.bmp
--- a/img/search/face/001_1.bmp
+++ b/img/search/face/001_1.bmp
--- a/img/search/face/001_2.bmp
+++ b/img/search/face/001_2.bmp
--- a/img/search/face/001_3.bmp
+++ b/img/search/face/001_3.bmp
--- a/img/search/face/001_4.bmp
+++ b/img/search/face/001_4.bmp
--- a/img/search/face/002_0.bmp
+++ b/img/search/face/002_0.bmp
--- a/img/search/face/002_1.bmp
+++ b/img/search/face/002_1.bmp
--- a/img/search/face/002_2.bmp
+++ b/img/search/face/002_2.bmp
--- a/img/search/face/002_3.bmp
+++ b/img/search/face/002_3.bmp
--- a/img/search/face/002_4.bmp
+++ b/img/search/face/002_4.bmp
--- a/img/search/face/003_0.bmp
+++ b/img/search/face/003_0.bmp
--- a/img/search/face/003_1.bmp
+++ b/img/search/face/003_1.bmp
--- a/img/search/face/003_2.bmp
+++ b/img/search/face/003_2.bmp
--- a/img/search/face/003_3.bmp
+++ b/img/search/face/003_3.bmp
--- a/img/search/face/003_4.bmp
+++ b/img/search/face/003_4.bmp
--- a/img/search/face/004_0.bmp
+++ b/img/search/face/004_0.bmp
--- a/img/search/face/004_1.bmp
+++ b/img/search/face/004_1.bmp
--- a/img/search/face/004_2.bmp
+++ b/img/search/face/004_2.bmp
--- a/img/search/face/004_3.bmp
+++ b/img/search/face/004_3.bmp
--- a/img/search/face/004_4.bmp
+++ b/img/search/face/004_4.bmp
--- a/layers/init.py
+++ b/layers/init.py
@ -0,0 +1,2 @@
 from .functions import *
 from .modules import *
--- a/layers/pycache/init.cpython-38.pyc
+++ b/layers/pycache/init.cpython-38.pyc
--- a/layers/functions/pycache/prior_box.cpython-38.pyc
+++ b/layers/functions/pycache/prior_box.cpython-38.pyc
--- a/layers/functions/prior_box.py
+++ b/layers/functions/prior_box.py
@ -0,0 +1,34 @@
 import torch
 from itertools import product as product
 import numpy as np
 from math import ceil
 class PriorBox(object):
    def __init__(self, cfg, image_size=None, phase='train'):
        super(PriorBox, self).__init__()
        self.min_sizes = cfg['min_sizes']
        self.steps = cfg['steps']
        self.clip = cfg['clip']
        self.image_size = image_size
        self.feature_maps = [[ceil(self.image_size[0]/step), ceil(self.image_size[1]/step)] for step in self.steps]
        self.name = "s"
    def forward(self):
        anchors = []
        for k, f in enumerate(self.feature_maps):
            min_sizes = self.min_sizes[k]
            for i, j in product(range(f[0]), range(f[1])):
                for min_size in min_sizes:
                    s_kx = min_size / self.image_size[1]
                    s_ky = min_size / self.image_size[0]
                    dense_cx = [x * self.steps[k] / self.image_size[1] for x in [j + 0.5]]
                    dense_cy = [y * self.steps[k] / self.image_size[0] for y in [i + 0.5]]
                    for cy, cx in product(dense_cy, dense_cx):
                        anchors += [cx, cy, s_kx, s_ky]
        # back to torch land
        output = torch.Tensor(anchors).view(-1, 4)
        if self.clip:
            output.clamp_(max=1, min=0)
        return output
--- a/layers/modules/init.py
+++ b/layers/modules/init.py
@ -0,0 +1,3 @@
 from .multibox_loss import MultiBoxLoss
 __all__ = ['MultiBoxLoss']
--- a/layers/modules/pycache/init.cpython-38.pyc
+++ b/layers/modules/pycache/init.cpython-38.pyc
--- a/layers/modules/pycache/multibox_loss.cpython-38.pyc
+++ b/layers/modules/pycache/multibox_loss.cpython-38.pyc
--- a/layers/modules/multibox_loss.py
+++ b/layers/modules/multibox_loss.py
@ -0,0 +1,125 @@
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 from torch.autograd import Variable
 from utils.box_utils import match, log_sum_exp
 from data import cfg_mnet
 GPU = cfg_mnet['gpu_train']
 class MultiBoxLoss(nn.Module):
    """SSD Weighted Loss Function
    Compute Targets:
        1) Produce Confidence Target Indices by matching  ground truth boxes
           with (default) 'priorboxes' that have jaccard index > threshold parameter
           (default threshold: 0.5).
        2) Produce localization target by 'encoding' variance into offsets of ground
           truth boxes and their matched  'priorboxes'.
        3) Hard negative mining to filter the excessive number of negative examples
           that comes with using a large number of default bounding boxes.
           (default negative:positive ratio 3:1)
    Objective Loss:
        L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
        Where, Lconf is the CrossEntropy Loss and Lloc is the SmoothL1 Loss
        weighted by α which is set to 1 by cross val.
        Args:
            c: class confidences,
            l: predicted boxes,
            g: ground truth boxes
            N: number of matched default boxes
        See: https://arxiv.org/pdf/1512.02325.pdf for more details.
    """
    def __init__(self, num_classes, overlap_thresh, prior_for_matching, bkg_label, neg_mining, neg_pos, neg_overlap, encode_target):
        super(MultiBoxLoss, self).__init__()
        self.num_classes = num_classes
        self.threshold = overlap_thresh
        self.background_label = bkg_label
        self.encode_target = encode_target
        self.use_prior_for_matching = prior_for_matching
        self.do_neg_mining = neg_mining
        self.negpos_ratio = neg_pos
        self.neg_overlap = neg_overlap
        self.variance = [0.1, 0.2]
    def forward(self, predictions, priors, targets):
        """Multibox Loss
        Args:
            predictions (tuple): A tuple containing loc preds, conf preds,
            and prior boxes from SSD net.
                conf shape: torch.size(batch_size,num_priors,num_classes)
                loc shape: torch.size(batch_size,num_priors,4)
                priors shape: torch.size(num_priors,4)
            ground_truth (tensor): Ground truth boxes and labels for a batch,
                shape: [batch_size,num_objs,5] (last idx is the label).
        """
        loc_data, conf_data, landm_data = predictions
        priors = priors
        num = loc_data.size(0)
        num_priors = (priors.size(0))
        # match priors (default boxes) and ground truth boxes
        loc_t = torch.Tensor(num, num_priors, 4)
        landm_t = torch.Tensor(num, num_priors, 10)
        conf_t = torch.LongTensor(num, num_priors)
        for idx in range(num):
            truths = targets[idx][:, :4].data
            labels = targets[idx][:, -1].data
            landms = targets[idx][:, 4:14].data
            defaults = priors.data
            match(self.threshold, truths, defaults, self.variance, labels, landms, loc_t, conf_t, landm_t, idx)
        if GPU:
            loc_t = loc_t.cuda()
            conf_t = conf_t.cuda()
            landm_t = landm_t.cuda()
        zeros = torch.tensor(0).cuda()
        # landm Loss (Smooth L1)
        # Shape: [batch,num_priors,10]
        pos1 = conf_t > zeros
        num_pos_landm = pos1.long().sum(1, keepdim=True)
        N1 = max(num_pos_landm.data.sum().float(), 1)
        pos_idx1 = pos1.unsqueeze(pos1.dim()).expand_as(landm_data)
        landm_p = landm_data[pos_idx1].view(-1, 10)
        landm_t = landm_t[pos_idx1].view(-1, 10)
        loss_landm = F.smooth_l1_loss(landm_p, landm_t, reduction='sum')
        pos = conf_t != zeros
        conf_t[pos] = 1
        # Localization Loss (Smooth L1)
        # Shape: [batch,num_priors,4]
        pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
        loc_p = loc_data[pos_idx].view(-1, 4)
        loc_t = loc_t[pos_idx].view(-1, 4)
        loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction='sum')
        # Compute max conf across batch for hard negative mining
        batch_conf = conf_data.view(-1, self.num_classes)
        loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1, 1))
        # Hard Negative Mining
        loss_c[pos.view(-1, 1)] = 0 # filter out pos boxes for now
        loss_c = loss_c.view(num, -1)
        _, loss_idx = loss_c.sort(1, descending=True)
        _, idx_rank = loss_idx.sort(1)
        num_pos = pos.long().sum(1, keepdim=True)
        num_neg = torch.clamp(self.negpos_ratio*num_pos, max=pos.size(1)-1)
        neg = idx_rank < num_neg.expand_as(idx_rank)
        # Confidence Loss Including Positive and Negative Examples
        pos_idx = pos.unsqueeze(2).expand_as(conf_data)
        neg_idx = neg.unsqueeze(2).expand_as(conf_data)
        conf_p = conf_data[(pos_idx+neg_idx).gt(0)].view(-1,self.num_classes)
        targets_weighted = conf_t[(pos+neg).gt(0)]
        loss_c = F.cross_entropy(conf_p, targets_weighted, reduction='sum')
        # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
        N = max(num_pos.data.sum().float(), 1)
        loss_l /= N
        loss_c /= N
        loss_landm /= N1
        return loss_l, loss_c, loss_landm
--- a/losses.py
+++ b/losses.py
@ -0,0 +1,33 @@
 import torch
 from torch import nn
 class CosFace(nn.Module):
    def __init__(self, s=64.0, m=0.40):
        super(CosFace, self).__init__()
        self.s = s
        self.m = m
    def forward(self, cosine, label):
        index = torch.where(label != -1)[0]
        m_hot = torch.zeros(index.size()[0], cosine.size()[1], device=cosine.device)
        m_hot.scatter_(1, label[index, None], self.m)
        cosine[index] -= m_hot
        ret = cosine * self.s
        return ret
 class ArcFace(nn.Module):
    def __init__(self, s=64.0, m=0.5):
        super(ArcFace, self).__init__()
        self.s = s
        self.m = m
    def forward(self, cosine: torch.Tensor, label):
        index = torch.where(label != -1)[0]
        m_hot = torch.zeros(index.size()[0], cosine.size()[1], device=cosine.device)
        m_hot.scatter_(1, label[index, None], self.m)
        cosine.acos_()
        cosine[index] += m_hot
        cosine.cos_().mul_(self.s)
        return cosine
--- a/model/anti_spoof_models/2.7_80x80_MiniFASNetV2.pth
+++ b/model/anti_spoof_models/2.7_80x80_MiniFASNetV2.pth
--- a/model/anti_spoof_models/4_0_0_80x80_MiniFASNetV1SE.pth
+++ b/model/anti_spoof_models/4_0_0_80x80_MiniFASNetV1SE.pth
--- a/model/backbone100.pth
+++ b/model/backbone100.pth
--- a/model/log
+++ b/model/log
--- a/model/model-0000.params
+++ b/model/model-0000.params
--- a/model/model-symbol.json
+++ b/model/model-symbol.json
--- a/model/onnx/centerface.onnx
+++ b/model/onnx/centerface.onnx
--- a/model/onnx/centerface_bnmerged.onnx
+++ b/model/onnx/centerface_bnmerged.onnx
--- a/models/init.py
+++ b/models/init.py
--- a/models/pycache/init.cpython-38.pyc
+++ b/models/pycache/init.cpython-38.pyc
--- a/models/pycache/net.cpython-38.pyc
+++ b/models/pycache/net.cpython-38.pyc
--- a/models/pycache/retinaface.cpython-38.pyc
+++ b/models/pycache/retinaface.cpython-38.pyc
--- a/models/net.py
+++ b/models/net.py
@ -0,0 +1,137 @@
 import time
 import torch
 import torch.nn as nn
 import torchvision.models._utils as _utils
 import torchvision.models as models
 import torch.nn.functional as F
 from torch.autograd import Variable
 def conv_bn(inp, oup, stride = 1, leaky = 0):
    return nn.Sequential(
        nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
        nn.BatchNorm2d(oup),
        nn.LeakyReLU(negative_slope=leaky, inplace=True)
    )
 def conv_bn_no_relu(inp, oup, stride):
    return nn.Sequential(
        nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
        nn.BatchNorm2d(oup),
    )
 def conv_bn1X1(inp, oup, stride, leaky=0):
    return nn.Sequential(
        nn.Conv2d(inp, oup, 1, stride, padding=0, bias=False),
        nn.BatchNorm2d(oup),
        nn.LeakyReLU(negative_slope=leaky, inplace=True)
    )
 def conv_dw(inp, oup, stride, leaky=0.1):
    return nn.Sequential(
        nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False),
        nn.BatchNorm2d(inp),
        nn.LeakyReLU(negative_slope= leaky,inplace=True),
        nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
        nn.BatchNorm2d(oup),
        nn.LeakyReLU(negative_slope= leaky,inplace=True),
    )
 class SSH(nn.Module):
    def __init__(self, in_channel, out_channel):
        super(SSH, self).__init__()
        assert out_channel % 4 == 0
        leaky = 0
        if (out_channel <= 64):
            leaky = 0.1
        self.conv3X3 = conv_bn_no_relu(in_channel, out_channel//2, stride=1)
        self.conv5X5_1 = conv_bn(in_channel, out_channel//4, stride=1, leaky = leaky)
        self.conv5X5_2 = conv_bn_no_relu(out_channel//4, out_channel//4, stride=1)
        self.conv7X7_2 = conv_bn(out_channel//4, out_channel//4, stride=1, leaky = leaky)
        self.conv7x7_3 = conv_bn_no_relu(out_channel//4, out_channel//4, stride=1)
    def forward(self, input):
        conv3X3 = self.conv3X3(input)
        conv5X5_1 = self.conv5X5_1(input)
        conv5X5 = self.conv5X5_2(conv5X5_1)
        conv7X7_2 = self.conv7X7_2(conv5X5_1)
        conv7X7 = self.conv7x7_3(conv7X7_2)
        out = torch.cat([conv3X3, conv5X5, conv7X7], dim=1)
        out = F.relu(out)
        return out
 class FPN(nn.Module):
    def __init__(self,in_channels_list,out_channels):
        super(FPN,self).__init__()
        leaky = 0
        if (out_channels <= 64):
            leaky = 0.1
        self.output1 = conv_bn1X1(in_channels_list[0], out_channels, stride = 1, leaky = leaky)
        self.output2 = conv_bn1X1(in_channels_list[1], out_channels, stride = 1, leaky = leaky)
        self.output3 = conv_bn1X1(in_channels_list[2], out_channels, stride = 1, leaky = leaky)
        self.merge1 = conv_bn(out_channels, out_channels, leaky = leaky)
        self.merge2 = conv_bn(out_channels, out_channels, leaky = leaky)
    def forward(self, input):
        # names = list(input.keys())
        input = list(input.values())
        output1 = self.output1(input[0])
        output2 = self.output2(input[1])
        output3 = self.output3(input[2])
        up3 = F.interpolate(output3, size=[output2.size(2), output2.size(3)], mode="nearest")
        output2 = output2 + up3
        output2 = self.merge2(output2)
        up2 = F.interpolate(output2, size=[output1.size(2), output1.size(3)], mode="nearest")
        output1 = output1 + up2
        output1 = self.merge1(output1)
        out = [output1, output2, output3]
        return out
 class MobileNetV1(nn.Module):
    def __init__(self):
        super(MobileNetV1, self).__init__()
        self.stage1 = nn.Sequential(
            conv_bn(3, 8, 2, leaky = 0.1),    # 3
            conv_dw(8, 16, 1),   # 7
            conv_dw(16, 32, 2),  # 11
            conv_dw(32, 32, 1),  # 19
            conv_dw(32, 64, 2),  # 27
            conv_dw(64, 64, 1),  # 43
        )
        self.stage2 = nn.Sequential(
            conv_dw(64, 128, 2),  # 43 + 16 = 59
            conv_dw(128, 128, 1), # 59 + 32 = 91
            conv_dw(128, 128, 1), # 91 + 32 = 123
            conv_dw(128, 128, 1), # 123 + 32 = 155
            conv_dw(128, 128, 1), # 155 + 32 = 187
            conv_dw(128, 128, 1), # 187 + 32 = 219
        )
        self.stage3 = nn.Sequential(
            conv_dw(128, 256, 2), # 219 +3 2 = 241
            conv_dw(256, 256, 1), # 241 + 64 = 301
        )
        self.avg = nn.AdaptiveAvgPool2d((1,1))
        self.fc = nn.Linear(256, 1000)
    def forward(self, x):
        x = self.stage1(x)
        x = self.stage2(x)
        x = self.stage3(x)
        x = self.avg(x)
        # x = self.model(x)
        x = x.view(-1, 256)
        x = self.fc(x)
        return x
--- a/models/retinaface.py
+++ b/models/retinaface.py
@ -0,0 +1,127 @@
 import torch
 import torch.nn as nn
 import torchvision.models.detection.backbone_utils as backbone_utils
 import torchvision.models._utils as _utils
 import torch.nn.functional as F
 from collections import OrderedDict
 from models.net import MobileNetV1 as MobileNetV1
 from models.net import FPN as FPN
 from models.net import SSH as SSH
 class ClassHead(nn.Module):
    def __init__(self,inchannels=512,num_anchors=3):
        super(ClassHead,self).__init__()
        self.num_anchors = num_anchors
        self.conv1x1 = nn.Conv2d(inchannels,self.num_anchors*2,kernel_size=(1,1),stride=1,padding=0)
    def forward(self,x):
        out = self.conv1x1(x)
        out = out.permute(0,2,3,1).contiguous()
        return out.view(out.shape[0], -1, 2)
 class BboxHead(nn.Module):
    def __init__(self,inchannels=512,num_anchors=3):
        super(BboxHead,self).__init__()
        self.conv1x1 = nn.Conv2d(inchannels,num_anchors*4,kernel_size=(1,1),stride=1,padding=0)
    def forward(self,x):
        out = self.conv1x1(x)
        out = out.permute(0,2,3,1).contiguous()
        return out.view(out.shape[0], -1, 4)
 class LandmarkHead(nn.Module):
    def __init__(self,inchannels=512,num_anchors=3):
        super(LandmarkHead,self).__init__()
        self.conv1x1 = nn.Conv2d(inchannels,num_anchors*10,kernel_size=(1,1),stride=1,padding=0)
    def forward(self,x):
        out = self.conv1x1(x)
        out = out.permute(0,2,3,1).contiguous()
        return out.view(out.shape[0], -1, 10)
 class RetinaFace(nn.Module):
    def __init__(self, cfg = None, phase = 'train'):
        """
        :param cfg:  Network related settings.
        :param phase: train or test.
        """
        super(RetinaFace,self).__init__()
        self.phase = phase
        backbone = None
        if cfg['name'] == 'mobilenet0.25':
            backbone = MobileNetV1()
            if cfg['pretrain']:
                checkpoint = torch.load("./weights/mobilenetV1X0.25_pretrain.tar", map_location=torch.device('cpu'))
                from collections import OrderedDict
                new_state_dict = OrderedDict()
                for k, v in checkpoint['state_dict'].items():
                    name = k[7:]  # remove module.
                    new_state_dict[name] = v
                # load params
                backbone.load_state_dict(new_state_dict)
        elif cfg['name'] == 'Resnet50':
            import torchvision.models as models
            backbone = models.resnet50(pretrained=cfg['pretrain'])
        self.body = _utils.IntermediateLayerGetter(backbone, cfg['return_layers'])
        in_channels_stage2 = cfg['in_channel']
        in_channels_list = [
            in_channels_stage2 * 2,
            in_channels_stage2 * 4,
            in_channels_stage2 * 8,
        ]
        out_channels = cfg['out_channel']
        self.fpn = FPN(in_channels_list,out_channels)
        self.ssh1 = SSH(out_channels, out_channels)
        self.ssh2 = SSH(out_channels, out_channels)
        self.ssh3 = SSH(out_channels, out_channels)
        self.ClassHead = self._make_class_head(fpn_num=3, inchannels=cfg['out_channel'])
        self.BboxHead = self._make_bbox_head(fpn_num=3, inchannels=cfg['out_channel'])
        self.LandmarkHead = self._make_landmark_head(fpn_num=3, inchannels=cfg['out_channel'])
    def _make_class_head(self,fpn_num=3,inchannels=64,anchor_num=2):
        classhead = nn.ModuleList()
        for i in range(fpn_num):
            classhead.append(ClassHead(inchannels,anchor_num))
        return classhead
    def _make_bbox_head(self,fpn_num=3,inchannels=64,anchor_num=2):
        bboxhead = nn.ModuleList()
        for i in range(fpn_num):
            bboxhead.append(BboxHead(inchannels,anchor_num))
        return bboxhead
    def _make_landmark_head(self,fpn_num=3,inchannels=64,anchor_num=2):
        landmarkhead = nn.ModuleList()
        for i in range(fpn_num):
            landmarkhead.append(LandmarkHead(inchannels,anchor_num))
        return landmarkhead
    def forward(self,inputs):
        out = self.body(inputs)
        # FPN
        fpn = self.fpn(out)
        # SSH
        feature1 = self.ssh1(fpn[0])
        feature2 = self.ssh2(fpn[1])
        feature3 = self.ssh3(fpn[2])
        features = [feature1, feature2, feature3]
        bbox_regressions = torch.cat([self.BboxHead[i](feature) for i, feature in enumerate(features)], dim=1)
        classifications = torch.cat([self.ClassHead[i](feature) for i, feature in enumerate(features)],dim=1)
        ldm_regressions = torch.cat([self.LandmarkHead[i](feature) for i, feature in enumerate(features)], dim=1)
        if self.phase == 'train':
            output = (bbox_regressions, classifications, ldm_regressions)
        else:
            output = (bbox_regressions, F.softmax(classifications, dim=-1), ldm_regressions)
        return output
--- a/partial_fc.py
+++ b/partial_fc.py
@ -0,0 +1,161 @@
 import logging
 import os
 import torch
 import torch.distributed as dist
 from torch.nn import Module
 from torch.nn.functional import normalize, linear
 from torch.nn.parameter import Parameter
 class PartialFC(Module):
    """
    Author: {Xiang An, Yang Xiao, XuHan Zhu} in DeepGlint,
    Partial FC: Training 10 Million Identities on a Single Machine
    See the original paper:
    https://arxiv.org/abs/2010.05222
    """
    @torch.no_grad()
    def __init__(self, rank, local_rank, world_size, batch_size, resume,
                 margin_softmax, num_classes, sample_rate=1.0, embedding_size=512, prefix="./"):
        super(PartialFC, self).__init__()
        #
        self.num_classes: int = num_classes
        self.rank: int = rank
        self.local_rank: int = local_rank
        self.device: torch.device = torch.device("cuda:{}".format(self.local_rank))
        self.world_size: int = world_size
        self.batch_size: int = batch_size
        self.margin_softmax: callable = margin_softmax
        self.sample_rate: float = sample_rate
        self.embedding_size: int = embedding_size
        self.prefix: str = prefix
        self.num_local: int = num_classes // world_size + int(rank < num_classes % world_size)
        self.class_start: int = num_classes // world_size * rank + min(rank, num_classes % world_size)
        self.num_sample: int = int(self.sample_rate * self.num_local)
        self.weight_name = os.path.join(self.prefix, "rank:{}_softmax_weight.pt".format(self.rank))
        self.weight_mom_name = os.path.join(self.prefix, "rank:{}_softmax_weight_mom.pt".format(self.rank))
        if resume:
            try:
                self.weight: torch.Tensor = torch.load(self.weight_name)
                logging.info("softmax weight resume successfully!")
            except (FileNotFoundError, KeyError, IndexError):
                self.weight = torch.normal(0, 0.01, (self.num_local, self.embedding_size), device=self.device)
                logging.info("softmax weight resume fail!")
            try:
                self.weight_mom: torch.Tensor = torch.load(self.weight_mom_name)
                logging.info("softmax weight mom resume successfully!")
            except (FileNotFoundError, KeyError, IndexError):
                self.weight_mom: torch.Tensor = torch.zeros_like(self.weight)
                logging.info("softmax weight mom resume fail!")
        else:
            self.weight = torch.normal(0, 0.01, (self.num_local, self.embedding_size), device=self.device)
            self.weight_mom: torch.Tensor = torch.zeros_like(self.weight)
            logging.info("softmax weight init successfully!")
            logging.info("softmax weight mom init successfully!")
        self.stream: torch.cuda.Stream = torch.cuda.Stream(local_rank)
        self.index = None
        if int(self.sample_rate) == 1:
            self.update = lambda: 0
            self.sub_weight = Parameter(self.weight)
            self.sub_weight_mom = self.weight_mom
        else:
            self.sub_weight = Parameter(torch.empty((0, 0)).cuda(local_rank))
    def save_params(self):
        torch.save(self.weight.data, self.weight_name)
        torch.save(self.weight_mom, self.weight_mom_name)
    @torch.no_grad()
    def sample(self, total_label):
        index_positive = (self.class_start <= total_label) & (total_label < self.class_start + self.num_local)
        total_label[~index_positive] = -1
        total_label[index_positive] -= self.class_start
        if int(self.sample_rate) != 1:
            positive = torch.unique(total_label[index_positive], sorted=True)
            if self.num_sample - positive.size(0) >= 0:
                perm = torch.rand(size=[self.num_local], device=self.device)
                perm[positive] = 2.0
                index = torch.topk(perm, k=self.num_sample)[1]
                index = index.sort()[0]
            else:
                index = positive
            self.index = index
            total_label[index_positive] = torch.searchsorted(index, total_label[index_positive])
            self.sub_weight = Parameter(self.weight[index])
            self.sub_weight_mom = self.weight_mom[index]
    def forward(self, total_features, norm_weight):
        torch.cuda.current_stream().wait_stream(self.stream)
        logits = linear(total_features, norm_weight)
        return logits
    @torch.no_grad()
    def update(self):
        self.weight_mom[self.index] = self.sub_weight_mom
        self.weight[self.index] = self.sub_weight
    def prepare(self, label, optimizer):
        with torch.cuda.stream(self.stream):
            total_label = torch.zeros(
                size=[self.batch_size * self.world_size], device=self.device, dtype=torch.long)
            dist.all_gather(list(total_label.chunk(self.world_size, dim=0)), label)
            self.sample(total_label)
            optimizer.state.pop(optimizer.param_groups[-1]['params'][0], None)
            optimizer.param_groups[-1]['params'][0] = self.sub_weight
            optimizer.state[self.sub_weight]['momentum_buffer'] = self.sub_weight_mom
            norm_weight = normalize(self.sub_weight)
            return total_label, norm_weight
    def forward_backward(self, label, features, optimizer):
        total_label, norm_weight = self.prepare(label, optimizer)
        total_features = torch.zeros(
            size=[self.batch_size * self.world_size, self.embedding_size], device=self.device)
        dist.all_gather(list(total_features.chunk(self.world_size, dim=0)), features.data)
        total_features.requires_grad = True
        logits = self.forward(total_features, norm_weight)
        logits = self.margin_softmax(logits, total_label)
        with torch.no_grad():
            max_fc = torch.max(logits, dim=1, keepdim=True)[0]
            dist.all_reduce(max_fc, dist.ReduceOp.MAX)
            # calculate exp(logits) and all-reduce
            logits_exp = torch.exp(logits - max_fc)
            logits_sum_exp = logits_exp.sum(dim=1, keepdims=True)
            dist.all_reduce(logits_sum_exp, dist.ReduceOp.SUM)
            # calculate prob
            logits_exp.div_(logits_sum_exp)
            # get one-hot
            grad = logits_exp
            index = torch.where(total_label != -1)[0]
            one_hot = torch.zeros(size=[index.size()[0], grad.size()[1]], device=grad.device)
            one_hot.scatter_(1, total_label[index, None], 1)
            # calculate loss
            loss = torch.zeros(grad.size()[0], 1, device=grad.device)
            loss[index] = grad[index].gather(1, total_label[index, None])
            dist.all_reduce(loss, dist.ReduceOp.SUM)
            loss_v = loss.clamp_min_(1e-30).log_().mean() * (-1)
            # calculate grad
            grad[index] -= one_hot
            grad.div_(self.batch_size * self.world_size)
        logits.backward(grad)
        if total_features.grad is not None:
            total_features.grad.detach_()
        x_grad: torch.Tensor = torch.zeros_like(features, requires_grad=True)
        # feature gradient all-reduce
        dist.reduce_scatter(x_grad, list(total_features.grad.chunk(self.world_size, dim=0)))
        x_grad = x_grad * self.world_size
        # backward backbone
        return x_grad, loss_v
--- a/play.py
+++ b/play.py
@ -0,0 +1,14 @@
 import cv2
 cap = cv2.VideoCapture("rtsp://admin:2020@uestc@192.168.30.83:554/h264")
 ret, frame = cap.read()
 h, w = frame.shape[:2]
 print("hight:"+str(h)+"with:"+str(w))
 fps = cap.get(cv2.CAP_PROP_FPS)
 print(fps)
 # while ret:
 #     cv2.imshow('out', frame)
 #     if cv2.waitKey(1) & 0xFF == ord('q'):
 #         break
 #     ret, frame = cap.read()
 cap.release()
 cv2.destroyAllWindows()
--- a/realtime_detect.py
+++ b/realtime_detect.py
@ -0,0 +1,282 @@
 import argparse
 import subprocess
 import time
 import cv2
 import torch
 import numpy as np
 from skimage import transform as trans
 from PIL import Image, ImageDraw, ImageFont
 from data import cfg_mnet, cfg_re50
 from face_api import load_arcface_model, load_npy
 from layers.functions.prior_box import PriorBox
 from retinaface_detect import set_retinaface_conf, load_retinaface_model, findAll
 from utils.nms.py_cpu_nms import py_cpu_nms
 from utils.box_utils import decode, decode_landm
 import faiss
 ppi = 1280
 ppi2 = 1100
 step = 3
 def detect_rtsp(rtsp, out_rtsp, net, arcface_model, index ,database_name_list, k_v, args):
    tic_total = time.time()
    cfg = None
    if args.network == "mobile0.25":
        cfg = cfg_mnet
    elif args.network == "resnet50":
        cfg = cfg_re50
    device = torch.device("cpu" if args.cpu else "cuda")
    resize = 1
    # testing begin
    cap = cv2.VideoCapture(rtsp)
    ret, frame = cap.read()
    h, w = frame.shape[:2]
    factor = 0
    if (w > ppi):
        factor = h / w
        frame = cv2.resize(frame, (ppi, int(ppi * factor)))
        h, w = frame.shape[:2]
    arf = 1
    detect_h, detect_w = frame.shape[:2]
    frame_detect = frame
    factor2 = 0
    if (w > ppi2):
        factor2 = h / w
        frame_detect = cv2.resize(frame, (ppi2, int(ppi2 * factor2)))
        detect_h, detect_w = frame_detect.shape[:2]
        arf = w/detect_w
    print(w,h)
    print(detect_w,detect_h)
    fps = cap.get(cv2.CAP_PROP_FPS)
    #print(fps)
    size = (w, h)
    sizeStr = str(size[0]) + 'x' + str(size[1])
    if(out_rtsp.startswith("rtsp")):
        command = ['ffmpeg',
                   '-y', '-an',
                   '-f', 'rawvideo',
                   '-vcodec', 'rawvideo',
                   '-pix_fmt', 'bgr24',
                   '-s', sizeStr,
                   '-r', "25",
                   '-i', '-',
                   '-c:v', 'libx265',
                   '-b:v', '3000k',
                   '-pix_fmt', 'yuv420p',
                   '-preset', 'ultrafast',
                   '-f', 'rtsp',
                   out_rtsp]
    pipe = subprocess.Popen(command, shell=False, stdin=subprocess.PIPE)
    #out = cv2.VideoWriter("output.avi", cv2.VideoWriter_fourcc(*'XVID'), fps, size)
    number = step
    dets = []
    name_list = []
    font = ImageFont.truetype("font.ttf", 22)
    priorbox = PriorBox(cfg, image_size=(detect_h, detect_w))
    priors = priorbox.forward()
    priors = priors.to(device)
    prior_data = priors.data
    scale = torch.Tensor([detect_w, detect_h, detect_w, detect_h])
    scale = scale.to(device)
    scale1 = torch.Tensor([detect_w, detect_h, detect_w, detect_h,
                           detect_w, detect_h, detect_w, detect_h,
                           detect_w, detect_h])
    scale1 = scale1.to(device)
    src1 = np.array([
        [38.3814, 51.6963],
        [73.6186, 51.5014],
        [56.1120, 71.7366],
        [41.6361, 92.3655],
        [70.8167, 92.2041]], dtype=np.float32)
    tform = trans.SimilarityTransform()
    while ret:
        tic_all = time.time()
        if number == step:
            tic = time.time()
            img = np.float32(frame_detect)
            img -= (104, 117, 123)
            img = img.transpose(2, 0, 1)
            img = torch.from_numpy(img).unsqueeze(0)
            img = img.to(device)
            loc, conf, landms = net(img)  # forward pass
            boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
            boxes = boxes * scale / resize
            boxes = boxes.cpu().numpy()
            scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
            landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance'])
            landms = landms * scale1 / resize
            landms = landms.cpu().numpy()
            # ignore low scores
            inds = np.where(scores > args.confidence_threshold)[0]
            boxes = boxes[inds]
            landms = landms[inds]
            scores = scores[inds]
            # keep top-K before NMS
            order = scores.argsort()[::-1][:args.top_k]
            boxes = boxes[order]
            landms = landms[order]
            scores = scores[order]
            # do NMS
            dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
            keep = py_cpu_nms(dets, args.nms_threshold)
            # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu)
            dets = dets[keep, :]
            landms = landms[keep]
            # keep top-K faster NMS
            dets = dets[:args.keep_top_k, :]
            landms = landms[:args.keep_top_k, :]
            dets = np.concatenate((dets, landms), axis=1)
            face_list = []
            name_list = []
            print('net forward time: {:.4f}'.format(time.time() - tic))
            start_time_findall = time.time()
            for i, det in enumerate(dets[:4]):
                if det[4] < args.vis_thres:
                    continue
                #boxes, score = det[:4], det[4]
                dst = np.reshape(landms[i], (5, 2))
                dst = dst * arf
                tform.estimate(dst, src1)
                M = tform.params[0:2, :]
                frame2 = cv2.warpAffine(frame, M, (w, h), borderValue=0.0)
                img112 = frame2[0:112, 0:112, :]
                face_list.append(img112)
            if len(face_list) != 0:
                face_list = np.array(face_list)
                face_list = face_list.transpose((0, 3, 1, 2))
                face_list = np.array(face_list, dtype=np.float32)
                face_list -= 127.5
                face_list /= 127.5
                print(face_list.shape)
                print("warpALL time: " + str(time.time() - start_time_findall ))
                #start_time = time.time()
                name_list = findAll(face_list, arcface_model, index ,database_name_list, k_v, "cpu" if args.cpu else "cuda")
                #print(name_list)
            #print("findOneframe time: " + str(time.time() - start_time_findall))
            # start_time = time.time()
            # if (len(dets) != 0):
            #     for i, det in enumerate(dets[:]):
            #         if det[4] < args.vis_thres:
            #             continue
            #         boxes, score = det[:4], det[4]
            #         boxes = boxes * arf
            #         name = name_list[i]
            #         cv2.rectangle(frame, (int(boxes[0]), int(boxes[1])), (int(boxes[2]), int(boxes[3])), (255, 0, 0), 2)
            #         cv2.putText(frame, name, (int(boxes[0]), int(boxes[1])), cv2.FONT_HERSHEY_COMPLEX, 0.4,(0, 225, 255), 1)
            start_time = time.time()
            if(len(dets) != 0):
                img_PIL = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
                draw = ImageDraw.Draw(img_PIL)
                for i, det in enumerate(dets[:4]):
                    if det[4] < args.vis_thres:
                        continue
                    boxes, score = det[:4], det[4]
                    boxes = boxes * arf
                    name = name_list[i]
                    if not isinstance(name, np.unicode):
                        name = name.decode('utf8')
                    draw.text((int(boxes[0]), int(boxes[1])), name, fill=(255, 0, 0), font=font)
                    draw.rectangle((int(boxes[0]), int(boxes[1]), int(boxes[2]), int(boxes[3])), outline="green", width=3)
                frame = cv2.cvtColor(np.asarray(img_PIL), cv2.COLOR_RGB2BGR)
            pipe.stdin.write(frame.tostring())
            #out.write(frame)
            print("drawOneframe time: " + str(time.time() - start_time))
            start_time = time.time()
            ret, frame = cap.read()
            frame_detect = frame
            number = 0
            if (ret != 0 and factor != 0):
                frame = cv2.resize(frame, (ppi, int(ppi * factor)))
            if (ret != 0 and factor2 != 0):
                frame_detect = cv2.resize(frame, (ppi2, int(ppi2 * factor2)))
            print("readframe time: " + str(time.time() - start_time))
        else:
            number += 1
            # if (len(dets) != 0):
            #     for i, det in enumerate(dets[:4]):
            #         if det[4] < args.vis_thres:
            #             continue
            #         boxes, score = det[:4], det[4]
            #         cv2.rectangle(frame, (int(boxes[0]), int(boxes[1])), (int(boxes[2]), int(boxes[3])), (2, 255, 0), 1)
            if (len(dets) != 0):
                img_PIL = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
                draw = ImageDraw.Draw(img_PIL)
                for i, det in enumerate(dets[:4]):
                    if det[4] < args.vis_thres:
                        continue
                    boxes, score = det[:4], det[4]
                    boxes = boxes * arf
                    name = name_list[i]
                    if not isinstance(name, np.unicode):
                        name = name.decode('utf8')
                    draw.text((int(boxes[0]), int(boxes[1])), name, fill=(255, 0, 0), font=font)
                    draw.rectangle((int(boxes[0]), int(boxes[1]), int(boxes[2]), int(boxes[3])), outline="green",
                                   width=3)
                frame = cv2.cvtColor(np.asarray(img_PIL), cv2.COLOR_RGB2BGR)
            start_time = time.time()
            pipe.stdin.write(frame.tostring())
            #out.write(frame)
            print("writeframe time: " + str(time.time() - start_time))
            start_time = time.time()
            ret, frame = cap.read()
            frame_detect = frame
            if (ret != 0 and factor != 0):
                frame = cv2.resize(frame, (ppi, int(ppi * factor)))
            if (ret != 0 and factor2 != 0):
                frame_detect = cv2.resize(frame, (ppi2, int(ppi2 * factor2)))
            print("readframe time: " + str(time.time() - start_time))
        print('all time: {:.4f}'.format(time.time() - tic_all))
    cap.release()
    #out.release()
    pipe.terminate()
    print('total time: {:.4f}'.format(time.time() - tic_total))
 if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--rtsp",
        type=str,
        default="",
        dest="rtsp_path"
    )
    args = parser.parse_args()
    cpu_or_cuda = "cuda" if torch.cuda.is_available() else "cpu"
    # 加载人脸识别模型
    arcface_model = load_arcface_model("./model/backbone100.pth", cpu_or_cuda="cuda")
    # 加载人脸检测模型
    retinaface_args = set_retinaface_conf(cpu_or_cuda=cpu_or_cuda)
    retinaface_model = load_retinaface_model(retinaface_args)
    k_v = load_npy("./Database/student.npy")
    #print(list(k_v.keys()))
    database_name_list = list(k_v.keys())
    vector_list = np.array(list(k_v.values()))
    print(vector_list.shape)
    nlist = 10
    quantizer = faiss.IndexFlatL2(512)  # the other index
    index = faiss.IndexIVFFlat(quantizer, 512, nlist, faiss.METRIC_L2)
    index.train(vector_list)
    #index = faiss.IndexFlatL2(512)
    index.add(vector_list)
    index.nprobe=10
    detect_rtsp(args.rtsp_path, 'rtsp://localhost:5001/test2', retinaface_model, arcface_model, index ,database_name_list, k_v, retinaface_args)
    #detect_rtsp("rtsp://admin:2020@uestc@192.168.14.32:8557/h264", 'rtsp://localhost:5001/test2', retinaface_model, arcface_model, index ,database_name_list, k_v, retinaface_args)
    #detect_rtsp("cut.mp4", 'rtsp://localhost:5001/test2', retinaface_model, arcface_model, k_v, retinaface_args)
--- a/recognition_video.py
+++ b/recognition_video.py
@ -0,0 +1,283 @@
 import time
 from centerface import CenterFace
 from skimage import transform as trans
 import numpy as np
 import torch
 import cv2
 from backbones import iresnet100, iresnet18
 from create_database import findOne, load_npy,findAll
 from PIL import Image, ImageDraw,ImageFont
 def show():
    cap = cv2.VideoCapture("test.mp4")
    ret, frame = cap.read()
    h, w = frame.shape[:2]
    centerface = CenterFace()
    size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
            int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
    out = cv2.VideoWriter('ccvt6.mp4', cv2.VideoWriter_fourcc('m', 'p', '4', 'v'), 30, size)
    while ret:
        start_time = time.time()
        dets, lms = centerface(frame, h, w, threshold=0.35)
        end_time = time.time()
        print("findOne time: " + str(end_time - start_time))
        for det in dets:
            boxes, score = det[:4], det[4]
            cv2.rectangle(frame, (int(boxes[0]), int(boxes[1])), (int(boxes[2]), int(boxes[3])), (2, 255, 0), 1)
        for lm in lms:
            for i in range(0, 5):
                cv2.circle(frame, (int(lm[i * 2]), int(lm[i * 2 + 1])), 2, (0, 0, 255), -1)
        cv2.imshow('out', frame)
        out.write(frame)
        # Press Q on keyboard to stop recording
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
        ret, frame = cap.read()
    cap.release()
    out.release()
    cv2.destroyAllWindows()
 def video():
    model = iresnet100()
    model.load_state_dict(torch.load("./model/backbone100.pth", map_location="cpu"))
    model.eval()
    k_v = load_npy("student.npy")
    count = 0
    #cap = cv2.VideoCapture("http://ivi.bupt.edu.cn/hls/cctv6hd.m3u8")
    cap = cv2.VideoCapture("software.mp4")
    ret, frame = cap.read()
    h, w = frame.shape[:2]
    size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
            int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
    fps = cap.get(cv2.CAP_PROP_FPS)
    out = cv2.VideoWriter('ttt.mp4', cv2.VideoWriter_fourcc('m', 'p', '4', 'v'), fps, size)
    centerface = CenterFace()
    while ret:
        start_time = time.time()
        dets, lms = centerface(frame, h, w, threshold=0.35)
        end_time = time.time()
        print("detectOneframe time: " + str(end_time - start_time))
        face_list = []
        name_list = []
        for i,det in enumerate(dets):
            boxes, score = det[:4], det[4]
            img_w = int(boxes[2] - boxes[0])
            img_h = int(boxes[3] - boxes[1])
            distace = int(abs(img_w - img_h) / 2)
            img_w1 = int(boxes[0]) - distace
            img_w2 = int(boxes[2]) + distace
            # print(img_w,img_h,distace,max_hw)
            if img_w <= img_h and img_w1 >= 0 and img_w2 <= frame.shape[1]:
                img112 = frame[int(boxes[1]):int(boxes[3]), img_w1:img_w2, :]
                img112 = cv2.resize(img112, (112, 112))
                # cv2.imwrite("./img/man"+str(count)+".jpg", img112)
                # count += 1
                face_list.append(img112)
            else:
                img112 = frame[int(boxes[1]):int(boxes[3]), int(boxes[0]):int(boxes[2]), :]
                img112 = cv2.resize(img112, (112, 112))
                face_list.append(img112)
        if len(face_list) != 0:
            face_list = np.array(face_list)
            face_list = face_list.transpose((0,3,1,2))
            face_list = np.array(face_list, dtype=np.float32)
            face_list -= 127.5
            face_list /= 127.5
            print(face_list.shape)
            face_list = torch.from_numpy(face_list)
            start_time = time.time()
            for face in face_list:
                face = face[np.newaxis, :, :, :]
                name_list.append(findOne(face,model,k_v))
            end_time = time.time()
            print("findOneframe time: "+str(end_time-start_time))
        img_PIL = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
        draw = ImageDraw.Draw(img_PIL)
        font = ImageFont.truetype("font.ttf",12)
        for i,det in enumerate(dets):
            boxes, score = det[:4], det[4]
            # cv2.rectangle(frame, (int(boxes[0]), int(boxes[1])), (int(boxes[2]), int(boxes[3])), (2, 255, 0), 1)
            # cv2.putText(frame, name_list[i], (int(boxes[0]), int(boxes[1])), cv2.FONT_HERSHEY_COMPLEX, 0.4,
            #             (0, 225, 255), 1)
            name = name_list[i][:3]
            if not isinstance(name, np.unicode):
                name = name.decode('utf8')
            draw.text((int(boxes[0]), int(boxes[1])),name,fill=(0, 225, 255),font=font)
            draw.rectangle((int(boxes[0]), int(boxes[1]),int(boxes[2]), int(boxes[3])),outline="green",width=1)
        frame = cv2.cvtColor(np.asarray(img_PIL),cv2.COLOR_RGB2BGR)
        cv2.imshow('out', frame)
        out.write(frame)
        # Press Q on keyboard to stop recording
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
        ret, frame = cap.read()
    cap.release()
    out.release()
    cv2.destroyAllWindows()
 def video_GPU():
    model = iresnet100()
    model.load_state_dict(torch.load("./model/backbone100.pth", map_location="cpu"))
    model.eval()
    k_v = load_npy("student.npy")
    count = 0
    #cap = cv2.VideoCapture("http://ivi.bupt.edu.cn/hls/cctv6hd.m3u8")
    cap = cv2.VideoCapture("software.mp4")
    ret, frame = cap.read()
    h, w = frame.shape[:2]
    size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
            int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
    fps = cap.get(cv2.CAP_PROP_FPS)
    out = cv2.VideoWriter('ttt.mp4', cv2.VideoWriter_fourcc('m', 'p', '4', 'v'), fps, size)
    centerface = CenterFace()
    while ret:
        start_time = time.time()
        dets, lms = centerface(frame, h, w, threshold=0.35)
        end_time = time.time()
        print("detectOneframe time: " + str(end_time - start_time))
        face_list = []
        name_list = []
        for i,det in enumerate(dets):
            boxes, score = det[:4], det[4]
            img_w = int(boxes[2] - boxes[0])
            img_h = int(boxes[3] - boxes[1])
            distace = int(abs(img_w - img_h) / 2)
            img_w1 = int(boxes[0]) - distace
            img_w2 = int(boxes[2]) + distace
            # print(img_w,img_h,distace,max_hw)
            if img_w <= img_h and img_w1 >= 0 and img_w2 <= frame.shape[1]:
                img112 = frame[int(boxes[1]):int(boxes[3]), img_w1:img_w2, :]
                img112 = cv2.resize(img112, (112, 112))
                # cv2.imwrite("./img/man"+str(count)+".jpg", img112)
                # count += 1
                face_list.append(img112)
            else:
                img112 = frame[int(boxes[1]):int(boxes[3]), int(boxes[0]):int(boxes[2]), :]
                img112 = cv2.resize(img112, (112, 112))
                face_list.append(img112)
        if len(face_list) != 0:
            face_list = np.array(face_list)
            face_list = face_list.transpose((0,3,1,2))
            face_list = np.array(face_list, dtype=np.float32)
            face_list -= 127.5
            face_list /= 127.5
            print(face_list.shape)
            face_list = torch.from_numpy(face_list)
            start_time = time.time()
            name_list = findAll(face_list, model, k_v)
            # for face in face_list:
            #     face = face[np.newaxis, :, :, :]
            #
            #     name_list.append(findOne(face,model,k_v))
            end_time = time.time()
            print("findOneframe time: "+str(end_time-start_time))
        img_PIL = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
        draw = ImageDraw.Draw(img_PIL)
        font = ImageFont.truetype("font.ttf",18)
        for i,det in enumerate(dets):
            boxes, score = det[:4], det[4]
            # cv2.rectangle(frame, (int(boxes[0]), int(boxes[1])), (int(boxes[2]), int(boxes[3])), (2, 255, 0), 1)
            # cv2.putText(frame, name_list[i], (int(boxes[0]), int(boxes[1])), cv2.FONT_HERSHEY_COMPLEX, 0.4,
            #             (0, 225, 255), 1)
            name = name_list[i][:3]
            if not isinstance(name, np.unicode):
                name = name.decode('utf8')
            draw.text((int(boxes[0]), int(boxes[1])),name,fill=(255, 0, 0),font=font)
            draw.rectangle((int(boxes[0]), int(boxes[1]),int(boxes[2]), int(boxes[3])),outline="green",width=2)
        frame = cv2.cvtColor(np.asarray(img_PIL),cv2.COLOR_RGB2BGR)
        cv2.imshow('out', frame)
        out.write(frame)
        # Press Q on keyboard to stop recording
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
        ret, frame = cap.read()
    cap.release()
    out.release()
    cv2.destroyAllWindows()
 def video_GPU_retinaface():
    model = iresnet100()
    model.load_state_dict(torch.load("./model/backbone100.pth", map_location="cpu"))
    model.eval()
    k_v = load_npy("student.npy")
    count = 0
    #cap = cv2.VideoCapture("http://ivi.bupt.edu.cn/hls/cctv6hd.m3u8")
    cap = cv2.VideoCapture("software.mp4")
    ret, frame = cap.read()
    h, w = frame.shape[:2]
    size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
            int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
    fps = cap.get(cv2.CAP_PROP_FPS)
    out = cv2.VideoWriter('ttt.mp4', cv2.VideoWriter_fourcc('m', 'p', '4', 'v'), fps, size)
    centerface = CenterFace()
    while ret:
        start_time = time.time()
        dets, lms = centerface(frame, h, w, threshold=0.35)
        end_time = time.time()
        print("detectOneframe time: " + str(end_time - start_time))
        face_list = []
        name_list = []
        print(dets.shape)
        for i,det in enumerate(dets):
            boxes, score = det[:4], det[4]
            img_w = int(boxes[2] - boxes[0])
            img_h = int(boxes[3] - boxes[1])
            distace = int(abs(img_w - img_h) / 2)
            img_w1 = int(boxes[0]) - distace
            img_w2 = int(boxes[2]) + distace
            # print(img_w,img_h,distace,max_hw)
            if img_w <= img_h and img_w1 >= 0 and img_w2 <= frame.shape[1]:
                img112 = frame[int(boxes[1]):int(boxes[3]), img_w1:img_w2, :]
                img112 = cv2.resize(img112, (112, 112))
                # cv2.imwrite("./img/man"+str(count)+".jpg", img112)
                # count += 1
                face_list.append(img112)
            else:
                img112 = frame[int(boxes[1]):int(boxes[3]), int(boxes[0]):int(boxes[2]), :]
                img112 = cv2.resize(img112, (112, 112))
                face_list.append(img112)
        if len(face_list) != 0:
            face_list = np.array(face_list)
            face_list = face_list.transpose((0,3,1,2))
            face_list = np.array(face_list, dtype=np.float32)
            face_list -= 127.5
            face_list /= 127.5
            print(face_list.shape)
            face_list = torch.from_numpy(face_list)
            start_time = time.time()
            name_list = findAll(face_list, model, k_v)
            # for face in face_list:
            #     face = face[np.newaxis, :, :, :]
            #
            #     name_list.append(findOne(face,model,k_v))
            end_time = time.time()
            print("findOneframe time: "+str(end_time-start_time))
        img_PIL = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
        draw = ImageDraw.Draw(img_PIL)
        font = ImageFont.truetype("font.ttf",18)
        for i,det in enumerate(dets):
            boxes, score = det[:4], det[4]
            # cv2.rectangle(frame, (int(boxes[0]), int(boxes[1])), (int(boxes[2]), int(boxes[3])), (2, 255, 0), 1)
            # cv2.putText(frame, name_list[i], (int(boxes[0]), int(boxes[1])), cv2.FONT_HERSHEY_COMPLEX, 0.4,
            #             (0, 225, 255), 1)
            name = name_list[i][:3]
            if not isinstance(name, np.unicode):
                name = name.decode('utf8')
            draw.text((int(boxes[0]), int(boxes[1])),name,fill=(255, 0, 0),font=font)
            draw.rectangle((int(boxes[0]), int(boxes[1]),int(boxes[2]), int(boxes[3])),outline="green",width=2)
        frame = cv2.cvtColor(np.asarray(img_PIL),cv2.COLOR_RGB2BGR)
        cv2.imshow('out', frame)
        out.write(frame)
        # Press Q on keyboard to stop recording
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
        ret, frame = cap.read()
    cap.release()
    out.release()
    cv2.destroyAllWindows()
 video_GPU_retinaface()
 #video_GPU()
 #show()
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,111 @@
 Package                Version    
 ---------------------- -----------
 appdirs                1.4.4      
 attrs                  21.2.0     
 backcall               0.2.0      
 beautifulsoup4         4.9.3      
 certifi                2021.5.30  
 cffi                   1.14.0     
 chardet                4.0.0      
 click                  8.0.1      
 conda                  4.9.1      
 conda-build            3.20.5     
 conda-package-handling 1.7.0      
 cryptography           2.9.2      
 cycler                 0.10.0     
 dataclasses            0.6        
 decorator              4.4.2      
 dnspython              2.0.0      
 faiss-cpu              1.7.1      
 filelock               3.0.12     
 fire                   0.4.0      
 Flask                  1.1.2      
 future                 0.18.2     
 glob2                  0.7        
 graphsurgeon           0.4.5      
 graphviz               0.8.4      
 h5py                   3.3.0      
 idna                   2.10       
 imageio                2.9.0      
 iniconfig              1.1.1      
 ipython                7.18.1     
 ipython-genutils       0.2.0      
 itsdangerous           2.0.1      
 jedi                   0.17.2     
 Jinja2                 3.0.1      
 joblib                 1.0.1      
 kiwisolver             1.3.1      
 libarchive-c           2.9        
 Mako                   1.1.4      
 MarkupSafe             2.0.1      
 matplotlib             3.4.1      
 mkl-fft                1.2.0      
 mkl-random             1.1.1      
 mkl-service            2.3.0      
 mxnet                  1.8.0.post0
 networkx               2.5.1      
 nltk                   3.6        
 numpy                  1.20.3     
 olefile                0.46       
 opencv-python          4.5.1.48   
 packaging              21.0       
 pandas                 1.2.4      
 parso                  0.7.0      
 pexpect                4.8.0      
 pickleshare            0.7.5      
 Pillow                 8.0.0      
 pip                    20.0.2     
 pkginfo                1.6.0      
 pluggy                 1.0.0      
 prefetch-generator     1.0.1      
 prompt-toolkit         3.0.8      
 protobuf               3.15.8     
 psutil                 5.7.2      
 ptyprocess             0.6.0      
 py                     1.9.0      
 pycosat                0.6.3      
 pycparser              2.20       
 pycuda                 2021.1     
 Pygments               2.7.1      
 pyOpenSSL              19.1.0     
 pyparsing              2.4.7      
 PySocks                1.7.1      
 pytest                 6.2.5      
 python-dateutil        2.8.1      
 python-etcd            0.4.5      
 pytools                2021.2.6   
 pytz                   2020.1     
 PyWavelets             1.1.1      
 PyYAML                 5.3.1      
 pyzmq                  22.1.0     
 regex                  2021.8.3   
 requests               2.25.1     
 ruamel-yaml            0.15.87    
 scikit-image           0.18.1     
 scipy                  1.6.3      
 seaborn                0.11.1     
 setuptools             57.1.0     
 six                    1.14.0     
 soupsieve              2.0.1      
 tensorboard-logger     0.1.0      
 tensorrt               7.2.3.4    
 termcolor              1.1.0      
 tifffile               2021.4.8   
 toml                   0.10.2     
 torch                  1.7.1      
 torch2trt              0.2.0      
 torchelastic           0.2.1      
 torchfile              0.1.0      
 torchtext              0.8.0      
 torchvision            0.8.2      
 tornado                6.1        
 tqdm                   4.46.0     
 traitlets              5.0.5      
 typing-extensions      3.7.4.3    
 uff                    0.6.9      
 urllib3                1.26.5     
 visdom                 0.1.8      
 wcwidth                0.2.5      
 websocket-client       1.1.0      
 Werkzeug               2.0.1      
 wheel                  0.34.2     
--- a/retinaface_arcface.py
+++ b/retinaface_arcface.py
@ -0,0 +1,762 @@
 from __future__ import print_function
 import os
 import argparse
 import re
 import faiss
 import torch
 import torch.backends.cudnn as cudnn
 import numpy as np
 from data import cfg_mnet, cfg_re50
 from face_api import create_database_from_img, load_arcface_model, findAll
 from layers.functions.prior_box import PriorBox
 from utils.nms.py_cpu_nms import py_cpu_nms
 import cv2
 from models.retinaface import RetinaFace
 from utils.box_utils import decode, decode_landm
 import time
 from face_api import load_arcface_model, load_npy
 from skimage import transform as trans
 from backbones import iresnet100, iresnet18
 #from create_database import findOne, load_npy,findAll
 from PIL import Image, ImageDraw,ImageFont
 parser = argparse.ArgumentParser(description='Retinaface')
 parser.add_argument('-m', '--trained_model', default='./weights/mobilenet0.25_Final.pth',
                    type=str, help='Trained state_dict file path to open')
 parser.add_argument('--network', default='mobile0.25', help='Backbone network mobile0.25 or resnet50')
 parser.add_argument('--cpu', action="store_true", default=False if torch.cuda.is_available() else True, help='Use cpu inference')
 parser.add_argument('--confidence_threshold', default=0.02, type=float, help='confidence_threshold')
 parser.add_argument('--top_k', default=5000, type=int, help='top_k')
 parser.add_argument('--nms_threshold', default=0.4, type=float, help='nms_threshold')
 parser.add_argument('--keep_top_k', default=750, type=int, help='keep_top_k')
 parser.add_argument('-s', '--save_image', action="store_true", default=True, help='show detection results')
 parser.add_argument('--vis_thres', default=0.6, type=float, help='visualization_threshold')
 args = parser.parse_args()
 def check_keys(model, pretrained_state_dict):
    ckpt_keys = set(pretrained_state_dict.keys())
    model_keys = set(model.state_dict().keys())
    used_pretrained_keys = model_keys & ckpt_keys
    unused_pretrained_keys = ckpt_keys - model_keys
    missing_keys = model_keys - ckpt_keys
    print('Missing keys:{}'.format(len(missing_keys)))
    print('Unused checkpoint keys:{}'.format(len(unused_pretrained_keys)))
    print('Used keys:{}'.format(len(used_pretrained_keys)))
    assert len(used_pretrained_keys) > 0, 'load NONE from pretrained checkpoint'
    return True
 def remove_prefix(state_dict, prefix):
    ''' Old style model is stored with all names of parameters sharing common prefix 'module.' '''
    print('remove prefix \'{}\''.format(prefix))
    f = lambda x: x.split(prefix, 1)[-1] if x.startswith(prefix) else x
    return {f(key): value for key, value in state_dict.items()}
 def load_model(model, pretrained_path, load_to_cpu):
    print('Loading pretrained model from {}'.format(pretrained_path))
    if load_to_cpu:
        pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage)
    else:
        device = torch.cuda.current_device()
        pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage.cuda(device))
    if "state_dict" in pretrained_dict.keys():
        pretrained_dict = remove_prefix(pretrained_dict['state_dict'], 'module.')
    else:
        pretrained_dict = remove_prefix(pretrained_dict, 'module.')
    check_keys(model, pretrained_dict)
    model.load_state_dict(pretrained_dict, strict=False)
    return model
 def image_to112x112_retinaface():
    torch.set_grad_enabled(False)
    cfg = None
    if args.network == "mobile0.25":
        cfg = cfg_mnet
    elif args.network == "resnet50":
        cfg = cfg_re50
    # net and model
    net = RetinaFace(cfg=cfg, phase = 'test')
    net = load_model(net, args.trained_model, args.cpu)
    net.eval()
    print('Finished loading model!')
    #print(net)
    cudnn.benchmark = True
    device = torch.device("cpu" if args.cpu else "cuda")
    net = net.to(device)
    resize = 1
    input_path = r"D:\Download\out\cfp"
    output_path = "D:\Download\out\cfp_align"
    folder1 = os.listdir(input_path)
    count = 0
    count2 =0
    for f in folder1:
        output_name_path = os.path.join(output_path, f)
        if os.path.exists(output_name_path) == 0:
            os.makedirs(output_name_path)
        img_name_path = os.path.join(input_path, f)
        img_list = os.listdir(img_name_path)
        for img in img_list:
            count2 +=1
            print(count2)
            path = os.path.join(img_name_path, img)
            align_img_path = os.path.join(output_name_path, img)
            # print(path)
            frame = cv2.imread(path)
            h, w = frame.shape[:2]
            img = np.float32(frame)
            im_height, im_width, _ = img.shape
            scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
            img -= (104, 117, 123)
            img = img.transpose(2, 0, 1)
            img = torch.from_numpy(img).unsqueeze(0)
            img = img.to(device)
            scale = scale.to(device)
            tic = time.time()
            loc, conf, landms = net(img)  # forward pass
            print('net forward time: {:.4f}'.format(time.time() - tic))
            priorbox = PriorBox(cfg, image_size=(im_height, im_width))
            priors = priorbox.forward()
            priors = priors.to(device)
            prior_data = priors.data
            boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
            boxes = boxes * scale / resize
            boxes = boxes.cpu().numpy()
            scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
            landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance'])
            scale1 = torch.Tensor([img.shape[3], img.shape[2], img.shape[3], img.shape[2],
                                   img.shape[3], img.shape[2], img.shape[3], img.shape[2],
                                   img.shape[3], img.shape[2]])
            scale1 = scale1.to(device)
            landms = landms * scale1 / resize
            landms = landms.cpu().numpy()
            # ignore low scores
            inds = np.where(scores > args.confidence_threshold)[0]
            boxes = boxes[inds]
            landms = landms[inds]
            scores = scores[inds]
            # keep top-K before NMS
            order = scores.argsort()[::-1][:args.top_k]
            boxes = boxes[order]
            landms = landms[order]
            scores = scores[order]
            # do NMS
            dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
            keep = py_cpu_nms(dets, args.nms_threshold)
            # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu)
            dets = dets[keep, :]
            landms = landms[keep]
            # keep top-K faster NMS
            dets = dets[:args.keep_top_k, :]
            landms = landms[:args.keep_top_k, :]
            dets = np.concatenate((dets, landms), axis=1)
            score = 500
            # show image
            if args.save_image:
                dst = []
                for i, det in enumerate(dets):
                    if det[4] < args.vis_thres:
                        continue
                    center_x = (det[2] + det[0]) / 2
                    center_y = (det[3] + det[1]) / 2
                    if abs(center_x - 125) + abs(center_y - 125) < score:
                        score = abs(center_x - 125) + abs(center_y - 125)
                        dst = np.reshape(landms[i], (5, 2))
                if len(dst) > 0:
                    src1 = np.array([
                        [38.3814, 51.6963],
                        [73.6186, 51.5014],
                        [56.1120, 71.7366],
                        [41.6361, 92.3655],
                        [70.8167, 92.2041]], dtype=np.float32)
                    tform = trans.SimilarityTransform()
                    tform.estimate(dst, src1)
                    M = tform.params[0:2, :]
                    if w < 112 or h < 112:
                        count += 1
                        #print(align_img_path)
                        continue
                    frame = cv2.warpAffine(frame, M, (w, h), borderValue=0.0)
                    img112 = frame[0:112, 0:112, :]
                    cv2.imwrite(align_img_path, img112)
    print(">112 number"+str(count))
 def sfz_to112x112_retinaface(arcface_model,cpu_or_cuda):
    torch.set_grad_enabled(False)
    cfg = None
    if args.network == "mobile0.25":
        cfg = cfg_mnet
    elif args.network == "resnet50":
        cfg = cfg_re50
    # net and model
    net = RetinaFace(cfg=cfg, phase = 'test')
    net = load_model(net, args.trained_model, args.cpu)
    net.eval()
    print('Finished loading model!')
    #print(net)
    cudnn.benchmark = True
    device = torch.device("cpu" if args.cpu else "cuda")
    net = net.to(device)
    resize = 1
    input_path = r"D:\Download\out\alig_students_all"
    output_path = r"D:\Download\out\alig_students_all"
    folder1 = os.listdir(input_path)
    count = 0
    count2 =0
    print(len(folder1))
    # print(folder1[0][:-4])
    # return 0
    order_img = []
    order_name = []
    tic = time.time()
    for img_name in folder1[:2500]:
        # output_name_path = os.path.join(output_path, img_name)
        # if os.path.exists(output_name_path) == 0:
        #     os.makedirs(output_name_path)
        img_name_path = os.path.join(input_path, img_name)
        #img_list = os.listdir(img_name_path)
        count2 += 1
        if (count2 % 1000 == 0):
            print('net forward time: {:.4f}'.format(time.time() - tic))
            print(count2)
            if len(order_img) > 0:
                order_img = np.array(order_img)
                order_img = order_img.transpose((0, 3, 1, 2))
                order_img = np.array(order_img, dtype=np.float32)
                order_img -= 127.5
                order_img /= 127.5
                # order_img = np.array(order_img)
                # print(order_img.shape)
                # print(len(order_name))
                create_database_from_img(order_name, order_img, arcface_model, "./Database/sfz_test.npy", cpu_or_cuda)
                order_img = []
                order_name = []
                tic = time.time()
        # if img_name[19] != "1":
        #     continue
        #path = os.path.join(img_name_path, img)
        align_img_path = os.path.join(output_path, img_name)
        # print(path)
        #frame = cv2.imdecode(np.fromfile(img_name_path, dtype=np.uint8), cv2.IMREAD_COLOR)
        try:
            frame = cv2.imdecode(np.fromfile(img_name_path, dtype=np.uint8), cv2.IMREAD_COLOR)
            h, w, d = frame.shape
        except AttributeError:
            print(img_name)
            continue
        if d == 1:
            continue
        factor = h / w
        if (w > 1000):
            frame = cv2.resize(frame, (600, int(600 * factor)))
        h, w = frame.shape[:2]
        img = np.float32(frame)
        im_height, im_width, _ = img.shape
        scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
        img -= (104, 117, 123)
        img = img.transpose(2, 0, 1)
        img = torch.from_numpy(img).unsqueeze(0)
        img = img.to(device)
        scale = scale.to(device)
        #tic = time.time()
        loc, conf, landms = net(img)  # forward pass
        #print('net forward time: {:.4f}'.format(time.time() - tic))
        priorbox = PriorBox(cfg, image_size=(im_height, im_width))
        priors = priorbox.forward()
        priors = priors.to(device)
        prior_data = priors.data
        boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
        boxes = boxes * scale / resize
        boxes = boxes.cpu().numpy()
        scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
        landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance'])
        scale1 = torch.Tensor([img.shape[3], img.shape[2], img.shape[3], img.shape[2],
                               img.shape[3], img.shape[2], img.shape[3], img.shape[2],
                               img.shape[3], img.shape[2]])
        scale1 = scale1.to(device)
        landms = landms * scale1 / resize
        landms = landms.cpu().numpy()
        # ignore low scores
        inds = np.where(scores > args.confidence_threshold)[0]
        boxes = boxes[inds]
        landms = landms[inds]
        scores = scores[inds]
        # keep top-K before NMS
        order = scores.argsort()[::-1][:args.top_k]
        boxes = boxes[order]
        landms = landms[order]
        scores = scores[order]
        # do NMS
        dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
        keep = py_cpu_nms(dets, args.nms_threshold)
        # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu)
        dets = dets[keep, :]
        landms = landms[keep]
        # keep top-K faster NMS
        dets = dets[:args.keep_top_k, :]
        landms = landms[:args.keep_top_k, :]
        dets = np.concatenate((dets, landms), axis=1)
        score = 500
        # show image
        if args.save_image:
            dst = []
            for i, det in enumerate(dets):
                if det[4] < args.vis_thres:
                    continue
                # center_x = (det[2] + det[0]) / 2
                # center_y = (det[3] + det[1]) / 2
                # if abs(center_x - 125) + abs(center_y - 125) < score:
                #     score = abs(center_x - 125) + abs(center_y - 125)
                dst = np.reshape(landms[i], (5, 2))
            if len(dst) > 0:
                src1 = np.array([
                    [38.3814, 51.6963],
                    [73.6186, 51.5014],
                    [56.1120, 71.7366],
                    [41.6361, 92.3655],
                    [70.8167, 92.2041]], dtype=np.float32)
                tform = trans.SimilarityTransform()
                tform.estimate(dst, src1)
                M = tform.params[0:2, :]
                if w < 112 or h < 112:
                    count += 1
                    print(img_name_path)
                    continue
                frame = cv2.warpAffine(frame, M, (w, h), borderValue=0.0)
                img112 = frame[0:112, 0:112, :]
                order_img.append(img112)
                order_name.append(img_name[:-6])
                #cv2.imencode('.jpg', img112)[1].tofile(align_img_path)
                #cv2.imwrite(align_img_path, img112)
    print(">112 number"+str(count))
    if len(order_img) > 0:
        order_img = np.array(order_img)
        order_img = order_img.transpose((0, 3, 1, 2))
        order_img = np.array(order_img, dtype=np.float32)
        order_img -= 127.5
        order_img /= 127.5
        #order_img = np.array(order_img)
        # print(order_img.shape)
        # print(len(order_name))
        create_database_from_img(order_name, order_img, arcface_model, "./Database/sfz_test.npy", cpu_or_cuda)
 def count_accuracy(arcface_model,cpu_or_cuda,index ,database_name_list):
    torch.set_grad_enabled(False)
    cfg = None
    if args.network == "mobile0.25":
        cfg = cfg_mnet
    elif args.network == "resnet50":
        cfg = cfg_re50
    # net and model
    net = RetinaFace(cfg=cfg, phase = 'test')
    net = load_model(net, args.trained_model, args.cpu)
    net.eval()
    print('Finished loading model!')
    #print(net)
    cudnn.benchmark = True
    device = torch.device("cpu" if args.cpu else "cuda")
    net = net.to(device)
    resize = 1
    input_path = r"../face/czrkzp2"
    folder1 = os.listdir(input_path)
    count = 0
    count2 =0
    print(len(folder1))
    # print(folder1[0][:-4])
    # return 0
    order_img = []
    order_name = []
    tic = time.time()
    for img_name in folder1[:15000]:
        # output_name_path = os.path.join(output_path, img_name)
        # if os.path.exists(output_name_path) == 0:
        #     os.makedirs(output_name_path)
        img_name_path = os.path.join(input_path, img_name)
        #img_list = os.listdir(img_name_path)
        count2 += 1
        if (count2 % 5000 == 0):
            print('net forward time: {:.4f}'.format(time.time() - tic))
            print(count2)
            # if len(order_img) > 0:
            #     order_img = np.array(order_img)
            #     order_img = order_img.transpose((0, 3, 1, 2))
            #     order_img = np.array(order_img, dtype=np.float32)
            #     order_img -= 127.5
            #     order_img /= 127.5
            #     # order_img = np.array(order_img)
            #     # print(order_img.shape)
            #     # print(len(order_name))
            #     create_database_from_img(order_name, order_img, arcface_model, "./Database/sfz_test.npy", cpu_or_cuda)
            #     order_img = []
            #     order_name = []
            #     tic = time.time()
        if img_name[19] == "1":
            continue
        #path = os.path.join(img_name_path, img)
        #align_img_path = os.path.join(output_path, img_name)
        # print(path)
        #frame = cv2.imdecode(np.fromfile(img_name_path, dtype=np.uint8), cv2.IMREAD_COLOR)
        try:
            frame = cv2.imread(img_name_path)
            h, w, d = frame.shape
        except AttributeError:
            print(img_name)
            continue
        if d == 1:
            continue
        factor = h / w
        if (w > 1000):
            frame = cv2.resize(frame, (600, int(600 * factor)))
        h, w = frame.shape[:2]
        img = np.float32(frame)
        im_height, im_width, _ = img.shape
        scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
        img -= (104, 117, 123)
        img = img.transpose(2, 0, 1)
        img = torch.from_numpy(img).unsqueeze(0)
        img = img.to(device)
        scale = scale.to(device)
        #tic = time.time()
        loc, conf, landms = net(img)  # forward pass
        #print('net forward time: {:.4f}'.format(time.time() - tic))
        priorbox = PriorBox(cfg, image_size=(im_height, im_width))
        priors = priorbox.forward()
        priors = priors.to(device)
        prior_data = priors.data
        boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
        boxes = boxes * scale / resize
        boxes = boxes.cpu().numpy()
        scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
        landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance'])
        scale1 = torch.Tensor([img.shape[3], img.shape[2], img.shape[3], img.shape[2],
                               img.shape[3], img.shape[2], img.shape[3], img.shape[2],
                               img.shape[3], img.shape[2]])
        scale1 = scale1.to(device)
        landms = landms * scale1 / resize
        landms = landms.cpu().numpy()
        # ignore low scores
        inds = np.where(scores > args.confidence_threshold)[0]
        boxes = boxes[inds]
        landms = landms[inds]
        scores = scores[inds]
        # keep top-K before NMS
        order = scores.argsort()[::-1][:args.top_k]
        boxes = boxes[order]
        landms = landms[order]
        scores = scores[order]
        # do NMS
        dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
        keep = py_cpu_nms(dets, args.nms_threshold)
        # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu)
        dets = dets[keep, :]
        landms = landms[keep]
        # keep top-K faster NMS
        dets = dets[:args.keep_top_k, :]
        landms = landms[:args.keep_top_k, :]
        dets = np.concatenate((dets, landms), axis=1)
        score = 500
        # show image
        if args.save_image:
            dst = []
            for i, det in enumerate(dets):
                if det[4] < args.vis_thres:
                    continue
                # center_x = (det[2] + det[0]) / 2
                # center_y = (det[3] + det[1]) / 2
                # if abs(center_x - 125) + abs(center_y - 125) < score:
                #     score = abs(center_x - 125) + abs(center_y - 125)
                dst = np.reshape(landms[i], (5, 2))
            if len(dst) > 0:
                src1 = np.array([
                    [38.3814, 51.6963],
                    [73.6186, 51.5014],
                    [56.1120, 71.7366],
                    [41.6361, 92.3655],
                    [70.8167, 92.2041]], dtype=np.float32)
                tform = trans.SimilarityTransform()
                tform.estimate(dst, src1)
                M = tform.params[0:2, :]
                if w < 112 or h < 112:
                    count += 1
                    print(img_name_path)
                    continue
                frame = cv2.warpAffine(frame, M, (w, h), borderValue=0.0)
                img112 = frame[0:112, 0:112, :]
                order_img.append(img112)
                order_name.append(img_name)
                #cv2.imencode('.jpg', img112)[1].tofile(align_img_path)
                #cv2.imwrite(align_img_path, img112)
    print(">112 number"+str(count))
    if len(order_img) > 0:
        order_img = np.array(order_img)
        order_img = order_img.transpose((0, 3, 1, 2))
        order_img = np.array(order_img, dtype=np.float32)
        order_img -= 127.5
        order_img /= 127.5
        #order_img = np.array(order_img)
        # print(order_img.shape)
        # print(len(order_name))
        count_acc(order_name,order_img,arcface_model,index ,database_name_list,cpu_or_cuda)
 def count_acc(order_name,order_img,model,index ,database_name_list,cpu_or_cuda):
    pred_name = []
    unknown = []
    print(order_img.shape)
    start_time = time.time()
    # order_img = torch.from_numpy(order_img)
    # order_img = order_img.to(torch.device("cuda" if cpu_or_cuda == "cuda" else "cpu"))
    batch = 256
    now = 0
    number = len(order_img)
    # number = 1400
    for i in range(number):
        unknown.append("unknown")
    while now < number:
        if now + batch < number:
            name = findAll(order_img[now:now + batch], model, index ,database_name_list, cpu_or_cuda)
        else:
            name = findAll(order_img[now:number], model, index ,database_name_list, cpu_or_cuda)
        now = now + batch
        for na in name:
            pred_name.append(na)
        print("batch" + str(now))
    end_time = time.time()
    print("findAll time: " + str(end_time - start_time))
    # print(len(pred_name))
    right = 0
    for i, name in enumerate(pred_name):
        if pred_name[i] == order_name[i][:-6]:
            right += 1
    filed = 0
    for i, name in enumerate(pred_name):
        if pred_name[i] == unknown[i]:
            filed += 1
            #print(order_name[i])
    error = 0
    print("----------------")
    for i, name in enumerate(pred_name):
        if pred_name[i] != order_name[i][:-6]:
            error += 1
            #print(order_name[i] + " " + pred_name[i] + " ")
    #print(order_name)
    #print(pred_name)
    print("total:" + str(number))
    print("right:" + str(right+filed) + " rate:" + str((filed+right) / number))
    #print("filed:" + str(filed) + " rate:" + str(filed / number))
    print("error:" + str(error - filed) + " rate:" + str((error - filed) / number))
 # if __name__ == '__main__':
 #     torch.set_grad_enabled(False)
 #     cfg = None
 #     if args.network == "mobile0.25":
 #         cfg = cfg_mnet
 #     elif args.network == "resnet50":
 #         cfg = cfg_re50
 #     # net and model
 #     net = RetinaFace(cfg=cfg, phase = 'test')
 #     net = load_model(net, args.trained_model, args.cpu)
 #     net.eval()
 #     print('Finished loading model!')
 #     #print(net)
 #     cudnn.benchmark = True
 #     device = torch.device("cpu" if args.cpu else "cuda")
 #     net = net.to(device)
 #
 #     resize = 1
 #
 #     # testing begin
 #     cap = cv2.VideoCapture("rtsp://47.108.74.82:8557/h264")
 #     ret, frame = cap.read()
 #     h, w = frame.shape[:2]
 #     fps = cap.get(cv2.CAP_PROP_FPS)
 #     size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
 #             int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
 #     #out = cv2.VideoWriter('out.mp4', cv2.VideoWriter_fourcc('m', 'p', '4', 'v'), fps, size)
 #     out = cv2.VideoWriter('ttttttt.avi', cv2.VideoWriter_fourcc(*'XVID'), fps, size)
 #     number = 0
 #
 #     model = iresnet100()
 #     model.load_state_dict(torch.load("./model/backbone100.pth", map_location="cpu"))
 #     model.eval()
 #     k_v = load_npy("./Database/student.npy")
 #
 #     while ret:
 #         tic = time.time()
 #         img = np.float32(frame)
 #         im_height, im_width, _ = img.shape
 #         scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
 #         img -= (104, 117, 123)
 #         img = img.transpose(2, 0, 1)
 #         img = torch.from_numpy(img).unsqueeze(0)
 #         img = img.to(device)
 #         scale = scale.to(device)
 #
 #         loc, conf, landms = net(img)  # forward pass
 #
 #
 #         priorbox = PriorBox(cfg, image_size=(im_height, im_width))
 #         priors = priorbox.forward()
 #         priors = priors.to(device)
 #         prior_data = priors.data
 #         boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
 #         boxes = boxes * scale / resize
 #         boxes = boxes.cpu().numpy()
 #         scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
 #         landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance'])
 #         scale1 = torch.Tensor([img.shape[3], img.shape[2], img.shape[3], img.shape[2],
 #                                img.shape[3], img.shape[2], img.shape[3], img.shape[2],
 #                                img.shape[3], img.shape[2]])
 #         scale1 = scale1.to(device)
 #         landms = landms * scale1 / resize
 #         landms = landms.cpu().numpy()
 #
 #         # ignore low scores
 #         inds = np.where(scores > args.confidence_threshold)[0]
 #         boxes = boxes[inds]
 #         landms = landms[inds]
 #         scores = scores[inds]
 #
 #         # keep top-K before NMS
 #         order = scores.argsort()[::-1][:args.top_k]
 #         boxes = boxes[order]
 #         landms = landms[order]
 #         scores = scores[order]
 #
 #         # do NMS
 #         dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
 #         keep = py_cpu_nms(dets, args.nms_threshold)
 #         # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu)
 #         dets = dets[keep, :]
 #         landms = landms[keep]
 #
 #         # keep top-K faster NMS
 #         dets = dets[:args.keep_top_k, :]
 #         landms = landms[:args.keep_top_k, :]
 #
 #         dets = np.concatenate((dets, landms), axis=1)
 #         face_list = []
 #         name_list = []
 #         #print(dets[:4])
 #         print('net forward time: {:.4f}'.format(time.time() - tic))
 #         start_time = time.time()
 #         for i, det in enumerate(dets):
 #             if det[4] < args.vis_thres:
 #                         continue
 #             boxes, score = det[:4], det[4]
 #             dst = np.reshape(landms[i],(5,2))
 #             #print(dst.shape)
 #             src1 = np.array([
 #                 [38.3814, 51.6963],
 #                 [73.6186, 51.5014],
 #                 [56.1120, 71.7366],
 #                 [41.6361, 92.3655],
 #                 [70.8167, 92.2041]], dtype=np.float32)
 #             #print(src1.shape)
 #             tform = trans.SimilarityTransform()
 #             tform.estimate(dst, src1)
 #             M = tform.params[0:2, :]
 #             frame2 = cv2.warpAffine(frame, M, (w, h), borderValue=0.0)
 #             img112 = frame2[0:112, 0:112, :]
 #             # cv2.imwrite("./img/man"+str(count)+".jpg", img112)
 #             # count += 1
 #             face_list.append(img112)
 #
 #         if len(face_list) != 0:
 #             face_list = np.array(face_list)
 #             face_list = face_list.transpose((0, 3, 1, 2))
 #             face_list = np.array(face_list, dtype=np.float32)
 #             face_list -= 127.5
 #             face_list /= 127.5
 #             print(face_list.shape)
 #             face_list = torch.from_numpy(face_list)
 #
 #             name_list = findAll(face_list, model, k_v)
 #             end_time = time.time()
 #             print("findOneframe time: " + str(end_time - start_time))
 #         start_time = time.time()
 #         img_PIL = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
 #         draw = ImageDraw.Draw(img_PIL)
 #         font = ImageFont.truetype("font.ttf", 22)
 #         for i, det in enumerate(dets):
 #             if det[4] < args.vis_thres:
 #                         continue
 #             boxes, score = det[:4], det[4]
 #             #print(name_list)
 #             name = name_list[i]
 #             mo = r'[\u4e00-\u9fa5]*'
 #             name = re.match(mo, name).group(0)
 #             if not isinstance(name, np.unicode):
 #                 name = name.decode('utf8')
 #             draw.text((int(boxes[0]), int(boxes[1])), name, fill=(255, 0, 0), font=font)
 #             draw.rectangle((int(boxes[0]), int(boxes[1]), int(boxes[2]), int(boxes[3])), outline="green", width=3)
 #         frame = cv2.cvtColor(np.asarray(img_PIL), cv2.COLOR_RGB2BGR)
 #         cv2.imshow('out', frame)
 #         out.write(frame)
 #         end_time = time.time()
 #         print("drawOneframe time: " + str(end_time - start_time))
 #         # Press Q on keyboard to stop recording
 #         if cv2.waitKey(1) & 0xFF == ord('q'):
 #             break
 #         ret, frame = cap.read()
 #     cap.release()
 #     out.release()
 #     cv2.destroyAllWindows()
 if __name__ == '__main__':
    cpu_or_cuda = "cuda" if torch.cuda.is_available() else "cpu"
    arcface_model = load_arcface_model("./model/backbone100.pth", cpu_or_cuda=cpu_or_cuda)
    k_v = load_npy("./Database/sfz_test.npy")
    database_name_list = list(k_v.keys())
    vector_list = np.array(list(k_v.values()))
    print(vector_list.shape)
    # print(database_name_list)
    nlist = 500
    quantizer = faiss.IndexFlatL2(512)  # the other index
    index = faiss.IndexIVFFlat(quantizer, 512, nlist, faiss.METRIC_L2)
    index.train(vector_list)
    # index = faiss.IndexFlatL2(512)
    index.add(vector_list)
    index.nprobe = 50
    count_accuracy(arcface_model, cpu_or_cuda, index, database_name_list)
    # sfz_to112x112_retinaface(arcface_model,cpu_or_cuda)
--- a/retinaface_detect.py
+++ b/retinaface_detect.py
@ -0,0 +1,483 @@
 from __future__ import print_function
 import re
 import time
 import cv2
 import torch
 import torch.backends.cudnn as cudnn
 import numpy as np
 from skimage import transform as trans
 from PIL import Image, ImageDraw, ImageFont
 from data import cfg_mnet, cfg_re50
 from layers.functions.prior_box import PriorBox
 from utils.nms.py_cpu_nms import py_cpu_nms
 from models.retinaface import RetinaFace
 from utils.box_utils import decode, decode_landm
 threshold = 1.05
 ppi = 1280
 step = 3
 class ConfRetinaface(object):
    def __init__(self, trained_model, network, cpu, confidence_threshold, top_k, nms_threshold, keep_top_k, vis_thres):
        self.trained_model = trained_model
        self.network = network
        self.cpu = cpu
        self.confidence_threshold = confidence_threshold
        self.top_k = top_k
        self.nms_threshold = nms_threshold
        self.keep_top_k = keep_top_k
        self.vis_thres = vis_thres
 def set_retinaface_conf(cpu_or_cuda):
    args = ConfRetinaface(trained_model='./weights/mobilenet0.25_Final.pth',
                          network='mobile0.25',
                          cpu=True if cpu_or_cuda == 'cpu' else False,
                          confidence_threshold=0.02,
                          top_k=5000,
                          nms_threshold=0.4,
                          keep_top_k=750,
                          vis_thres=0.6)
    return args
 def check_keys(model, pretrained_state_dict):
    ckpt_keys = set(pretrained_state_dict.keys())
    model_keys = set(model.state_dict().keys())
    used_pretrained_keys = model_keys & ckpt_keys
    unused_pretrained_keys = ckpt_keys - model_keys
    missing_keys = model_keys - ckpt_keys
    print('Missing keys:{}'.format(len(missing_keys)))
    print('Unused checkpoint keys:{}'.format(len(unused_pretrained_keys)))
    print('Used keys:{}'.format(len(used_pretrained_keys)))
    assert len(used_pretrained_keys) > 0, 'load NONE from pretrained checkpoint'
    return True
 def remove_prefix(state_dict, prefix):
    ''' Old style model is stored with all names of parameters sharing common prefix 'module.' '''
    print('remove prefix \'{}\''.format(prefix))
    f = lambda x: x.split(prefix, 1)[-1] if x.startswith(prefix) else x
    return {f(key): value for key, value in state_dict.items()}
 def load_model(model, pretrained_path, load_to_cpu):
    print('Loading pretrained model from {}'.format(pretrained_path))
    if load_to_cpu:
        pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage)
    else:
        device = torch.cuda.current_device()
        pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage.cuda(device))
    if "state_dict" in pretrained_dict.keys():
        pretrained_dict = remove_prefix(pretrained_dict['state_dict'], 'module.')
    else:
        pretrained_dict = remove_prefix(pretrained_dict, 'module.')
    check_keys(model, pretrained_dict)
    model.load_state_dict(pretrained_dict, strict=False)
    return model
 # 加载retinaface模型
 def load_retinaface_model(args):
    torch.set_grad_enabled(False)
    cfg = None
    if args.network == "mobile0.25":
        cfg = cfg_mnet
    elif args.network == "resnet50":
        cfg = cfg_re50
    # net and model
    net = RetinaFace(cfg=cfg, phase='test')
    net = load_model(net, args.trained_model, args.cpu)
    net.eval()
    cudnn.benchmark = True
    device = torch.device("cpu" if args.cpu else "cuda")
    net = net.to(device)
    print('Finished loading model!')
    return net
 # 计算两个特征向量的欧式距离
 def findEuclideanDistance(source_representation, test_representation):
    euclidean_distance = source_representation - test_representation
    euclidean_distance = np.sum(np.multiply(euclidean_distance, euclidean_distance))
    euclidean_distance = np.sqrt(euclidean_distance)
    return euclidean_distance
 # 归一化欧氏距离
 def l2_normalize(x):
    return x / np.sqrt(np.sum(np.multiply(x, x)))
 # 计算此特征向量与人脸库中的哪个人脸特征向量距离最近
 def findmindistance(pred, threshold, k_v):
    distance = 10
    most_like = ""
    for name in k_v.keys():
        tmp = findEuclideanDistance(k_v[name], pred)
        if distance > tmp:
            distance = tmp
            most_like = name
    if distance < threshold:
        return most_like
    else:
        return "unknown"
 #
 def faiss_find_face(pred,index ,database_name_list):
    #print(len(database_name_list))
    start_time = time.time()
    D, I = index.search(pred, 1)
    name_list = []
    end_time = time.time()
    print("faiss cost %fs" % (end_time - start_time))
    print(D, I)
    # if D[0][0] < threshold:
    #     print(database_name_list[I[0][0]])
    #     return database_name_list[I[0][0]]
    # else:
    #     return "unknown"
    for i,index in enumerate(I):
        if D[i][0] < threshold:
            #print(database_name_list[I[0][0]])
            name_list.append(database_name_list[index[0]])
        else:
            name_list.append("unknown")
    return name_list
 # 从人脸库中找到传入的人脸列表中的所有人脸
 def findAll(imglist, model, index ,database_name_list, k_v, cpu_or_cuda):
    start_time = time.time()
    imglist = torch.from_numpy(imglist)
    imglist = imglist.to(torch.device("cuda" if cpu_or_cuda == "cuda" else "cpu"))
    with torch.no_grad():
        name_list = []
        pred = model(imglist)
        pred = pred.cpu().numpy()
        print("predOne time: " + str(time.time() - start_time))
        #print(pred.shape)
        start_time = time.time()
        #name_list = faiss_find_face(l2_normalize(pred), index, database_name_list)
        for pr in pred:
            name = findmindistance(l2_normalize(pr), threshold=threshold, k_v=k_v)
            print(name)
            # print(l2_normalize(pr).shape)
            #pr = np.expand_dims(l2_normalize(pr), 0)
            #print(pr.shape)
            #name = faiss_find_face(pr,index ,database_name_list)
            if name != "unknown":
                mo = r'[\u4e00-\u9fa5_a-zA-Z]*'
                name = re.match(mo, name)
                name_list.append(name.group(0))
            else:
                name_list.append("unknown")
            #name_list.append(name)
        print("findOne time: " + str(time.time() - start_time))
        return name_list
 # 检测单张人脸，返回1x3x112x112的数组
 def detect_one(path, net, args):
    cfg = None
    if args.network == "mobile0.25":
        cfg = cfg_mnet
    elif args.network == "resnet50":
        cfg = cfg_re50
    device = torch.device("cpu" if args.cpu else "cuda")
    resize = 1
    # testing begin
    frame = cv2.imdecode(np.fromfile(path, dtype=np.uint8), cv2.IMREAD_COLOR)
    h, w = frame.shape[:2]
    factor = h / w
    if (w > 1000):
        frame = cv2.resize(frame, (600, int(600 * factor)))
    h, w = frame.shape[:2]
    tic = time.time()
    img = np.float32(frame)
    im_height, im_width, _ = img.shape
    scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
    img -= (104, 117, 123)
    img = img.transpose(2, 0, 1)
    img = torch.from_numpy(img).unsqueeze(0)
    img = img.to(device)
    scale = scale.to(device)
    loc, conf, landms = net(img)  # forward pass
    #print(loc.shape,landms.shape,conf.shape)
    priorbox = PriorBox(cfg, image_size=(im_height, im_width))
    priors = priorbox.forward()
    priors = priors.to(device)
    prior_data = priors.data
    boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
    boxes = boxes * scale / resize
    boxes = boxes.cpu().numpy()
    scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
    landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance'])
    scale1 = torch.Tensor([img.shape[3], img.shape[2], img.shape[3], img.shape[2],
                           img.shape[3], img.shape[2], img.shape[3], img.shape[2],
                           img.shape[3], img.shape[2]])
    scale1 = scale1.to(device)
    landms = landms * scale1 / resize
    landms = landms.cpu().numpy()
    # ignore low scores
    inds = np.where(scores > args.confidence_threshold)[0]
    boxes = boxes[inds]
    landms = landms[inds]
    scores = scores[inds]
    # keep top-K before NMS
    order = scores.argsort()[::-1][:args.top_k]
    boxes = boxes[order]
    landms = landms[order]
    scores = scores[order]
    # do NMS
    dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
    keep = py_cpu_nms(dets, args.nms_threshold)
    # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu)
    dets = dets[keep, :]
    landms = landms[keep]
    # keep top-K faster NMS
    dets = dets[:args.keep_top_k, :]
    landms = landms[:args.keep_top_k, :]
    dets = np.concatenate((dets, landms), axis=1)
    face_list = []
    box_and_point = []
    # print(dets[:4])
    # print('net forward time: {:.4f}'.format(time.time() - tic))
    print(len(dets))
    for i, det in enumerate(dets):
        if det[4] < args.vis_thres:
            continue
        box_and_point.append(det)
        dst = np.reshape(landms[i], (5, 2))
        # print(dst.shape)
        src1 = np.array([
            [38.3814, 51.6963],
            [73.6186, 51.5014],
            [56.1120, 71.7366],
            [41.6361, 92.3655],
            [70.8167, 92.2041]], dtype=np.float32)
        # print(src1.shape)
        tform = trans.SimilarityTransform()
        tform.estimate(dst, src1)
        M = tform.params[0:2, :]
        frame2 = cv2.warpAffine(frame, M, (w, h), borderValue=0.0)
        img112 = frame2[0:112, 0:112, :]
        # cv2.imshow('out', img112)
        # cv2.waitKey(0)
        face_list.append(img112)
    if len(face_list) > 0:
        face_list = np.array(face_list)
        face_list = face_list.transpose((0, 3, 1, 2))
        face_list = np.array(face_list, dtype=np.float32)
        face_list -= 127.5
        face_list /= 127.5
        box_and_point = np.array(box_and_point)
        # face_list = torch.from_numpy(face_list)
        # cv2.imshow('out', img112)
        # cv2.waitKey(0)
    return face_list, box_and_point
 # 检测视频中的人脸并人脸识别
 def detect_video(video_path, output_path, net, arcface_model, k_v, args):
    tic_total = time.time()
    cfg = None
    if args.network == "mobile0.25":
        cfg = cfg_mnet
    elif args.network == "resnet50":
        cfg = cfg_re50
    device = torch.device("cpu" if args.cpu else "cuda")
    resize = 1
    # testing begin
    cap = cv2.VideoCapture(video_path)
    ret, frame = cap.read()
    h, w = frame.shape[:2]
    factor = 0
    if (w > ppi):
        factor = h / w
        frame = cv2.resize(frame, (ppi, int(ppi * factor)))
        h, w = frame.shape[:2]
    fps = cap.get(cv2.CAP_PROP_FPS)
    size = (w, h)
    # size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
    #         int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
    # out = cv2.VideoWriter('out.mp4', cv2.VideoWriter_fourcc('m', 'p', '4', 'v'), fps, size)
    out = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'XVID'), fps, size)
    number = step
    dets = []
    name_list = []
    font = ImageFont.truetype("font.ttf", 22)
    priorbox = PriorBox(cfg, image_size=(h, w))
    priors = priorbox.forward()
    priors = priors.to(device)
    prior_data = priors.data
    scale = torch.Tensor([w, h, w, h])
    scale = scale.to(device)
    scale1 = torch.Tensor([w, h, w, h,
                           w, h, w, h,
                           w, h])
    scale1 = scale1.to(device)
    src1 = np.array([
        [38.3814, 51.6963],
        [73.6186, 51.5014],
        [56.1120, 71.7366],
        [41.6361, 92.3655],
        [70.8167, 92.2041]], dtype=np.float32)
    # print(src1.shape)
    tform = trans.SimilarityTransform()
    while ret:
        tic_all = time.time()
        if number == step:
            tic = time.time()
            img = np.float32(frame)
            img -= (104, 117, 123)
            img = img.transpose(2, 0, 1)
            img = torch.from_numpy(img).unsqueeze(0)
            img = img.to(device)
            loc, conf, landms = net(img)  # forward pass
            boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
            boxes = boxes * scale / resize
            boxes = boxes.cpu().numpy()
            scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
            landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance'])
            landms = landms * scale1 / resize
            landms = landms.cpu().numpy()
            # ignore low scores
            inds = np.where(scores > args.confidence_threshold)[0]
            boxes = boxes[inds]
            landms = landms[inds]
            scores = scores[inds]
            # keep top-K before NMS
            order = scores.argsort()[::-1][:args.top_k]
            boxes = boxes[order]
            landms = landms[order]
            scores = scores[order]
            # do NMS
            dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
            keep = py_cpu_nms(dets, args.nms_threshold)
            # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu)
            dets = dets[keep, :]
            landms = landms[keep]
            # keep top-K faster NMS
            dets = dets[:args.keep_top_k, :]
            landms = landms[:args.keep_top_k, :]
            dets = np.concatenate((dets, landms), axis=1)
            face_list = []
            name_list = []
            # print(dets[:4])
            print('net forward time: {:.4f}'.format(time.time() - tic))
            start_time = time.time()
            for i, det in enumerate(dets[:4]):
                if det[4] < args.vis_thres:
                    continue
                boxes, score = det[:4], det[4]
                dst = np.reshape(landms[i], (5, 2))
                # print(dst.shape)
                tform.estimate(dst, src1)
                M = tform.params[0:2, :]
                frame2 = cv2.warpAffine(frame, M, (w, h), borderValue=0.0)
                img112 = frame2[0:112, 0:112, :]
                face_list.append(img112)
            if len(face_list) != 0:
                face_list = np.array(face_list)
                face_list = face_list.transpose((0, 3, 1, 2))
                face_list = np.array(face_list, dtype=np.float32)
                face_list -= 127.5
                face_list /= 127.5
                print(face_list.shape)
                # face_list = torch.from_numpy(face_list)
                name_list = findAll(face_list, arcface_model, k_v, "cpu" if args.cpu else "cuda")
            end_time = time.time()
            print("findOneframe time: " + str(end_time - start_time))
            start_time = time.time()
            if (len(dets) != 0):
                for i, det in enumerate(dets[:4]):
                    if det[4] < args.vis_thres:
                        continue
                    boxes, score = det[:4], det[4]
                    cv2.rectangle(frame, (int(boxes[0]), int(boxes[1])), (int(boxes[2]), int(boxes[3])), (2, 255, 0), 1)
            # if (len(dets) != 0):
            #     img_PIL = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
            #     draw = ImageDraw.Draw(img_PIL)
            #
            #     for i, det in enumerate(dets[:4]):
            #         if det[4] < args.vis_thres:
            #             continue
            #         boxes, score = det[:4], det[4]
            #         # print(name_list)
            #         name = name_list[i]
            #         if not isinstance(name, np.unicode):
            #             name = name.decode('utf8')
            #         draw.text((int(boxes[0]), int(boxes[1])), name, fill=(255, 0, 0), font=font)
            #         draw.rectangle((int(boxes[0]), int(boxes[1]), int(boxes[2]), int(boxes[3])), outline="green", width=3)
            #     frame = cv2.cvtColor(np.asarray(img_PIL), cv2.COLOR_RGB2BGR)
            #cv2.imshow('out', frame)
            #cv2.waitKey(0)
            out.write(frame)
            end_time = time.time()
            print("drawOneframe time: " + str(end_time - start_time))
            # Press Q on keyboard to stop recording
            # if cv2.waitKey(1) & 0xFF == ord('q'):
            #     break
            ret, frame = cap.read()
            number = 0
            if (ret != 0 and factor != 0):
                frame = cv2.resize(frame, (ppi, int(ppi * factor)))
        else:
            number += 1
            if (len(dets) != 0):
                img_PIL = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
                draw = ImageDraw.Draw(img_PIL)
                for i, det in enumerate(dets[:4]):
                    if det[4] < args.vis_thres:
                        continue
                    boxes, score = det[:4], det[4]
                    # print(name_list)
                    name = name_list[i]
                    if not isinstance(name, np.unicode):
                        name = name.decode('utf8')
                    draw.text((int(boxes[0]), int(boxes[1])), name, fill=(255, 0, 0), font=font)
                    draw.rectangle((int(boxes[0]), int(boxes[1]), int(boxes[2]), int(boxes[3])), outline="green",
                                   width=3)
                frame = cv2.cvtColor(np.asarray(img_PIL), cv2.COLOR_RGB2BGR)
            out.write(frame)
            start_time = time.time()
            ret, frame = cap.read()
            if (ret != 0 and factor != 0):
                frame = cv2.resize(frame, (ppi, int(ppi * factor)))
            print("readframe time: " + str(time.time() - start_time))
        print('all time: {:.4f}'.format(time.time() - tic_all))
    cap.release()
    out.release()
    print('total time: {:.4f}'.format(time.time() - tic_total))
    #cv2.destroyAllWindows()
 if __name__ == "__main__":
    args = set_retinaface_conf()
    print(args.cpu)
--- a/src/pycache/generate_patches.cpython-38.pyc
+++ b/src/pycache/generate_patches.cpython-38.pyc
--- a/src/pycache/utility.cpython-38.pyc
+++ b/src/pycache/utility.cpython-38.pyc
--- a/src/data_io/pycache/functional.cpython-38.pyc
+++ b/src/data_io/pycache/functional.cpython-38.pyc
--- a/src/data_io/pycache/transform.cpython-38.pyc
+++ b/src/data_io/pycache/transform.cpython-38.pyc
--- a/src/data_io/dataset_folder.py
+++ b/src/data_io/dataset_folder.py
@ -0,0 +1,65 @@
 # -*- coding: utf-8 -*-
 # @Time : 20-6-4 下午4:04
 # @Author : zhuying
 # @Company : Minivision
 # @File : dataset_folder.py
 # @Software : PyCharm
 import cv2
 import torch
 from torchvision import datasets
 import numpy as np
 def opencv_loader(path):
    img = cv2.imread(path)
    return img
 class DatasetFolderFT(datasets.ImageFolder):
    def __init__(self, root, transform=None, target_transform=None,
                 ft_width=10, ft_height=10, loader=opencv_loader):
        super(DatasetFolderFT, self).__init__(root, transform, target_transform, loader)
        self.root = root
        self.ft_width = ft_width
        self.ft_height = ft_height
    def __getitem__(self, index):
        path, target = self.samples[index]
        sample = self.loader(path)
        # generate the FT picture of the sample
        ft_sample = generate_FT(sample)
        if sample is None:
            print('image is None --> ', path)
        if ft_sample is None:
            print('FT image is None -->', path)
        assert sample is not None
        ft_sample = cv2.resize(ft_sample, (self.ft_width, self.ft_height))
        ft_sample = torch.from_numpy(ft_sample).float()
        ft_sample = torch.unsqueeze(ft_sample, 0)
        if self.transform is not None:
            try:
                sample = self.transform(sample)
            except Exception as err:
                print('Error Occured: %s' % err, path)
        if self.target_transform is not None:
            target = self.target_transform(target)
        return sample, ft_sample, target
 def generate_FT(image):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    f = np.fft.fft2(image)
    fshift = np.fft.fftshift(f)
    fimg = np.log(np.abs(fshift)+1)
    maxx = -1
    minn = 100000
    for i in range(len(fimg)):
        if maxx < max(fimg[i]):
            maxx = max(fimg[i])
        if minn > min(fimg[i]):
            minn = min(fimg[i])
    fimg = (fimg - minn+1) / (maxx - minn+1)
    return fimg
--- a/src/data_io/dataset_loader.py
+++ b/src/data_io/dataset_loader.py
@ -0,0 +1,33 @@
 # -*- coding: utf-8 -*-
 # @Time : 20-6-4 下午3:40
 # @Author : zhuying
 # @Company : Minivision
 # @File : dataset_loader.py
 # @Software : PyCharm
 from torch.utils.data import DataLoader
 from src.data_io.dataset_folder import DatasetFolderFT
 from src.data_io import transform as trans
 def get_train_loader(conf):
    train_transform = trans.Compose([
        trans.ToPILImage(),
        trans.RandomResizedCrop(size=tuple(conf.input_size),
                                scale=(0.9, 1.1)),
        trans.ColorJitter(brightness=0.4,
                          contrast=0.4, saturation=0.4, hue=0.1),
        trans.RandomRotation(10),
        trans.RandomHorizontalFlip(),
        trans.ToTensor()
    ])
    root_path = '{}/{}'.format(conf.train_root_path, conf.patch_info)
    trainset = DatasetFolderFT(root_path, train_transform,
                               None, conf.ft_width, conf.ft_height)
    train_loader = DataLoader(
        trainset,
        batch_size=conf.batch_size,
        shuffle=True,
        pin_memory=True,
        num_workers=16)
    return train_loader
--- a/src/data_io/functional.py
+++ b/src/data_io/functional.py
@ -0,0 +1,589 @@
 # -*- coding: utf-8 -*-
 # @Time : 20-6-4 下午6:18
 # @Author : zhuying
 # @Company : Minivision
 # @File : functional.py
 # @Software : PyCharm
 from __future__ import division
 import torch
 from PIL import Image, ImageOps, ImageEnhance
 try:
    import accimage
 except ImportError:
    accimage = None
 import numpy as np
 import numbers
 import types
 import collections
 import warnings
 def _is_pil_image(img):
    if accimage is not None:
        return isinstance(img, (Image.Image, accimage.Image))
    else:
        return isinstance(img, Image.Image)
 def _is_tensor_image(img):
    return torch.is_tensor(img) and img.ndimension() == 3
 def _is_numpy_image(img):
    return isinstance(img, np.ndarray) and (img.ndim in {2, 3})
 def to_tensor(pic):
    """Convert a ``PIL Image`` or ``numpy.ndarray`` to tensor.
    See ``ToTensor`` for more details.
    Args:
        pic (PIL Image or numpy.ndarray): Image to be converted to tensor.
    Returns:
        Tensor: Converted image.
    """
    if not(_is_pil_image(pic) or _is_numpy_image(pic)):
        raise TypeError('pic should be PIL Image or ndarray. Got {}'.format(type(pic)))
    if isinstance(pic, np.ndarray):
        # handle numpy array
        # IR image channel=1: modify by lzc --> 20190730
        if pic.ndim == 2:
            pic = pic.reshape((pic.shape[0], pic.shape[1], 1))
        img = torch.from_numpy(pic.transpose((2, 0, 1)))
        # backward compatibility
        # return img.float().div(255)  modify by zkx
        return img.float()
    if accimage is not None and isinstance(pic, accimage.Image):
        nppic = np.zeros([pic.channels, pic.height, pic.width], dtype=np.float32)
        pic.copyto(nppic)
        return torch.from_numpy(nppic)
    # handle PIL Image
    if pic.mode == 'I':
        img = torch.from_numpy(np.array(pic, np.int32, copy=False))
    elif pic.mode == 'I;16':
        img = torch.from_numpy(np.array(pic, np.int16, copy=False))
    else:
        img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
    # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK
    if pic.mode == 'YCbCr':
        nchannel = 3
    elif pic.mode == 'I;16':
        nchannel = 1
    else:
        nchannel = len(pic.mode)
    img = img.view(pic.size[1], pic.size[0], nchannel)
    # put it from HWC to CHW format
    # yikes, this transpose takes 80% of the loading time/CPU
    img = img.transpose(0, 1).transpose(0, 2).contiguous()
    if isinstance(img, torch.ByteTensor):
        # return img.float().div(255)  #modified by zkx
        return img.float()
    else:
        return img
 def to_pil_image(pic, mode=None):
    """Convert a tensor or an ndarray to PIL Image.
    See :class:`~torchvision.transforms.ToPIlImage` for more details.
    Args:
        pic (Tensor or numpy.ndarray): Image to be converted to PIL Image.
        mode (`PIL.Image mode`_): color space and pixel depth of input data (optional).
    .. _PIL.Image mode: http://pillow.readthedocs.io/en/3.4.x/handbook/concepts.html#modes
    Returns:
        PIL Image: Image converted to PIL Image.
    """
    if not(_is_numpy_image(pic) or _is_tensor_image(pic)):
        raise TypeError('pic should be Tensor or ndarray. Got {}.'.format(type(pic)))
    npimg = pic
    if isinstance(pic, torch.FloatTensor):
        pic = pic.mul(255).byte()
    if torch.is_tensor(pic):
        npimg = np.transpose(pic.numpy(), (1, 2, 0))
    if not isinstance(npimg, np.ndarray):
        raise TypeError('Input pic must be a torch.Tensor or NumPy ndarray, ' +
                        'not {}'.format(type(npimg)))
    if npimg.shape[2] == 1:
        expected_mode = None
        npimg = npimg[:, :, 0]
        if npimg.dtype == np.uint8:
            expected_mode = 'L'
        if npimg.dtype == np.int16:
            expected_mode = 'I;16'
        if npimg.dtype == np.int32:
            expected_mode = 'I'
        elif npimg.dtype == np.float32:
            expected_mode = 'F'
        if mode is not None and mode != expected_mode:
            raise ValueError("Incorrect mode ({}) supplied for input type {}. Should be {}"
                             .format(mode, np.dtype, expected_mode))
        mode = expected_mode
    elif npimg.shape[2] == 4:
        permitted_4_channel_modes = ['RGBA', 'CMYK']
        if mode is not None and mode not in permitted_4_channel_modes:
            raise ValueError("Only modes {} are supported for 4D inputs".format(permitted_4_channel_modes))
        if mode is None and npimg.dtype == np.uint8:
            mode = 'RGBA'
    else:
        permitted_3_channel_modes = ['RGB', 'YCbCr', 'HSV']
        if mode is not None and mode not in permitted_3_channel_modes:
            raise ValueError("Only modes {} are supported for 3D inputs".format(permitted_3_channel_modes))
        if mode is None and npimg.dtype == np.uint8:
            mode = 'RGB'
    if mode is None:
        raise TypeError('Input type {} is not supported'.format(npimg.dtype))
    return Image.fromarray(npimg, mode=mode)
 def normalize(tensor, mean, std):
    """Normalize a tensor image with mean and standard deviation.
    See ``Normalize`` for more details.
    Args:
        tensor (Tensor): Tensor image of size (C, H, W) to be normalized.
        mean (sequence): Sequence of means for each channel.
        std (sequence): Sequence of standard deviations for each channely.
    Returns:
        Tensor: Normalized Tensor image.
    """
    if not _is_tensor_image(tensor):
        raise TypeError('tensor is not a torch image.')
    for t, m, s in zip(tensor, mean, std):
        t.sub_(m).div_(s)
    return tensor
 def resize(img, size, interpolation=Image.BILINEAR):
    """Resize the input PIL Image to the given size.
    Args:
        img (PIL Image): Image to be resized.
        size (sequence or int): Desired output size. If size is a sequence like
            (h, w), the output size will be matched to this. If size is an int,
            the smaller edge of the image will be matched to this number maintaing
            the aspect ratio. i.e, if height > width, then image will be rescaled to
            (size * height / width, size)
        interpolation (int, optional): Desired interpolation. Default is
            ``PIL.Image.BILINEAR``
    Returns:
        PIL Image: Resized image.
    """
    if not _is_pil_image(img):
        raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
    if not (isinstance(size, int) or (isinstance(size, collections.Iterable) and len(size) == 2)):
        raise TypeError('Got inappropriate size arg: {}'.format(size))
    if isinstance(size, int):
        w, h = img.size
        if (w <= h and w == size) or (h <= w and h == size):
            return img
        if w < h:
            ow = size
            oh = int(size * h / w)
            return img.resize((ow, oh), interpolation)
        else:
            oh = size
            ow = int(size * w / h)
            return img.resize((ow, oh), interpolation)
    else:
        return img.resize(size[::-1], interpolation)
 def scale(*args, **kwargs):
    warnings.warn("The use of the transforms.Scale transform is deprecated, " +
                  "please use transforms.Resize instead.")
    return resize(*args, **kwargs)
 def pad(img, padding, fill=0):
    """Pad the given PIL Image on all sides with the given "pad" value.
    Args:
        img (PIL Image): Image to be padded.
        padding (int or tuple): Padding on each border. If a single int is provided this
            is used to pad all borders. If tuple of length 2 is provided this is the padding
            on left/right and top/bottom respectively. If a tuple of length 4 is provided
            this is the padding for the left, top, right and bottom borders
            respectively.
        fill: Pixel fill value. Default is 0. If a tuple of
            length 3, it is used to fill R, G, B channels respectively.
    Returns:
        PIL Image: Padded image.
    """
    if not _is_pil_image(img):
        raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
    if not isinstance(padding, (numbers.Number, tuple)):
        raise TypeError('Got inappropriate padding arg')
    if not isinstance(fill, (numbers.Number, str, tuple)):
        raise TypeError('Got inappropriate fill arg')
    if isinstance(padding, collections.Sequence) and len(padding) not in [2, 4]:
        raise ValueError("Padding must be an int or a 2, or 4 element tuple, not a " +
                         "{} element tuple".format(len(padding)))
    return ImageOps.expand(img, border=padding, fill=fill)
 def crop(img, i, j, h, w):
    """Crop the given PIL Image.
    Args:
        img (PIL Image): Image to be cropped.
        i: Upper pixel coordinate.
        j: Left pixel coordinate.
        h: Height of the cropped image.
        w: Width of the cropped image.
    Returns:
        PIL Image: Cropped image.
    """
    if not _is_pil_image(img):
        raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
    return img.crop((j, i, j + w, i + h))
 def center_crop(img, output_size):
    if isinstance(output_size, numbers.Number):
        output_size = (int(output_size), int(output_size))
    w, h = img.size
    th, tw = output_size
    i = int(round((h - th) / 2.))
    j = int(round((w - tw) / 2.))
    return crop(img, i, j, th, tw)
 def resized_crop(img, i, j, h, w, size, interpolation=Image.BILINEAR):
    """Crop the given PIL Image and resize it to desired size.
    Notably used in RandomResizedCrop.
    Args:
        img (PIL Image): Image to be cropped.
        i: Upper pixel coordinate.
        j: Left pixel coordinate.
        h: Height of the cropped image.
        w: Width of the cropped image.
        size (sequence or int): Desired output size. Same semantics as ``scale``.
        interpolation (int, optional): Desired interpolation. Default is
            ``PIL.Image.BILINEAR``.
    Returns:
        PIL Image: Cropped image.
    """
    assert _is_pil_image(img), 'img should be PIL Image'
    img = crop(img, i, j, h, w)
    img = resize(img, size, interpolation)
    return img
 def hflip(img):
    """Horizontally flip the given PIL Image.
    Args:
        img (PIL Image): Image to be flipped.
    Returns:
        PIL Image:  Horizontall flipped image.
    """
    if not _is_pil_image(img):
        raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
    return img.transpose(Image.FLIP_LEFT_RIGHT)
 def vflip(img):
    """Vertically flip the given PIL Image.
    Args:
        img (PIL Image): Image to be flipped.
    Returns:
        PIL Image:  Vertically flipped image.
    """
    if not _is_pil_image(img):
        raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
    return img.transpose(Image.FLIP_TOP_BOTTOM)
 def five_crop(img, size):
    """Crop the given PIL Image into four corners and the central crop.
    .. Note::
        This transform returns a tuple of images and there may be a
        mismatch in the number of inputs and targets your ``Dataset`` returns.
    Args:
       size (sequence or int): Desired output size of the crop. If size is an
           int instead of sequence like (h, w), a square crop (size, size) is
           made.
    Returns:
        tuple: tuple (tl, tr, bl, br, center) corresponding top left,
            top right, bottom left, bottom right and center crop.
    """
    if isinstance(size, numbers.Number):
        size = (int(size), int(size))
    else:
        assert len(size) == 2, "Please provide only two dimensions (h, w) for size."
    w, h = img.size
    crop_h, crop_w = size
    if crop_w > w or crop_h > h:
        raise ValueError("Requested crop size {} is bigger than input size {}".format(size,
                                                                                      (h, w)))
    tl = img.crop((0, 0, crop_w, crop_h))
    tr = img.crop((w - crop_w, 0, w, crop_h))
    bl = img.crop((0, h - crop_h, crop_w, h))
    br = img.crop((w - crop_w, h - crop_h, w, h))
    center = center_crop(img, (crop_h, crop_w))
    return (tl, tr, bl, br, center)
 def ten_crop(img, size, vertical_flip=False):
    """Crop the given PIL Image into four corners and the central crop plus the
       flipped version of these (horizontal flipping is used by default).
    .. Note::
        This transform returns a tuple of images and there may be a
        mismatch in the number of inputs and targets your ``Dataset`` returns.
       Args:
           size (sequence or int): Desired output size of the crop. If size is an
               int instead of sequence like (h, w), a square crop (size, size) is
               made.
           vertical_flip (bool): Use vertical flipping instead of horizontal
        Returns:
            tuple: tuple (tl, tr, bl, br, center, tl_flip, tr_flip, bl_flip,
                br_flip, center_flip) corresponding top left, top right,
                bottom left, bottom right and center crop and same for the
                flipped image.
    """
    if isinstance(size, numbers.Number):
        size = (int(size), int(size))
    else:
        assert len(size) == 2, "Please provide only two dimensions (h, w) for size."
    first_five = five_crop(img, size)
    if vertical_flip:
        img = vflip(img)
    else:
        img = hflip(img)
    second_five = five_crop(img, size)
    return first_five + second_five
 def adjust_brightness(img, brightness_factor):
    """Adjust brightness of an Image.
    Args:
        img (PIL Image): PIL Image to be adjusted.
        brightness_factor (float):  How much to adjust the brightness. Can be
            any non negative number. 0 gives a black image, 1 gives the
            original image while 2 increases the brightness by a factor of 2.
    Returns:
        PIL Image: Brightness adjusted image.
    """
    if not _is_pil_image(img):
        raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
    enhancer = ImageEnhance.Brightness(img)
    img = enhancer.enhance(brightness_factor)
    return img
 def adjust_contrast(img, contrast_factor):
    """Adjust contrast of an Image.
    Args:
        img (PIL Image): PIL Image to be adjusted.
        contrast_factor (float): How much to adjust the contrast. Can be any
            non negative number. 0 gives a solid gray image, 1 gives the
            original image while 2 increases the contrast by a factor of 2.
    Returns:
        PIL Image: Contrast adjusted image.
    """
    if not _is_pil_image(img):
        raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
    enhancer = ImageEnhance.Contrast(img)
    img = enhancer.enhance(contrast_factor)
    return img
 def adjust_saturation(img, saturation_factor):
    """Adjust color saturation of an image.
    Args:
        img (PIL Image): PIL Image to be adjusted.
        saturation_factor (float):  How much to adjust the saturation. 0 will
            give a black and white image, 1 will give the original image while
            2 will enhance the saturation by a factor of 2.
    Returns:
        PIL Image: Saturation adjusted image.
    """
    if not _is_pil_image(img):
        raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
    enhancer = ImageEnhance.Color(img)
    img = enhancer.enhance(saturation_factor)
    return img
 def adjust_hue(img, hue_factor):
    """Adjust hue of an image.
    The image hue is adjusted by converting the image to HSV and
    cyclically shifting the intensities in the hue channel (H).
    The image is then converted back to original image mode.
    `hue_factor` is the amount of shift in H channel and must be in the
    interval `[-0.5, 0.5]`.
    See https://en.wikipedia.org/wiki/Hue for more details on Hue.
    Args:
        img (PIL Image): PIL Image to be adjusted.
        hue_factor (float):  How much to shift the hue channel. Should be in
            [-0.5, 0.5]. 0.5 and -0.5 give complete reversal of hue channel in
            HSV space in positive and negative direction respectively.
            0 means no shift. Therefore, both -0.5 and 0.5 will give an image
            with complementary colors while 0 gives the original image.
    Returns:
        PIL Image: Hue adjusted image.
    """
    if not(-0.5 <= hue_factor <= 0.5):
        raise ValueError('hue_factor is not in [-0.5, 0.5].'.format(hue_factor))
    if not _is_pil_image(img):
        raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
    input_mode = img.mode
    if input_mode in {'L', '1', 'I', 'F'}:
        return img
    h, s, v = img.convert('HSV').split()
    np_h = np.array(h, dtype=np.uint8)
    # uint8 addition take cares of rotation across boundaries
    with np.errstate(over='ignore'):
        np_h += np.uint8(hue_factor * 255)
    h = Image.fromarray(np_h, 'L')
    img = Image.merge('HSV', (h, s, v)).convert(input_mode)
    return img
 def adjust_gamma(img, gamma, gain=1):
    """Perform gamma correction on an image.
    Also known as Power Law Transform. Intensities in RGB mode are adjusted
    based on the following equation:
        I_out = 255 * gain * ((I_in / 255) ** gamma)
    See https://en.wikipedia.org/wiki/Gamma_correction for more details.
    Args:
        img (PIL Image): PIL Image to be adjusted.
        gamma (float): Non negative real number. gamma larger than 1 make the
            shadows darker, while gamma smaller than 1 make dark regions
            lighter.
        gain (float): The constant multiplier.
    """
    if not _is_pil_image(img):
        raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
    if gamma < 0:
        raise ValueError('Gamma should be a non-negative real number')
    input_mode = img.mode
    img = img.convert('RGB')
    np_img = np.array(img, dtype=np.float32)
    np_img = 255 * gain * ((np_img / 255) ** gamma)
    np_img = np.uint8(np.clip(np_img, 0, 255))
    img = Image.fromarray(np_img, 'RGB').convert(input_mode)
    return img
 def rotate(img, angle, resample=False, expand=False, center=None):
    """Rotate the image by angle and then (optionally) translate it by (n_columns, n_rows)
    Args:
        img (PIL Image): PIL Image to be rotated.
        angle ({float, int}): In degrees degrees counter clockwise order.
        resample ({PIL.Image.NEAREST, PIL.Image.BILINEAR, PIL.Image.BICUBIC}, optional):
            An optional resampling filter.
            See http://pillow.readthedocs.io/en/3.4.x/handbook/concepts.html#filters
            If omitted, or if the image has mode "1" or "P", it is set to PIL.Image.NEAREST.
        expand (bool, optional): Optional expansion flag.
            If true, expands the output image to make it large enough to hold the entire rotated image.
            If false or omitted, make the output image the same size as the input image.
            Note that the expand flag assumes rotation around the center and no translation.
        center (2-tuple, optional): Optional center of rotation.
            Origin is the upper left corner.
            Default is the center of the image.
    """
    if not _is_pil_image(img):
        raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
    return img.rotate(angle, resample, expand, center)
 def to_grayscale(img, num_output_channels=1):
    """Convert image to grayscale version of image.
    Args:
        img (PIL Image): Image to be converted to grayscale.
    Returns:
        PIL Image:  Grayscale version of the image.
                    if num_output_channels == 1 : returned image is single channel
                    if num_output_channels == 3 : returned image is 3 channel with r == g == b
    """
    if not _is_pil_image(img):
        raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
    if num_output_channels == 1:
        img = img.convert('L')
    elif num_output_channels == 3:
        img = img.convert('L')
        np_img = np.array(img, dtype=np.uint8)
        np_img = np.dstack([np_img, np_img, np_img])
        img = Image.fromarray(np_img, 'RGB')
    else:
        raise ValueError('num_output_channels should be either 1 or 3')
    return img
--- a/src/data_io/transform.py
+++ b/src/data_io/transform.py
@ -0,0 +1,347 @@
 # -*- coding: utf-8 -*-
 # @Time : 20-6-4 下午4:19
 # @Author : zhuying
 # @Company : Minivision
 # @File : transform.py
 # @Software : PyCharm
 from __future__ import division
 import math
 import random
 from PIL import Image
 try:
    import accimage
 except ImportError:
    accimage = None
 import numpy as np
 import numbers
 import types
 from src.data_io import functional as F
 __all__ = ["Compose", "ToTensor", "ToPILImage", "Normalize", "RandomHorizontalFlip",
           "Lambda", "RandomResizedCrop", "ColorJitter", "RandomRotation"]
 class Compose(object):
    """Composes several transforms together.
    Args:
        transforms (list of ``Transform`` objects): list of transforms to compose.
    Example:
        >>> transforms.Compose([
        >>>     transforms.CenterCrop(10),
        >>>     transforms.ToTensor(),
        >>> ])
    """
    def __init__(self, transforms):
        self.transforms = transforms
    def __call__(self, img):
        for t in self.transforms:
            img = t(img)
        return img
 class ToTensor(object):
    """Convert a ``PIL Image`` or ``numpy.ndarray`` to tensor.
    Converts a PIL Image or numpy.ndarray (H x W x C) in the range
    [0, 255] to a torch.FloatTensor of shape (C x H x W) in the range [0.0, 1.0].
    """
    def __call__(self, pic):
        """
        Args:
            pic (PIL Image or numpy.ndarray): Image to be converted to tensor.
        Returns:
            Tensor: Converted image.
        """
        return F.to_tensor(pic)
 class Lambda(object):
    """Apply a user-defined lambda as a transform.
    Args:
        lambd (function): Lambda/function to be used for transform.
    """
    def __init__(self, lambd):
        assert isinstance(lambd, types.LambdaType)
        self.lambd = lambd
    def __call__(self, img):
        return self.lambd(img)
 class ToPILImage(object):
    """Convert a tensor or an ndarray to PIL Image.
    Converts a torch.*Tensor of shape C x H x W or a numpy ndarray of shape
    H x W x C to a PIL Image while preserving the value range.
    Args:
        mode (`PIL.Image mode`_): color space and pixel depth of input data (optional).
            If ``mode`` is ``None`` (default) there are some assumptions made about the input data:
            1. If the input has 3 channels, the ``mode`` is assumed to be ``RGB``.
            2. If the input has 4 channels, the ``mode`` is assumed to be ``RGBA``.
            3. If the input has 1 channel, the ``mode`` is determined by the data type (i,e,
            ``int``, ``float``, ``short``).
    .. _PIL.Image mode: http://pillow.readthedocs.io/en/3.4.x/handbook/concepts.html#modes
    """
    def __init__(self, mode=None):
        self.mode = mode
    def __call__(self, pic):
        """
        Args:
            pic (Tensor or numpy.ndarray): Image to be converted to PIL Image.
        Returns:
            PIL Image: Image converted to PIL Image.
        """
        return F.to_pil_image(pic, self.mode)
 class Normalize(object):
    """Normalize an tensor image with mean and standard deviation.
    Given mean: ``(M1,...,Mn)`` and std: ``(S1,..,Sn)`` for ``n`` channels, this transform
    will normalize each channel of the input ``torch.*Tensor`` i.e.
    ``input[channel] = (input[channel] - mean[channel]) / std[channel]``
    Args:
        mean (sequence): Sequence of means for each channel.
        std (sequence): Sequence of standard deviations for each channel.
    """
    def __init__(self, mean, std):
        self.mean = mean
        self.std = std
    def __call__(self, tensor):
        """
        Args:
            tensor (Tensor): Tensor image of size (C, H, W) to be normalized.
        Returns:
            Tensor: Normalized Tensor image.
        """
        return F.normalize(tensor, self.mean, self.std)
 class RandomHorizontalFlip(object):
    """Horizontally flip the given PIL Image randomly with a probability of 0.5."""
    def __call__(self, img):
        """
        Args:
            img (PIL Image): Image to be flipped.
        Returns:
            PIL Image: Randomly flipped image.
        """
        if random.random() < 0.5:
            return F.hflip(img)
        return img
 class RandomResizedCrop(object):
    """Crop the given PIL Image to random size and aspect ratio.
    A crop of random size (default: of 0.08 to 1.0) of the original size and a random
    aspect ratio (default: of 3/4 to 4/3) of the original aspect ratio is made. This crop
    is finally resized to given size.
    This is popularly used to train the Inception networks.
    Args:
        size: expected output size of each edge
        scale: range of size of the origin size cropped
        ratio: range of aspect ratio of the origin aspect ratio cropped
        interpolation: Default: PIL.Image.BILINEAR
    """
    def __init__(self, size, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.), interpolation=Image.BILINEAR):
        if isinstance(size, tuple):
            self.size = size
        else:
            self.size = (size, size)
        self.interpolation = interpolation
        self.scale = scale
        self.ratio = ratio
    @staticmethod
    def get_params(img, scale, ratio):
        """Get parameters for ``crop`` for a random sized crop.
        Args:
            img (PIL Image): Image to be cropped.
            scale (tuple): range of size of the origin size cropped
            ratio (tuple): range of aspect ratio of the origin aspect ratio cropped
        Returns:
            tuple: params (i, j, h, w) to be passed to ``crop`` for a random
                sized crop.
        """
        for attempt in range(10):
            area = img.size[0] * img.size[1]
            target_area = random.uniform(*scale) * area
            aspect_ratio = random.uniform(*ratio)
            w = int(round(math.sqrt(target_area * aspect_ratio)))
            h = int(round(math.sqrt(target_area / aspect_ratio)))
            if random.random() < 0.5:
                w, h = h, w
            if w <= img.size[0] and h <= img.size[1]:
                i = random.randint(0, img.size[1] - h)
                j = random.randint(0, img.size[0] - w)
                return i, j, h, w
        # Fallback
        w = min(img.size[0], img.size[1])
        i = (img.size[1] - w) // 2
        j = (img.size[0] - w) // 2
        return i, j, w, w
    def __call__(self, img):
        """
        Args:
            img (PIL Image): Image to be flipped.
        Returns:
            PIL Image: Randomly cropped and resize image.
        """
        i, j, h, w = self.get_params(img, self.scale, self.ratio)
        return F.resized_crop(img, i, j, h, w, self.size, self.interpolation)
 class ColorJitter(object):
    """Randomly change the brightness, contrast and saturation of an image.
    Args:
        brightness (float): How much to jitter brightness. brightness_factor
            is chosen uniformly from [max(0, 1 - brightness), 1 + brightness].
        contrast (float): How much to jitter contrast. contrast_factor
            is chosen uniformly from [max(0, 1 - contrast), 1 + contrast].
        saturation (float): How much to jitter saturation. saturation_factor
            is chosen uniformly from [max(0, 1 - saturation), 1 + saturation].
        hue(float): How much to jitter hue. hue_factor is chosen uniformly from
            [-hue, hue]. Should be >=0 and <= 0.5.
    """
    def __init__(self, brightness=0, contrast=0, saturation=0, hue=0):
        self.brightness = brightness
        self.contrast = contrast
        self.saturation = saturation
        self.hue = hue
    @staticmethod
    def get_params(brightness, contrast, saturation, hue):
        """Get a randomized transform to be applied on image.
        Arguments are same as that of __init__.
        Returns:
            Transform which randomly adjusts brightness, contrast and
            saturation in a random order.
        """
        transforms = []
        if brightness > 0:
            brightness_factor = np.random.uniform(max(0, 1 - brightness), 1 + brightness)
            transforms.append(Lambda(lambda img: F.adjust_brightness(img, brightness_factor)))
        if contrast > 0:
            contrast_factor = np.random.uniform(max(0, 1 - contrast), 1 + contrast)
            transforms.append(Lambda(lambda img: F.adjust_contrast(img, contrast_factor)))
        if saturation > 0:
            saturation_factor = np.random.uniform(max(0, 1 - saturation), 1 + saturation)
            transforms.append(Lambda(lambda img: F.adjust_saturation(img, saturation_factor)))
        if hue > 0:
            hue_factor = np.random.uniform(-hue, hue)
            transforms.append(Lambda(lambda img: F.adjust_hue(img, hue_factor)))
        np.random.shuffle(transforms)
        transform = Compose(transforms)
        return transform
    def __call__(self, img):
        """
        Args:
            img (PIL Image): Input image.
        Returns:
            PIL Image: Color jittered image.
        """
        transform = self.get_params(self.brightness, self.contrast,
                                    self.saturation, self.hue)
        return transform(img)
 class RandomRotation(object):
    """Rotate the image by angle.
    Args:
        degrees (sequence or float or int): Range of degrees to select from.
            If degrees is a number instead of sequence like (min, max), the range of degrees
            will be (-degrees, +degrees).
        resample ({PIL.Image.NEAREST, PIL.Image.BILINEAR, PIL.Image.BICUBIC}, optional):
            An optional resampling filter.
            See http://pillow.readthedocs.io/en/3.4.x/handbook/concepts.html#filters
            If omitted, or if the image has mode "1" or "P", it is set to PIL.Image.NEAREST.
        expand (bool, optional): Optional expansion flag.
            If true, expands the output to make it large enough to hold the entire rotated image.
            If false or omitted, make the output image the same size as the input image.
            Note that the expand flag assumes rotation around the center and no translation.
        center (2-tuple, optional): Optional center of rotation.
            Origin is the upper left corner.
            Default is the center of the image.
    """
    def __init__(self, degrees, resample=False, expand=False, center=None):
        if isinstance(degrees, numbers.Number):
            if degrees < 0:
                raise ValueError("If degrees is a single number, it must be positive.")
            self.degrees = (-degrees, degrees)
        else:
            if len(degrees) != 2:
                raise ValueError("If degrees is a sequence, it must be of len 2.")
            self.degrees = degrees
        self.resample = resample
        self.expand = expand
        self.center = center
    @staticmethod
    def get_params(degrees):
        """Get parameters for ``rotate`` for a random rotation.
        Returns:
            sequence: params to be passed to ``rotate`` for random rotation.
        """
        angle = np.random.uniform(degrees[0], degrees[1])
        return angle
    def __call__(self, img):
        """
            img (PIL Image): Image to be rotated.
        Returns:
            PIL Image: Rotated image.
        """
        angle = self.get_params(self.degrees)
        return F.rotate(img, angle, self.resample, self.expand, self.center)
--- a/src/default_config.py
+++ b/src/default_config.py
@ -0,0 +1,73 @@
 # -*- coding: utf-8 -*-
 # @Time : 20-6-4 上午9:12
 # @Author : zhuying
 # @Company : Minivision
 # @File : default_config.py
 # @Software : PyCharm
 # --*-- coding: utf-8 --*--
 """
 default config for training
 """
 import torch
 from datetime import datetime
 from easydict import EasyDict
 from src.utility import make_if_not_exist, get_width_height, get_kernel
 def get_default_config():
    conf = EasyDict()
    # ----------------------training---------------
    conf.lr = 1e-1
    # [9, 13, 15]
    conf.milestones = [10, 15, 22]  # down learing rate
    conf.gamma = 0.1
    conf.epochs = 25
    conf.momentum = 0.9
    conf.batch_size = 1024
    # model
    conf.num_classes = 3
    conf.input_channel = 3
    conf.embedding_size = 128
    # dataset
    conf.train_root_path = './datasets/rgb_image'
    # save file path
    conf.snapshot_dir_path = './saved_logs/snapshot'
    # log path
    conf.log_path = './saved_logs/jobs'
    # tensorboard
    conf.board_loss_every = 10
    # save model/iter
    conf.save_every = 30
    return conf
 def update_config(args, conf):
    conf.devices = args.devices
    conf.patch_info = args.patch_info
    w_input, h_input = get_width_height(args.patch_info)
    conf.input_size = [h_input, w_input]
    conf.kernel_size = get_kernel(h_input, w_input)
    conf.device = "cuda:{}".format(conf.devices[0]) if torch.cuda.is_available() else "cpu"
    # resize fourier image size
    conf.ft_height = 2*conf.kernel_size[0]
    conf.ft_width = 2*conf.kernel_size[1]
    current_time = datetime.now().strftime('%b%d_%H-%M-%S')
    job_name = 'Anti_Spoofing_{}'.format(args.patch_info)
    log_path = '{}/{}/{} '.format(conf.log_path, job_name, current_time)
    snapshot_dir = '{}/{}'.format(conf.snapshot_dir_path, job_name)
    make_if_not_exist(snapshot_dir)
    make_if_not_exist(log_path)
    conf.model_path = snapshot_dir
    conf.log_path = log_path
    conf.job_name = job_name
    return conf
--- a/src/generate_patches.py
+++ b/src/generate_patches.py
@ -0,0 +1,65 @@
 # -*- coding: utf-8 -*-
 # @Time : 20-6-9 下午3:06
 # @Author : zhuying
 # @Company : Minivision
 # @File : test.py
 # @Software : PyCharm
 """
 Create patch from original input image by using bbox coordinate
 """
 import cv2
 import numpy as np
 class CropImage:
    @staticmethod
    def _get_new_box(src_w, src_h, bbox, scale):
        x = bbox[0]
        y = bbox[1]
        box_w = bbox[2]
        box_h = bbox[3]
        scale = min((src_h-1)/box_h, min((src_w-1)/box_w, scale))
        new_width = box_w * scale
        new_height = box_h * scale
        center_x, center_y = box_w/2+x, box_h/2+y
        left_top_x = center_x-new_width/2
        left_top_y = center_y-new_height/2
        right_bottom_x = center_x+new_width/2
        right_bottom_y = center_y+new_height/2
        if left_top_x < 0:
            right_bottom_x -= left_top_x
            left_top_x = 0
        if left_top_y < 0:
            right_bottom_y -= left_top_y
            left_top_y = 0
        if right_bottom_x > src_w-1:
            left_top_x -= right_bottom_x-src_w+1
            right_bottom_x = src_w-1
        if right_bottom_y > src_h-1:
            left_top_y -= right_bottom_y-src_h+1
            right_bottom_y = src_h-1
        return int(left_top_x), int(left_top_y),\
               int(right_bottom_x), int(right_bottom_y)
    def crop(self, org_img, bbox, scale, out_w, out_h, crop=True):
        if not crop:
            dst_img = cv2.resize(org_img, (out_w, out_h))
        else:
            src_h, src_w, _ = np.shape(org_img)
            left_top_x, left_top_y, \
                right_bottom_x, right_bottom_y = self._get_new_box(src_w, src_h, bbox, scale)
            img = org_img[left_top_y: right_bottom_y+1,
                          left_top_x: right_bottom_x+1]
            dst_img = cv2.resize(img, (out_w, out_h))
        return dst_img
--- a/src/model_lib/MiniFASNet.py
+++ b/src/model_lib/MiniFASNet.py
@ -0,0 +1,296 @@
 # -*- coding: utf-8 -*-
 # @Time : 20-6-3 下午4:45
 # @Author : zhuying
 # @Company : Minivision
 # @File : MiniFASNet.py
 # @Software : PyCharm
 import torch
 import torch.nn.functional as F
 from torch.nn import Linear, Conv2d, BatchNorm1d, BatchNorm2d, PReLU, ReLU, Sigmoid, \
    AdaptiveAvgPool2d, Sequential, Module
 class L2Norm(Module):
    def forward(self, input):
        return F.normalize(input)
 class Flatten(Module):
    def forward(self, input):
        return input.view(input.size(0), -1)
 class Conv_block(Module):
    def __init__(self, in_c, out_c, kernel=(1, 1), stride=(1, 1), padding=(0, 0), groups=1):
        super(Conv_block, self).__init__()
        self.conv = Conv2d(in_c, out_c, kernel_size=kernel, groups=groups,
                           stride=stride, padding=padding, bias=False)
        self.bn = BatchNorm2d(out_c)
        self.prelu = PReLU(out_c)
    def forward(self, x):
        x = self.conv(x)
        x = self.bn(x)
        x = self.prelu(x)
        return x
 class Linear_block(Module):
    def __init__(self, in_c, out_c, kernel=(1, 1), stride=(1, 1), padding=(0, 0), groups=1):
        super(Linear_block, self).__init__()
        self.conv = Conv2d(in_c, out_channels=out_c, kernel_size=kernel,
                           groups=groups, stride=stride, padding=padding, bias=False)
        self.bn = BatchNorm2d(out_c)
    def forward(self, x):
        x = self.conv(x)
        x = self.bn(x)
        return x
 class Depth_Wise(Module):
     def __init__(self, c1, c2, c3, residual=False, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=1):
        super(Depth_Wise, self).__init__()
        c1_in, c1_out = c1
        c2_in, c2_out = c2
        c3_in, c3_out = c3
        self.conv = Conv_block(c1_in, out_c=c1_out, kernel=(1, 1), padding=(0, 0), stride=(1, 1))
        self.conv_dw = Conv_block(c2_in, c2_out, groups=c2_in, kernel=kernel, padding=padding, stride=stride)
        self.project = Linear_block(c3_in, c3_out, kernel=(1, 1), padding=(0, 0), stride=(1, 1))
        self.residual = residual
     def forward(self, x):
        if self.residual:
            short_cut = x
        x = self.conv(x)
        x = self.conv_dw(x)
        x = self.project(x)
        if self.residual:
            output = short_cut + x
        else:
            output = x
        return output
 class Residual(Module):
    def __init__(self, c1, c2, c3, num_block, groups, kernel=(3, 3), stride=(1, 1), padding=(1, 1)):
        super(Residual, self).__init__()
        modules = []
        for i in range(num_block):
            c1_tuple = c1[i]
            c2_tuple = c2[i]
            c3_tuple = c3[i]
            modules.append(Depth_Wise(c1_tuple, c2_tuple, c3_tuple, residual=True,
                                      kernel=kernel, padding=padding, stride=stride, groups=groups))
        self.model = Sequential(*modules)
    def forward(self, x):
        return self.model(x)
 class SEModule(Module):
    def __init__(self, channels, reduction):
        super(SEModule, self).__init__()
        self.avg_pool = AdaptiveAvgPool2d(1)
        self.fc1 = Conv2d(
            channels, channels // reduction, kernel_size=1, padding=0, bias=False)
        self.bn1 = BatchNorm2d(channels // reduction)
        self.relu = ReLU(inplace=True)
        self.fc2 = Conv2d(
            channels // reduction, channels, kernel_size=1, padding=0, bias=False)
        self.bn2 = BatchNorm2d(channels)
        self.sigmoid = Sigmoid()
    def forward(self, x):
        module_input = x
        x = self.avg_pool(x)
        x = self.fc1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.bn2(x)
        x = self.sigmoid(x)
        return module_input * x
 class ResidualSE(Module):
    def __init__(self, c1, c2, c3, num_block, groups, kernel=(3, 3), stride=(1, 1), padding=(1, 1), se_reduct=4):
        super(ResidualSE, self).__init__()
        modules = []
        for i in range(num_block):
            c1_tuple = c1[i]
            c2_tuple = c2[i]
            c3_tuple = c3[i]
            if i == num_block-1:
                modules.append(
                    Depth_Wise_SE(c1_tuple, c2_tuple, c3_tuple, residual=True, kernel=kernel, padding=padding, stride=stride,
                               groups=groups, se_reduct=se_reduct))
            else:
                modules.append(Depth_Wise(c1_tuple, c2_tuple, c3_tuple, residual=True, kernel=kernel, padding=padding,
                                          stride=stride, groups=groups))
        self.model = Sequential(*modules)
    def forward(self, x):
        return self.model(x)
 class Depth_Wise_SE(Module):
    def __init__(self, c1, c2, c3, residual=False, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=1, se_reduct=8):
        super(Depth_Wise_SE, self).__init__()
        c1_in, c1_out = c1
        c2_in, c2_out = c2
        c3_in, c3_out = c3
        self.conv = Conv_block(c1_in, out_c=c1_out, kernel=(1, 1), padding=(0, 0), stride=(1, 1))
        self.conv_dw = Conv_block(c2_in, c2_out, groups=c2_in, kernel=kernel, padding=padding, stride=stride)
        self.project = Linear_block(c3_in, c3_out, kernel=(1, 1), padding=(0, 0), stride=(1, 1))
        self.residual = residual
        self.se_module = SEModule(c3_out, se_reduct)
    def forward(self, x):
        if self.residual:
            short_cut = x
        x = self.conv(x)
        x = self.conv_dw(x)
        x = self.project(x)
        if self.residual:
            x = self.se_module(x)
            output = short_cut + x
        else:
            output = x
        return output
 class MiniFASNet(Module):
    def __init__(self, keep, embedding_size, conv6_kernel=(7, 7),
                 drop_p=0.0, num_classes=3, img_channel=3):
        super(MiniFASNet, self).__init__()
        self.embedding_size = embedding_size
        self.conv1 = Conv_block(img_channel, keep[0], kernel=(3, 3), stride=(2, 2), padding=(1, 1))
        self.conv2_dw = Conv_block(keep[0], keep[1], kernel=(3, 3), stride=(1, 1), padding=(1, 1), groups=keep[1])
        c1 = [(keep[1], keep[2])]
        c2 = [(keep[2], keep[3])]
        c3 = [(keep[3], keep[4])]
        self.conv_23 = Depth_Wise(c1[0], c2[0], c3[0], kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=keep[3])
        c1 = [(keep[4], keep[5]), (keep[7], keep[8]), (keep[10], keep[11]), (keep[13], keep[14])]
        c2 = [(keep[5], keep[6]), (keep[8], keep[9]), (keep[11], keep[12]), (keep[14], keep[15])]
        c3 = [(keep[6], keep[7]), (keep[9], keep[10]), (keep[12], keep[13]), (keep[15], keep[16])]
        self.conv_3 = Residual(c1, c2, c3, num_block=4, groups=keep[4], kernel=(3, 3), stride=(1, 1), padding=(1, 1))
        c1 = [(keep[16], keep[17])]
        c2 = [(keep[17], keep[18])]
        c3 = [(keep[18], keep[19])]
        self.conv_34 = Depth_Wise(c1[0], c2[0], c3[0], kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=keep[19])
        c1 = [(keep[19], keep[20]), (keep[22], keep[23]), (keep[25], keep[26]), (keep[28], keep[29]),
              (keep[31], keep[32]), (keep[34], keep[35])]
        c2 = [(keep[20], keep[21]), (keep[23], keep[24]), (keep[26], keep[27]), (keep[29], keep[30]),
              (keep[32], keep[33]), (keep[35], keep[36])]
        c3 = [(keep[21], keep[22]), (keep[24], keep[25]), (keep[27], keep[28]), (keep[30], keep[31]),
              (keep[33], keep[34]), (keep[36], keep[37])]
        self.conv_4 = Residual(c1, c2, c3, num_block=6, groups=keep[19], kernel=(3, 3), stride=(1, 1), padding=(1, 1))
        c1 = [(keep[37], keep[38])]
        c2 = [(keep[38], keep[39])]
        c3 = [(keep[39], keep[40])]
        self.conv_45 = Depth_Wise(c1[0], c2[0], c3[0], kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=keep[40])
        c1 = [(keep[40], keep[41]), (keep[43], keep[44])]
        c2 = [(keep[41], keep[42]), (keep[44], keep[45])]
        c3 = [(keep[42], keep[43]), (keep[45], keep[46])]
        self.conv_5 = Residual(c1, c2, c3, num_block=2, groups=keep[40], kernel=(3, 3), stride=(1, 1), padding=(1, 1))
        self.conv_6_sep = Conv_block(keep[46], keep[47], kernel=(1, 1), stride=(1, 1), padding=(0, 0))
        self.conv_6_dw = Linear_block(keep[47], keep[48], groups=keep[48], kernel=conv6_kernel, stride=(1, 1), padding=(0, 0))
        self.conv_6_flatten = Flatten()
        self.linear = Linear(512, embedding_size, bias=False)
        self.bn = BatchNorm1d(embedding_size)
        self.drop = torch.nn.Dropout(p=drop_p)
        self.prob = Linear(embedding_size, num_classes, bias=False)
    def forward(self, x):
        out = self.conv1(x)
        out = self.conv2_dw(out)
        out = self.conv_23(out)
        out = self.conv_3(out)
        out = self.conv_34(out)
        out = self.conv_4(out)
        out = self.conv_45(out)
        out = self.conv_5(out)
        out = self.conv_6_sep(out)
        out = self.conv_6_dw(out)
        out = self.conv_6_flatten(out)
        if self.embedding_size != 512:
            out = self.linear(out)
        out = self.bn(out)
        out = self.drop(out)
        out = self.prob(out)
        return out
 class MiniFASNetSE(MiniFASNet):
    def __init__(self, keep, embedding_size, conv6_kernel=(7, 7),drop_p=0.75, num_classes=4, img_channel=3):
        super(MiniFASNetSE, self).__init__(keep=keep, embedding_size=embedding_size, conv6_kernel=conv6_kernel,
                                               drop_p=drop_p, num_classes=num_classes, img_channel=img_channel)
        c1 = [(keep[4], keep[5]), (keep[7], keep[8]), (keep[10], keep[11]), (keep[13], keep[14])]
        c2 = [(keep[5], keep[6]), (keep[8], keep[9]), (keep[11], keep[12]), (keep[14], keep[15])]
        c3 = [(keep[6], keep[7]), (keep[9], keep[10]), (keep[12], keep[13]), (keep[15], keep[16])]
        self.conv_3 = ResidualSE(c1, c2, c3, num_block=4, groups=keep[4], kernel=(3, 3), stride=(1, 1), padding=(1, 1))
        c1 = [(keep[19], keep[20]), (keep[22], keep[23]), (keep[25], keep[26]), (keep[28], keep[29]),
              (keep[31], keep[32]), (keep[34], keep[35])]
        c2 = [(keep[20], keep[21]), (keep[23], keep[24]), (keep[26], keep[27]), (keep[29], keep[30]),
              (keep[32], keep[33]), (keep[35], keep[36])]
        c3 = [(keep[21], keep[22]), (keep[24], keep[25]), (keep[27], keep[28]), (keep[30], keep[31]),
              (keep[33], keep[34]), (keep[36], keep[37])]
        self.conv_4 = ResidualSE(c1, c2, c3, num_block=6, groups=keep[19], kernel=(3, 3), stride=(1, 1), padding=(1, 1))
        c1 = [(keep[40], keep[41]), (keep[43], keep[44])]
        c2 = [(keep[41], keep[42]), (keep[44], keep[45])]
        c3 = [(keep[42], keep[43]), (keep[45], keep[46])]
        self.conv_5 = ResidualSE(c1, c2, c3, num_block=2, groups=keep[40], kernel=(3, 3), stride=(1, 1), padding=(1, 1))
 keep_dict = {'1.8M': [32, 32, 103, 103, 64, 13, 13, 64, 26, 26,
                      64, 13, 13, 64, 52, 52, 64, 231, 231, 128,
                      154, 154, 128, 52, 52, 128, 26, 26, 128, 52,
                      52, 128, 26, 26, 128, 26, 26, 128, 308, 308,
                      128, 26, 26, 128, 26, 26, 128, 512, 512],
             '1.8M_': [32, 32, 103, 103, 64, 13, 13, 64, 13, 13, 64, 13,
                       13, 64, 13, 13, 64, 231, 231, 128, 231, 231, 128, 52,
                       52, 128, 26, 26, 128, 77, 77, 128, 26, 26, 128, 26, 26,
                       128, 308, 308, 128, 26, 26, 128, 26, 26, 128, 512, 512]
             }
 # (80x80) flops: 0.044, params: 0.41
 def MiniFASNetV1(embedding_size=128, conv6_kernel=(7, 7),
                     drop_p=0.2, num_classes=3, img_channel=3):
    return MiniFASNet(keep_dict['1.8M'], embedding_size, conv6_kernel, drop_p, num_classes, img_channel)
 # (80x80) flops: 0.044, params: 0.43
 def MiniFASNetV2(embedding_size=128, conv6_kernel=(7, 7),
                     drop_p=0.2, num_classes=3, img_channel=3):
    return MiniFASNet(keep_dict['1.8M_'], embedding_size, conv6_kernel, drop_p, num_classes, img_channel)
 def MiniFASNetV1SE(embedding_size=128, conv6_kernel=(7, 7),
                   drop_p=0.75, num_classes=3, img_channel=3):
    return MiniFASNetSE(keep_dict['1.8M'], embedding_size, conv6_kernel,drop_p, num_classes, img_channel)
 # (80x80) flops: 0.044, params: 0.43
 def MiniFASNetV2SE(embedding_size=128, conv6_kernel=(7, 7),
                   drop_p=0.75, num_classes=4, img_channel=3):
    return MiniFASNetSE(keep_dict['1.8M_'], embedding_size, conv6_kernel,drop_p, num_classes, img_channel)
--- a/Show More
+++ b/Show More
		`@ -0,0 +1 @@`
							`from .iresnet import iresnet18, iresnet34, iresnet50, iresnet100, iresnet200`
		`@ -0,0 +1,2 @@`
							`from .functions import *`
							`from .modules import *`
		`@ -0,0 +1,3 @@`
							`from .multibox_loss import MultiBoxLoss`

							`__all__ = ['MultiBoxLoss']`