深度学习实践-人脸活体检测CelebA_Spoof数据集

1.下载数据集

原数据集:
链接:https://pan.baidu.com/s/19dTr...
提取码:sngl
数据集70多G,太大,加工成.npz文件
链接:https://pan.baidu.com/s/1CBjP...
提取码:ranb

2. 加工代码

import numpy as np  # linear algebra
import pandas as pd  # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import cv2
import matplotlib.pyplot as plt


def standard_width_height_scaling(real_w, real_h, bbox0, bbox1, bbox2, bbox3):
    x1 = int(int(bbox0) * (float(real_w) / 224))  # bbox[0]
    y1 = int(int(bbox1) * (float(real_h) / 224))  # bbox[1]
    w1 = int(int(bbox2) * (float(real_w) / 224))  # bbox[2]
    h1 = int(int(bbox3) * (float(real_h) / 224))  # bbox[3]
    return x1, y1, w1, h1


def get_area_bbox_indices(bound_box_path, real_w, real_h):
    bound_box_read = open(bound_box_path, "r")
    bound_box_indices = list()
    for i in bound_box_read:
        bound_box_indices.append(i)
    bbox = bound_box_indices[0].split()
    x1, y1, w1, h1 = standard_width_height_scaling(real_w, real_h,
                                                   bbox[0], bbox[1], bbox[2], bbox[3])
    return x1, y1, w1, h1


def get_ratio_bbox_and_image(full_img_path, bound_box_path):
    img = cv2.imread(full_img_path)
    real_h, real_w, _ = img.shape
    area_image = real_h * real_w
    x1, y1, w1, h1 = get_area_bbox_indices(bound_box_path, real_w, real_h)
    area_bbox = w1 * h1
    return area_bbox / area_image


def get_padding_bbox_indices(x1, y1, w1, h1, real_w, real_h, ratio_bbox_and_image):
    x1_padding = x1 - int((w1) * (1 + ratio_bbox_and_image))
    y1_padding = y1 - int((h1) * (1 + ratio_bbox_and_image))
    w1_padding = w1 + int((w1) * (1 + ratio_bbox_and_image))
    h1_padding = h1 + int((h1) * (1 + ratio_bbox_and_image))
    if x1_padding < 0:
        x1_padding = 0
    if y1_padding < 0:
        y1_padding = 0
    if w1_padding > real_w:
        w1_padding = real_w
    if h1_padding > real_h:
        h1_padding = real_h
    return x1_padding, y1_padding, w1_padding, h1_padding


def read_crop_img_with_bbox(full_img_path, bound_box_path):
    img = cv2.imread(full_img_path)
    real_w = img.shape[1]
    real_h = img.shape[0]
    x1, y1, w1, h1 = get_area_bbox_indices(bound_box_path, real_w, real_h)
    return x1, y1, w1, h1, img, real_w, real_h


original_cropped_storage = []
padding_cropped_storage = []
img_names = []
original_cropped_labels = []
padding_cropped_labels = []

count_live = 0
count_spoof = 0
dim = (100, 100)
count_limit_live = 5000
count_limit_spoof = 5000

rootdir_train = r'E:\mldata\CelebA_Spoof_zip\CelebA_Spoof\Data\train'
for file in os.listdir(rootdir_train):
    d = os.path.join(rootdir_train, file)
    print(d)
    if os.path.isdir(d):
        for e in os.listdir(d):
            imgs_path = d + '/' + e + '/'
            for img_path in os.listdir(imgs_path):
                if (img_path.endswith(".jpg")):
                    full_img_path = imgs_path + img_path
                    bound_box_path = full_img_path[0:-4] + '_BB.txt'
                    x1, y1, w1, h1, img, real_w, real_h = read_crop_img_with_bbox(full_img_path, bound_box_path)
                    ratio_bbox_and_image = get_ratio_bbox_and_image(full_img_path, bound_box_path)
                    x1_padding, y1_padding, w1_padding, h1_padding = get_padding_bbox_indices(x1, y1, w1, h1,
                                                                                              real_w, real_h,
                                                                                              ratio_bbox_and_image)
                    padding_img = img[y1_padding:y1 + h1_padding, x1_padding:x1 + w1_padding]
                    try:
                        if (e == 'live' and count_live >= count_limit_live) or (
                                e == 'spoof' and count_spoof >= count_limit_spoof):
                            continue
                        resized_padding_img = cv2.resize(padding_img, dim, interpolation=cv2.INTER_AREA)
                        padding_cropped_storage.append(resized_padding_img)
                        if e == 'live':
                            count_live = count_live + 1
                            padding_cropped_labels.append(1)
                        elif e == 'spoof':
                            count_spoof = count_spoof + 1
                            padding_cropped_labels.append(0)
                    except:
                        continue

                    img_names.append(img_path)

                    if (count_live == count_limit_live and e == 'live') or (
                            count_spoof == count_limit_spoof and e == 'spoof'):
                        break
            if count_live >= count_limit_live and count_spoof >= count_limit_spoof:
                break
    if count_live >= count_limit_live and count_spoof >= count_limit_spoof:
        print("生成完成 ")
        break
X = np.asarray(padding_cropped_storage)
y = np.asarray(padding_cropped_labels)
np.savez('../data/spoofing_data.npz', X, y)
print("生成npz完成")

3.创建模型

def fit_model():
    anti_spoofing_data = np.load('../data/anti_spoofing_data.npz')
    X, y = anti_spoofing_data['arr_0'], anti_spoofing_data['arr_1']
    check_live_label = 0
    check_spoof_label = 0
    for i in y:
        if i == 1:
            check_live_label += 1
        elif i == 0:
            check_spoof_label += 1
    print(
        f"There are 2 classes including number of live is {check_live_label} and number of spoof is {check_spoof_label}")

    X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.3, random_state=42)
    X_valid, X_test, y_valid, y_test = train_test_split(X_valid, y_valid, test_size=0.5, random_state=42)

    print(f'Training dataset size of X_train: {len(X_train)}')
    print(f'Training : {X_train.shape}')
    print(f'Testing dataset size of X_test: {len(X_test)}')
    print(f'Validation dataset size of X_valid: {len(X_valid)}')
    print(f'Testing dataset size of y_train: {len(y_train)}')
    print(f'Testing dataset size of y_test: {len(y_test)}')
    print(f'Testing dataset size of y_valid: {len(y_valid)}')

    model = models.Sequential()
    model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(100, 100, 3)))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(64, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(64, (3, 3), activation='relu'))
    model.add(layers.Flatten())
    model.add(layers.Dense(64, activation='relu'))
    model.add(layers.Dense(2))

    model.compile(optimizer='adam',
                  loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
                  metrics=['accuracy'])
    history = model.fit(X_train, y_train, epochs=10,
                        validation_data=(X_valid, y_valid))
    test_loss, test_acc = model.evaluate(X_test, y_test, verbose=2)
    print(test_loss, test_acc)
    model.save('./model/live_model.h5')

4.训练结果

Epoch 1/10
219/219 [==============================] - 45s 203ms/step - loss: 8.3599 - accuracy: 0.7795 - val_loss: 0.2634 - val_accuracy: 0.9073
Epoch 2/10
219/219 [==============================] - 45s 203ms/step - loss: 0.1445 - accuracy: 0.9432 - val_loss: 0.2837 - val_accuracy: 0.9227
Epoch 3/10
219/219 [==============================] - 45s 205ms/step - loss: 0.1026 - accuracy: 0.9586 - val_loss: 0.2011 - val_accuracy: 0.9367
Epoch 4/10
219/219 [==============================] - 45s 205ms/step - loss: 0.1586 - accuracy: 0.9619 - val_loss: 0.1472 - val_accuracy: 0.9493
Epoch 5/10
219/219 [==============================] - 45s 204ms/step - loss: 0.1232 - accuracy: 0.9554 - val_loss: 0.0931 - val_accuracy: 0.9653
Epoch 6/10
219/219 [==============================] - 46s 210ms/step - loss: 0.0491 - accuracy: 0.9832 - val_loss: 0.1221 - val_accuracy: 0.9567
Epoch 7/10
219/219 [==============================] - 47s 214ms/step - loss: 0.0468 - accuracy: 0.9823 - val_loss: 0.0854 - val_accuracy: 0.9660
Epoch 8/10
219/219 [==============================] - 45s 204ms/step - loss: 0.0254 - accuracy: 0.9933 - val_loss: 0.1062 - val_accuracy: 0.9640
Epoch 9/10
219/219 [==============================] - 45s 204ms/step - loss: 0.0291 - accuracy: 0.9901 - val_loss: 0.1596 - val_accuracy: 0.9647
Epoch 10/10
219/219 [==============================] - 45s 205ms/step - loss: 0.0254 - accuracy: 0.9923 - val_loss: 0.1037 - val_accuracy: 0.9720
47/47 - 2s - loss: 0.1117 - accuracy: 0.9653
0.11169376969337463 0.9653333425521851

5.完整代码

from sklearn.model_selection import train_test_split
import numpy as  np
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras import datasets, layers, models
import matplotlib.pyplot as plt
import os
import cv2
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, GlobalAveragePooling2D, Dropout
from tensorflow.keras.models import Sequential

def fit_model():
    anti_spoofing_data = np.load('../data/anti_spoofing_data.npz')
    X, y = anti_spoofing_data['arr_0'], anti_spoofing_data['arr_1']
    check_live_label = 0
    check_spoof_label = 0
    for i in y:
        if i == 1:
            check_live_label += 1
        elif i == 0:
            check_spoof_label += 1
    print(
        f"There are 2 classes including number of live is {check_live_label} and number of spoof is {check_spoof_label}")

    X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.3, random_state=42)
    X_valid, X_test, y_valid, y_test = train_test_split(X_valid, y_valid, test_size=0.5, random_state=42)

    print(f'Training dataset size of X_train: {len(X_train)}')
    print(f'Training : {X_train.shape}')
    print(f'Testing dataset size of X_test: {len(X_test)}')
    print(f'Validation dataset size of X_valid: {len(X_valid)}')
    print(f'Testing dataset size of y_train: {len(y_train)}')
    print(f'Testing dataset size of y_test: {len(y_test)}')
    print(f'Testing dataset size of y_valid: {len(y_valid)}')

    model = models.Sequential()
    model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(100, 100, 3)))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(64, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(64, (3, 3), activation='relu'))
    model.add(layers.Flatten())
    model.add(layers.Dense(64, activation='relu'))
    model.add(layers.Dense(2))

    model.compile(optimizer='adam',
                  loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
                  metrics=['accuracy'])
    history = model.fit(X_train, y_train, epochs=10,
                        validation_data=(X_valid, y_valid))
    test_loss, test_acc = model.evaluate(X_test, y_test, verbose=2)
    print(test_loss, test_acc)
    model.save('./model/live_model.h5')

if __name__ == '__main__':
    # fit2()
    fit_model()
32 声望
13 粉丝
0 条评论
推荐阅读
推荐系统评测指标
一. 评测指标用户满意度、预测准确度、覆盖率、多样性、 新颖性、惊喜度、信任度、实时性、健壮性、商业目标1. 用户满意度满意度是评测推荐系统的最重要指标,只能通过用户调查或者在线实验获得,主要是通过调查...

捕风阅读 591评论 1

视频清晰度优化指南
随着移动互联网的深入发展,视频消费场景逐渐变成主流,早期由于手机硬件的限制问题,导致生产出来的视频画质、清晰度存在较大的问题,用户体验不太好,当时的网络也处于4G的发展阶段,网络的限制也无法持续支持...

得物技术2阅读 857

python里打印list的四种方法
原文链接标题:Print lists in Python (4 Different Ways)用for循环来打印 {代码...} 结果1 2 3 4 5用 * 星号来打印 {代码...} 结果 {代码...} 把list转换为str来打印 {代码...} 结果 {代码...} 用map把数组里非...

chiiinnn阅读 10.2k

封面图
Ubuntu20.04 从源代码编译安装 python3.10
Ubuntu 22.04 Release DateUbuntu 22.04 Jammy Jellyfish is scheduled for release on April 21, 2022If you’re ready to use Ubuntu 22.04 Jammy Jellyfish, you can either upgrade your current Ubuntu syste...

ponponon1阅读 3.9k

“3D 元宇宙技术”在汽车新零售领域的应用与实践
随着不久前汽车之家新零售项目震撼发布,我们直击用户看车选车痛点首次提出 ABC 新体验模式,以元宇宙科技打造沉浸式交互服务,开放元宇宙能源空间站体验店,为用户打造更“有用”的体验。

之家技术阅读 5.1k

封面图
日常Python 代码片段整理
1、简单的 HTTP Web 服务器 {代码...} 2、单行循环List {代码...} 3、更新字典 {代码...} 4、拆分多行字符串 {代码...} 5、跟踪列表中元素的频率 {代码...} 6、不使用 Pandas 读取 CSV 文件 {代码...} 7、将列表...

墨城2阅读 291

Unicode 正则表达式(qbit)
前言本文根据《精通正则表达式》和 Unicode Regular Expressions 整理。本文的示例默认以 Python3 为实现语言,用到 Python3 的 re 模块或 regex 库。基本的 Unicode 属性分类 {代码...} 基本的 Unicode 子属性Le...

qbit阅读 4.3k

32 声望
13 粉丝
宣传栏