# CTF 中如何欺骗 AI？

English

CTF 中的欺骗 AI 问题一般分成两类：基于神经网络的和基于统计模型的。如果题目要求选手欺骗神经网络，一般会给出白盒的模型（往往是图像分类任务）；如果是要求选手欺骗统计学习模型，有些题目会给出白盒的模型参数，也有的提供训练数据集。

# 欺骗 kNN：[西湖论剑2020] 指鹿为马

## 任务目标

``````import numpy as np
from PIL import Image
import math
import operator
import os
import time
import base64
import random

data = []
p = Image.open('./horse.png').convert('L')
p = np.array(p).reshape(-1)
p = np.append(p,0)
data.append(p)
return np.array(data)

data = []
p = Image.open('./deer.png').convert('L')
p = np.array(p).reshape(-1)
p = np.append(p,1)
data.append(p)
return np.array(data)

data = []
p = Image.open(pic).convert('L')
p = np.array(p).reshape(-1)
p = np.append(p,1)
data.append(p)
return np.array(data)

def euclideanDistance(instance1, instance2, length):
distance = 0
for x in range(length):
distance += pow((instance1[x] - instance2[x]), 2)
return math.sqrt(distance)

def getNeighbors(trainingSet, testInstance, k):
distances = []
length = len(testInstance) - 1
for x in range(len(trainingSet)):
dist = euclideanDistance(testInstance, trainingSet[x], length)
distances.append((trainingSet[x], dist))
distances.sort(key=operator.itemgetter(1))
neighbors = []
for x in range(k):
neighbors.append(distances[x][0])
return neighbors

def getResponse(neighbors):
for x in range(len(neighbors)):
response = neighbors[x][-1]
else:

def getAccuracy(testSet, predictions):
correct = 0
for x in range(len(testSet)):
if testSet[x][-1] == predictions[x]:
correct += 1
return (correct / float(len(testSet))) * 100.0

def check(pic):
source_p = Image.open('deer.png')
try:
c_p = Image.open(pic)
except:
exit()
diff_pixel = 0
a, b = source_p.size
if c_p.size[0] != a and c_p.size[1] != b:
exit()
for y in range(b):
for x in range(a):
diff_pixel += abs(source_p.getpixel((x, y)) - c_p.getpixel((x, y)))
return diff_pixel

def main():
while 1:
print('-' * 134)
print('''      ____       __            _          _   _                _                              _   _            _
|  __ \     / _|          | |        | | | |              | |                            | | | |          | |
| |__) |___| |_ ___ _ __  | |_ ___   | |_| |__   ___    __| | ___  ___ _ __    __ _ ___  | |_| |__   ___  | |__   ___  _ __ ___  ___
|  _  // _ \  _/ _ \ '__| | __/ _ \  | __| '_ \ / _ \  / _` |/ _ \/ _ \ '__|  / _` / __| | __| '_ \ / _ \ | '_ \ / _ \| '__/ __|/ _ \\
| | \ \  __/ ||  __/ |    | || (_) | | |_| | | |  __/ | (_| |  __/  __/ |    | (_| \__ \ | |_| | | |  __/ | | | | (_) | |  \__ \  __/
|_|  \_\___|_| \___|_|     \__\___/   \__|_| |_|\___|  \__,_|\___|\___|_|     \__,_|___/  \__|_| |_|\___| |_| |_|\___/|_|  |___/\___|
''')
print('-'*134)
print('\t1.show source code')
print('\t2.give me the source pictures')
print('\t4.exit')
choose = input('>')
if choose == '1':
w = open('run.py','r')
continue
elif choose == '2':
print('this is horse`s picture:')
print(h.decode())
print('-'*134)
print('this is deer`s picture:')
print(d.decode())
continue
elif choose == '4':
break
elif choose == '3':
pic = input('>')
try:
pic = base64.b64decode(pic)
except:
exit()
if b"<?php" in pic or b'eval' in pic:
print("Hacker!!This is not WEB,It`s Just a misc!!!")
exit()
salt = str(random.getrandbits(15))
pic_name = 'tmp_'+salt+'.png'
tmp_pic = open(pic_name,'wb')
tmp_pic.write(pic)
tmp_pic.close()
if check(pic_name)>=100000:
print('Don`t give me the horse source picture!!!')
os.remove(pic_name)
break
k = 1
trainingSet = np.append(ma, lu).reshape(2, 5185)
neighbors = getNeighbors(trainingSet, testSet[0], k)
result = getResponse(neighbors)
if repr(result) == '0':
os.system('clear')
print('Yes,I want this horse like deer,here is your flag encoded by base64')
print(flag.decode())
os.remove(pic_name)
break
else:
print('I want horse but not deer!!!')
os.remove(pic_name)
break
else:
print('wrong choose!!!')
break
exit()

if __name__=='__main__':
main()
``````

1. 检查选手上传的图片与 deer 的像素差是否小于 100000。如果超过限制，则报告错误。
2. 求选手图片与 deer 和 horse 的欧几里得距离。离谁更近，就判定为哪个分类。
3. 如果选手图片被判定为马，则选手获胜。

deer 和 horse 都是灰度图，如下：

## 尝试：随机噪声

``````for y in range(b):
for x in range(a):
diff_pixel += abs(source_p.getpixel((x, y)) - c_p.getpixel((x, y)))
return diff_pixel``````

# 欺骗白盒神经网络

## 概述

CTF 中的欺骗神经网络题一般如下：给定一个预训练好的分类模型（PyTorch 或者 TensorFlow），再给定一张原图。要求小幅度修改原图，使得神经网络将其误分类为另一个类别。

## 实践：训练神经网络

``````import torch
import torchvision
import torch.nn as nn
import torchvision.transforms as transforms
import torch.nn.functional as F
import numpy as np

import matplotlib.pyplot as plt

trans_to_tensor = transforms.Compose([
transforms.ToTensor()
])

data_train = torchvision.datasets.MNIST(
'./data',
train=True,
transform=trans_to_tensor,

data_test = torchvision.datasets.MNIST(
'./data',
train=False,
transform=trans_to_tensor,

data_train, data_test
``````

``````train_loader = torch.utils.data.DataLoader(
data_train,
batch_size=100,
shuffle=True)``````

``````class MyNet(nn.Module):

def __init__(self):
super().__init__()
self.fc1 = nn.Linear(28*28, 100)
self.fc2 = nn.Linear(100, 10)

def forward(self, x):
x = x.view(-1, 28*28)
x = self.fc1(x)
x = F.relu(x)
x = self.fc2(x)
x = torch.sigmoid(x)

return x

net = MyNet()``````

``````criterion = nn.CrossEntropyLoss()

``````def fit(net, epoch=1):
net.train()
run_loss = 0

for num_epoch in range(epoch):
print(f'epoch {num_epoch}')

x, y = data[0], data[1]

outputs = net(x)
loss = criterion(outputs, y)

loss.backward()
optimizer.step()

run_loss += loss.item()

if i % 100 == 99:
print(f'[{i+1} / 600] loss={run_loss / 100}')
run_loss = 0

test(net)

def test(net):
net.eval()

x, y = test_data[0], test_data[1]

outputs = net(x)

pred = torch.max(outputs, 1)[1]
print(f'test acc: {sum(pred == y)} / {y.shape[0]}')

net.train()``````

## 实践：欺骗白盒多层感知机

1. 将图片输入网络，得到网络输出。
2. 将网络输出与期望输出求 loss 值（这里采用交叉熵）。
3. 将图片像素减去自己的梯度 * alpha，不改变网络参数。

``````def play(epoch):

loss_fn = nn.CrossEntropyLoss()   # 交叉熵损失函数

for num_epoch in range(epoch):
output = net(img)
target = torch.tensor([3])    # 误导网络，使之分类为 3
loss = loss_fn(output, target)

loss.backward()               # 计算梯度

if num_epoch % 10 == 9:
print(f'[{num_epoch + 1} / {epoch}] loss: {loss} pred: {torch.max(output, 1)[1].item()}')

if torch.max(output, 1)[1].item() == 3:
print(f'done in round {num_epoch + 1}')
return

img = origin.view(1, 28, 28)

play(100)``````

131 声望
333 粉丝
0 条评论