问题描述
TensorFlow写了一个简单的神经网络报错:ValueError: Cannot feed value of shape (50,) for Tensor 'y-output:0', which has shape '(?, 2)'
问题出现的环境背景及自己尝试过哪些方法
试过np.reshape
方法,但是没用(不知道是不是自己用错了。。。)。
相关代码
代码是仿照TensorFlow 实战Google深度学习框架中minist数据集那个神经网络写的。
我的数据每一行包括629个数据,其中前200个拿来作为特征,倒数第二列是labels。
#!/usr/bin/env python3
# coding: utf-8
import pandas as pd
import numpy as np
import os
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold
# 输入输出节点数
INPUT_NODE = 200
OUTPUT_NODE = 2
# 配置神经网络参数, 这里只设置一个隐藏层
LAYER1_NODE = 500
BATCH_SIZE = 50
LEARNING_RATE_BASE = 0.8
LEARNING_RATE_DECAY = 0.99
REGULARIZATION_RATE = 0.0001
TRAINING_STEPS = 30000
MOVING_AVERAGE_DECAY = 0.99
TOTAL_NUM = 1376
def load_data(file):
'''读入数据
'''
data = np.loadtxt('./coef_arr.txt')
np.random.shuffle(data)
global TOTAL_NUMs
TOTAL_NUM = len(data)
train_num = int(TOTAL_NUM*0.7)
test_data = data[train_num:, :]
train_data = data[:train_num, :]
TOTAL_NUM = len(train_data)
return train_data, test_data
def inference(input_tensor, avg_class, weights1, biases1, weights2, biases2):
'''定义神经网络
'''
# 当没有提供滑动平均类时,直接使用参数当前的取值
if avg_class == None:
# 计算隐藏层的前向传播结果,这里使用了ReLu激活函数
layer1 = tf.nn.relu(tf.matmul(input_tensor, weights1) + biases1)
# 计算输出层的前向传播结果
return tf.matmul(layer1, weights2) + biases2
else:
# 首先使用avg_class.average函数来计算得出变量的滑动平均值
# 然后计算相应的神经网络的前向传播结果
layer1 = tf.nn.relu(tf.matmul(input_tensor, avg_class.average(weights1)) +
avg_class.average(biases1))
return tf.matmul(layer1, avg_class.average(weights2))+avg_class.average(biases2)
def train():
'''训练模型过程
'''
# 载入数据
train_data, test_data = load_data('./coef_arr.txt')
x = tf.placeholder(tf.float32, [None, INPUT_NODE], name='x-input')
y_ = tf.placeholder(tf.float32, [None, 2], name='y-output')
# 生成隐藏层的参数
weights1 = tf.Variable(tf.truncated_normal([INPUT_NODE, LAYER1_NODE], stddev=0.1))
biases1 = tf.Variable(tf.constant(0.1, shape=[LAYER1_NODE]))
# 生成输出层的函数
weights2 = tf.Variable(tf.truncated_normal([LAYER1_NODE, OUTPUT_NODE], stddev=0.1))
biases2 = tf.Variable(tf.constant(0.1, shape=[OUTPUT_NODE]))
# 计算在当前参数下的神经网络前向传播结果。这里用于计算滑动平均的类为None,
# 所以函数不会使用参数的滑动平均值
y = inference(x, None, weights1, biases1, weights2, biases2)
# 定义储存训练轮数的变量(由于这里不计算滑动平均值,所以这个变量为不可训练变量)
global_step = tf.Variable(0, trainable=False)
# 给定滑动平均衰减率和训练轮数的变量,初始化滑动平均类
variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
# 在所有代表神经网络参数的变量上使用滑动平均(global_step不需要)
# tf.trainabel_variables返回的就是图上集合GraphKeys.TRAINABLE_VARIABLES中的元素。这个集合的元素
# 就是所有没有设定trainable=False的参数
variables_averages_op = variable_averages.apply(tf.trainable_variables())
# 计算使用了滑动平均之后的前向传播结果
average_y = inference(x, variable_averages, weights1, biases1, weights2, biases2)
# 计算交叉熵作为刻画预测值和真实值之间差距的损失函数
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.argmax(y_, 1))
# 计算在当前batch中所有样例的交叉熵平均值
cross_entropy_mean = tf.reduce_mean(cross_entropy)
# 计算L2正则化损失函数
regularizer = tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE)
# 计算模型的正则化损失,一般指计算神经网络边上权重的正则化损失,而不使用偏置项
regularization = regularizer(weights1)+regularizer(weights2)
# 总损失等于交叉熵损失和正则化损失的和
loss = cross_entropy_mean+regularization
# 设置指数衰减的学习率
learning_rate = tf.train.exponential_decay(
LEARNING_RATE_BASE,
global_step,
TOTAL_NUM,
LEARNING_RATE_DECAY)
# 使用tf.train.GradientDescentOptimizer优化算法来优化损失函数
train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)
# 在训练神经网络时,每过一遍数据既需要通过反向传播来更新神经网络中的参数,又要更新每一个参数的滑动平均值
train_op = tf.group(train_step, variables_averages_op)
# 检验使用了滑动平均模型的神经网络传播结果是否正确
correct_prediction = tf.equal(tf.argmax(average_y, 1), tf.argmax(y_, 1))
# 下面这个运算首先将一个布尔型的数值转换为实数型,然后计算平均值。这个平均值就是模型在这一组数据上的正确率
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# 初始化会话并开始训练过程
with tf.Session() as sess:
tf.global_variables_initializer().run()
# 准备验证数据 报错位置就在下面这一行,显示数据维度不对
validate_feed = {x: np.reshape(test_data[:, :200], (592, -1)), y_: np.reshape(test_data[:, -2].astype(np.uint8), (592, -1))}
# 迭代地训练神经网络
for i in range(TRAINING_STEPS):
# 每1000轮输出一次在验证集上的测试效果
if i % 1000 == 0:
# 计算滑动平均模型在验证数据集上的结果
validate_acc = sess.run(accuracy, feed_dict=validate_feed)
print('After {0} training steps, validation accracy using average model is {1}'.format(i, validate_acc))
# 产生这一轮使用的一个batch的训练数据,并运行训练过程
train_data_index = np.random.randint(TOTAL_NUM, size=BATCH_SIZE)
xs, ys = np.reshape(train_data[train_data_index, :200], (50, -1)), np.reshape(train_data[train_data_index, -2].astype(np.uint8), (50, -1))
sess.run(train_op, feed_dict={x: xs, y_: ys})
# 训练结束后
test_acc = sess.run(accuracy, feed_dict=validate_feed)
print('After {0} training steps, test accuracy using average model is {1}'.format(TRAINING_STEPS, test_acc))
if __name__ == "__main__":
train()
输入数据的结构如下:
In [7]: test_data = data[train_num:, :]
...: train_data = data[:train_num, :]
...:
...:
# 测试数据,这里592个数据是打算一起放进去然后进行预测评估准确率的
In [8]: x = test_data[:,:200]; x
Out[8]:
array([[ 8.91246164e+00, 1.01078152e+01, 8.80562559e+00, ...,
0.00000000e+00, 2.68379727e-01, -0.00000000e+00],
[ 2.24351527e+00, 2.21014256e+00, 3.24120606e-01, ...,
-0.00000000e+00, 4.15543410e-01, -0.00000000e+00],
[ 1.66988056e+01, 1.73381736e+01, 1.68301896e+01, ...,
0.00000000e+00, 1.02455868e-02, -0.00000000e+00],
...,
[ 2.35881337e+00, 4.92812666e+00, 2.67332157e+00, ...,
1.41453446e-01, 0.00000000e+00, -1.59909463e-02],
[-2.10887190e-01, 1.31097380e+00, 8.98165441e-01, ...,
2.33660936e-02, -0.00000000e+00, 6.08660936e-02],
[-7.28152009e+00, -6.08726298e+00, -4.23098025e+00, ...,
-7.18160515e-02, 0.00000000e+00, 0.00000000e+00]])
In [9]: x.shape
Out[9]: (592, 200)
# 训练数据,每次随机抽取50行
In [10]: x_ = train_data[:50, :200]; x_
Out[10]:
array([[ 9.56146318, 9.52391585, 9.43482532, ..., -0. ,
0.14322389, 0. ],
[13.28242346, 13.83013572, 14.28976303, ..., -0. ,
-0.16746366, 0.09310116],
[ 1.3820352 , 1.54098823, 3.45830433, ..., -0. ,
-0.06416255, 0. ],
...,
[10.98494526, 11.38627988, 11.44886119, ..., -0. ,
-0.05050572, -0. ],
[ 7.56331162, 7.9327677 , 8.63404904, ..., -0.01920361,
-0. , -0.03519111],
[ 4.60998919, 5.06155302, 7.19940497, ..., -0.27916831,
0. , -0. ]])
In [11]: x_.shape
Out[11]: (50, 200)
这里y_test是测试数据的标签,训练数据的标签跟这个是一样的
In [18]: y_test = test_data[:,-2]; y_test
Out[18]:
array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
... ...
1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])
报错信息如下:
ValueError: Cannot feed value of shape (592, 1) for Tensor 'y-output:0', which has shape '(?, 2)'
看网上说要用reshape,代码中用了reshape后还是没用。请问要怎么改?谢谢!
另外,我这里是二分类,如上label为1,0,我设置的神经网络输出节点数为2,但是我训练和测试时的输入y都是维度为1,所以我猜想是这里不匹配?我把输出节点数改了后的确是可以运行了,但是输出的准确率却都是1了,请问要怎么改,谢谢!