Tensorflow 报错:ValueError: Cannot feed value of shape

问题描述

TensorFlow写了一个简单的神经网络报错:ValueError: Cannot feed value of shape (50,) for Tensor 'y-output:0', which has shape '(?, 2)'

问题出现的环境背景及自己尝试过哪些方法

试过np.reshape方法,但是没用(不知道是不是自己用错了。。。)。

相关代码

代码是仿照TensorFlow 实战Google深度学习框架中minist数据集那个神经网络写的。
我的数据每一行包括629个数据,其中前200个拿来作为特征,倒数第二列是labels。

#!/usr/bin/env python3
# coding: utf-8


import pandas as pd
import numpy as np
import os
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold

# 输入输出节点数
INPUT_NODE = 200
OUTPUT_NODE = 2

# 配置神经网络参数, 这里只设置一个隐藏层
LAYER1_NODE = 500
BATCH_SIZE = 50
LEARNING_RATE_BASE = 0.8
LEARNING_RATE_DECAY = 0.99
REGULARIZATION_RATE = 0.0001
TRAINING_STEPS = 30000
MOVING_AVERAGE_DECAY = 0.99
TOTAL_NUM = 1376


def load_data(file):
    '''读入数据
    '''
    data = np.loadtxt('./coef_arr.txt')
    np.random.shuffle(data)
    global TOTAL_NUMs
    TOTAL_NUM = len(data)
    train_num = int(TOTAL_NUM*0.7)
    test_data = data[train_num:, :]
    train_data = data[:train_num, :]
    TOTAL_NUM = len(train_data)
    return train_data, test_data


def inference(input_tensor, avg_class, weights1, biases1, weights2, biases2):
    '''定义神经网络
    '''
    # 当没有提供滑动平均类时,直接使用参数当前的取值
    if avg_class == None:
        # 计算隐藏层的前向传播结果,这里使用了ReLu激活函数
        layer1 = tf.nn.relu(tf.matmul(input_tensor, weights1) + biases1)
        # 计算输出层的前向传播结果
        return tf.matmul(layer1, weights2) + biases2
    else:
        # 首先使用avg_class.average函数来计算得出变量的滑动平均值
        # 然后计算相应的神经网络的前向传播结果
        layer1 = tf.nn.relu(tf.matmul(input_tensor, avg_class.average(weights1)) +
                            avg_class.average(biases1))
        return tf.matmul(layer1, avg_class.average(weights2))+avg_class.average(biases2)


def train():
    '''训练模型过程
    '''
    # 载入数据
    train_data, test_data = load_data('./coef_arr.txt')

    x = tf.placeholder(tf.float32, [None, INPUT_NODE], name='x-input')
    y_ = tf.placeholder(tf.float32, [None, 2], name='y-output')

    # 生成隐藏层的参数
    weights1 = tf.Variable(tf.truncated_normal([INPUT_NODE, LAYER1_NODE], stddev=0.1))
    biases1 = tf.Variable(tf.constant(0.1, shape=[LAYER1_NODE]))
    # 生成输出层的函数
    weights2 = tf.Variable(tf.truncated_normal([LAYER1_NODE, OUTPUT_NODE], stddev=0.1))
    biases2 = tf.Variable(tf.constant(0.1, shape=[OUTPUT_NODE]))

    # 计算在当前参数下的神经网络前向传播结果。这里用于计算滑动平均的类为None,
    # 所以函数不会使用参数的滑动平均值
    y = inference(x, None, weights1, biases1, weights2, biases2)

    # 定义储存训练轮数的变量(由于这里不计算滑动平均值,所以这个变量为不可训练变量)
    global_step = tf.Variable(0, trainable=False)

    # 给定滑动平均衰减率和训练轮数的变量,初始化滑动平均类
    variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)

    # 在所有代表神经网络参数的变量上使用滑动平均(global_step不需要)
    # tf.trainabel_variables返回的就是图上集合GraphKeys.TRAINABLE_VARIABLES中的元素。这个集合的元素
    # 就是所有没有设定trainable=False的参数
    variables_averages_op = variable_averages.apply(tf.trainable_variables())

    # 计算使用了滑动平均之后的前向传播结果
    average_y = inference(x, variable_averages, weights1, biases1, weights2, biases2)

    # 计算交叉熵作为刻画预测值和真实值之间差距的损失函数
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.argmax(y_, 1))
    # 计算在当前batch中所有样例的交叉熵平均值
    cross_entropy_mean = tf.reduce_mean(cross_entropy)

    # 计算L2正则化损失函数
    regularizer = tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE)
    # 计算模型的正则化损失,一般指计算神经网络边上权重的正则化损失,而不使用偏置项
    regularization = regularizer(weights1)+regularizer(weights2)
    # 总损失等于交叉熵损失和正则化损失的和
    loss = cross_entropy_mean+regularization
    # 设置指数衰减的学习率
    learning_rate = tf.train.exponential_decay(
        LEARNING_RATE_BASE,
        global_step,
        TOTAL_NUM,
        LEARNING_RATE_DECAY)

    # 使用tf.train.GradientDescentOptimizer优化算法来优化损失函数
    train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)

    # 在训练神经网络时,每过一遍数据既需要通过反向传播来更新神经网络中的参数,又要更新每一个参数的滑动平均值
    train_op = tf.group(train_step, variables_averages_op)

    # 检验使用了滑动平均模型的神经网络传播结果是否正确
    correct_prediction = tf.equal(tf.argmax(average_y, 1), tf.argmax(y_, 1))
    # 下面这个运算首先将一个布尔型的数值转换为实数型,然后计算平均值。这个平均值就是模型在这一组数据上的正确率
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    # 初始化会话并开始训练过程
    with tf.Session() as sess:
        tf.global_variables_initializer().run()
        # 准备验证数据   报错位置就在下面这一行,显示数据维度不对
        validate_feed = {x: np.reshape(test_data[:, :200], (592, -1)), y_: np.reshape(test_data[:, -2].astype(np.uint8), (592, -1))}

        # 迭代地训练神经网络
        for i in range(TRAINING_STEPS):
            # 每1000轮输出一次在验证集上的测试效果
            if i % 1000 == 0:
                # 计算滑动平均模型在验证数据集上的结果
                validate_acc = sess.run(accuracy, feed_dict=validate_feed)
                print('After {0} training steps, validation accracy using average model is {1}'.format(i, validate_acc))

            # 产生这一轮使用的一个batch的训练数据,并运行训练过程
            train_data_index = np.random.randint(TOTAL_NUM, size=BATCH_SIZE)
            xs, ys = np.reshape(train_data[train_data_index, :200], (50, -1)), np.reshape(train_data[train_data_index, -2].astype(np.uint8), (50, -1))
            sess.run(train_op, feed_dict={x: xs, y_: ys})

        # 训练结束后
        test_acc = sess.run(accuracy, feed_dict=validate_feed)
        print('After {0} training steps, test accuracy using average model is {1}'.format(TRAINING_STEPS, test_acc))


if __name__ == "__main__":
    train()

输入数据的结构如下:

In [7]:     test_data = data[train_num:, :]
   ...:     train_data = data[:train_num, :]
   ...:
   ...:
# 测试数据,这里592个数据是打算一起放进去然后进行预测评估准确率的
In [8]: x = test_data[:,:200]; x
Out[8]:
array([[ 8.91246164e+00,  1.01078152e+01,  8.80562559e+00, ...,
         0.00000000e+00,  2.68379727e-01, -0.00000000e+00],
       [ 2.24351527e+00,  2.21014256e+00,  3.24120606e-01, ...,
        -0.00000000e+00,  4.15543410e-01, -0.00000000e+00],
       [ 1.66988056e+01,  1.73381736e+01,  1.68301896e+01, ...,
         0.00000000e+00,  1.02455868e-02, -0.00000000e+00],
       ...,
       [ 2.35881337e+00,  4.92812666e+00,  2.67332157e+00, ...,
         1.41453446e-01,  0.00000000e+00, -1.59909463e-02],
       [-2.10887190e-01,  1.31097380e+00,  8.98165441e-01, ...,
         2.33660936e-02, -0.00000000e+00,  6.08660936e-02],
       [-7.28152009e+00, -6.08726298e+00, -4.23098025e+00, ...,
        -7.18160515e-02,  0.00000000e+00,  0.00000000e+00]])

In [9]: x.shape
Out[9]: (592, 200)

# 训练数据,每次随机抽取50行
In [10]: x_ = train_data[:50, :200]; x_
Out[10]:
array([[ 9.56146318,  9.52391585,  9.43482532, ..., -0.        ,
         0.14322389,  0.        ],
       [13.28242346, 13.83013572, 14.28976303, ..., -0.        ,
        -0.16746366,  0.09310116],
       [ 1.3820352 ,  1.54098823,  3.45830433, ..., -0.        ,
        -0.06416255,  0.        ],
       ...,
       [10.98494526, 11.38627988, 11.44886119, ..., -0.        ,
        -0.05050572, -0.        ],
       [ 7.56331162,  7.9327677 ,  8.63404904, ..., -0.01920361,
        -0.        , -0.03519111],
       [ 4.60998919,  5.06155302,  7.19940497, ..., -0.27916831,
         0.        , -0.        ]])

In [11]: x_.shape
Out[11]: (50, 200)
这里y_test是测试数据的标签,训练数据的标签跟这个是一样的
In [18]: y_test = test_data[:,-2]; y_test
Out[18]:
array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
      ... ...
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

报错信息如下:

ValueError: Cannot feed value of shape (592, 1) for Tensor 'y-output:0', which has shape '(?, 2)'

看网上说要用reshape,代码中用了reshape后还是没用。请问要怎么改?谢谢!
另外,我这里是二分类,如上label为1,0,我设置的神经网络输出节点数为2,但是我训练和测试时的输入y都是维度为1,所以我猜想是这里不匹配?我把输出节点数改了后的确是可以运行了,但是输出的准确率却都是1了,请问要怎么改,谢谢!

阅读 8.3k
撰写回答
你尚未登录,登录后可以
  • 和开发者交流问题的细节
  • 关注并接收问题和回答的更新提醒
  • 参与内容的编辑和改进,让解决方法与时俱进
推荐问题