Hive 有一部分表执行MR会报错,但是其他表执行MR都会成功的原因是什么?

在执行hive的表任务时,有几张表会报错,例如执行select count(*)就会报错,报错信息如下:

java.io.FileNotFoundException: File does not exist: /datacenter/hive/grp_dstc_carr_dim_t/p_day_id=20170717/p_time_type=1/p_serv_type=10002/p_index=1_140100101_201&220_0/-ext-10000
    at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getBlockLocationsUpdateTimes(FSNamesystem.java:1301)
    at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getBlockLocationsInt(FSNamesystem.java:1254)
    at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getBlockLocations(FSNamesystem.java:1227)
    at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getBlockLocations(FSNamesystem.java:1209)
    at org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.getBlockLocations(NameNodeRpcServer.java:393)
    at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.getBlockLocations(ClientNamenodeProtocolServerSideTranslatorPB.java:170)
    at org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java:44064)
    at org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:453)
    at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1002)
    at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:1695)
    at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:1691)
    at java.security.AccessController.doPrivileged(Native Method)
    at javax.security.auth.Subject.doAs(Subject.java:396)
    at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1407)
    at org.apache.hadoop.ipc.Server$Handler.run(Server.java:1689)

    at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
    at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:39)
    at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:27)
    at java.lang.reflect.Constructor.newInstance(Constructor.java:513)
    at org.apache.hadoop.ipc.RemoteException.instantiateException(RemoteException.java:90)
    at org.apache.hadoop.ipc.RemoteException.unwrapRemoteException(RemoteException.java:57)
    at org.apache.hadoop.hdfs.DFSClient.callGetBlockLocations(DFSClient.java:959)
    at org.apache.hadoop.hdfs.DFSClient.getLocatedBlocks(DFSClient.java:947)
    at org.apache.hadoop.hdfs.DFSClient.getBlockLocations(DFSClient.java:996)
    at org.apache.hadoop.hdfs.DistributedFileSystem.getFileBlockLocations(DistributedFileSystem.java:192)
    at org.apache.hadoop.hdfs.DistributedFileSystem.getFileBlockLocations(DistributedFileSystem.java:185)
    at org.apache.hadoop.mapred.lib.CombineFileInputFormat$OneFileInfo.<init>(CombineFileInputFormat.java:463)
    at org.apache.hadoop.mapred.lib.CombineFileInputFormat.getMoreSplits(CombineFileInputFormat.java:256)
    at org.apache.hadoop.mapred.lib.CombineFileInputFormat.getSplits(CombineFileInputFormat.java:212)
    at org.apache.hadoop.hive.shims.HadoopShimsSecure$CombineFileInputFormatShim.getSplits(HadoopShimsSecure.java:411)
    at org.apache.hadoop.hive.shims.HadoopShimsSecure$CombineFileInputFormatShim.getSplits(HadoopShimsSecure.java:377)
    at org.apache.hadoop.hive.ql.io.CombineHiveInputFormat.getSplits(CombineHiveInputFormat.java:387)
    at org.apache.hadoop.mapred.JobClient.writeOldSplits(JobClient.java:1091)
    at org.apache.hadoop.mapred.JobClient.writeSplits(JobClient.java:1083)
    at org.apache.hadoop.mapred.JobClient.access$600(JobClient.java:174)
    at org.apache.hadoop.mapred.JobClient$2.run(JobClient.java:993)
    at org.apache.hadoop.mapred.JobClient$2.run(JobClient.java:946)
    at java.security.AccessController.doPrivileged(Native Method)
    at javax.security.auth.Subject.doAs(Subject.java:396)
    at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1407)
    at org.apache.hadoop.mapred.JobClient.submitJobInternal(JobClient.java:946)
    at org.apache.hadoop.mapred.JobClient.submitJob(JobClient.java:920)
    at org.apache.hadoop.hive.ql.exec.ExecDriver.execute(ExecDriver.java:447)
    at org.apache.hadoop.hive.ql.exec.MapRedTask.execute(MapRedTask.java:136)
    at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:138)
    at org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:57)
    at org.apache.hadoop.hive.ql.exec.TaskRunner.run(TaskRunner.java:47)
Caused by: org.apache.hadoop.ipc.RemoteException(java.io.FileNotFoundException): File does not exist: /datacenter/hive/grp_dstc_carr_dim_t/p_day_id=20170717/p_time_type=1/p_serv_type=10002/p_index=1_140100101_201&220_0/-ext-10000
    at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getBlockLocationsUpdateTimes(FSNamesystem.java:1301)
    at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getBlockLocationsInt(FSNamesystem.java:1254)
    at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getBlockLocations(FSNamesystem.java:1227)
    at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getBlockLocations(FSNamesystem.java:1209)
    at org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.getBlockLocations(NameNodeRpcServer.java:393)
    at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.getBlockLocations(ClientNamenodeProtocolServerSideTranslatorPB.java:170)
    at org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java:44064)
    at org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:453)
    at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1002)
    at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:1695)
    at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:1691)
    at java.security.AccessController.doPrivileged(Native Method)
    at javax.security.auth.Subject.doAs(Subject.java:396)
    at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1407)
    at org.apache.hadoop.ipc.Server$Handler.run(Server.java:1689)

    at org.apache.hadoop.ipc.Client.call(Client.java:1225)
    at org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:202)
    at $Proxy18.getBlockLocations(Unknown Source)
    at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolTranslatorPB.getBlockLocations(ClientNamenodeProtocolTranslatorPB.java:154)
    at sun.reflect.GeneratedMethodAccessor120.invoke(Unknown Source)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
    at java.lang.reflect.Method.invoke(Method.java:597)
    at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:164)
    at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:83)
    at $Proxy19.getBlockLocations(Unknown Source)
    at org.apache.hadoop.hdfs.DFSClient.callGetBlockLocations(DFSClient.java:957)
    ... 25 more
Job Submission failed with exception 'java.io.FileNotFoundException(File does not exist: /datacenter/hive/grp_dstc_carr_dim_t/p_day_id=20170717/p_time_type=1/p_serv_type=10002/p_index=1_140100101_201&220_0/-ext-10000
    at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getBlockLocationsUpdateTimes(FSNamesystem.java:1301)
    at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getBlockLocationsInt(FSNamesystem.java:1254)
    at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getBlockLocations(FSNamesystem.java:1227)
    at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getBlockLocations(FSNamesystem.java:1209)
    at org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.getBlockLocations(NameNodeRpcServer.java:393)
    at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.getBlockLocations(ClientNamenodeProtocolServerSideTranslatorPB.java:170)
    at org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java:44064)
    at org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:453)
    at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1002)
    at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:1695)
    at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:1691)
    at java.security.AccessController.doPrivileged(Native Method)
    at javax.security.auth.Subject.doAs(Subject.java:396)
    at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1407)
    at org.apache.hadoop.ipc.Server$Handler.run(Server.java:1689)
)'
FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.MapRedTask

求大神赐教,这个为题之前遇到过是所有表无法count(*) ,但是这次很怪异,只有个别及张表会出现这个情况。
环境信息:
hadoop2.2.0-mr1-cdh4.2.0
hive0.10.0-cdh4.2.0

阅读 4.7k
1 个回答

第一句就说了
File does not exist:
你在好好看看你的表吧。

你单独select count(*) from tb_name;试试。

如果是统计的子查询的话,把 * 换成某个字段试试。

撰写回答
你尚未登录,登录后可以
  • 和开发者交流问题的细节
  • 关注并接收问题和回答的更新提醒
  • 参与内容的编辑和改进,让解决方法与时俱进