问题描述

问题描述/异常栈

java.lang.RuntimeException: serious problem
    at org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.generateSplitsInfo(OrcInputFormat.java:1021)
    at org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.getSplits(OrcInputFormat.java:1048)
    at org.apache.spark.rdd.HadoopRDD.getPartitions(HadoopRDD.scala:200)
    at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:253)
    at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:251)
    at scala.Option.getOrElse(Option.scala:121)
    at org.apache.spark.rdd.RDD.partitions(RDD.scala:251)
    at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:46)
    at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:253)
    at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:251)
    at scala.Option.getOrElse(Option.scala:121)
    at org.apache.spark.rdd.RDD.partitions(RDD.scala:251)

    at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:46)

    at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:253)

    at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:251)

    at scala.Option.getOrElse(Option.scala:121)

    at org.apache.spark.rdd.RDD.partitions(RDD.scala:251)

    at org.apache.spark.rdd.UnionRDD$$anonfun$1.apply(UnionRDD.scala:84)

    at org.apache.spark.rdd.UnionRDD$$anonfun$1.apply(UnionRDD.scala:84)

    at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)

    at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)

    at scala.collection.immutable.List.foreach(List.scala:381)

    at scala.collection.TraversableLike$class.map(TraversableLike.scala:234)

    at scala.collection.immutable.List.map(List.scala:285)

    at org.apache.spark.rdd.UnionRDD.getPartitions(UnionRDD.scala:84)

    at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:253)

    at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:251)

    at scala.Option.getOrElse(Option.scala:121)

    at org.apache.spark.rdd.RDD.partitions(RDD.scala:251)

    at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:46)

    at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:253)

    at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:251)

    at scala.Option.getOrElse(Option.scala:121)

    at org.apache.spark.rdd.RDD.partitions(RDD.scala:251)

    at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:46)

    at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:253)

    at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:251)

    at scala.Option.getOrElse(Option.scala:121)

    at org.apache.spark.rdd.RDD.partitions(RDD.scala:251)

    at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:46)

    at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:253)

    at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:251)

    at scala.Option.getOrElse(Option.scala:121)

    at org.apache.spark.rdd.RDD.partitions(RDD.scala:251)

    at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:46)

    at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:253)

    at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:251)

    at scala.Option.getOrElse(Option.scala:121)

    at org.apache.spark.rdd.RDD.partitions(RDD.scala:251)

    at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:46)

    at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:253)

    at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:251)

    at scala.Option.getOrElse(Option.scala:121)

    at org.apache.spark.rdd.RDD.partitions(RDD.scala:251)

    at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:46)

    at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:253)

    at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:251)

    at scala.Option.getOrElse(Option.scala:121)

    at org.apache.spark.rdd.RDD.partitions(RDD.scala:251)

    at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:46)

    at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:253)

    at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:251)

    at scala.Option.getOrElse(Option.scala:121)

    at org.apache.spark.rdd.RDD.partitions(RDD.scala:251)

    at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:46)

    at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:253)

    at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:251)

    at scala.Option.getOrElse(Option.scala:121)

    at org.apache.spark.rdd.RDD.partitions(RDD.scala:251)

    at org.apache.spark.rdd.RDD.getNumPartitions(RDD.scala:267)

    at com.netease.music.da.transfer.jdbc.writer.JDBCWriter.doWrite(JDBCWriter.scala:109)

    at com.netease.music.da.transfer.common.writer.AbstractDataWriter.write(AbstractDataWriter.scala:12)

    at com.netease.music.da.transfer.common.Worker$.main(Worker.scala:33)

    at com.netease.music.da.transfer.common.Worker.main(Worker.scala)

    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)

    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)

    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)

    at java.lang.reflect.Method.invoke(Method.java:498)

    at org.apache.spark.deploy.yarn.ApplicationMaster$$anon$2.run(ApplicationMaster.scala:678)

Caused by: java.lang.NullPointerException

    at org.apache.hadoop.hive.ql.io.orc.OrcInputFormat$BISplitStrategy.getSplits(OrcInputFormat.java:560)

    at org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.generateSplitsInfo(OrcInputFormat.java:1010)

解决方案

传输任务:节点属性或运行时高级配置添加:

ndi.spark.spark-conf.spark.sql.orc.impl=native

ndi.spark.spark-conf.spark.sql.hive.convertMetastoreOrc =true

sql节点:节点属性或运行时高级配置添加:

conf.spark.sql.orc.impl=native

conf.spark.sql.hive.convertMetastoreOrc =true

参考资料:https://stackoverflow.com/questions/43500949/spark-load-or-select-hive-table-of-orc-format

问题原因

表的事务或操作错误

作者:常惠渊