无法连接火花Cloudant
问题描述:
我试图用Java代码,并得到错误从Cloudant获取数据,无法连接火花Cloudant
我试着用下面Spark和cloudant火花版本,
星火2.0.0 ,
火花2.0.1,
火花2.0.2
为所有版本获取相同的错误,因为下面发布的错误。
如果我添加scala依赖来解决错误,这个错误比它与Spark库冲突。
下面是我的Java代码,
package spark.cloudant.connecter;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.SQLContext;
import com.cloudant.spark.*;
public class cloudantconnecter {
public static void main(String[] args) throws Exception {
try {
SparkConf sparkConf = new SparkConf().setAppName("spark cloudant connecter").setMaster("local[*]");
sparkConf.set("spark.streaming.concurrentJobs", "30");
JavaSparkContext sc = new JavaSparkContext(sparkConf);
SQLContext sqlContext = new SQLContext(sc);
System.out.print("initialization successfully");
Dataset<org.apache.spark.sql.Row> st = sqlContext.read().format("com.cloudant.spark")
.option("cloudant.host", "HOSTNAME").option("cloudant.username", "USERNAME")
.option("cloudant.password", "PASSWORD").load("DATABASENAME");
st.printSchema();
} catch (
Exception e) {
e.printStackTrace();
}
}
}
Maven依赖
<dependencies>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.10</artifactId>
<version>2.0.0</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-mllib_2.10</artifactId>
<version>2.0.0</version>
</dependency>
<dependency>
<groupId>cloudant-labs</groupId>
<artifactId>spark-cloudant</artifactId>
<version>2.0.0-s_2.11</version>
</dependency>
</dependencies>
获取错误的详细信息,
Exception in thread "main" java.lang.NoSuchMethodError: scala/Predef$.ArrowAssoc(Ljava/lang/Object;)Ljava/lang/Object; (loaded from file:/C:/Users/Administrator/.m2/repository/org/scala-lang/scala-library/2.10.6/scala-library-2.10.6.jar by [email protected]) called from class scalaj.http.HttpConstants$ (loaded from file:/C:/Users/Administrator/.m2/repository/org/scalaj/scalaj-http_2.11/2.3.0/scalaj-http_2.11-2.3.0.jar by [email protected]).
at scalaj.http.HttpConstants$.liftedTree1$1(Http.scala:637)
at scalaj.http.HttpConstants$.<init>(Http.scala:636)
at scalaj.http.HttpConstants$.<clinit>(Http.scala)
at scalaj.http.BaseHttp$.$lessinit$greater$default$2(Http.scala:754)
at scalaj.http.Http$.<init>(Http.scala:738)
at scalaj.http.Http$.<clinit>(Http.scala)
at com.cloudant.spark.common.JsonStoreDataAccess.getQueryResult(JsonStoreDataAccess.scala:152)
at com.cloudant.spark.common.JsonStoreDataAccess.getTotalRows(JsonStoreDataAccess.scala:99)
at com.cloudant.spark.common.JsonStoreRDD.totalRows$lzycompute(JsonStoreRDD.scala:56)
at com.cloudant.spark.common.JsonStoreRDD.totalRows(JsonStoreRDD.scala:55)
at com.cloudant.spark.common.JsonStoreRDD.totalPartition$lzycompute(JsonStoreRDD.scala:59)
at com.cloudant.spark.common.JsonStoreRDD.totalPartition(JsonStoreRDD.scala:58)
at com.cloudant.spark.common.JsonStoreRDD.getPartitions(JsonStoreRDD.scala:81)
at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:248)
at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:246)
at scala.Option.getOrElse(Option.scala:120)
at org.apache.spark.rdd.RDD.partitions(RDD.scala:246)
at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35)
at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:248)
at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:246)
at scala.Option.getOrElse(Option.scala:120)
at org.apache.spark.rdd.RDD.partitions(RDD.scala:246)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1934)
at org.apache.spark.rdd.RDD$$anonfun$fold$1.apply(RDD.scala:1046)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:358)
at org.apache.spark.rdd.RDD.fold(RDD.scala:1040)
at org.apache.spark.sql.execution.datasources.json.InferSchema$.infer(InferSchema.scala:68)
at org.apache.spark.sql.DataFrameReader$$anonfun$3.apply(DataFrameReader.scala:317)
at org.apache.spark.sql.DataFrameReader$$anonfun$3.apply(DataFrameReader.scala:317)
at scala.Option.getOrElse(Option.scala:120)
at org.apache.spark.sql.DataFrameReader.json(DataFrameReader.scala:316)
at com.cloudant.spark.DefaultSource.create(DefaultSource.scala:127)
at com.cloudant.spark.DefaultSource.createRelation(DefaultSource.scala:105)
at com.cloudant.spark.DefaultSource.createRelation(DefaultSource.scala:100)
at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:315)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:149)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:132)
at spark.cloudant.connecter.cloudantconnecter.main(cloudantconnecter.java:24)
答
误差显示,因为提到使用Scala的2.10问题库,并提到包火花云图库使用2.11
所以,请更改库火花core_2.10到火花core_2.11
所以,现在依赖关系,
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
<version>2.0.1</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-mllib_2.11</artifactId>
<version>2.0.1</version>
</dependency>
<dependency>
<groupId>cloudant-labs</groupId>
<artifactId>spark-cloudant</artifactId>
<version>2.0.0-s_2.11</version>
</dependency>
看来你没有正确的斯卡拉版本。如果您使用连接器版本2.0.0,请确保您有Scala 2.11。从*这个问题可能会帮助你:http://*.com/questions/25089852/what-is-the-reason-for-java-lang-nosuchmethoderror-scala-predef-arrowassoc-upo –
如果我添加scala版本然后这种依赖与Spark核心库冲突 –