javamavenapache-spark

Using Java SparkSQL getting:java.lang.NoSuchMethodError: 'scala.collection.mutable.ArrayBuffer org.apache.spark.executor.TaskMetrics.externalAccums()'


I am learning Spark for Java, trying to perform a simple task with SparkSQL and getting this error, tried to look for the answer in the internet but without success.

23/10/02 10:48:10 ERROR Utils: uncaught error in thread spark-listener-group-appStatus, stopping SparkContext
java.lang.NoSuchMethodError: 'scala.collection.mutable.ArrayBuffer org.apache.spark.executor.TaskMetrics.externalAccums()'
    at org.apache.spark.sql.execution.ui.SQLAppStatusListener.onTaskEnd(SQLAppStatusListener.scala:179)
    at org.apache.spark.scheduler.SparkListenerBus.doPostEvent(SparkListenerBus.scala:45)
    at org.apache.spark.scheduler.SparkListenerBus.doPostEvent$(SparkListenerBus.scala:28)
    at org.apache.spark.scheduler.AsyncEventQueue.doPostEvent(AsyncEventQueue.scala:37)
    at org.apache.spark.scheduler.AsyncEventQueue.doPostEvent(AsyncEventQueue.scala:37)
    at org.apache.spark.util.ListenerBus.postToAll(ListenerBus.scala:117)
    at org.apache.spark.util.ListenerBus.postToAll$(ListenerBus.scala:101)
    at org.apache.spark.scheduler.AsyncEventQueue.super$postToAll(AsyncEventQueue.scala:105)
    at org.apache.spark.scheduler.AsyncEventQueue.$anonfun$dispatch$1(AsyncEventQueue.scala:105)
    at scala.runtime.java8.JFunction0$mcJ$sp.apply(JFunction0$mcJ$sp.java:23)
    at scala.util.DynamicVariable.withValue(DynamicVariable.scala:62)
    at org.apache.spark.scheduler.AsyncEventQueue.org$apache$spark$scheduler$AsyncEventQueue$$dispatch(AsyncEventQueue.scala:100)
    at org.apache.spark.scheduler.AsyncEventQueue$$anon$2.$anonfun$run$1(AsyncEventQueue.scala:96)
    at org.apache.spark.util.Utils$.tryOrStopSparkContext(Utils.scala:1471)
    at org.apache.spark.scheduler.AsyncEventQueue$$anon$2.run(AsyncEventQueue.scala:96)
23/10/02 10:48:10 INFO SparkContext: SparkContext is stopping with exitCode 0.
23/10/02 10:48:10 ERROR Utils: throw uncaught fatal error in thread spark-listener-group-appStatus
java.lang.NoSuchMethodError: 'scala.collection.mutable.ArrayBuffer org.apache.spark.executor.TaskMetrics.externalAccums()'
    at org.apache.spark.sql.execution.ui.SQLAppStatusListener.onTaskEnd(SQLAppStatusListener.scala:179)
    at org.apache.spark.scheduler.SparkListenerBus.doPostEvent(SparkListenerBus.scala:45)
    at org.apache.spark.scheduler.SparkListenerBus.doPostEvent$(SparkListenerBus.scala:28)
    at org.apache.spark.scheduler.AsyncEventQueue.doPostEvent(AsyncEventQueue.scala:37)
    at org.apache.spark.scheduler.AsyncEventQueue.doPostEvent(AsyncEventQueue.scala:37)
    at org.apache.spark.util.ListenerBus.postToAll(ListenerBus.scala:117)
    at org.apache.spark.util.ListenerBus.postToAll$(ListenerBus.scala:101)
    at org.apache.spark.scheduler.AsyncEventQueue.super$postToAll(AsyncEventQueue.scala:105)
    at org.apache.spark.scheduler.AsyncEventQueue.$anonfun$dispatch$1(AsyncEventQueue.scala:105)
    at scala.runtime.java8.JFunction0$mcJ$sp.apply(JFunction0$mcJ$sp.java:23)
    at scala.util.DynamicVariable.withValue(DynamicVariable.scala:62)
    at org.apache.spark.scheduler.AsyncEventQueue.org$apache$spark$scheduler$AsyncEventQueue$$dispatch(AsyncEventQueue.scala:100)
    at org.apache.spark.scheduler.AsyncEventQueue$$anon$2.$anonfun$run$1(AsyncEventQueue.scala:96)
    at org.apache.spark.util.Utils$.tryOrStopSparkContext(Utils.scala:1471)
    at org.apache.spark.scheduler.AsyncEventQueue$$anon$2.run(AsyncEventQueue.scala:96)
Exception in thread "spark-listener-group-appStatus" java.lang.NoSuchMethodError: 'scala.collection.mutable.ArrayBuffer org.apache.spark.executor.TaskMetrics.externalAccums()'
    at org.apache.spark.sql.execution.ui.SQLAppStatusListener.onTaskEnd(SQLAppStatusListener.scala:179)
    at org.apache.spark.scheduler.SparkListenerBus.doPostEvent(SparkListenerBus.scala:45)
    at org.apache.spark.scheduler.SparkListenerBus.doPostEvent$(SparkListenerBus.scala:28)
    at org.apache.spark.scheduler.AsyncEventQueue.doPostEvent(AsyncEventQueue.scala:37)
    at org.apache.spark.scheduler.AsyncEventQueue.doPostEvent(AsyncEventQueue.scala:37)
    at org.apache.spark.util.ListenerBus.postToAll(ListenerBus.scala:117)
    at org.apache.spark.util.ListenerBus.postToAll$(ListenerBus.scala:101)
    at org.apache.spark.scheduler.AsyncEventQueue.super$postToAll(AsyncEventQueue.scala:105)
    at org.apache.spark.scheduler.AsyncEventQueue.$anonfun$dispatch$1(AsyncEventQueue.scala:105)
    at scala.runtime.java8.JFunction0$mcJ$sp.apply(JFunction0$mcJ$sp.java:23)
    at scala.util.DynamicVariable.withValue(DynamicVariable.scala:62)
    at org.apache.spark.scheduler.AsyncEventQueue.org$apache$spark$scheduler$AsyncEventQueue$$dispatch(AsyncEventQueue.scala:100)
    at org.apache.spark.scheduler.AsyncEventQueue$$anon$2.$anonfun$run$1(AsyncEventQueue.scala:96)
    at org.apache.spark.util.Utils$.tryOrStopSparkContext(Utils.scala:1471)
    at org.apache.spark.scheduler.AsyncEventQueue$$anon$2.run(AsyncEventQueue.scala:96)
23/10/02 10:48:10 INFO SparkUI: Stopped Spark web UI at http://{username}:4040
23/10/02 10:48:10 INFO MapOutputTrackerMasterEndpoint: MapOutputTrackerMasterEndpoint stopped!
23/10/02 10:48:10 INFO CodeGenerator: Code generated in 59.174833 ms
23/10/02 10:48:10 INFO MemoryStore: MemoryStore cleared
23/10/02 10:48:10 INFO BlockManager: BlockManager stopped
23/10/02 10:48:10 INFO BlockManagerMaster: BlockManagerMaster stopped
23/10/02 10:48:10 INFO OutputCommitCoordinator$OutputCommitCoordinatorEndpoint: OutputCommitCoordinator stopped!
23/10/02 10:48:10 INFO SparkContext: Successfully stopped SparkContext
23/10/02 10:48:10 INFO FileSourceStrategy: Pushed Filters: 
23/10/02 10:48:10 INFO FileSourceStrategy: Post-Scan Filters: 
23/10/02 10:48:10 INFO FileSourceStrategy: Output Data Schema: struct<value: string>
Exception in thread "main" java.lang.IllegalStateException: Cannot call methods on a stopped SparkContext.
This stopped SparkContext was created at:

org.apache.spark.sql.SparkSession$Builder.getOrCreate(SparkSession.scala:943)
org.example.Main.main(Main.java:21)

The currently active SparkContext was created at:

(No active SparkContext.)
         
    at org.apache.spark.SparkContext.assertNotStopped(SparkContext.scala:120)
    at org.apache.spark.SparkContext.broadcastInternal(SparkContext.scala:1545)
    at org.apache.spark.SparkContext.broadcast(SparkContext.scala:1530)
    at org.apache.spark.sql.execution.datasources.text.TextFileFormat.buildReader(TextFileFormat.scala:106)
    at org.apache.spark.sql.execution.datasources.FileFormat.buildReaderWithPartitionValues(FileFormat.scala:131)
    at org.apache.spark.sql.execution.datasources.FileFormat.buildReaderWithPartitionValues$(FileFormat.scala:122)
    at org.apache.spark.sql.execution.datasources.TextBasedFileFormat.buildReaderWithPartitionValues(FileFormat.scala:177)
    at org.apache.spark.sql.execution.FileSourceScanExec.inputRDD$lzycompute(DataSourceScanExec.scala:426)
    at org.apache.spark.sql.execution.FileSourceScanExec.inputRDD(DataSourceScanExec.scala:417)
    at org.apache.spark.sql.execution.FileSourceScanExec.doExecute(DataSourceScanExec.scala:504)
    at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:184)
    at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:222)
    at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
    at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:219)
    at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:180)
    at org.apache.spark.sql.execution.DeserializeToObjectExec.doExecute(objects.scala:96)
    at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:184)
    at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:222)
    at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
    at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:219)
    at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:180)
    at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:185)
    at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:184)
    at org.apache.spark.sql.Dataset.rdd$lzycompute(Dataset.scala:3247)
    at org.apache.spark.sql.Dataset.rdd(Dataset.scala:3245)
    at org.apache.spark.sql.execution.datasources.csv.TextInputCSVDataSource$.inferFromDataset(CSVDataSource.scala:130)
    at org.apache.spark.sql.execution.datasources.csv.TextInputCSVDataSource$.infer(CSVDataSource.scala:113)
    at org.apache.spark.sql.execution.datasources.csv.CSVDataSource.inferSchema(CSVDataSource.scala:65)
    at org.apache.spark.sql.execution.datasources.csv.CSVFileFormat.inferSchema(CSVFileFormat.scala:63)
    at org.apache.spark.sql.execution.datasources.DataSource.$anonfun$getOrInferFileFormatSchema$11(DataSource.scala:210)
    at scala.Option.orElse(Option.scala:447)
    at org.apache.spark.sql.execution.datasources.DataSource.getOrInferFileFormatSchema(DataSource.scala:207)
    at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:411)
    at org.apache.spark.sql.DataFrameReader.loadV1Source(DataFrameReader.scala:274)
    at org.apache.spark.sql.DataFrameReader.$anonfun$load$3(DataFrameReader.scala:245)
    at scala.Option.getOrElse(Option.scala:189)
    at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:245)
    at org.apache.spark.sql.DataFrameReader.csv(DataFrameReader.scala:571)
    at org.apache.spark.sql.DataFrameReader.csv(DataFrameReader.scala:481)
    at org.example.Main.main(Main.java:23)
23/10/02 10:48:10 INFO ShutdownHookManager: Shutdown hook called
23/10/02 10:48:10 INFO ShutdownHookManager: Deleting directory /private/var/folders/bg/2frhq2rj0hj0fhmn9ld5yyrh0000gn/T/spark-bda4ea42-9883-4672-ade8-b0aeb1802869

My POM is:


    <?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>org.example</groupId>
    <artifactId>spark_tutorial</artifactId>
    <version>1.0-SNAPSHOT</version>

    <properties>
        <maven.compiler.source>15</maven.compiler.source>
        <maven.compiler.target>15</maven.compiler.target>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
    </properties>

    <dependencies>
        <!-- https://mvnrepository.com/artifact/org.apache.spark/spark-core -->
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-core_2.12</artifactId>
            <version>3.5.0</version>
        </dependency>
        <!-- https://mvnrepository.com/artifact/org.apache.spark/spark-sql -->
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-sql_2.12</artifactId>
            <version>3.5.0</version>
            <scope>compile</scope>
        </dependency>
        <!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-hdfs -->
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-hdfs</artifactId>
            <version>3.3.6</version>
        </dependency>
        <dependency>
            <groupId>com.opencsv</groupId>
            <artifactId>opencsv</artifactId>
            <version>5.5.1</version>
        </dependency>
        <dependency>
            <groupId>org.projectlombok</groupId>
            <artifactId>lombok</artifactId>
            <version>1.18.22</version> <!-- Use the latest version -->
            <scope>compile</scope>
        </dependency>
        <!-- https://mvnrepository.com/artifact/org.scala-lang/scala-library -->
        <dependency>
            <groupId>org.scala-lang</groupId>
            <artifactId>scala-library</artifactId>
            <version>2.12.0</version>
        </dependency>
    </dependencies>

</project>

And my code:

    package org.example;

import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;

public class Main {

    @SuppressWarnings("resourse")
    public static void main(String[] args) {

        System.setProperty("hadoop.home.dir", "c:/hadoop");
        Logger.getLogger("org.apache").setLevel(Level.WARN);

        SparkSession spark = SparkSession.builder()
                .appName("Tutorial Spark SQL")
                .master("local[*]")
                .config("spark.driver.bindAddress", "127.0.0.1")
                .getOrCreate();

        Dataset<Row> dataset = spark.read().option("header", true).csv("src/main/resources/students.csv");
        dataset.show();

        spark.close();

    }
}

Thanks for help in advance !


Solution

  • You have outdated(Oct 28, 2016)
    scala-library 2.12.0

    I think that you need to update it to: 2.12.18 and everything should work fine Spartk 3.5.0 POM