I am executing Spark programs from Ubuntu Linux 22.04 LTS - Eclipse IDE. It s using Oracle Sun JDK 1.8.0_361. Referred this thread
It s directly executing the Spark programs from Eclipse IDE. Getting issues for Spark-SQL programs Regular Spark programs are all working fine.
这一方案
package org.example;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import static org.apache.spark.sql.functions.avg;
import static org.apache.spark.sql.functions.col;
import static org.apache.spark.sql.functions.max;
public class HousePriceSolution {
private static final String PRICE = "Price";
private static final String PRICE_SQ_FT = "Price SQ Ft";
public static void main(String[] args) throws Exception
{
Logger.getLogger("org").setLevel(Level.ERROR);
SparkSession session = SparkSession.builder().appName("HousePriceSolution").master("local[1]").getOrCreate();
Dataset<Row> realEstate = session.read().option("header", "true").csv("src/main/resources/RealEstate.csv");
Dataset<Row> castedRealEstate = realEstate.withColumn(PRICE, col(PRICE).cast("long"))
.withColumn(PRICE_SQ_FT, col(PRICE_SQ_FT).cast("long"));
castedRealEstate.groupBy("Location")
.agg(avg(PRICE_SQ_FT), max(PRICE))
.orderBy(col("avg(" + PRICE_SQ_FT + ")").desc())
.show();
}
}
页: 1
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>sparkwordcount</groupId>
<artifactId>sparkwordcount</artifactId>
<version>0.0.1-SNAPSHOT</version>
<packaging>jar</packaging>
<name>"Spark Word Count"</name>
<repositories>
<repository>
<id>scala-tools.org</id>
<name>Scala-tools Maven2 Repository</name>
<url>http://scala-tools.org/repo-releases</url>
</repository>
</repositories>
<pluginRepositories>
<pluginRepository>
<id>scala-tools.org</id>
<name>Scala-tools Maven2 Repository</name>
<url>http://scala-tools.org/repo-releases</url>
</pluginRepository>
</pluginRepositories>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
</properties>
<build>
<plugins>
<!-- this plugin is for scala code. It uses the version of the Scala library dependency to pick the Scala version -->
<!--
<plugin>
<groupId>net.alchim31.maven</groupId>
<artifactId>scala-maven-plugin</artifactId>
<version>4.4.0</version>
<executions>
<execution>
<goals>
<goal>compile</goal>
</goals>
</execution>
</executions>
</plugin>
-->
<!-- this plugin is for java code. the source and target versions are Java versions -->
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.8.1</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
</plugins>
</build>
<dependencies>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
<version>2.12.17</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.13</artifactId>
<version>3.4.0</version>
<scope>provided</scope>
</dependency>
<!-- the following aren t needed for the word count demo, but
will be for more complex things.
-->
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-text</artifactId>
<version>1.6</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.13</artifactId>
<version>3.4.0</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming_2.13</artifactId>
<version>3.4.0</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>3.12.0</version>
</dependency>
<!-- the following artifacts are also avaiable, depending upon
what you re doing. They would use the same groupId and version as the ones above:
spark-mllib
spark-hive
spark-catalyst
spark-streaming-kafka
spark-repl
spark-graphx
-->
</dependencies>
</project>
但是,我正在经历这些错误,而建设是好的。
Exception in thread "main" java.lang.NoClassDefFoundError: scala/$less$colon$less
at org.example.HousePriceSolution.main(HousePriceSolution.java:22)
Caused by: java.lang.ClassNotFoundException: scala.$less$colon$less
at java.net.URLClassLoader.findClass(URLClassLoader.java:387)
at java.lang.ClassLoader.loadClass(ClassLoader.java:418)
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:355)
at java.lang.ClassLoader.loadClass(ClassLoader.java:351)
... 1 more