这是我下面的法典,我正在发现这一错误“火花园。 产出栏目已经存在”,即检查其他员额,但我不相信需要做些什么才能在这里帮助任何人。
from pyspark.sql import SparkSession
from pyspark.ml.feature import StringIndexer, VectorAssembler
from pyspark.ml.classification import LinearSVC
from pyspark.ml import Pipeline
from pyspark.ml.evaluation import MulticlassClassificationEvaluator
# Create a SparkSession
spark = SparkSession.builder.appName("").getOrCreate()
# Load your TSV file into a DataFrame
data = spark.read.csv("sleep.tsv", sep= , header=True, inferSchema=True)
input_cols = ["V0", "V1", "V2", "V3", "V4", "V5", "V6", "V7", "V8", "V9", "V10", "V11", "V12"]
# Concatenate input columns into a single column named "features"
assembler = VectorAssembler(inputCols=input_cols, outputCol= "features")
data_assembled = assembler.transform(data)
# Renamed target column to label
data_assembled = data_assembled.withColumnRenamed("target", "label")
### Split data into training and testing sets
(trainingData, testData) = new_data.randomSplit([0.8, 0.2], seed=16)
### Fit the pipeline to training data
model = pipeline.fit(trainingData)