I have this Vector[String]:
user_uid,score,value
255938,34096,8
259117,34599,10
253664,28891,7
how can I convert it to DataFrame?
I already tried this:
val dataInVectorRow = dataInVectorString
.map(_.split("\\s+"))
.map(x => Row.fromSeq(x))
val fileRdd: RDD[Row] = sparkSession.sparkContext
.parallelize(dataInVectorRow)
val schema = StructType(Seq(
StructField("col1", StringType),
StructField("col2", StringType),
StructField("col3", StringType)
))
val df_from_file = sparkSession.sqlContext.createDataFrame(fileRdd, schema)
df_from_file
But it give me this error:
Caused by: java.lang.RuntimeException: Error while encoding: java.lang.ArrayIndexOutOfBoundsException: 1
Solved by doing this:
val dataToWrite = getColNames(column_names) ++
data.map(x => x._2.mkString(",") + "," + x._1)
val header = dataToWrite.head.split(",")
val datas = dataToWrite.tail
val rdd = sparkSession.sparkContext.parallelize(datas)
val rowRDD = rdd.map(_.split(",")).map(attributes => Row(attributes(0).toDouble, attributes(1).toDouble, attributes(2).toDouble, attributes(3).toDouble))
val schema = StructType(header.map(fieldName => StructField(fieldName, DoubleType, nullable = true)))
val df = sparkSession.sqlContext.createDataFrame(rowRDD, schema)
df