RDD编程第三问代码存在问题
以下是正确代码
package com.hainiu.spark
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
/**
* step1 userid type count
* step2 type movie avg_score
*/
object step3 {
def main(args: Array[String]): Unit = {
val conf = new SparkConf()
conf.setAppName("step3")
conf.setMaster("local[*]")
val sc = new SparkContext(conf)
val rddStep1 = sc.textFile("data/step1")
.map(t => {
val strs = t.split(",")
(strs(1),(strs(0),strs(2)))
})
val rddStep2 = sc.textFile("data/step2")
.map(t => {
val strs = t.split(",")
(strs(0), (strs(1), strs(2)))
})
(rddStep1 join rddStep2).saveAsTextFile("data/step3")
}
}