Use the collect method in dataframe
val jsonStr = Seq("""{"id" : "1", "name": "aaaaa", "addr": "seoul", "data": 10}""",
"""{"id" : "2", "name": "bbbbb", "addr": "pusan", "data": 20}""",
"""{"id" : "3", "name": "aaaaa", "addr": "pusan", "data": 30}""",
"""{"id" : "4", "name": "bbbbb", "addr": "seoul", "data": 40}""",
"""{"id" : "5", "name": "aaaaa", "addr": "pusan", "data": 50}""",
"""{"id" : "6", "name": "aaaaa", "addr": "pusan", "data": 60}""",
"""{"id" : "7", "name": "bbbbb", "addr": "seoul", "data": 70}""")
val rddData = spark.sparkContext.parallelize(jsonStr)
val resultDF = spark.read.json(rddData)
resultDF.collect()
res14: Array[org.apache.spark.sql.Row] = Array([seoul,10,1,aaaaa], [pusan,20,2,bbbbb], [pusan,30,3,aaaaa], [seoul,40,4,bbbbb], [pusan,50,5,aaaaa], [pusan,60,6,aaaaa], [seoul,70,7,bbbbb])
resultDF.collect().foreach(i => println(i))
[seoul,10,1,aaaaa]
[pusan,20,2,bbbbb]
[pusan,30,3,aaaaa]
[seoul,40,4,bbbbb]
[pusan,50,5,aaaaa]
[pusan,60,6,aaaaa]
[seoul,70,7,bbbbb]
© 2024 OneMinuteCode. All rights reserved.