import
pyspark
from
pyspark.sql
import
SparkSession
spark
=
SparkSession.builder.appName(
'sparkdf'
).getOrCreate()
data
=
[[
"1"
,
"sravan"
,
"vignan"
,
95
],
[
"2"
,
"ojaswi"
,
"vvit"
,
78
],
[
"3"
,
"rohith"
,
"vvit"
,
89
],
[
"2"
,
"ojaswi"
,
"vvit"
,
100
],
[
"4"
,
"sridevi"
,
"vignan"
,
88
],
[
"1"
,
"sravan"
,
"vignan"
,
78
],
[
"4"
,
"sridevi"
,
"vignan"
,
90
],
[
"5"
,
"gnanesh"
,
"iit"
,
67
]]
columns
=
[
'student ID'
,
'student NAME'
,
'college'
,
'subject marks'
]
dataframe
=
spark.createDataFrame(data, columns)
dataframe
=
dataframe.groupBy(
'student ID'
).
sum
(
'subject marks'
)
dataframe.createOrReplaceTempView(
"DATA"
)
spark.sql("SELECT DISTINCT(COUNT(
'student ID'
)) \
FROM DATA GROUP BY
'subject marks'
").show()