import
pyspark
from
pyspark.sql
import
SparkSession
from
pyspark.sql
import
functions as f
from
pyspark.sql
import
Window
spark
=
SparkSession.builder.appName(
'sparkdf'
).getOrCreate()
data
=
[[
"1"
,
"sravan"
,
"IT"
,
45000
],
[
"2"
,
"ojaswi"
,
"CS"
,
85000
],
[
"3"
,
"rohith"
,
"CS"
,
41000
],
[
"4"
,
"sridevi"
,
"IT"
,
56000
],
[
"5"
,
"bobby"
,
"ECE"
,
45000
],
[
"6"
,
"gayatri"
,
"ECE"
,
49000
],
[
"7"
,
"gnanesh"
,
"CS"
,
45000
],
[
"8"
,
"bhanu"
,
"Mech"
,
21000
]
]
columns
=
[
'ID'
,
'NAME'
,
'DEPT'
,
'FEE'
]
dataframe
=
spark.createDataFrame(data, columns)
dataframe.withColumn(
'Total Branch Sum'
, f.
sum
(
'FEE'
).over(Window.partitionBy(
'DEPT'
))).show()