from
pyspark.sql.types
import
StructType, StructField, StringType, IntegerType, FloatType
from
pyspark.sql
import
SparkSession
import
findspark
findspark.init(
'_path-to-spark_'
)
data2
=
[(
"Pulkit"
,
12
,
"CS32"
,
82
,
"Programming"
),
(
"Ritika"
,
20
,
"CS32"
,
94
,
"Writing"
),
(
"Atirikt"
,
4
,
"BB21"
,
78
,
None
),
(
"Reshav"
,
18
,
None
,
56
,
None
)
]
spark
=
SparkSession.builder.appName(
"Student_Info"
).getOrCreate()
schema
=
StructType([
StructField(
"Name"
, StringType(),
True
),
StructField(
"Roll Number"
, IntegerType(),
True
),
StructField(
"Class ID"
, StringType(),
True
),
StructField(
"Marks"
, IntegerType(),
True
),
StructField(
"Extracurricular"
, StringType(),
True
)
])
df
=
spark.createDataFrame(data
=
data2, schema
=
schema)
df.na.drop(subset
=
[
"Class ID"
]).show(truncate
=
False
)
spark.stop()