from
pyspark.sql.types
import
StructType, StructField, StringType, IntegerType, FloatType
from
pyspark.sql
import
SparkSession
import
findspark
findspark.init(
'c:/spark'
)
data
=
[
((
"Pulkit"
,
"Dhingra"
),
12
,
"CS32"
,
82
,
"Programming"
),
((
"Ritika"
,
"Pandey"
),
20
,
"CS32"
,
94
,
"Writing"
),
((
"Atirikt"
,
"Sans"
),
4
,
"BB21"
,
78
,
None
),
((
"Reshav"
,
None
),
18
,
None
,
56
,
None
)
]
spark
=
SparkSession.builder.appName(
"Student_Info"
).getOrCreate()
schema
=
StructType([
StructField(
'name'
, StructType([
StructField(
'firstname'
, StringType(),
True
),
StructField(
'lastname'
, StringType(),
True
)
])),
StructField(
"Roll Number"
, IntegerType(),
True
),
StructField(
"Class ID"
, StringType(),
True
),
StructField(
"Marks"
, IntegerType(),
True
),
StructField(
"Extracurricular"
, StringType(),
True
)
])
df2
=
spark.createDataFrame(data
=
data, schema
=
schema)
df2.printSchema()
df2.select(
"name.firstname"
,
"name.lastname"
).show(truncate
=
False
)
spark.stop()