from
pyspark.sql
import
SparkSession
import
pyspark.sql.types as T
spark
=
SparkSession.builder.appName(
'My App'
).getOrCreate()
actor_data
=
[
(
"James"
,
None
,
"Bond"
,
"M"
,
6000
),
(
"Michael"
,
None
,
None
,
"M"
,
4000
),
(
"Robert"
,
None
,
"Pattinson"
,
"M"
,
4000
),
(
"Natalie"
,
None
,
"Portman"
,
"F"
,
4000
),
(
"Julia"
,
None
,
"Roberts"
,
"F"
,
1000
)
]
actor_schema
=
T.StructType([
T.StructField(
"firstname"
, T.StringType(),
True
),
T.StructField(
"middlename"
, T.StringType(),
True
),
T.StructField(
"lastname"
, T.StringType(),
True
),
T.StructField(
"gender"
, T.StringType(),
True
),
T.StructField(
"salary"
, T.IntegerType(),
True
)
])
df
=
spark.createDataFrame(data
=
actor_data, schema
=
actor_schema)
df.show(truncate
=
False
)