from
pyspark.sql
import
SparkSession
from
pyspark.sql
import
functions as f
spark
=
SparkSession.builder.appName(
'pyspark - example join'
).getOrCreate()
dataframe
=
[
(
"Sam"
,
"Software Engineer"
,
"IND"
,
10000
),
(
"Raj"
,
"Data Scientist"
,
"US"
,
41000
),
(
"Jonas"
,
"Sales Person"
,
"UK"
,
230000
),
(
"Peter"
,
"CTO"
,
"Ireland"
,
50000
),
(
"Hola"
,
"Data Analyst"
,
"Australia"
,
111000
),
(
"Ram"
,
"CEO"
,
"Iran"
,
300000
),
(
"Lekhana"
,
"Advertising"
,
"UK"
,
250000
),
(
"Thanos"
,
"Marketing"
,
"UIND"
,
114000
),
(
"Nick"
,
"Data Engineer"
,
"Ireland"
,
680000
),
(
"Wade"
,
"Data Engineer"
,
"IND"
,
70000
)
]
columns
=
[
"Name"
,
"Job"
,
"Country"
,
"salary"
]
df
=
spark.createDataFrame(data
=
dataframe, schema
=
columns)
df.show()