import
pyspark
from
pyspark.sql
import
SparkSession
spark
=
SparkSession.builder.appName(
'sparkdf'
).getOrCreate()
data
=
[(
1
,
"sravan"
), (
2
,
"ojsawi"
), (
3
,
"bobby"
)]
columns
=
[
'ID1'
,
'NAME1'
]
dataframe
=
spark.createDataFrame(data, columns)
data
=
[(
1
,
"sravan"
), (
2
,
"ojsawi"
), (
3
,
"bobby"
),
(
4
,
"rohith"
), (
5
,
"gnanesh"
)]
columns
=
[
'ID2'
,
'NAME2'
]
dataframe1
=
spark.createDataFrame(data, columns)
dataframe.join(dataframe1, (dataframe.ID1
=
=
dataframe1.ID2)
& (dataframe.NAME1
=
=
dataframe1.NAME2)).show()