from
pyspark.sql
import
SparkSession
from
pyspark.sql.functions
import
col, substring
def
create_session():
spk
=
SparkSession.builder \
.master(
"local"
) \
.appName(
"Substring.com"
) \
.getOrCreate()
return
spk
def
create_df(spark, data, schema):
df1
=
spark.createDataFrame(data, schema)
return
df1
if
__name__
=
=
"__main__"
:
input_data
=
[(
"India"
,
+
91
,
2701
,
2020
),
(
"United States of America"
,
+
1
,
1301
,
2020
),
(
"Israel"
,
+
972
,
3102
,
2020
),
(
"Dubai"
,
+
971
,
2901
,
2020
),
(
"Russia"
,
7
,
3101
,
2020
)]
spark
=
create_session()
schema
=
[
"Country"
,
"Country Code"
,
"Data"
,
"Year"
]
df
=
create_df(spark, input_data, schema)
df.show()