import
pandas as pd
from
fuzzywuzzy
import
fuzz
from
fuzzywuzzy
import
process
dict1
=
{
'name'
: [
"aparna"
,
"pankaj"
,
"sudhir"
,
"Geeku"
,
"geeks for geeks"
]}
dict2
=
{
'name'
: [
"aparn"
,
"arup"
,
"Pankaj"
,
"for geeks geeks"
,
"sudhir c"
,
"Geek"
]}
dframe1
=
pd.DataFrame(dict1)
dframe2
=
pd.DataFrame(dict2)
mat1
=
[]
mat2
=
[]
p
=
[]
print
(
"First dataframe:\n"
, dframe1,
"\nSecond dataframe:\n"
, dframe2)
list1
=
dframe1[
'name'
].tolist()
list2
=
dframe2[
'name'
].tolist()
threshold
=
80
for
i
in
list1:
mat1.append(process.extractOne(
i, list2, scorer
=
fuzz.token_sort_ratio))
dframe1[
'matches'
]
=
mat1
for
j
in
dframe1[
'matches'
]:
if
j[
1
] >
=
threshold:
p.append(j[
0
])
mat2.append(
","
.join(p))
p
=
[]
dframe1[
'matches'
]
=
mat2
print
(
"\nDataFrame after Fuzzy matching using fuzz.token_sort_ratio:"
)
dframe1