Open In App

Jaro and Jaro-Winkler similarity

Last Updated : 15 Feb, 2022
Improve
Improve
Like Article
Like
Save
Share
Report

Jaro Similarity

Jaro Similarity is the measure of similarity between two strings. The value of Jaro distance ranges from 0 to 1. where 1 means the strings are equal and 0 means no similarity between the two strings. 
 

Examples: 

Input: s1 = “CRATE”, s2 = “TRACE”; 
Output: Jaro Similarity = 0.733333


Input: s1 = “DwAyNE”, s2 = “DuANE”; 
Output: Jaro Similarity = 0.822222

Algorithm: 
The Jaro Similarity is calculated using the following formula 

    \[ Jaro\hspace{1mm}similarity\hspace{1mm}= \left \{ \begin{tabular}{cc} 0, if m=0\\ \[\cfrac{1}{3}\]\Big(\[\cfrac{m}{\big| s1 \big|}\] + \[\cfrac{m}{\big| s2 \big|}\]+\[\cfrac{m-t}{m}\]\Big), for m!=0 \end{tabular} } \]


where:

  • m is the number of matching characters
  • t is half the number of transpositions
  • where |s1| and |s2| are the lengths of strings s1 and s2 respectively.

The characters are said to be matching if they are the same and the characters are not further than \Big\lfloor\cfrac{max(|s1|, |s2|)}{2}\Big\rfloor-1
Transpositions are half the number of matching characters in both strings but in a different order.
Calculation: 

  • Let s1=”arnab”, s2=”raanb”, so the maximum distance to which each character is matched is 1.
  • It is evident that both the strings have 5 matching characters, but the order is not the same, so the number of characters that are not in order is 4, so the number of transpositions is 2.
  • Therefore, Jaro similarity can be calculated as follows: 
    Jaro Similarity = (1/3) * {(5/5) + (5/5) + (5-2)/5 } = 0.86667


Below is the implementation of the above approach.

C++

// C++ implementation of above approach
#include <bits/stdc++.h>
using namespace std;
 
// Function to calculate the
// Jaro Similarity of two strings
double jaro_distance(string s1, string s2)
{
    // If the strings are equal
    if (s1 == s2)
        return 1.0;
 
    // Length of two strings
    int len1 = s1.length(),
        len2 = s2.length();
 
    // Maximum distance upto which matching
    // is allowed
    int max_dist = floor(max(len1, len2) / 2) - 1;
 
    // Count of matches
    int match = 0;
 
    // Hash for matches
    int hash_s1[s1.length()] = { 0 },
        hash_s2[s2.length()] = { 0 };
 
    // Traverse through the first string
    for (int i = 0; i < len1; i++) {
 
        // Check if there is any matches
        for (int j = max(0, i - max_dist);
             j < min(len2, i + max_dist + 1); j++)
 
            // If there is a match
            if (s1[i] == s2[j] && hash_s2[j] == 0) {
                hash_s1[i] = 1;
                hash_s2[j] = 1;
                match++;
                break;
            }
    }
 
    // If there is no match
    if (match == 0)
        return 0.0;
 
    // Number of transpositions
    double t = 0;
 
    int point = 0;
 
    // Count number of occurrences
    // where two characters match but
    // there is a third matched character
    // in between the indices
    for (int i = 0; i < len1; i++)
        if (hash_s1[i]) {
 
            // Find the next matched character
            // in second string
            while (hash_s2[point] == 0)
                point++;
 
            if (s1[i] != s2[point++])
                t++;
        }
 
    t /= 2;
 
    // Return the Jaro Similarity
    return (((double)match) / ((double)len1)
            + ((double)match) / ((double)len2)
            + ((double)match - t) / ((double)match))
           / 3.0;
}
 
// Driver code
int main()
{
    string s1 = "CRATE", s2 = "TRACE";
 
    // Print jaro Similarity of two strings
    cout << jaro_distance(s1, s2) << endl;
 
    return 0;
}

                    

Java

// Java implementation of above approach
class GFG
{
 
// Function to calculate the
// Jaro Similarity of two Strings
static double jaro_distance(String s1, String s2)
{
    // If the Strings are equal
    if (s1 == s2)
        return 1.0;
 
    // Length of two Strings
    int len1 = s1.length(),
        len2 = s2.length();
 
    // Maximum distance upto which matching
    // is allowed
    int max_dist = (int) (Math.floor(Math.max(len1, len2) / 2) - 1);
 
    // Count of matches
    int match = 0;
 
    // Hash for matches
    int hash_s1[] = new int[s1.length()];
    int hash_s2[] = new int[s2.length()];
 
    // Traverse through the first String
    for (int i = 0; i < len1; i++)
    {
 
        // Check if there is any matches
        for (int j = Math.max(0, i - max_dist);
            j < Math.min(len2, i + max_dist + 1); j++)
 
            // If there is a match
            if (s1.charAt(i) == s2.charAt(j) && hash_s2[j] == 0)
            {
                hash_s1[i] = 1;
                hash_s2[j] = 1;
                match++;
                break;
            }
    }
 
    // If there is no match
    if (match == 0)
        return 0.0;
 
    // Number of transpositions
    double t = 0;
 
    int point = 0;
 
    // Count number of occurrences
    // where two characters match but
    // there is a third matched character
    // in between the indices
    for (int i = 0; i < len1; i++)
        if (hash_s1[i] == 1)
        {
 
            // Find the next matched character
            // in second String
            while (hash_s2[point] == 0)
                point++;
 
            if (s1.charAt(i) != s2.charAt(point++) )
                t++;
        }
 
    t /= 2;
 
    // Return the Jaro Similarity
    return (((double)match) / ((double)len1)
            + ((double)match) / ((double)len2)
            + ((double)match - t) / ((double)match))
        / 3.0;
}
 
// Driver code
public static void main(String[] args)
{
    String s1 = "CRATE", s2 = "TRACE";
 
    // Print jaro Similarity of two Strings
    System.out.print(jaro_distance(s1, s2) +"\n");
}
}
 
// This code is contributed by PrinciRaj1992

                    

Python3

# Python3 implementation of above approach
from math import floor, ceil
 
# Function to calculate the
# Jaro Similarity of two s
def jaro_distance(s1, s2):
     
    # If the s are equal
    if (s1 == s2):
        return 1.0
 
    # Length of two s
    len1 = len(s1)
    len2 = len(s2)
 
    # Maximum distance upto which matching
    # is allowed
    max_dist = floor(max(len1, len2) / 2) - 1
 
    # Count of matches
    match = 0
 
    # Hash for matches
    hash_s1 = [0] * len(s1)
    hash_s2 = [0] * len(s2)
 
    # Traverse through the first
    for i in range(len1):
 
        # Check if there is any matches
        for j in range(max(0, i - max_dist),
                       min(len2, i + max_dist + 1)):
             
            # If there is a match
            if (s1[i] == s2[j] and hash_s2[j] == 0):
                hash_s1[i] = 1
                hash_s2[j] = 1
                match += 1
                break
 
    # If there is no match
    if (match == 0):
        return 0.0
 
    # Number of transpositions
    t = 0
    point = 0
 
    # Count number of occurrences
    # where two characters match but
    # there is a third matched character
    # in between the indices
    for i in range(len1):
        if (hash_s1[i]):
 
            # Find the next matched character
            # in second
            while (hash_s2[point] == 0):
                point += 1
 
            if (s1[i] != s2[point]):
                t += 1
            point += 1
    t = t//2
 
    # Return the Jaro Similarity
    return (match/ len1 + match / len2 +
            (match - t) / match)/ 3.0
 
# Driver code
s1 = "CRATE"
s2 = "TRACE"
 
# Prjaro Similarity of two s
print(round(jaro_distance(s1, s2),6))
 
# This code is contributed by mohit kumar 29

                    

C#

// C# implementation of above approach
using System;
 
class GFG
{
 
    // Function to calculate the
    // Jaro Similarity of two Strings
    static double jaro_distance(string s1, string s2)
    {
        // If the Strings are equal
        if (s1 == s2)
            return 1.0;
     
        // Length of two Strings
        int len1 = s1.Length ;
        int len2 = s2.Length;
     
        // Maximum distance upto which matching
        // is allowed
        int max_dist = (int)(Math.Floor((double)(
                        (Math.Max(len1, len2) / 2) - 1)));
     
        // Count of matches
        int match = 0;
     
        // Hash for matches
        int []hash_s1 = new int[s1.Length];
        int []hash_s2 = new int[s2.Length];
     
        // Traverse through the first String
        for (int i = 0; i < len1; i++)
        {
     
            // Check if there is any matches
            for (int j = Math.Max(0, i - max_dist);
                j < Math.Min(len2, i + max_dist + 1); j++)
     
                // If there is a match
                if (s1[i] == s2[j] && hash_s2[j] == 0)
                {
                    hash_s1[i] = 1;
                    hash_s2[j] = 1;
                    match++;
                    break;
                }
        }
     
        // If there is no match
        if (match == 0)
            return 0.0;
     
        // Number of transpositions
        double t = 0;
     
        int point = 0;
     
        // Count number of occurrences
        // where two characters match but
        // there is a third matched character
        // in between the indices
        for (int i = 0; i < len1; i++)
            if (hash_s1[i] == 1)
            {
     
                // Find the next matched character
                // in second String
                while (hash_s2[point] == 0)
                    point++;
     
                if (s1[i] != s2[point++] )
                    t++;
            }
     
        t /= 2;
     
        // Return the Jaro Similarity
        return (((double)match) / ((double)len1)
                + ((double)match) / ((double)len2)
                + ((double)match - t) / ((double)match))
            / 3.0;
    }
     
    // Driver code
    public static void Main()
    {
        string s1 = "CRATE", s2 = "TRACE";
     
        // Print jaro Similarity of two Strings
        Console.WriteLine(jaro_distance(s1, s2));
    }
}
 
// This code is contributed by AnkitRai01

                    

Javascript

<script>
 
// Javascript implementation of above approach
 
// Function to calculate the
// Jaro Similarity of two strings
function jaro_distance(s1, s2)
{
    // If the strings are equal
    if (s1 == s2)
        return 1.0;
 
    // Length of two strings
    var len1 = s1.length,
        len2 = s2.length;
 
    // Maximum distance upto which matching
    // is allowed
    var max_dist = Math.floor(Math.max(len1, len2) / 2) - 1;
 
    // Count of matches
    var match = 0;
 
    // Hash for matches
    var hash_s1 = Array(s1.length).fill(0);
    var hash_s2 = Array(s1.length).fill(0);
 
    // Traverse through the first string
    for (var i = 0; i < len1; i++) {
 
        // Check if there is any matches
        for (var j = Math.max(0, i - max_dist);
             j < Math.min(len2, i + max_dist + 1); j++)
 
            // If there is a match
            if (s1[i] == s2[j] && hash_s2[j] == 0) {
                hash_s1[i] = 1;
                hash_s2[j] = 1;
                match++;
                break;
            }
    }
 
    // If there is no match
    if (match == 0)
        return 0.0;
 
    // Number of transpositions
    var t = 0;
 
    var point = 0;
 
    // Count number of occurrences
    // where two characters match but
    // there is a third matched character
    // in between the indices
    for (var i = 0; i < len1; i++)
        if (hash_s1[i]) {
 
            // Find the next matched character
            // in second string
            while (hash_s2[point] == 0)
                point++;
 
            if (s1[i] != s2[point++])
                t++;
        }
 
    t /= 2;
 
    // Return the Jaro Similarity
    return ((match) / (len1)
            + (match) / (len2)
            + (match - t) / (match))
           / 3.0;
}
 
// Driver code
var s1 = "CRATE", s2 = "TRACE";
// Print jaro Similarity of two strings
document.write( jaro_distance(s1, s2).toFixed(5));
 
 
</script>

                    

Output: 
0.733333

 

Time Complexity: O(N * M), where N is the length of string s1 and M is the length of string s2.

Auxiliary Space: O(N + M)

Jaro-Winkler Similarity

The Jaro-Winkler similarity is a string metric measuring edit distance between two strings. Jaro – Winkler Similarity is much similar to Jaro Similarity. They both differ when the prefix of two string match. Jaro – Winkler Similarity uses a prefix scale ‘p’ which gives a more accurate answer when the strings have a common prefix up to a defined maximum length l. 
Examples: 

Input: s1 = “DwAyNE”, s2 = “DuANE”; 
Output: Jaro-Winkler Similarity =0.84


Input: s1=”TRATE”, s2=”TRACE”; 
Output: Jaro-Winkler similarity = 0.906667

Calculation: 

  • Jaro Winkler similarity is defined as follows 
    Sw = Sj + P * L * (1 – Sj) 
    where, 
    • Sj, is jaro similarity
    • Sw, is jaro- winkler similarity
    • P is the scaling factor (0.1 by default)
    • L is the length of the matching prefix up to a maximum of 4 characters.
  • Let s1=”arnab”, s2=”aranb”. The Jaro similarity of the two strings is 0.933333 (From the above calculation.)
  • The length of the matching prefix is 2 and we take the scaling factor as 0.1.
  • Substituting in the formula; 
    Jaro-Winkler Similarity= 0.9333333 + 0.1 * 2 * (1-0.9333333) = 0.946667


Below is the implementation of the above approach. 

C++

// C++ implementation of above approach
#include <bits/stdc++.h>
using namespace std;
 
// Function to calculate the
// Jaro Similarity of two strings
double jaro_distance(string s1, string s2)
{
    // If the strings are equal
    if (s1 == s2)
        return 1.0;
 
    // Length of two strings
    int len1 = s1.length(),
        len2 = s2.length();
 
    if (len1 == 0 || len2 == 0)
        return 0.0;
 
    // Maximum distance upto which matching
    // is allowed
    int max_dist = floor(max(len1, len2) / 2) - 1;
 
    // Count of matches
    int match = 0;
 
    // Hash for matches
    int hash_s1[s1.length()] = { 0 },
        hash_s2[s2.length()] = { 0 };
 
    // Traverse through the first string
    for (int i = 0; i < len1; i++) {
 
        // Check if there is any matches
        for (int j = max(0, i - max_dist);
             j < min(len2, i + max_dist + 1); j++)
            // If there is a match
            if (s1[i] == s2[j] && hash_s2[j] == 0) {
                hash_s1[i] = 1;
                hash_s2[j] = 1;
                match++;
                break;
            }
    }
 
    // If there is no match
    if (match == 0)
        return 0.0;
 
    // Number of transpositions
    double t = 0;
 
    int point = 0;
 
    // Count number of occurrences
    // where two characters match but
    // there is a third matched character
    // in between the indices
    for (int i = 0; i < len1; i++)
        if (hash_s1[i]) {
 
            // Find the next matched character
            // in second string
            while (hash_s2[point] == 0)
                point++;
 
            if (s1[i] != s2[point++])
                t++;
        }
 
    t /= 2;
 
    // Return the Jaro Similarity
    return (((double)match) / ((double)len1)
            + ((double)match) / ((double)len2)
            + ((double)match - t) / ((double)match))
           / 3.0;
}
 
// Jaro Winkler Similarity
double jaro_Winkler(string s1, string s2)
{
    double jaro_dist = jaro_distance(s1, s2);
 
    // If the jaro Similarity is above a threshold
    if (jaro_dist > 0.7) {
 
        // Find the length of common prefix
        int prefix = 0;
 
        for (int i = 0;
             i < min(s1.length(), s2.length()); i++) {
            // If the characters match
            if (s1[i] == s2[i])
                prefix++;
 
            // Else break
            else
                break;
        }
 
        // Maximum of 4 characters are allowed in prefix
        prefix = min(4, prefix);
 
        // Calculate jaro winkler Similarity
        jaro_dist += 0.1 * prefix * (1 - jaro_dist);
    }
 
    return jaro_dist;
}
 
// Driver code
int main()
{
    string s1 = "TRATE", s2 = "TRACE";
 
    // Print Jaro-Winkler Similarity of two strings
    cout << "Jaro-Winkler Similarity ="
         << jaro_Winkler(s1, s2) << endl;
 
    return 0;
}

                    

Java

// Java implementation of above approach
class GFG
{
     
    // Function to calculate the
    // Jaro Similarity of two strings
    static double jaro_distance(String s1, String s2)
    {
        // If the strings are equal
        if (s1 == s2)
            return 1.0;
     
        // Length of two strings
        int len1 = s1.length(),
            len2 = s2.length();
     
        if (len1 == 0 || len2 == 0)
            return 0.0;
     
        // Maximum distance upto which matching
        // is allowed
        int max_dist = (int)Math.floor(Math.max(len1, len2) / 2) - 1;
     
        // Count of matches
        int match = 0;
     
        // Hash for matches
        int hash_s1[] = new int [s1.length()];
        int hash_s2[] = new int[s2.length()];
     
        // Traverse through the first string
        for (int i = 0; i < len1; i++)
        {
     
            // Check if there is any matches
            for (int j = Math.max(0, i - max_dist);
                j < Math.min(len2, i + max_dist + 1); j++)
                 
                // If there is a match
                if (s1.charAt(i) == s2.charAt(j) &&
                                hash_s2[j] == 0)
                {
                    hash_s1[i] = 1;
                    hash_s2[j] = 1;
                    match++;
                    break;
                }
        }
     
        // If there is no match
        if (match == 0)
            return 0.0;
     
        // Number of transpositions
        double t = 0;
     
        int point = 0;
     
        // Count number of occurrences
        // where two characters match but
        // there is a third matched character
        // in between the indices
        for (int i = 0; i < len1; i++)
            if (hash_s1[i] == 1)
            {
     
                // Find the next matched character
                // in second string
                while (hash_s2[point] == 0)
                    point++;
     
                if (s1.charAt(i) != s2.charAt(point++))
                    t++;
            }
     
        t /= 2;
     
        // Return the Jaro Similarity
        return (((double)match) / ((double)len1)
                + ((double)match) / ((double)len2)
                + ((double)match - t) / ((double)match))
            / 3.0;
    }
     
    // Jaro Winkler Similarity
    static double jaro_Winkler(String s1, String s2)
    {
        double jaro_dist = jaro_distance(s1, s2);
     
        // If the jaro Similarity is above a threshold
        if (jaro_dist > 0.7)
        {
     
            // Find the length of common prefix
            int prefix = 0;
     
            for (int i = 0;
                i < Math.min(s1.length(), s2.length()); i++)
            {
                 
                // If the characters match
                if (s1.charAt(i) == s2.charAt(i))
                    prefix++;
     
                // Else break
                else
                    break;
            }
     
            // Maximum of 4 characters are allowed in prefix
            prefix = Math.min(4, prefix);
     
            // Calculate jaro winkler Similarity
            jaro_dist += 0.1 * prefix * (1 - jaro_dist);
        }
        return jaro_dist;
    }
     
    // Driver code
    public static void main (String[] args)
    {
        String s1 = "TRATE", s2 = "TRACE";
     
        // Print Jaro-Winkler Similarity of two strings
        System.out.println("Jaro-Winkler Similarity =" +
                            jaro_Winkler(s1, s2));
    }
}
 
// This code is contributed by AnkitRai01

                    

Python3

# Python3 implementation of above approach
from math import floor
 
# Function to calculate the
# Jaro Similarity of two strings
def jaro_distance(s1, s2) :
 
    # If the strings are equal
    if (s1 == s2) :
        return 1.0;
 
    # Length of two strings
    len1 = len(s1);
    len2 = len(s2);
 
    if (len1 == 0 or len2 == 0) :
        return 0.0;
 
    # Maximum distance upto which matching
    # is allowed
    max_dist = (max(len(s1), len(s2)) // 2 ) - 1;
 
    # Count of matches
    match = 0;
 
    # Hash for matches
    hash_s1 = [0] * len(s1) ;
    hash_s2 = [0] * len(s2) ;
 
    # Traverse through the first string
    for i in range(len1) :
 
        # Check if there is any matches
        for j in range( max(0, i - max_dist),
                    min(len2, i + max_dist + 1)) :
             
            # If there is a match
            if (s1[i] == s2[j] and hash_s2[j] == 0) :
                hash_s1[i] = 1;
                hash_s2[j] = 1;
                match += 1;
                break;
         
    # If there is no match
    if (match == 0) :
        return 0.0;
 
    # Number of transpositions
    t = 0;
 
    point = 0;
 
    # Count number of occurrences
    # where two characters match but
    # there is a third matched character
    # in between the indices
    for i in range(len1) :
        if (hash_s1[i]) :
 
            # Find the next matched character
            # in second string
            while (hash_s2[point] == 0) :
                point += 1;
 
            if (s1[i] != s2[point]) :
                point += 1;
                t += 1;
            else :
                point += 1;
                 
        t /= 2;
 
    # Return the Jaro Similarity
    return ((match / len1 + match / len2 +
            (match - t) / match ) / 3.0);
 
# Jaro Winkler Similarity
def jaro_Winkler(s1, s2) :
 
    jaro_dist = jaro_distance(s1, s2);
 
    # If the jaro Similarity is above a threshold
    if (jaro_dist > 0.7) :
 
        # Find the length of common prefix
        prefix = 0;
 
        for i in range(min(len(s1), len(s2))) :
         
            # If the characters match
            if (s1[i] == s2[i]) :
                prefix += 1;
 
            # Else break
            else :
                break;
 
        # Maximum of 4 characters are allowed in prefix
        prefix = min(4, prefix);
 
        # Calculate jaro winkler Similarity
        jaro_dist += 0.1 * prefix * (1 - jaro_dist);
 
    return jaro_dist;
 
# Driver code
if __name__ == "__main__" :
 
    s1 = "TRATE"; s2 = "TRACE";
 
    # Print Jaro-Winkler Similarity of two strings
    print("Jaro-Winkler Similarity =", jaro_Winkler(s1, s2)) ;
 
# This code is contributed by AnkitRai01

                    

C#

// C# implementation of above approach
using System;
 
class GFG
{
     
    // Function to calculate the
    // Jaro Similarity of two strings
    static double jaro_distance(string s1, string s2)
    {
        // If the strings are equal
        if (s1 == s2)
            return 1.0;
     
        // Length of two strings
        int len1 = s1.Length,
            len2 = s2.Length;
     
        if (len1 == 0 || len2 == 0)
            return 0.0;
     
        // Maximum distance upto which matching
        // is allowed
        int max_dist = (int)Math.Floor((double)
                        Math.Max(len1, len2) / 2) - 1;
     
        // Count of matches
        int match = 0;
     
        // Hash for matches
        int []hash_s1 = new int [s1.Length];
        int []hash_s2 = new int[s2.Length];
     
        // Traverse through the first string
        for (int i = 0; i < len1; i++)
        {
     
            // Check if there is any matches
            for (int j = Math.Max(0, i - max_dist);
                j < Math.Min(len2, i + max_dist + 1); j++)
                 
                // If there is a match
                if (s1[i] == s2[j] &&
                    hash_s2[j] == 0)
                {
                    hash_s1[i] = 1;
                    hash_s2[j] = 1;
                    match++;
                    break;
                }
        }
     
        // If there is no match
        if (match == 0)
            return 0.0;
     
        // Number of transpositions
        double t = 0;
     
        int point = 0;
     
        // Count number of occurrences
        // where two characters match but
        // there is a third matched character
        // in between the indices
        for (int i = 0; i < len1; i++)
            if (hash_s1[i] == 1)
            {
     
                // Find the next matched character
                // in second string
                while (hash_s2[point] == 0)
                    point++;
     
                if (s1[i] != s2[point++])
                    t++;
            }
        t /= 2;
     
        // Return the Jaro Similarity
        return (((double)match) / ((double)len1)
                + ((double)match) / ((double)len2)
                + ((double)match - t) / ((double)match))
            / 3.0;
    }
     
    // Jaro Winkler Similarity
    static double jaro_Winkler(string s1, string s2)
    {
        double jaro_dist = jaro_distance(s1, s2);
     
        // If the jaro Similarity is above a threshold
        if (jaro_dist > 0.7)
        {
     
            // Find the length of common prefix
            int prefix = 0;
     
            for (int i = 0; i < Math.Min(s1.Length,
                                        s2.Length); i++)
            {
                 
                // If the characters match
                if (s1[i] == s2[i])
                    prefix++;
     
                // Else break
                else
                    break;
            }
     
            // Maximum of 4 characters are allowed in prefix
            prefix = Math.Min(4, prefix);
     
            // Calculate jaro winkler Similarity
            jaro_dist += 0.1 * prefix * (1 - jaro_dist);
        }
        return jaro_dist;
    }
     
    // Driver code
    public static void Main ()
    {
        string s1 = "TRATE", s2 = "TRACE";
     
        // Print Jaro-Winkler Similarity of two strings
        Console.WriteLine("Jaro-Winkler Similarity =" +
                            jaro_Winkler(s1, s2));
    }
}
 
// This code is contributed by AnkitRai01

                    

Javascript

<script>
    // Javascript implementation of above approach
     
    // Function to calculate the
    // Jaro Similarity of two strings
    function jaro_distance(s1, s2)
    {
        // If the strings are equal
        if (s1 == s2)
            return 1.0;
      
        // Length of two strings
        let len1 = s1.length, len2 = s2.length;
      
        if (len1 == 0 || len2 == 0)
            return 0.0;
      
        // Maximum distance upto which matching
        // is allowed
        let max_dist = Math.floor(Math.max(len1, len2) / 2) - 1;
      
        // Count of matches
        let match = 0;
      
        // Hash for matches
        let hash_s1 = new Array(s1.length);
        hash_s1.fill(0);
        let hash_s2 = new Array(s2.length);
        hash_s2.fill(0);
      
        // Traverse through the first string
        for (let i = 0; i < len1; i++)
        {
      
            // Check if there is any matches
            for (let j = Math.max(0, i - max_dist);
                j < Math.min(len2, i + max_dist + 1); j++)
                  
                // If there is a match
                if (s1[i] == s2[j] &&
                    hash_s2[j] == 0)
                {
                    hash_s1[i] = 1;
                    hash_s2[j] = 1;
                    match++;
                    break;
                }
        }
      
        // If there is no match
        if (match == 0)
            return 0.0;
      
        // Number of transpositions
        let t = 0;
      
        let point = 0;
      
        // Count number of occurrences
        // where two characters match but
        // there is a third matched character
        // in between the indices
        for (let i = 0; i < len1; i++)
            if (hash_s1[i] == 1)
            {
      
                // Find the next matched character
                // in second string
                while (hash_s2[point] == 0)
                    point++;
      
                if (s1[i] != s2[point++])
                    t++;
            }
        t /= 2;
      
        // Return the Jaro Similarity
        return ((match) / (len1)
                + (match) / (len2)
                + (match - t) / (match))
            / 3.0;
    }
      
    // Jaro Winkler Similarity
    function jaro_Winkler(s1, s2)
    {
        let jaro_dist = jaro_distance(s1, s2);
      
        // If the jaro Similarity is above a threshold
        if (jaro_dist > 0.7)
        {
      
            // Find the length of common prefix
            let prefix = 0;
      
            for (let i = 0; i < Math.min(s1.length,s2.length); i++)
            {
                  
                // If the characters match
                if (s1[i] == s2[i])
                    prefix++;
      
                // Else break
                else
                    break;
            }
      
            // Maximum of 4 characters are allowed in prefix
            prefix = Math.min(4, prefix);
      
            // Calculate jaro winkler Similarity
            jaro_dist += 0.1 * prefix * (1 - jaro_dist);
        }
        return jaro_dist.toFixed(6);
    }
     
    let s1 = "TRATE", s2 = "TRACE";
      
    // Print Jaro-Winkler Similarity of two strings
    document.write("Jaro-Winkler Similarity =" +
                      jaro_Winkler(s1, s2));
 
</script>

                    

Output: 
Jaro-Winkler Similarity =0.906667

 

Time Complexity: O(N * M), where N is the length of string s1 and M is the length of string s2.
Auxiliary Space: O(N + M)



Like Article
Suggest improvement
Previous
Next
Share your thoughts in the comments

Similar Reads