Find the Jaccard Index and Jaccard Distance between the two given sets

Given two sets of integers s1 and s2, the task is to find the Jaccard Index and the Jaccard Distance between the two sets.

Examples:

Input: s1 = {1, 2, 3, 4, 5}, s2 = {4, 5, 6, 7, 8, 9, 10}
Output:
Jaccard index = 0.2
Jaccard distance = 0.8



Input: s1 = {1, 2, 3, 4, 5}, s2 = {4, 5, 6, 7, 8}
Output:
Jaccard index = 0.25
Jaccard distance = 0.75

Approach: The Jaccard Index and the Jaccard Distance between the two sets can be calculated by using the formula:

     \[ Jaccard Index = \frac {| A \cap B |}{| A \cup B |} = \frac {|A \cap B |}{|A| +|B| -|A \cap B |} \] \[ Jaccard Distance = 1 - Jaccard Index \]

Below is the implementation of the above approach:

C++

filter_none

edit
close

play_arrow

link
brightness_4
code

// C++ implementation of the approach
#include <bits/stdc++.h>
using namespace std;
  
// Function to return the
// intersection set of s1 and s2
set<int> intersection(set<int> s1, set<int> s2)
{
    set<int> intersect;
  
    // Find the intersection of the two sets
    set_intersection(s1.begin(), s1.end(), s2.begin(), s2.end(),
                     inserter(intersect, intersect.begin()));
  
    return intersect;
}
  
// Function to return the Jaccard index of two sets
double jaccard_index(set<int> s1, set<int> s2)
{
    // Sizes of both the sets
    double size_s1 = s1.size();
    double size_s2 = s2.size();
  
    // Get the intersection set
    set<int> intersect = intersection(s1, s2);
  
    // Size of the intersection set
    double size_in = intersect.size();
  
    // Calculate the Jaccard index
    // using the formula
    double jaccard_in = size_in
                        / (size_s1 + size_s2 - size_in);
  
    // Return the Jaccard index
    return jaccard_in;
}
  
// Function to return the Jaccard distance
double jaccard_distance(double jaccardIndex)
{
    // Calculate the Jaccard distance
    // using the formula
    double jaccard_dist = 1 - jaccardIndex;
  
    // Return the Jaccard distance
    return jaccard_dist;
}
  
// Driver code
int main()
{
    // Elements of the 1st set
    set<int> s1;
    s1.insert(1);
    s1.insert(2);
    s1.insert(3);
    s1.insert(4);
    s1.insert(5);
  
    // Elements of the 2nd set
    set<int> s2;
    s2.insert(4);
    s2.insert(5);
    s2.insert(6);
    s2.insert(7);
    s2.insert(8);
    s2.insert(9);
    s2.insert(10);
  
    double jaccardIndex = jaccard_index(s1, s2);
  
    // Print the Jaccard index and Jaccard distance
    cout << "Jaccard index = "
         << jaccardIndex << endl;
    cout << "Jaccard distance = "
         << jaccard_distance(jaccardIndex);
  
    return 0;
}

chevron_right


Python3

filter_none

edit
close

play_arrow

link
brightness_4
code

# Python3 implementation of the approach 
  
# Function to return the 
# intersection set of s1 and s2 
def intersection(s1, s2) :
  
    # Find the intersection of the two sets 
    intersect = s1 & s2 ;
  
    return intersect; 
  
  
# Function to return the Jaccard index of two sets 
def jaccard_index(s1, s2) :
      
    # Sizes of both the sets 
    size_s1 = len(s1); 
    size_s2 = len(s2); 
  
    # Get the intersection set 
    intersect = intersection(s1, s2); 
  
    # Size of the intersection set 
    size_in = len(intersect); 
  
    # Calculate the Jaccard index 
    # using the formula 
    jaccard_in = size_in  / (size_s1 + size_s2 - size_in); 
  
    # Return the Jaccard index 
    return jaccard_in; 
  
  
# Function to return the Jaccard distance 
def jaccard_distance(jaccardIndex)  :
  
    # Calculate the Jaccard distance 
    # using the formula 
    jaccard_dist = 1 - jaccardIndex; 
  
    # Return the Jaccard distance 
    return jaccard_dist; 
  
  
# Driver code 
if __name__ == "__main__"
  
    # Elements of the 1st set 
    s1 = set(); 
    s1.add(1); 
    s1.add(2); 
    s1.add(3); 
    s1.add(4); 
    s1.add(5); 
  
    # Elements of the 2nd set 
    s2 = set(); 
    s2.add(4); 
    s2.add(5); 
    s2.add(6); 
    s2.add(7); 
    s2.add(8); 
    s2.add(9); 
    s2.add(10); 
  
    jaccardIndex = jaccard_index(s1, s2); 
  
    # Print the Jaccard index and Jaccard distance 
    print("Jaccard index = ",jaccardIndex); 
    print("Jaccard distance = ",jaccard_distance(jaccardIndex)); 
      
    # This code is contributed by AnkitRai01

chevron_right


Output:

Jaccard index = 0.2
Jaccard distance = 0.8


My Personal Notes arrow_drop_up

Second year Department of Information Technology Jadavpur University

If you like GeeksforGeeks and would like to contribute, you can also write an article using contribute.geeksforgeeks.org or mail your article to contribute@geeksforgeeks.org. See your article appearing on the GeeksforGeeks main page and help other Geeks.

Please Improve this article if you find anything incorrect by clicking on the "Improve Article" button below.



Improved By : AnkitRai01



Article Tags :
Practice Tags :


Be the First to upvote.


Please write to us at contribute@geeksforgeeks.org to report any issue with the above content.