Count distinct emails present in a given array

Last Updated : 14 Feb, 2023

Given an array arr[] consisting of N strings where each string represents an email address consisting of English alphabets, ‘.’, ‘+’ and ‘@’, the task is to count the number of distinct emails present in the array according to the following rules:

An email address can be split into two substrings, the prefix and suffix of ‘@’, which are the local name and domain name respectively.
The ‘.’ character in the string in the local name is ignored.
In the local name, every character after ‘+‘ is ignored.

Examples:

Input: arr[] = {“raghav.agg@geeksforgeeks.com”, “raghavagg@geeksforgeeks.com”}
Output: 1
Explanation: Removing all the ‘.’s before ‘@’ modifies the strings to {“raghavagg@geeksforgeeks.com”, “raghavagg@geeksforgeeks.com”}. Therefore, the total number of distinct emails present in the string are 1.

Input: arr[] = {“avruty+dhir+gfg@geeksforgeeks.com”, “avruty+gfg@geeksforgeeks.com”, “av.ruty@geeksforgeeks.com”}
Output: 1

Approach: The given problem can be solved by storing each email in a HashSet after populating it according to the given rule and print the size of the HashSet obtained. Follow the steps below to solve the problem:

Initialize a HashSet, say S, to store all the distinct strings after populating according to the given rules.
Traverse the given array arr[] and perform the following steps:
- Find the position of ‘@’ and store it in a variable, say pos2.
- Delete all the ‘.’ characters before pos2 using erase() function.
- Update the position of ‘@’ i.e., pos2 = find(‘@’) and find the position of ‘+’ and store it in a variable say pos1 as S.find(‘+’).
- Now, erase all the characters after pos1 and before pos2.
- Insert all the updated strings in a HashSet S.
After completing the above steps, print the size of HashSet S as the result.

Below is the implementation of the above approach:

C++14

// C++ program for the above approach 
#include <bits/stdc++.h> 
using namespace std; 
 
// Function to count all the distinct 
// emails after preprocessing according 
// to the given rules 
int distinctEmails(vector<string>& emails) 
{ 
    // Traverse the given array of 
    // strings arr[] 
    for (auto& x : emails) { 
 
        // Stores the position of '@' 
        // in the string 
        auto pos2 = x.find('@'); 
 
        // If pos2 < x.size() 
        if (pos2 < x.size()) 
 
            // Erases all the occurrences 
            // of '.' before pos2 
            x.erase( 
                remove(x.begin(), 
                    x.begin() + pos2, '.'), 
                x.begin() + pos2); 
 
        // Stores the position of the 
        // first '+' 
        auto pos1 = x.find('+'); 
 
        // Update the position pos2 
        pos2 = x.find('@'); 
 
        // If '+' exists then erase 
        // characters after '+' and 
        // before '@' 
        if (pos1 < x.size() 
            and pos2 < x.size()) { 
            x.erase(pos1, pos2 - pos1); 
        } 
    } 
 
    // Insert all the updated strings 
    // inside the set 
    unordered_set<string> ans( 
        emails.begin(), 
        emails.end()); 
 
    // Return the size of set ans 
    return ans.size(); 
} 
 
// Driver Code 
int main() 
{ 
    vector<string> arr 
        = { "raghav.agg@geeksforgeeks.com", 
            "raghavagg@geeksforgeeks.com" }; 
 
    // Function Call 
    cout << distinctEmails(arr); 
 
    return 0; 
} 

Java

/*package whatever //do not write package name here */
 
// Java program for the above approach
 
import java.util.*;
class GFG {
    // Function to count all the distinct
    // emails after preprocessing according
    // to the given rules
    static int distinctEmails(String emails[])
    {
 
        HashSet<String> ans = new HashSet<>();
 
        // Traverse the given array of
        // strings arr[]
        for (String x : emails) {
 
            // Stores the position of '@'
            // in the string
            int pos2 = x.indexOf('@');
 
            // If pos2 < x.size()
            if (pos2 < x.length()) {
 
                // Erases all the occurrences
                // of '.' before pos2
                String p = x.substring(0, pos2);
                p = p.replace(".", "");
                x = p + x.substring(pos2);
 
                // Stores the position of the
                // first '+'
                int pos1 = x.indexOf('+');
 
                // Update the position pos2
                pos2 = x.indexOf('@');
 
                // If '+' exists then erase
                // characters after '+' and
                // before '@'
                if (pos1 > 0 && pos1 < x.length()
                    && pos2 < x.length())
                    x = x.substring(0, pos1)
                        + x.substring(pos2);
 
                // Insert all the updated strings
                // inside the set
                ans.add(x);
            }
        }
 
        // Return the size of set ans
        return ans.size();
    }
 
    // Driver Code
    public static void main(String args[])
    {
 
        String arr[] = { "raghav.agg@geeksforgeeks.com",
                         "raghavagg@geeksforgeeks.com" };
 
        // Function Call
        System.out.println(distinctEmails(arr));
    }
}
// contributed by akashish__

Python3

# Python3 program for the above approach
 
# Function to count all the distinct
# emails after preprocessing according
# to the given rules
def distinctEmails(emails):
   
  ans = set([])
 
  # Traverse the given array of
  # strings arr[]
  for x in emails:
 
    # Stores the position of '@'
    # in the string
    pos2 = x.find('@')
 
    # If pos2 < x.size()
    if (pos2 < len(x)):
 
      # Erases all the occurrences
      # of '.' before pos2
      p = x[:pos2]
      p = p.replace(".", "")
      x = p + x[pos2:]
 
      # Stores the position of the
      # first '+'
      pos1 = x.find('+')
 
      # Update the position pos2
      pos2 = x.find('@')
 
      # If '+' exists then erase
      # characters after '+' and
      # before '@'
      if (pos1 > 0 and pos1 < len(x) and
          pos2 < len(x)):
        x = x[:pos1] + x[pos2:]
 
      # Insert all the updated strings
      # inside the set
      ans.add(x)
 
  # Return the size of set ans
  return len(ans)
 
# Driver Code
if __name__ == "__main__":
 
    arr = ["raghav.agg@geeksforgeeks.com",
           "raghavagg@geeksforgeeks.com"]
 
    # Function Call
    print(distinctEmails(arr))
 
# This code is contributed by ukasp

C#

// C# program for the above approach
using System;
using System.Collections.Generic;
 
public class GFG {
    // Function to count all the distinct
    // emails after preprocessing according
    // to the given rules
    static int distinctEmails(String[] emails)
    {
 
        HashSet<String> ans = new HashSet<String>();
 
        // Traverse the given array of
        // strings arr[]
        for (int i = 0; i < emails.Length; i++) {
            string x = emails[i];
 
            // Stores the position of '@'
            // in the string
            int pos2 = x.IndexOf('@');
 
            // If pos2 < x.size()
            if (pos2 < x.Length) {
 
                // Erases all the occurrences
                // of '.' before pos2
                String p = x.Substring(0, pos2);
                p = p.Replace(".", "");
                x = p + x.Substring(pos2);
 
                // Stores the position of the
                // first '+'
                int pos1 = x.IndexOf('+');
 
                // Update the position pos2
                pos2 = x.IndexOf('@');
 
                // If '+' exists then erase
                // characters after '+' and
                // before '@'
                if (pos1 > 0 && pos1 < x.Length
                    && pos2 < x.Length)
                    x = x.Substring(0, pos1)
                        + x.Substring(pos2);
 
                // Insert all the updated strings
                // inside the set
                ans.Add(x);
            }
        }
 
        // Return the size of set ans
        return ans.Count;
    }
 
    // Driver Code
    static public void Main()
    {
 
        String[] arr = { "raghav.agg@geeksforgeeks.com",
                         "raghavagg@geeksforgeeks.com" };
 
        // Function Call
        Console.WriteLine(distinctEmails(arr));
    }
}
// contributed by akashish__

Javascript

// Function to count all the distinct
// emails after preprocessing according
// to the given rules
function distinctEmails(emails) {
  var ans = new Set([]);
 
  // Traverse the given array of
  // strings arr[]
  for (var x of emails) {
    // Stores the position of '@'
    // in the string
    var pos2 = x.indexOf('@');
 
    // If pos2 < x.size()
    if (pos2 < x.length) {
      // Erases all the occurrences
      // of '.' before pos2
      let p = x.substring(0, pos2);
      p = p.replace(/\./g, '');
      x = p + x.substring(pos2);
 
      // Stores the position of the
      // first '+'
      var pos1 = x.indexOf('+');
 
      // Update the position pos2
      pos2 = x.indexOf('@');
 
      // If '+' exists then erase
      // characters after '+' and
      // before '@'
      if (pos1 > 0 && pos1 < x.length && pos2 < x.length) {
        x = x.substring(0, pos1) + x.substring(pos2);
      }
 
      // Insert all the updated strings
      // inside the set
      ans.add(x);
    }
  }
 
  // Return the size of set ans
  return ans.size;
}
 
// Driver Code
const arr = [
  'raghav.agg@geeksforgeeks.com',
  'raghavagg@geeksforgeeks.com',
];
 
// Function Call
console.log(distinctEmails(arr));
 
// This code is contributed by aadityaburujwale.