Given an array of strings arr[], the task is to find the count of distinct strings present in the array using polynomial rolling hash function.
Examples:
Input: arr[] = { “abcde”, “abcce”, “abcdf”, “abcde”, “abcdf” }
Output: 3
Explanation:
Distinct strings in the array are { “abcde”, “abcce”, “abcdf” }.
Therefore, the required output is 3.Input: arr[] = { “ab”, “abc”, “abcd”, “abcde”, “a” }
Output: 5
Explanation:
Distinct strings in the array are { “abcde”, “abcd”, “abc”, “ab”, “a” }.
Therefore, the required output is 5.
Approach: The problem can be solved using Hashing. The idea is to use rolling hash function to calculate the hash value of all the strings of the array and store it in another array, say Hash[]. Finally, print the count of distinct elements in Hash[] array. Follow the steps below to solve the problem:
- Initialize an array, say Hash[], to store the hash value of all the strings present in the array using rolling hash function.
- Initialize a variable, say cntElem, to store the count of distinct strings present in the array.
- Traverse the array arr[]. For every string encountered, calculate the hash value of that string and store it in the hash[] array.
- Sort the array hash[].
- Traverse the array hash[]. For every array element, check if hash[i] and hash[i – 1] are equal or not. If found to be false, then increment cntElem by 1.
- Finally, print the value of cntElem.
Below is the implementation of the above approach:
// C++ program to implement // the above approach #include<bits/stdc++.h> using namespace std;
// Function to find the hash value // of a string long long compute_hash(string str)
{ int p = 31;
int MOD = 1e9 + 7;
long long hash_val = 0;
long long mul = 1;
// Traverse the string
for ( char ch : str) {
// Update hash_val
hash_val
= (hash_val + (ch - 'a' + 1) * mul)
% MOD;
// Update mul
mul = (mul * p) % MOD;
}
// Return hash_val of str
return hash_val;
} // Function to find the count of distinct // strings present in the given array int distinct_str(vector<string>& arr, int n)
{ // Store the hash values of
// the strings
vector< long long > hash(n);
// Traverse the array
for ( int i = 0; i < n; i++) {
// Stores hash value of arr[i]
hash[i] = compute_hash(arr[i]);
}
// Sort hash[] array
sort(hash.begin(), hash.end());
// Stores count of distinct
// strings in the array
int cntElem = 1;
// Traverse hash[] array
for ( int i = 1; i < n; i++) {
if (hash[i] != hash[i - 1]) {
// Update cntElem
cntElem++;
}
}
return cntElem;
} // Driver Code int main()
{ vector<string> arr={ "abcde" , "abcce" , "abcdf" , "abcde" };
int N = arr.size();
cout << distinct_str(arr, N) << endl;
return 0;
} |
// Java program to implement // the above approach import java.util.Arrays;
public class GFG {
// Function to find the hash value
// of a string
static int compute_hash(String str)
{
int p = 31 ;
int MOD = ( int )1e9 + 7 ;
int hash_val = 0 ;
int mul = 1 ;
// Traverse the string
for ( int i = 0 ; i < str.length(); i++) {
char ch = str.charAt(i);
// Update hash_val
hash_val
= (hash_val + (ch - 'a' + 1 ) * mul)
% MOD;
// Update mul
mul = (mul * p) % MOD;
}
// Return hash_val of str
return hash_val;
}
// Function to find the count of distinct
// strings present in the given array
static int distinct_str(String arr[], int n)
{
// Store the hash values of
// the strings
int hash[] = new int [n];
// Traverse the array
for ( int i = 0 ; i < n; i++) {
// Stores hash value of arr[i]
hash[i] = compute_hash(arr[i]);
}
// Sort hash[] array
Arrays.sort(hash);
// Stores count of distinct
// strings in the array
int cntElem = 1 ;
// Traverse hash[] array
for ( int i = 1 ; i < n; i++) {
if (hash[i] != hash[i - 1 ]) {
// Update cntElem
cntElem++;
}
}
return cntElem;
}
// Driver Code
public static void main (String[] args)
{
String arr[] = { "abcde" , "abcce" ,
"abcdf" , "abcde" };
int N = arr.length;
System.out.println(distinct_str(arr, N));
}
} // This code is contributed by AnkThon |
# Python3 program to implement # the above approach # Function to find the hash value # of a def compute_hash( str ):
p = 31
MOD = 10 * * 9 + 7
hash_val = 0
mul = 1
# Traverse the
for ch in str :
# Update hash_val
hash_val = (hash_val + ( ord (ch) - ord ( 'a' ) + 1 ) * mul) % MOD
# Update mul
mul = (mul * p) % MOD
# Return hash_val of str
return hash_val
# Function to find the count of distinct # strings present in the given array def distinct_str(arr, n):
# Store the hash values of
# the strings
hash = [ 0 ] * (n)
# Traverse the array
for i in range (n):
# Stores hash value of arr[i]
hash [i] = compute_hash(arr[i])
# Sort hash[] array
hash = sorted ( hash )
# Stores count of distinct
# strings in the array
cntElem = 1
# Traverse hash[] array
for i in range ( 1 , n):
if ( hash [i] ! = hash [i - 1 ]):
# Update cntElem
cntElem + = 1
return cntElem
# Driver Code if __name__ = = '__main__' :
arr = [ "abcde" , "abcce" , "abcdf" , "abcde" ]
N = len (arr)
print (distinct_str(arr, N))
# This code is contributed by mohit kumar 29 |
// C# program to implement // the above approach using System;
class GFG
{ // Function to find the hash value
// of a string
static int compute_hash( string str)
{
int p = 31;
int MOD = ( int )1e9 + 7;
int hash_val = 0;
int mul = 1;
// Traverse the string
for ( int i = 0; i < str.Length; i++)
{
char ch = str[i];
// Update hash_val
hash_val = (hash_val + (ch -
'a' + 1) * mul) % MOD;
// Update mul
mul = (mul * p) % MOD;
}
// Return hash_val of str
return hash_val;
}
// Function to find the count of distinct
// strings present in the given array
static int distinct_str( string []arr, int n)
{
// Store the hash values of
// the strings
int []hash = new int [n];
// Traverse the array
for ( int i = 0; i < n; i++)
{
// Stores hash value of arr[i]
hash[i] = compute_hash(arr[i]);
}
// Sort hash[] array
Array.Sort(hash);
// Stores count of distinct
// strings in the array
int cntElem = 1;
// Traverse hash[] array
for ( int i = 1; i < n; i++)
{
if (hash[i] != hash[i - 1])
{
// Update cntElem
cntElem++;
}
}
return cntElem;
}
// Driver Code
public static void Main (String[] args)
{
string []arr = { "abcde" , "abcce" ,
"abcdf" , "abcde" };
int N = arr.Length;
Console.WriteLine(distinct_str(arr, N));
}
} // This code is contributed by AnkThon |
<script> // Javascript program to implement the above approach
// Function to find the hash value
// of a string
function compute_hash(str)
{
let p = 31;
let MOD = 1e9 + 7;
let hash_val = 0;
let mul = 1;
// Traverse the string
for (let i = 0; i < str.length; i++)
{
let ch = str[i];
// Update hash_val
hash_val = (hash_val + (ch.charCodeAt() - 'a' .charCodeAt() + 1) * mul) % MOD;
// Update mul
mul = (mul * p) % MOD;
}
// Return hash_val of str
return hash_val;
}
// Function to find the count of distinct
// strings present in the given array
function distinct_str(arr, n)
{
// Store the hash values of
// the strings
let hash = new Array(n);
// Traverse the array
for (let i = 0; i < n; i++)
{
// Stores hash value of arr[i]
hash[i] = compute_hash(arr[i]);
}
// Sort hash[] array
hash.sort( function (a, b){ return a - b});
// Stores count of distinct
// strings in the array
let cntElem = 1;
// Traverse hash[] array
for (let i = 1; i < n; i++)
{
if (hash[i] != hash[i - 1])
{
// Update cntElem
cntElem++;
}
}
return cntElem;
}
let arr = [ "abcde" , "abcce" , "abcdf" , "abcde" ];
let N = arr.length;
document.write(distinct_str(arr, N));
</script> |
3
Time Complexity: O(N * M), where M is the maximum length of the string
Auxiliary Space: O(N)