Count number of distinct substrings of a given length

Given a string S of length N consisting of lower-case English albhabets and an integer ‘l’, find the number of distinct sub-strings of length ‘l’ of the given string.
Examples:

Input : s = “abcbab”, l = 2
Output : 4
All distinct sub-strings of length 2
will be {“ab”, “bc”, “cb”, “ba”}
Thus, answer equals 4.
Input : s = “ababa”, l = 2
Output : 2

Naive Approach :
A simple approach will be to find all the possible substrings, find their hash values and find the number of distinct substrings.
Time Complexity : O(l*N)

Efficient approach :
We will solve this problem using Rolling hash algorithm.

  • Find the hash value of first sub-string of length ‘l’.
    It can be evaluated as (s[0]-97)*x^(l-1) + (s[1]-97)*x^(l-2) … (s[l-1]-97).Lets call this ‘H1‘.
  • Using this hash value, we will generate the next hash as :
    H2 = (H1-(s[0]-97)*x^(l-1)*x + (s[l]-97). Generate hashes of all substrings in this way
    and push them in an unordered set.
  • Count number of distinct values in the set.

Below is the implementation of above approach :

C++

filter_none

edit
close

play_arrow

link
brightness_4
code

// C++ implementation of above approach
#include <bits/stdc++.h>
#define x 26
#define mod 3001
using namespace std;
  
// Function to find the required count
int CntSubstr(string s, int l)
{
    // Variable to the hash
    int hash = 0;
  
    // Finding hash of substring
    // (0, l-1) using random number x
    for (int i = 0; i < l; i++) {
        hash = (hash * x + (s[i] - 97)) % mod;
    }
  
    // Computing x^(l-1)
    int pow_l = 1;
    for (int i = 0; i < l - 1; i++)
        pow_l = (pow_l * x) % mod;
  
    // Unordered set to add hash values
    unordered_set<int> result;
    result.insert(hash);
  
    // Generating all possible hash values
    for (int i = l; i < s.size(); i++) {
        hash = ((hash - pow_l * (s[i - l] - 97)
              + 2 * mod) * x + (s[i] - 97)) % mod;
        result.insert(hash);
    }
  
    // Print the result
    cout << result.size() << endl;
}
  
// Driver Code
int main()
{
    string s = "abcba";
  
    int l = 2;
  
    CntSubstr(s, l);
  
    return 0;
}

chevron_right


Java

filter_none

edit
close

play_arrow

link
brightness_4
code

// Java implementation of above approach
import java.util.*;
  
class GFG
{
  
    static int x = 26;
    static int mod = 3001;
  
    // Function to find the required count
    static void CntSubstr(char[] s, int l)
    {
        // Variable to the hash
        int hash = 0;
  
        // Finding hash of substring
        // (0, l-1) using random number x
        for (int i = 0; i < l; i++) 
        {
            hash = (hash * x + (s[i] - 97)) % mod;
        }
  
        // Computing x^(l-1)
        int pow_l = 1;
        for (int i = 0; i < l - 1; i++) 
        {
            pow_l = (pow_l * x) % mod;
        }
  
        // Unordered set to add hash values
        HashSet<Integer> result = new HashSet<Integer>();
        result.add(hash);
  
        // Generating all possible hash values
        for (int i = l; i < s.length; i++) 
        {
            hash = ((hash - pow_l * (s[i - l] - 97)
                    + 2 * mod) * x + (s[i] - 97)) % mod;
            result.add(hash);
        }
  
        // Print the result
        System.out.println(result.size());
    }
  
    // Driver Code
    public static void main(String[] args)
    {
        String s = "abcba";
  
        int l = 2;
  
        CntSubstr(s.toCharArray(), l);
    }
}
  
// This code contributed by Rajput-Ji

chevron_right


C#

filter_none

edit
close

play_arrow

link
brightness_4
code

// C# implementation of the approach
using System;
using System.Collections.Generic; 
  
class GFG
{
  
    static int x = 26;
    static int mod = 3001;
  
    // Function to find the required count
    static void CntSubstr(char[] s, int l)
    {
        // Variable to the hash
        int hash = 0;
  
        // Finding hash of substring
        // (0, l-1) using random number x
        for (int i = 0; i < l; i++) 
        {
            hash = (hash * x + (s[i] - 97)) % mod;
        }
  
        // Computing x^(l-1)
        int pow_l = 1;
        for (int i = 0; i < l - 1; i++) 
        {
            pow_l = (pow_l * x) % mod;
        }
  
        // Unordered set to add hash values
        HashSet<int> result = new HashSet<int>();
        result.Add(hash);
  
        // Generating all possible hash values
        for (int i = l; i < s.Length; i++) 
        {
            hash = ((hash - pow_l * (s[i - l] - 97)
                    + 2 * mod) * x + (s[i] - 97)) % mod;
            result.Add(hash);
        }
  
        // Print the result
        Console.WriteLine(result.Count);
    }
  
    // Driver Code
    public static void Main(String[] args)
    {
        String s = "abcba";
  
        int l = 2;
  
        CntSubstr(s.ToCharArray(), l);
    }
}
  
/* This code contributed by PrinciRaj1992 */

chevron_right


Python3

filter_none

edit
close

play_arrow

link
brightness_4
code

# Python3 implementation of above approach 
x = 26
mod = 3001
  
# Function to find the required count 
def CntSubstr(s, l) : 
  
    # Variable to the hash 
    hash = 0
  
    # Finding hash of substring 
    # (0, l-1) using random number x 
    for i in range(l) :
        hash = (hash * x + (ord(s[i]) - 97)) % mod; 
  
    # Computing x^(l-1) 
    pow_l = 1
    for i in range(l-1) : 
        pow_l = (pow_l * x) % mod; 
  
    # Unordered set to add hash values 
    result = set(); 
    result.add(hash); 
  
    # Generating all possible hash values 
    for i in range(l,len(s)) :
        hash = ((hash - pow_l * (ord(s[i - l]) - 97
            + 2 * mod) * x + (ord(s[i]) - 97)) % mod; 
          
        result.add(hash); 
  
    # Print the result 
    print(len(result)) ; 
  
  
# Driver Code 
if __name__ == "__main__"
  
    s = "abcba"
  
    l = 2
  
    CntSubstr(s, l); 
      
# This code is contributed by AnkitRai01

chevron_right


Output:

4

Time Complexity : O(N)



My Personal Notes arrow_drop_up

Check out this Author's contributed articles.

If you like GeeksforGeeks and would like to contribute, you can also write an article using contribute.geeksforgeeks.org or mail your article to contribute@geeksforgeeks.org. See your article appearing on the GeeksforGeeks main page and help other Geeks.

Please Improve this article if you find anything incorrect by clicking on the "Improve Article" button below.