Count M-length substrings occurring exactly K times in a string
Given a string S of length N and two integers M and K, the task is to count the number of substrings of length M occurring exactly K times in the string S.
Examples:
Input: S = “abacaba”, M = 3, K = 2
Output: 1
Explanation: All distinct substrings of length 3 are “aba”, “bac”, “aca”, “cab”.
Out of all these substrings, only “aba” occurs twice in the string S.
Therefore, the count is 1.Input: S = “geeksforgeeks”, M = 2, K = 1
Output: 4
Explanation:
All distinct substrings of length 2 are “ge”, “ee”, “ek”, “ks”, “sf”, “fo”, “or”, “rg”.
Out of all these strings, “sf”, “fo”, “or”, “rg” occurs once in the string S.
Therefore, the count is 4.
Naive Approach: The simplest approach is to generate all substrings of length M and store the frequency of each substring in the string S in a Map. Now, traverse the Map and if the frequency is equal to K, then increment count by 1. After completing the above steps, print count as the result.
Time Complexity: O((N – M)*N*M)
Auxiliary Space: O(N – M)
Efficient Approach: The above approach can be optimized by using the KMP algorithm for finding the frequency of a substring in the string. Follow the steps to solve the problem:
- Initialize a variable, say count as 0, to store the number of the required substring.
- Generate all substrings of length M from the string S and insert them in an array, say arr[].
- Traverse the array arr[] and for each string in the array, calculate its frequency in the string S using KMP algorithm.
- If the frequency of the string is equal to P, then increment the count by 1.
- After completing the above steps, print the value of count as the resultant count of substrings.
Below is the implementation of the above approach:
C++
// C++ program for the above approach #include <bits/stdc++.h> using namespace std; // Function to compute the LPS array void computeLPSArray(string pat, int M, int lps[]) { // Length of the previous // longest prefix suffix int len = 0; int i = 1; lps[0] = 0; // Iterate from [1, M - 1] to find lps[i] while (i < M) { // If the characters match if (pat[i] == pat[len]) { len++; lps[i] = len; i++; } // If pat[i] != pat[len] else { // If length is non-zero if (len != 0) { len = lps[len - 1]; // Also, note that i is // not incremented here } // Otherwise else { lps[i] = len; i++; } } } } // Function to find the frequency of // pat in the string txt int KMPSearch(string pat, string txt) { // Stores length of both strings int M = pat.length(); int N = txt.length(); // Initialize lps[] to store the // longest prefix suffix values // for the string pattern int lps[M]; // Store the index for pat[] int j = 0; // Preprocess the pattern // (calculate lps[] array) computeLPSArray(pat, M, lps); // Store the index for txt[] int i = 0; int res = 0; int next_i = 0; while (i < N) { if (pat[j] == txt[i]) { j++; i++; } if (j == M) { // If pattern is found the // first time, iterate again // to check for more patterns j = lps[j - 1]; res++; // Start i to check for more // than once occurrence // of pattern, reset i to // previous start + 1 if (lps[j] != 0) i = ++next_i; j = 0; } // Mismatch after j matches else if (i < N && pat[j] != txt[i]) { // Do not match lps[0..lps[j-1]] // characters, they will // match anyway if (j != 0) j = lps[j - 1]; else i = i + 1; } } // Return the required frequency return res; } // Function to find count of substrings // of length M occurring exactly P times // in the string, S void findCount(string& S, int M, int P) { // Store all substrings of length M set<string> vec; // Store the size of the string, S int n = S.length(); // Pick starting point for ( int i = 0; i < n; i++) { // Pick ending point for ( int len = 1; len <= n - i; len++) { // If the substring is of // length M, insert it in vec string s = S.substr(i, len); if (s.length() == M) { vec.insert(s); } } } // Initialise count as 0 to store // the required count of substrings int count = 0; // Iterate through the set of // substrings for ( auto it : vec) { // Store its frequency int ans = KMPSearch(it, S); // If frequency is equal to P if (ans == P) { // Increment count by 1 count++; } } // Print the answer cout << count; } // Driver Code int main() { string S = "abacaba" ; int M = 3, P = 2; // Function Call findCount(S, M, P); return 0; } |
Java
// Java Program to implement // the above approach import java.io.*; import java.util.*; class GFG { // Function to compute the LPS array static void computeLPSArray(String pat, int M, int lps[]) { // Length of the previous // longest prefix suffix int len = 0 ; int i = 1 ; lps[ 0 ] = 0 ; // Iterate from [1, M - 1] to find lps[i] while (i < M) { // If the characters match if (pat.charAt(i) == pat.charAt(len)) { len++; lps[i] = len; i++; } // If pat[i] != pat[len] else { // If length is non-zero if (len != 0 ) { len = lps[len - 1 ]; // Also, note that i is // not incremented here } // Otherwise else { lps[i] = len; i++; } } } } // Function to find the frequency of // pat in the string txt static int KMPSearch(String pat, String txt) { // Stores length of both strings int M = pat.length(); int N = txt.length(); // Initialize lps[] to store the // longest prefix suffix values // for the string pattern int lps[] = new int [M]; // Store the index for pat[] int j = 0 ; // Preprocess the pattern // (calculate lps[] array) computeLPSArray(pat, M, lps); // Store the index for txt[] int i = 0 ; int res = 0 ; int next_i = 0 ; while (i < N) { if (pat.charAt(j) == txt.charAt(i)) { j++; i++; } if (j == M) { // If pattern is found the // first time, iterate again // to check for more patterns j = lps[j - 1 ]; res++; // Start i to check for more // than once occurrence // of pattern, reset i to // previous start + 1 if (lps[j] != 0 ) i = ++next_i; j = 0 ; } // Mismatch after j matches else if (i < N && pat.charAt(j) != txt.charAt(i)) { // Do not match lps[0..lps[j-1]] // characters, they will // match anyway if (j != 0 ) j = lps[j - 1 ]; else i = i + 1 ; } } // Return the required frequency return res; } // Function to find count of substrings // of length M occurring exactly P times // in the string, S static void findCount(String S, int M, int P) { // Store all substrings of length M // set<string> vec; TreeSet<String> vec = new TreeSet<>(); // Store the size of the string, S int n = S.length(); // Pick starting point for ( int i = 0 ; i < n; i++) { // Pick ending point for ( int len = 1 ; len <= n - i; len++) { // If the substring is of // length M, insert it in vec String s = S.substring(i, i + len); if (s.length() == M) { vec.add(s); } } } // Initialise count as 0 to store // the required count of substrings int count = 0 ; // Iterate through the set of // substrings for (String it : vec) { // Store its frequency int ans = KMPSearch(it, S); // If frequency is equal to P if (ans == P) { // Increment count by 1 count++; } } // Print the answer System.out.println(count); } // Driver Code public static void main(String[] args) { String S = "abacaba" ; int M = 3 , P = 2 ; // Function Call findCount(S, M, P); } } // This code is contributed by kingash. |
Python3
# Python 3 program for the above approach # Function to compute the LPS array def computeLPSArray(pat, M, lps): # Length of the previous # longest prefix suffix len1 = 0 i = 1 lps[ 0 ] = 0 # Iterate from [1, M - 1] to find lps[i] while (i < M): # If the characters match if (pat[i] = = pat[len1]): len1 + = 1 lps[i] = len1 i + = 1 # If pat[i] != pat[len] else : # If length is non-zero if (len1 ! = 0 ): len1 = lps[len1 - 1 ] # Also, note that i is # not incremented here # Otherwise else : lps[i] = len1 i + = 1 # Function to find the frequency of # pat in the string txt def KMPSearch(pat, txt): # Stores length of both strings M = len (pat) N = len (txt) # Initialize lps[] to store the # longest prefix suffix values # for the string pattern lps = [ 0 for i in range (M)] # Store the index for pat[] j = 0 # Preprocess the pattern # (calculate lps[] array) computeLPSArray(pat, M, lps) # Store the index for txt[] i = 0 res = 0 next_i = 0 while (i < N): if (pat[j] = = txt[i]): j + = 1 i + = 1 if (j = = M): # If pattern is found the # first time, iterate again # to check for more patterns j = lps[j - 1 ] res + = 1 # Start i to check for more # than once occurrence # of pattern, reset i to # previous start + 1 if (lps[j] ! = 0 ): next_i + = 1 i = next_i j = 0 # Mismatch after j matches elif (i < N and pat[j] ! = txt[i]): # Do not match lps[0..lps[j-1]] # characters, they will # match anyway if (j ! = 0 ): j = lps[j - 1 ] else : i = i + 1 # Return the required frequency return res # Function to find count of substrings # of length M occurring exactly P times # in the string, S def findCount(S, M, P): # Store all substrings of length M vec = set () # Store the size of the string, S n = len (S) # Pick starting point for i in range (n): # Pick ending point for len1 in range (n - i + 1 ): # If the substring is of # length M, insert it in vec s = S[i:len1] # if (len1(s) == M): # vec.add(s) # Initialise count as 0 to store # the required count of substrings count = 1 # Iterate through the set of # substrings for it in vec: # Store its frequency ans = KMPSearch(it, S) # If frequency is equal to P if (ans = = P): # Increment count by 1 count + = 1 # Print the answer print (count) # Driver Code if __name__ = = '__main__' : S = "abacaba" M = 3 P = 2 # Function Call findCount(S, M, P) # This code is contributed by ipg2016107. |
C#
// C# program for the above approach using System; using System.Collections.Generic; class GFG { // Function to compute the LPS array static void computeLPSArray( string pat, int M, int [] lps) { // Length of the previous // longest prefix suffix int len = 0; int i = 1; lps[0] = 0; // Iterate from [1, M - 1] to find lps[i] while (i < M) { // If the characters match if (pat[i] == pat[len]) { len++; lps[i] = len; i++; } // If pat[i] != pat[len] else { // If length is non-zero if (len != 0) { len = lps[len - 1]; // Also, note that i is // not incremented here } // Otherwise else { lps[i] = len; i++; } } } } // Function to find the frequency of // pat in the string txt static int KMPSearch( string pat, string txt) { // Stores length of both strings int M = pat.Length; int N = txt.Length; // Initialize lps[] to store the // longest prefix suffix values // for the string pattern int [] lps = new int [M]; // Store the index for pat[] int j = 0; // Preprocess the pattern // (calculate lps[] array) computeLPSArray(pat, M, lps); // Store the index for txt[] int i = 0; int res = 0; int next_i = 0; while (i < N) { if (pat[j] == txt[i]) { j++; i++; } if (j == M) { // If pattern is found the // first time, iterate again // to check for more patterns j = lps[j - 1]; res++; // Start i to check for more // than once occurrence // of pattern, reset i to // previous start + 1 if (lps[j] != 0) i = ++next_i; j = 0; } // Mismatch after j matches else if (i < N && pat[j] != txt[i]) { // Do not match lps[0..lps[j-1]] // characters, they will // match anyway if (j != 0) j = lps[j - 1]; else i = i + 1; } } // Return the required frequency return res; } // Function to find count of substrings // of length M occurring exactly P times // in the string, S static void findCount( string S, int M, int P) { // Store all substrings of length M HashSet< string > vec = new HashSet< string >(); // Store the size of the string, S int n = S.Length; // Pick starting point for ( int i = 0; i < n; i++) { // Pick ending point for ( int len = 1; len <= n - i; len++) { // If the substring is of // length M, insert it in vec string s = S.Substring(i, len); if (s.Length == M) { vec.Add(s); } } } // Initialise count as 0 to store // the required count of substrings int count = 0; // Iterate through the set of // substrings foreach ( string it in vec) { // Store its frequency int ans = KMPSearch(it, S); // If frequency is equal to P if (ans == P) { // Increment count by 1 count++; } } // Print the answer Console.WriteLine(count); } // Driver code static void Main() { string S = "abacaba" ; int M = 3, P = 2; // Function Call findCount(S, M, P); } } // This code is contributed by divyeshrabadiya07. |
Javascript
<script> //Javascript implementation of the approach // Function to compute the LPS array function computeLPSArray(pat, M, lps) { // Length of the previous // longest prefix suffix var len = 0; var i = 1; lps[0] = 0; // Iterate from [1, M - 1] to find lps[i] while (i < M) { // If the characters match if (pat[i] == pat[len]) { len++; lps[i] = len; i++; } // If pat[i] != pat[len] else { // If length is non-zero if (len != 0) { len = lps[len - 1]; // Also, note that i is // not incremented here } // Otherwise else { lps[i] = len; i++; } } } } // Function to find the frequency of // pat in the string txt function KMPSearch(pat, txt) { // Stores length of both strings var M = pat.length; var N = txt.length; // Initialize lps[] to store the // longest prefix suffix values // for the string pattern var lps = new Array(M); // Store the index for pat[] var j = 0; // Preprocess the pattern // (calculate lps[] array) computeLPSArray(pat, M, lps); // Store the index for txt[] var i = 0; var res = 0; var next_i = 0; while (i < N) { if (pat[j] == txt[i]) { j++; i++; } if (j == M) { // If pattern is found the // first time, iterate again // to check for more patterns j = lps[j - 1]; res++; // Start i to check for more // than once occurrence // of pattern, reset i to // previous start + 1 if (lps[j] != 0) i = ++next_i; j = 0; } // Mismatch after j matches else if (i < N && pat[j] != txt[i]) { // Do not match lps[0..lps[j-1]] // characters, they will // match anyway if (j != 0) j = lps[j - 1]; else i = i + 1; } } // Return the required frequency return res; } // Function to find count of substrings // of length M occurring exactly P times // in the string, S function findCount( S, M, P) { // Store all substrings of length M var vec = new Set(); // Store the size of the string, S var n = S.length; // Pick starting point for ( var i = 0; i < n; i++) { // Pick ending point for ( var len = 1; len <= n - i; len++) { // If the substring is of // length M, insert it in vec var s = S.substring(i, len); if (s.length == M) { vec.add(s); } } } // Initialise count as 0 to store // the required count of substrings var count = 0; // Iterate through the set of // substrings for (const it of vec){ // Store its frequency var ans = KMPSearch(it, S); // If frequency is equal to P if (ans == P) { // Increment count by 1 count++; } } // Print the answer document.write( count); } var S = "abacaba" ; var M = 3, P = 2; // Function Call findCount(S, M, P); // This code is contributed by SoumikMondal </script> |
1
Time Complexity: O((N*M) + (N2 – M2))
Auxiliary Space: O(N – M)