Count M-length substrings occurring exactly K times in a string
Last Updated :
27 Jan, 2023
Given a string S of length N and two integers M and K, the task is to count the number of substrings of length M occurring exactly K times in the string S.
Examples:
Input: S = “abacaba”, M = 3, K = 2
Output: 1
Explanation: All distinct substrings of length 3 are “aba”, “bac”, “aca”, “cab”.
Out of all these substrings, only “aba” occurs twice in the string S.
Therefore, the count is 1.
Input: S = “geeksforgeeks”, M = 2, K = 1
Output: 4
Explanation:
All distinct substrings of length 2 are “ge”, “ee”, “ek”, “ks”, “sf”, “fo”, “or”, “rg”.
Out of all these strings, “sf”, “fo”, “or”, “rg” occurs once in the string S.
Therefore, the count is 4.
Naive Approach: The simplest approach is to generate all substrings of length M and store the frequency of each substring in the string S in a Map. Now, traverse the Map and if the frequency is equal to K, then increment count by 1. After completing the above steps, print count as the result.
C++
#include <bits/stdc++.h>
using namespace std;
void findCount(string& S, int M, int K)
{
unordered_map<string, int > unmap;
for ( int i = 0; i <= S.size() - M; i++) {
string s1 = S.substr(i, K);
unmap[s1]++;
}
int count = 0;
for ( auto it : unmap) {
if (it.second == K)
count++;
}
cout << count;
}
int main()
{
string S = "geeksforgeeks" ;
int M = 2, K = 1;
findCount(S, M, K);
return 0;
}
|
Java
import java.util.*;
class GFG {
static void findCount(String S, int M, int K) {
HashMap<String, Integer> unmap = new HashMap<String, Integer>();
for ( int i = 0 ; i <= S.length() - M; i++) {
String s1 = S.substring(i, i + K);
if (unmap.containsKey(s1)) {
unmap.put(s1, unmap.get(s1) + 1 );
}
else {
unmap.put(s1, 1 );
}
}
int count = 0 ;
for (Map.Entry<String, Integer> it : unmap.entrySet()) {
if (it.getValue() == K)
count++;
}
System.out.println(count);
}
public static void main(String[] args) {
String S = "geeksforgeeks" ;
int M = 2 , K = 1 ;
findCount(S, M, K);
}
}
|
Python3
def find_count(s: str , m: int , k: int ) - > int :
unmap = {}
for i in range ( len (s) - m + 1 ):
s1 = s[i:i + k]
unmap[s1] = unmap.get(s1, 0 ) + 1
count = 0
for key, value in unmap.items():
if value = = k:
count + = 1
return count
S = "geeksforgeeks"
M = 2
K = 1
print (find_count(S, M, K))
|
Javascript
function findCount(S, M, K) {
const unmap = {};
for (let i = 0; i <= S.length - M; i++) {
const s1 = S.substring(i, i + K);
if (!unmap[s1]) unmap[s1] = 0;
unmap[s1]++;
}
let count = 0;
for (const it in unmap) {
if (unmap[it] === K) count++;
}
console.log(count);
}
( function main() {
const S = "geeksforgeeks" ;
const M = 2;
const K = 1;
findCount(S, M, K);
})();
|
C#
using System;
using System.Linq;
using System.Collections.Generic;
class Program {
static void FindCount( string S, int M, int K)
{
Dictionary< string , int > unmap
= new Dictionary< string , int >();
for ( int i = 0; i <= S.Length - M; i++) {
string s1 = S.Substring(i, K);
if (unmap.ContainsKey(s1)) {
unmap[s1]++;
}
else {
unmap.Add(s1, 1);
}
}
int count = 0;
foreach (KeyValuePair< string , int > it in unmap)
{
if (it.Value == K)
count++;
}
Console.WriteLine(count);
}
static void Main( string [] args)
{
string S = "geeksforgeeks" ;
int M = 2, K = 1;
FindCount(S, M, K);
}
}
|
Time Complexity: O(N*M), where N and M are the length of the given string and the length of the substring needed respectively.
Auxiliary Space: O(N)
Efficient Approach: The above approach can be optimized by using the KMP algorithm for finding the frequency of a substring in the string. Follow the steps to solve the problem:
- Initialize a variable, say count as 0, to store the number of the required substring.
- Generate all substrings of length M from the string S and insert them in an array, say arr[].
- Traverse the array arr[] and for each string in the array, calculate its frequency in the string S using KMP algorithm.
- If the frequency of the string is equal to P, then increment the count by 1.
- After completing the above steps, print the value of count as the resultant count of substrings.
Below is the implementation of the above approach:
C++
#include <bits/stdc++.h>
using namespace std;
void computeLPSArray(string pat, int M,
int lps[])
{
int len = 0;
int i = 1;
lps[0] = 0;
while (i < M) {
if (pat[i] == pat[len]) {
len++;
lps[i] = len;
i++;
}
else {
if (len != 0) {
len = lps[len - 1];
}
else {
lps[i] = len;
i++;
}
}
}
}
int KMPSearch(string pat, string txt)
{
int M = pat.length();
int N = txt.length();
int lps[M];
int j = 0;
computeLPSArray(pat, M, lps);
int i = 0;
int res = 0;
int next_i = 0;
while (i < N) {
if (pat[j] == txt[i]) {
j++;
i++;
}
if (j == M) {
j = lps[j - 1];
res++;
if (lps[j] != 0)
i = ++next_i;
j = 0;
}
else if (i < N
&& pat[j] != txt[i]) {
if (j != 0)
j = lps[j - 1];
else
i = i + 1;
}
}
return res;
}
void findCount(string& S, int M, int P)
{
set<string> vec;
int n = S.length();
for ( int i = 0; i < n; i++) {
for ( int len = 1;
len <= n - i; len++) {
string s = S.substr(i, len);
if (s.length() == M) {
vec.insert(s);
}
}
}
int count = 0;
for ( auto it : vec) {
int ans = KMPSearch(it, S);
if (ans == P) {
count++;
}
}
cout << count;
}
int main()
{
string S = "abacaba" ;
int M = 3, P = 2;
findCount(S, M, P);
return 0;
}
|
Java
import java.io.*;
import java.util.*;
class GFG {
static void computeLPSArray(String pat, int M,
int lps[])
{
int len = 0 ;
int i = 1 ;
lps[ 0 ] = 0 ;
while (i < M) {
if (pat.charAt(i) == pat.charAt(len)) {
len++;
lps[i] = len;
i++;
}
else {
if (len != 0 ) {
len = lps[len - 1 ];
}
else {
lps[i] = len;
i++;
}
}
}
}
static int KMPSearch(String pat, String txt)
{
int M = pat.length();
int N = txt.length();
int lps[] = new int [M];
int j = 0 ;
computeLPSArray(pat, M, lps);
int i = 0 ;
int res = 0 ;
int next_i = 0 ;
while (i < N) {
if (pat.charAt(j) == txt.charAt(i)) {
j++;
i++;
}
if (j == M) {
j = lps[j - 1 ];
res++;
if (lps[j] != 0 )
i = ++next_i;
j = 0 ;
}
else if (i < N
&& pat.charAt(j) != txt.charAt(i)) {
if (j != 0 )
j = lps[j - 1 ];
else
i = i + 1 ;
}
}
return res;
}
static void findCount(String S, int M, int P)
{
TreeSet<String> vec = new TreeSet<>();
int n = S.length();
for ( int i = 0 ; i < n; i++) {
for ( int len = 1 ; len <= n - i; len++) {
String s = S.substring(i, i + len);
if (s.length() == M) {
vec.add(s);
}
}
}
int count = 0 ;
for (String it : vec) {
int ans = KMPSearch(it, S);
if (ans == P) {
count++;
}
}
System.out.println(count);
}
public static void main(String[] args)
{
String S = "abacaba" ;
int M = 3 , P = 2 ;
findCount(S, M, P);
}
}
|
Python3
def computeLPSArray(pat, M, lps):
len1 = 0
i = 1
lps[ 0 ] = 0
while (i < M):
if (pat[i] = = pat[len1]):
len1 + = 1
lps[i] = len1
i + = 1
else :
if (len1 ! = 0 ):
len1 = lps[len1 - 1 ]
else :
lps[i] = len1
i + = 1
def KMPSearch(pat, txt):
M = len (pat)
N = len (txt)
lps = [ 0 for i in range (M)]
j = 0
computeLPSArray(pat, M, lps)
i = 0
res = 0
next_i = 0
while (i < N):
if (pat[j] = = txt[i]):
j + = 1
i + = 1
if (j = = M):
j = lps[j - 1 ]
res + = 1
if (lps[j] ! = 0 ):
next_i + = 1
i = next_i
j = 0
elif (i < N and pat[j] ! = txt[i]):
if (j ! = 0 ):
j = lps[j - 1 ]
else :
i = i + 1
return res
def findCount(S, M, P):
vec = set ()
n = len (S)
for i in range (n):
for len1 in range (n - i + 1 ):
s = S[i:len1]
count = 1
for it in vec:
ans = KMPSearch(it, S)
if (ans = = P):
count + = 1
print (count)
if __name__ = = '__main__' :
S = "abacaba"
M = 3
P = 2
findCount(S, M, P)
|
C#
using System;
using System.Collections.Generic;
class GFG
{
static void computeLPSArray( string pat, int M, int [] lps)
{
int len = 0;
int i = 1;
lps[0] = 0;
while (i < M)
{
if (pat[i] == pat[len])
{
len++;
lps[i] = len;
i++;
}
else {
if (len != 0) {
len = lps[len - 1];
}
else {
lps[i] = len;
i++;
}
}
}
}
static int KMPSearch( string pat, string txt)
{
int M = pat.Length;
int N = txt.Length;
int [] lps = new int [M];
int j = 0;
computeLPSArray(pat, M, lps);
int i = 0;
int res = 0;
int next_i = 0;
while (i < N) {
if (pat[j] == txt[i]) {
j++;
i++;
}
if (j == M) {
j = lps[j - 1];
res++;
if (lps[j] != 0)
i = ++next_i;
j = 0;
}
else if (i < N
&& pat[j] != txt[i]) {
if (j != 0)
j = lps[j - 1];
else
i = i + 1;
}
}
return res;
}
static void findCount( string S, int M, int P)
{
HashSet< string > vec = new HashSet< string >();
int n = S.Length;
for ( int i = 0; i < n; i++) {
for ( int len = 1;
len <= n - i; len++) {
string s = S.Substring(i, len);
if (s.Length == M) {
vec.Add(s);
}
}
}
int count = 0;
foreach ( string it in vec) {
int ans = KMPSearch(it, S);
if (ans == P) {
count++;
}
}
Console.WriteLine(count);
}
static void Main() {
string S = "abacaba" ;
int M = 3, P = 2;
findCount(S, M, P);
}
}
|
Javascript
<script>
function computeLPSArray(pat, M, lps)
{
var len = 0;
var i = 1;
lps[0] = 0;
while (i < M) {
if (pat[i] == pat[len]) {
len++;
lps[i] = len;
i++;
}
else {
if (len != 0) {
len = lps[len - 1];
}
else {
lps[i] = len;
i++;
}
}
}
}
function KMPSearch(pat, txt)
{
var M = pat.length;
var N = txt.length;
var lps = new Array(M);
var j = 0;
computeLPSArray(pat, M, lps);
var i = 0;
var res = 0;
var next_i = 0;
while (i < N) {
if (pat[j] == txt[i]) {
j++;
i++;
}
if (j == M) {
j = lps[j - 1];
res++;
if (lps[j] != 0)
i = ++next_i;
j = 0;
}
else if (i < N
&& pat[j] != txt[i]) {
if (j != 0)
j = lps[j - 1];
else
i = i + 1;
}
}
return res;
}
function findCount( S, M, P)
{
var vec = new Set();
var n = S.length;
for ( var i = 0; i < n; i++) {
for ( var len = 1;
len <= n - i; len++) {
var s = S.substring(i, len);
if (s.length == M) {
vec.add(s);
}
}
}
var count = 0;
for (const it of vec){
var ans = KMPSearch(it, S);
if (ans == P) {
count++;
}
}
document.write( count);
}
var S = "abacaba" ;
var M = 3, P = 2;
findCount(S, M, P);
</script>
|
Time Complexity: O((N*M) + (N2 – M2))
Auxiliary Space: O(N – M)
Share your thoughts in the comments
Please Login to comment...