Count of occurrences of each prefix in a string using modified KMP algorithm
Last Updated :
13 Dec, 2021
Given a string S of size N, the task is to count the occurrences of all the prefixes of the given string S.
Examples:
Input: S = “AAAA”
Output:
A occurs 4 times
AA occurs 3 times.
AAA occurs 2 times.
AAAA occurs 1 times.
Explanation:
Below is the illustration of all the prefix:
Input: S = “ABACABA”
Output:
A occurs 4 times
AB occurs 2 times
ABA occurs 2 times
ABAC occurs 1 times
ABACA occurs 1 times
ABACAB occurs 1 times
ABACABA occurs 1 times
Naive Approach:
- Traverse over all the prefixes in set P. Let the x be the prefix.
- Do a sliding window approach of size |x|.
- Check if the current sliding window on S is equal to x. If yes then increase the count[x] by 1.
Time complexity: O(N3)
Auxiliary Space: O(N)
Efficient Approach:
Use the LPS array (also called prefix_function) from the KMP algorithm.
The prefix function for this string is defined as an array LPS of length N, where LPS[i] is the length of the longest proper prefix of the substring S[0…i] which is also a suffix of this substring. Let occ[i] denote the number of occurrences of the prefix of length i.
Below are the steps to implement this approach:
- Compute the LPS array or prefix_function.
- For each value of the prefix function, first, count how many times it occurs in the LPS array.
- The length prefix i appears exactly ans[i] times, then this number must be added to the number of occurrences of its longest suffix that is also a prefix.
- In the end, add 1 to all the values of occ array, because of the original prefix that should be counted as well.
For example:
LPS[i] denotes that in position i, a prefix of length = LPS[i] appears. And this is the longest prefix possible. But shorter prefixes can occur.
For String S = “AAAA”, following are the prefixes:
S[0..0] = A
S[0..1] = AA
S[0..2] = AAA
S[0..3] = AAAA
Initially:
occ[A] = 0
occ[AA] = 0
occ[AAA] = 0
occ[AAAA] = 0
Step1: LPS Array of the following string denotes the length of the longest prefix which is also a suffix:
LPS[1] denotes in string AA, A is a suffix and also a prefix as LPS[1] = 1
LPS[2] denotes in string AAA, AA is a suffix and also a prefix as LPS[2] = 2
LPS[3] denotes in string AAAA, AAA is a suffix and also a prefix as LPS[3] = 3
Step 2:Add these occurrences of prefixes as suffixes to the answer in the occ[] array:
Values : Counted substrings
occ[A] = 1 : S[1]
occ[AA] = 1 : S[1..2]
occ[AAA] = 1 : S[1..3]
occ[AAAA] = 0 : NULL(as there is not a prefix “AAAA” which is also a suffix.
Step 3: Now traverse the string in reverse order starting from “AAA” (as the last value will always be 0 since the complete string is not a proper prefix).
Since, string “AAA” S[1..3] contains “AA” S[2..3] as well, which was not counted yet, therefore increment the occurrence of string “AA” in occ[“AA”] as occ[“AA”] += occ[“AAA”]. Below is the count for the same:
Values : Counted substrings
occ[A] = 1 : S[1]
occ[AA] = 2 : S[1..2], S[2..3]
occ[AAA] = 1 : S[1..3]
occ[AAAA] = 0 : NULL
Now string “AA” contains “A” as well, which was not counted yet, therefore increment the occurrence of string “A” in occ[“A”] as occ[“A”] += occ[“AA”]. Below is the count for the same:
Values : Counted substrings
occ[A] = 3 : S[1], S[2], S[3]
occ[AA] = 2 : S[1..2], S[2..3]
occ[AAA] = 1 : S[1..3]
occ[AAAA] = 0 : NULL
Step 4: At last add one to all occurrences for the original prefixes, which are not counted yet.
Values : Counted substrings
occ[A] = 4 : S[1], S[2], S[3], S[0]
occ[AA] = 3 : S[1..2], S[2..3], S[0..1]
occ[AAA] = 2 : S[1..3], S[0..2]
occ[AAAA] = 1 : S[0..3]
Below is the implementation of the above approach:
C++
#include <bits/stdc++.h>
using namespace std;
void print(vector< int >& occ, string& s)
{
for ( int i = 1; i <= int (s.size());
i++) {
cout << s.substr(0, i)
<< " occurs "
<< occ[i]
<< " times."
<< endl;
}
}
vector< int > prefix_function(string& s)
{
vector< int > LPS(s.size());
LPS[0] = 0;
for ( int i = 1;
i < int (s.size());
i++) {
int j = LPS[i - 1];
while (j > 0 && s[i] != s[j]) {
j = LPS[j - 1];
}
if (s[i] == s[j]) {
LPS[i] = j + 1;
}
else {
LPS[i] = 0;
}
}
return LPS;
}
void count_occurrence(string& s)
{
int n = s.size();
vector< int > LPS
= prefix_function(s);
vector< int > occ(n + 1);
for ( int i = 0; i < n; i++) {
occ[LPS[i]]++;
}
for ( int i = n - 1;
i > 0; i--) {
occ[LPS[i - 1]] += occ[i];
}
for ( int i = 0; i <= n; i++)
occ[i]++;
print(occ, s);
}
int main()
{
string A = "ABACABA" ;
count_occurrence(A);
return 0;
}
|
Java
import java.util.*;
class GFG{
static void print( int [] occ,
String s)
{
for ( int i = 1 ;
i <= s.length() - 1 ; i++)
{
System.out.print(s.substring( 0 , i) +
" occurs " + occ[i] +
" times." + "\n" );
}
}
static int [] prefix_function(String s)
{
int []LPS = new int [s.length()];
LPS[ 0 ] = 0 ;
for ( int i = 1 ;
i < s.length(); i++)
{
int j = LPS[i - 1 ];
while (j > 0 &&
s.charAt(i) != s.charAt(j))
{
j = LPS[j - 1 ];
}
if (s.charAt(i) == s.charAt(j))
{
LPS[i] = j + 1 ;
}
else
{
LPS[i] = 0 ;
}
}
return LPS;
}
static void count_occurrence(String s)
{
int n = s.length();
int [] LPS = prefix_function(s);
int []occ = new int [n + 1 ];
for ( int i = 0 ; i < n; i++)
{
occ[LPS[i]]++;
}
for ( int i = n - 1 ;
i > 0 ; i--)
{
occ[LPS[i - 1 ]] += occ[i];
}
for ( int i = 0 ; i <= n; i++)
occ[i]++;
print(occ, s);
}
public static void main(String[] args)
{
String A = "ABACABA" ;
count_occurrence(A);
}
}
|
Python3
def Print (occ, s):
for i in range ( 1 , len (s) + 1 ):
print (s[ 0 : i], "occur" , occ[i], "times." )
def prefix_function(s):
LPS = [ 0 for i in range ( len (s))]
for i in range ( 1 , len (s)):
j = LPS[i - 1 ]
while (j > 0 and s[i] ! = s[j]):
j = LPS[j - 1 ]
if (s[i] = = s[j]):
LPS[i] = j + 1
else :
LPS[i] = 0
return LPS
def count_occurrence(s):
n = len (s)
LPS = prefix_function(s)
occ = [ 0 for i in range (n + 1 )]
for i in range (n):
occ[LPS[i]] + = 1
for i in range (n - 1 , 0 , - 1 ):
occ[LPS[i - 1 ]] + = occ[i]
for i in range (n + 1 ):
occ[i] + = 1
Print (occ, s)
A = "ABACABA"
count_occurrence(A)
|
C#
using System;
class GFG{
static void print( int [] occ,
String s)
{
for ( int i = 1;
i <= s.Length - 1; i++)
{
Console.Write(s.Substring(0, i) +
" occurs " + occ[i] +
" times." + "\n" );
}
}
static int [] prefix_function(String s)
{
int []LPS = new int [s.Length];
LPS[0] = 0;
for ( int i = 1;
i < s.Length; i++)
{
int j = LPS[i - 1];
while (j > 0 && s[i] != s[j])
{
j = LPS[j - 1];
}
if (s[i] == s[j])
{
LPS[i] = j + 1;
}
else
{
LPS[i] = 0;
}
}
return LPS;
}
static void count_occurrence(String s)
{
int n = s.Length;
int [] LPS = prefix_function(s);
int []occ = new int [n + 1];
for ( int i = 0; i < n; i++)
{
occ[LPS[i]]++;
}
for ( int i = n - 1;
i > 0; i--)
{
occ[LPS[i - 1]] += occ[i];
}
for ( int i = 0; i <= n; i++)
occ[i]++;
print(occ, s);
}
public static void Main(String[] args)
{
String A = "ABACABA" ;
count_occurrence(A);
}
}
|
Javascript
<script>
const print = (occ, s) => {
for (let i = 1; i <= s.length; i++) {
document.write(`${s.substr(0, i)} occurs ${occ[i]} times.<br/>`);
}
}
const prefix_function = (s) => {
let LPS = new Array(s.length).fill(0);
LPS[0] = 0;
for (let i = 1; i < s.length; i++) {
let j = LPS[i - 1];
while (j > 0 && s[i] != s[j]) {
j = LPS[j - 1];
}
if (s[i] == s[j]) {
LPS[i] = j + 1;
}
else {
LPS[i] = 0;
}
}
return LPS;
}
const count_occurrence = (s) => {
let n = s.length;
let LPS = prefix_function(s);
let occ = new Array(n + 1).fill(0);
for (let i = 0; i < n; i++) {
occ[LPS[i]]++;
}
for (let i = n - 1;
i > 0; i--) {
occ[LPS[i - 1]] += occ[i];
}
for (let i = 0; i <= n; i++)
occ[i]++;
print(occ, s);
}
let A = "ABACABA" ;
count_occurrence(A);
</script>
|
Output:
A occurs 4 times.
AB occurs 2 times.
ABA occurs 2 times.
ABAC occurs 1 times.
ABACA occurs 1 times.
ABACAB occurs 1 times.
ABACABA occurs 1 times.
Time Complexity: O(N2)
Auxiliary Space: O(N)
Like Article
Suggest improvement
Share your thoughts in the comments
Please Login to comment...