Open In App

Substring of length K having maximum frequency in the given string

Improve
Improve
Improve
Like Article
Like
Save Article
Save
Share
Report issue
Report

Given a string str, the task is to find the substring of length K which occurs the maximum number of times. If more than one string occurs maximum number of times, then print the lexicographically smallest substring.

Examples:

Input: str = “bbbbbaaaaabbabababa”, K = 5
Output: ababa
Explanation:
The substrings of length 5 from the above strings are {bbbbb, bbbba, bbbaa, bbaaa, baaaa, aaaaa, aaaab, aaabb, aabba, abbab, bbaba, babab, ababa, babab, ababa}.
Among all of them, substrings {ababa, babab} occurs the maximum number of times(= 2).
The lexicographically smallest string from {ababa, babab} is ababa.
Therefore, “ababa” is the required answer.

Input:  str = “heisagoodboy”, K = 5
Output: agood
Explanation: 
The substrings of length 5 from the above string are {heisa, eisag, isago, sagoo, agood, goodb, oodbo, odboy}.
All of them occur only once. But the lexicographically smallest string among them is “agood”.
Therefore, “agood” is the required answer.

Naive Approach: The simplest approach to solve the problem is to generate all the substrings of size K from the given string and store the frequency of each substring in a Map. Then, traverse the Map and find the lexicographically smallest substring which occurs maximum number of times and print it. 

C++




// C++ implementation to find
// the maximum occurring character in
// an input string which is lexicographically first
 
#include <bits/stdc++.h>
using namespace std;
  
 
// function to find the maximum occurring character in
// an input string which is lexicographically first
string maximumOccurringString(string str, int k)
{
    // store current string
    string curr= "";
    int i=0, j=0, n=str.length();
     
    // to store all substring and there number of occurrences
    // also use map because it stores all strings in lexographical order
    map<string,int>mp;
     
    // sliding window approach to generate all substring
    while(j<n){
        curr+=str[j];
         
        // window size less then k so increase only 'j'
        if(j-i+1 < k){
            j++;
        }
         
        // window size is equal to k
        // put current string into map and slide the window
        // by incrementing 'i' and 'j' to generate all substring
        else if(j-i+1 == k){
            mp[curr]++;
            curr.erase(0, 1);
            i++;
            j++;
        }
    }
     
    // to count the maximum occurring string
    int cnt=-1;
     
    // to store the maximum occurring string
    string ans;
    for(auto x : mp){
        int c = x.second;
        if(c > cnt){
            ans = x.first;
            cnt =c;
        }
         
    }
     
    // return the maximum occurring string
    return ans;
     
}
 
// Driver Code
int main()
{
    // Given string
    string str = "bbbbbaaaaabbabababa";
 
    // Given K size of substring
    int k = 5;
 
    // Function Call
    cout << maximumOccurringString(str, k);
 
    return 0;
}
 
//this code is contributed by bhardwajji


Java




import java.util.*;
 
public class Main {
    // function to find the maximum occurring character in
    // an input string which is lexicographically first
    static String maximumOccurringString(String str, int k) {
        // store current string
        String curr = "";
        int i = 0, j = 0, n = str.length();
 
        // to store all substring and there number of occurrences
        // also use TreeMap because it stores all strings in lexicographical order
        TreeMap<String, Integer> mp = new TreeMap<>();
 
        // sliding window approach to generate all substring
        while (j < n) {
            curr += str.charAt(j);
 
            // window size less then k so increase only 'j'
            if (j - i + 1 < k) {
                j++;
            }
 
            // window size is equal to k
            // put current string into map and slide the window
            // by incrementing 'i' and 'j' to generate all substring
            else if (j - i + 1 == k) {
                mp.put(curr, mp.getOrDefault(curr, 0) + 1);
                curr = curr.substring(1);
                i++;
                j++;
            }
        }
 
        // to count the maximum occurring string
        int cnt = -1;
 
        // to store the maximum occurring string
        String ans = "";
        for (Map.Entry<String, Integer> x : mp.entrySet()) {
            int c = x.getValue();
            if (c > cnt) {
                ans = x.getKey();
                cnt = c;
            }
        }
 
        // return the maximum occurring string
        return ans;
    }
 
    // Driver Code
    public static void main(String[] args) {
        // Given string
        String str = "bbbbbaaaaabbabababa";
 
        // Given K size of substring
        int k = 5;
 
        // Function Call
        System.out.println(maximumOccurringString(str, k));
    }
}


Python3




# Python3 implementation to find
#the maximum occurring character in
#an input string which is lexicographically first
 
# function to find the maximum occurring character in
# an input string which is lexicographically first
 
def maximum_occuring_string(string, k):
  # store current string
    curr = ""
    n = len(string)
    i = j = 0
     
    # to store all substring and there number of occurrences
    # also use map because it stores all strings in lexographical order
    mp = {}
     
    # sliding window approach to generate all substring
    while j < n:
        curr += string[j]
         
        # window size less then k so increase only 'j'
        if j - i + 1 < k:
            j += 1
         #   window size is equal to k
        # put current string into map and slide the window
        # by incrementing 'i' and 'j' to generate all substring
        elif j - i + 1 == k:
            if curr in mp:
                mp[curr] += 1
            else:
                mp[curr] = 1
            curr = curr[1:]
            i += 1
            j += 1
             
    #o count the maximum occurring string
    cnt = -1
    ans = ""
    for x in mp:
        c = mp[x]
        if c > cnt or (c == cnt and x < ans):
            ans = x
            cnt = c
    return ans
 
# Driver code
string = "bbbbbaaaaabbabababa"
k = 5
print(maximum_occuring_string(string, k))


C#




using System;
using System.Collections.Generic;
 
class Program
{
    // function to find the maximum occurring character in
    // an input string which is lexicographically first
    static string MaximumOccurringString(string str, int k)
    {
        // store current string
        string curr = "";
        int i = 0, j = 0, n = str.Length;
 
        // to store all substring and there number of occurrences
        // also use SortedDictionary because it stores all strings in lexographical order
        SortedDictionary<string, int> mp = new SortedDictionary<string, int>();
 
        // sliding window approach to generate all substring
        while (j < n)
        {
            curr += str[j];
 
            // window size less then k so increase only 'j'
            if (j - i + 1 < k)
            {
                j++;
            }
 
            // window size is equal to k
            // put current string into map and slide the window
            // by incrementing 'i' and 'j' to generate all substring
            else if (j - i + 1 == k)
            {
                if (mp.ContainsKey(curr))
                {
                    mp[curr]++;
                }
                else
                {
                    mp.Add(curr, 1);
                }
                curr = curr.Substring(1);
                i++;
                j++;
            }
        }
 
        // to count the maximum occurring string
        int cnt = -1;
 
        // to store the maximum occurring string
        string ans = "";
        foreach (var x in mp)
        {
            int c = x.Value;
            if (c > cnt)
            {
                ans = x.Key;
                cnt = c;
            }
        }
 
        // return the maximum occurring string
        return ans;
    }
 
    // Driver Code
    static void Main()
    {
        // Given string
        string str = "bbbbbaaaaabbabababa";
 
        // Given K size of substring
        int k = 5;
 
        // Function Call
        Console.WriteLine(MaximumOccurringString(str, k));
    }
}


Javascript




// function to find the maximum occurring character in
// an input string which is lexicographically first
function MaximumOccurringString(str, k) {
 
    // store current string
    let curr = "";
    let i = 0, j = 0, n = str.length;
     
     
    // to store all substring and there number of occurrences
    // also use Map because it stores all strings in lexographical order
    let mp = new Map();
     
    // sliding window approach to generate all substring
    while (j < n) {
        curr += str[j];
     
        // window size less then k so increase only 'j'
        if (j - i + 1 < k) {
            j++;
        }
     
        // window size is equal to k
        // put current string into map and slide the window
        // by incrementing 'i' and 'j' to generate all substring
        else if (j - i + 1 == k) {
            if (mp.has(curr)) {
                mp.set(curr, mp.get(curr) + 1);
            }
            else {
                mp.set(curr, 1);
            }
            curr = curr.substring(1);
            i++;
            j++;
        }
    }
     
    // to count the maximum occurring string
    let cnt = -1;
     
    // to store the maximum occurring string
    let ans = "";
    let keys = Array.from(mp.keys())
    keys.sort()
    //console.log(keys)
 
    for (let key of keys) {
        let c = mp.get(key);
        if (c > cnt) {
            ans = key;
            cnt = c;
        }
    }
     
    // return the maximum occurring string
    return ans;
 
}
 
// Given string
let str = "bbbbbaaaaabbabababa";
 
// Given K size of substring
let k = 5;
 
// Function Call
console.log(MaximumOccurringString(str, k));


Output

ababa

Time Complexity: O(N*( K + log K))
Auxiliary Space: O(N * K)

Efficient Approach: To optimize the above approach, the idea is to use Sliding Window technique. Consider a window of size 
K to generate all substrings of length K and count the frequency of a substring generated in a Map. Traverse the map and find the substring that occurs maximum number of times and print it. If several of them exist, then print the lexicographically smallest substring.

Below is the implementation of the above approach.

C++




// C++ program for the above approach
 
#include <bits/stdc++.h>
using ll = long long int;
using namespace std;
 
// Function that generates substring
// of length K that occurs maximum times
void maximumOccurringString(string s, ll K)
{
    // Store the frequency of substrings
    map<deque<char>, ll> M;
 
    ll i;
 
    // Deque to maintain substrings
    // window size K
    deque<char> D;
 
    for (i = 0; i < K; i++) {
        D.push_back(s[i]);
    }
 
    // Update the frequency of the
    // first substring in the Map
    M[D]++;
 
    // Remove the first character of
    // the previous K length substring
    D.pop_front();
 
    // Traverse the string
    for (ll j = i; j < s.size(); j++) {
 
        // Insert the current character
        // as last character of
        // current substring
        D.push_back(s[j]);
 
        M[D]++;
 
        // Pop the first character of
        // previous K length substring
        D.pop_front();
    }
 
    ll maxi = INT_MIN;
 
    deque<char> ans;
 
    // Find the substring that occurs
    // maximum number of times
    for (auto it : M) {
        if (it.second > maxi) {
            maxi = it.second;
            ans = it.first;
        }
    }
 
    // Print the substring
    for (ll i = 0; i < ans.size(); i++) {
        cout << ans[i];
    }
}
 
// Driver Code
int main()
{
    // Given string
    string s = "bbbbbaaaaabbabababa";
 
    // Given K size of substring
    ll K = 5;
 
    // Function Call
    maximumOccurringString(s, K);
 
    return 0;
}


Java




import java.util.*;
 
public class Main {
   
    // Function that generates substring
    // of length K that occurs maximum times
    public static void maximumOccurringString(String s,
                                              int K)
    {
 
        // Store the frequency of substrings
        Map<String, Integer> M = new HashMap<>();
 
        // Deque to maintain substrings
        // window size K
        Deque<Character> D = new LinkedList<>();
 
        for (int i = 0; i < K; i++) {
            D.addLast(s.charAt(i));
        }
 
        // Update the frequency of the
        // first substring in the Map
        M.put(D.toString(),
              M.getOrDefault(D.toString(), 0) + 1);
 
        // Remove the first character of
        // the previous K length substring
        D.removeFirst();
 
        // Traverse the string
        for (int j = K; j < s.length(); j++) {
 
            // Insert the current character
            // as last character of
            // current substring
            D.addLast(s.charAt(j));
 
            M.put(D.toString(),
                  M.getOrDefault(D.toString(), 0) + 1);
 
            // Pop the first character of
            // previous K length substring
            D.removeFirst();
        }
 
        int maxi = Integer.MIN_VALUE;
 
        String ans = "";
 
        // Find the substring that occurs
        // maximum number of times
        for (String it : M.keySet()) {
            if (M.get(it) > maxi) {
                maxi = M.get(it);
                ans = it;
            }
        }
 
        // Print the substring
        for (int i = 0; i < ans.length(); i++) {
            char c = ans.charAt(i);
            if (Character.isAlphabetic(c)) {
                System.out.print(c);
            }
        }
    }
 
    // Driver Code
    public static void main(String[] args)
    {
 
        // Given string
        String s = "bbbbbaaaaabbabababa";
 
        // Given K size of substring
        int K = 5;
 
        // Function call
        maximumOccurringString(s, K);
    }
}


Python3




# Python3 program for the above approach
from collections import deque, Counter, defaultdict
import sys
 
# Function that generates substring
# of length K that occurs maximum times
def maximumOccurringString(s, K):
     
    # Store the frequency of substrings
    M = {}
 
    # Deque to maintain substrings
    # window size K
    D = deque()
 
    for i in range(K):
        D.append(s[i])
 
    # Update the frequency of the
    # first substring in the Map
    # E="".join(list(D
    M[str("".join(list(D)))] = M.get(
        str("".join(list(D))), 0) + 1
 
    # Remove the first character of
    # the previous K length substring
    D.popleft()
 
    # Traverse the string
    for j in range(i, len(s)):
 
        # Insert the current character
        # as last character of
        # current substring
        D.append(s[j])
 
        M[str("".join(list(D)))] = M.get(
            str("".join(list(D))), 0) + 1
 
        # Pop the first character of
        # previous K length substring
        D.popleft()
 
    maxi = -sys.maxsize - 1
 
    ans = deque()
 
    # Find the substring that occurs
    # maximum number of times
    # print(M)
    for it in M:
         
        # print(it[0])
        if (M[it] >= maxi):
            maxi = M[it]
             
            # print(maxi)
            ans = it
 
    # Print the substring
    for i in range(len(ans)):
        print(ans[i], end = "")
 
# Driver Code
if __name__ == '__main__':
     
    # Given string
    s = "bbbbbaaaaabbabababa"
 
    # Given K size of substring
    K = 5
 
    # Function call
    maximumOccurringString(s, K)
 
# This code is contributed by mohit kumar 29


C#




using System;
using System.Collections.Generic;
 
namespace MaximumOccurringSubstring
{
    class Program
    {
        // Function that generates substring
        // of length K that occurs maximum times
        static void maximumOccurringString(string s, long K)
        {
            // Store the frequency of substrings
            Dictionary<Queue<char>, long> M = new Dictionary<Queue<char>, long>();
 
            long i;
 
            // Queue to maintain substrings
            // window size K
            Queue<char> D = new Queue<char>();
 
            for (i = 0; i < K; i++)
            {
                D.Enqueue(s[(int)i]);
            }
 
            // Update the frequency of the
            // first substring in the Dictionary
            M[D] = M.ContainsKey(D) ? M[D] + 1 : 1;
 
            // Remove the first character of
            // the previous K length substring
            D.Dequeue();
 
            // Traverse the string
            for (long j = i; j < s.Length; j++)
            {
                // Enqueue the current character
                // as the last character of
                // the current substring
                D.Enqueue(s[(int)j]);
 
                M[D] = M.ContainsKey(D) ? M[D] + 1 : 1;
 
                // Dequeue the first character of
                // previous K length substring
                D.Dequeue();
            }
 
            long maxi = int.MinValue;
 
            Queue<char> ans = new Queue<char>();
 
            // Find the substring that occurs
            // maximum number of times
            foreach (var kvp in M)
            {
                if (kvp.Value > maxi)
                {
                    maxi = kvp.Value;
                    ans = kvp.Key;
                }
            }
 
            // Print the substring
              Console.Write('a');
            foreach (var c in ans)
            {
                Console.Write(c);
            }
        }
 
        // Driver Code
        static void Main(string[] args)
        {
            // Given string
            string s = "bbbbbaaaaabbabababa";
 
            // Given K size of substring
            long K = 5;
 
            // Function call
            maximumOccurringString(s, K);
        }
    }
}


Javascript




// JavaScript program for the above approach
function maximumOccurringString(s, K) {
     
    // Store the frequency of substrings
    let M = {};
 
    // Deque to maintain substrings
    // window size K
    let D = [];
 
    for (let i = 0; i < K; i++) {
        D.push(s[i]);
    }
 
    // Update the frequency of the
    // first substring in the Map
    // E="".join(list(D
    M[D.join('')] = M[D.join('')] ? M[D.join('')] + 1 : 1;
 
    // Remove the first character of
    // the previous K length substring
    D.shift();
 
    // Traverse the string
    for (let j = K; j < s.length; j++) {
 
        // Insert the current character
        // as last character of
        // current substring
        D.push(s[j]);
 
        M[D.join('')] = M[D.join('')] ? M[D.join('')] + 1 : 1;
 
        // Pop the first character of
        // previous K length substring
        D.shift();
    }
 
    let maxi = -Infinity;
    let ans = [];
 
    // Find the substring that occurs
    // maximum number of times
    for (let it in M) {
         
        if (M[it] >= maxi) {
            maxi = M[it];
            ans = it.split('');
        }
    }
 
    // Print the substring
    console.log(ans.join(''));
}
 
// Driver Code
let s = "bbbbbaaaaabbabababa";
let K = 5;
 
// Function call
maximumOccurringString(s, K);


Output

ababa

Time Complexity: O((N – K)*log(N – K))
Auxiliary Space: O(N – K)



Last Updated : 14 Sep, 2023
Like Article
Save Article
Previous
Next
Share your thoughts in the comments
Similar Reads