Open In App

Distinct Substring frequency counter

Given a string s of length N containing only lowercase English letters and an integer K, the task is to find all distinct substrings of S of length K, and for each substring, count the number of times it occurs in S. You need to return a list of all distinct substrings along with their frequency of occurrence in S.

Examples:



Input: S = “abcabcabc”, K = 3
Output: [(abc, 3), (bca, 2), (cab, 2)]

Input : s = “aabaaaba”, k = 2
Output : [(“aa”, 3), (“ab”, 2), (“ba”, 2)]   



Approach: This can be solved by the following idea:

Using the Sliding window concept, where we will maintain two pointers one on k and another on 0 (0- indexed), getting the string formed between these two pointers. Adding the formed string in the map to maintain the frequency of substrings. Incrementing in these two pointers and repeating the process.

Steps involved in the implementation of code:

Below is the implementation for the above approach:




// C++ code for the above approach
#include <bits/stdc++.h>
using namespace std;
 
// Function to get all distinct substrings
// and their frequencies
void getAll(string s, int K)
{
 
    // To store frequencies
    unordered_map<string, int> m;
    int i = 0;
 
    while (i < s.length() - K + 1) {
        string formed = s.substr(i, K);
        m[formed]++;
        i++;
    }
 
    // Printing the substring formed
    for (auto a : m) {
        cout << '(' << a.first << ", " << a.second << ')'
             << endl;
    }
}
 
// Driver code
int main()
{
    string s = "abcabcabc";
    int K = 3;
 
    // Function call
    getAll(s, K);
    return 0;
}




import java.util.HashMap;
import java.util.Map;
 
public class Main {
    public static void main(String[] args)
    {
        String s = "abcabcabc";
        int K = 3;
 
        // Function call
        getAll(s, K);
    }
 
    // Function to get all distinct substrings and their
    // frequencies
    public static void getAll(String s, int K)
    {
        // To store frequencies
        Map<String, Integer> m = new HashMap<>();
        int i = 0;
 
        while (i < s.length() - K + 1) {
            String formed = s.substring(i, i + K);
            m.put(formed, m.getOrDefault(formed, 0) + 1);
            i++;
        }
 
        // Printing the substring formed
        for (Map.Entry<String, Integer> entry :
             m.entrySet()) {
            System.out.println("(" + entry.getKey() + ", "
                               + entry.getValue() + ")");
        }
    }
}




# Python3 code for the above approach
def get_all(s: str, K: int):
    # To store frequencies
    m = {}
 
    i = 0
    while i < len(s) - K + 1:
        formed = s[i:i+K]
        m[formed] = m.get(formed, 0) + 1
        i += 1
 
    # Printing the substring formed
    for key, value in m.items():
        print(f"({key}, {value})")
 
 
# Driver code
if __name__ == '__main__':
    s = "abcabcabc"
    K = 3
 
    # Function call
    get_all(s, K)




// JavaScript code for the above approach
 
// Function to get all distinct substrings
// and their frequencies
function getAll(s, K) {
 
    // To store frequencies
    const m = new Map();
    let i = 0;
 
    while (i < s.length - K + 1) {
        const formed = s.substr(i, K);
        m.set(formed, (m.get(formed) || 0) + 1);
        i++;
    }
 
    // Printing the substring formed
    for (const [key, value] of m) {
        console.log(`(${key}, ${value})`);
    }
}
 
// Driver code
const s = "abcabcabc";
const K = 3;
 
// Function call
getAll(s, K);




using System;
using System.Collections.Generic;
 
class MainClass {
    // Function to get all distinct substrings
    // and their frequencies
    static void GetAll(string s, int K) {
 
        // To store frequencies
        Dictionary<string, int> m = new Dictionary<string, int>();
        int i = 0;
 
        while (i < s.Length - K + 1) {
            string formed = s.Substring(i, K);
            if (m.ContainsKey(formed)) {
                m[formed]++;
            }
            else {
                m.Add(formed, 1);
            }
            i++;
        }
 
        // Printing the substring formed
        foreach (KeyValuePair<string, int> entry in m) {
            Console.WriteLine("({0}, {1})", entry.Key, entry.Value);
        }
    }
 
    // Driver code
    static void Main() {
        string s = "abcabcabc";
        int K = 3;
 
        // Function call
        GetAll(s, K);
    }
}

Output
(bca, 2)
(cab, 2)
(abc, 3)

Time complexity: O(N*logN)
Auxiliary space: O(N-K) As the map required storage equal to unique no. of substring in worst case it is O(N-K) and best case it is O(1)


Article Tags :