Open In App

CSES Solutions – Substring Order I

Last Updated : 30 Apr, 2024
Improve
Improve
Like Article
Like
Save
Share
Report

Given a string of length N. Your task is to find the kth smallest distinct substring when all of its distinct substrings are ordered lexicographically.

Examples:

Input: String = “AGCTAGCTAGCT” , k = 7
Output: AGCTAGC
Explanation: The 7 smallest distinct substrings in order are A, AG, AGC, AGCT, AGCTA, AGCTAG and AGCTAGC.

Input:  String = “BACAABC” , k = 5
Output: AB
Explanation:  The 5 smallest distinct substrings in order are A, AA, AAB, AABC and AB.

Approach: To solve the problem, follow the below idea:

The problem can be solved by constructing a suffix tree, which represents all suffixes of the string. Then, we perform a Depth-First Search (DFS) traversal of the suffix tree to find the kth smallest distinct substring.

Step-by-step algorithm:

  • Initialize a suffix tree to represent all suffixes of the input string.
  • Extend the suffix tree by adding each character of the input string.
  • Calculate the number of distinct substrings in each subtree of the suffix tree.
  • Perform a Depth-First Search (DFS) traversal of the suffix tree to find the kth smallest distinct substring:
    • At each node of the suffix tree, iterate through its next nodes (child nodes).
    • For each next node, if the count of distinct substrings in its subtree is less than or equal to k, decrement k by that count and move to the next node.
    • If the count of distinct substrings in the subtree of a next node is greater than k, add the corresponding character to the result list, recursively traverse to that next node, and decrement k by 1.
    • Repeat this process until k becomes negative or the entire suffix tree is traversed.
  • Output the characters stored in the result list as the kth smallest distinct substring.

Below is the implementation of the algorithm:

C++
#include <iostream>
#include <map>
#include <vector>

using namespace std;

const int MAX_N = 100005;

// Define a Node struct representing each node in the suffix
// tree
struct Node {
    long long count; // Number of distinct substrings in the
                     // subtree rooted at this node
    int length; // Length of the longest suffix in the
                // subtree rooted at this node
    int link; // Suffix link
    map<char, int>
        nextNodes; // Next nodes in the trie structure

    // Constructor to initialize node properties
    Node()
    {
        count = 0;
        length = 0;
        link = -1;
    }
};

// Global variables
Node nodes[2 * MAX_N];
int size, last;
vector<char> result;

// Initialize the suffix tree
void init()
{
    nodes[0].length = 0;
    nodes[0].link = -1;
    size = 1;
    last = 0;
}

// Extend the suffix tree with a new character
void extend(char character)
{
    int current = size++;
    nodes[current].length = nodes[last].length + 1;
    int previous = last;
    while (previous != -1
           && nodes[previous].nextNodes.find(character)
                  == nodes[previous].nextNodes.end()) {
        nodes[previous].nextNodes[character] = current;
        previous = nodes[previous].link;
    }
    if (previous == -1) {
        nodes[current].link = 0;
    }
    else {
        int next = nodes[previous].nextNodes[character];
        if (nodes[previous].length + 1
            == nodes[next].length) {
            nodes[current].link = next;
        }
        else {
            int clone = size++;
            nodes[clone].length
                = nodes[previous].length + 1;
            nodes[clone].nextNodes = nodes[next].nextNodes;
            nodes[clone].link = nodes[next].link;
            while (previous != -1
                   && nodes[previous].nextNodes[character]
                          == next) {
                nodes[previous].nextNodes[character]
                    = clone;
                previous = nodes[previous].link;
            }
            nodes[next].link = nodes[current].link = clone;
        }
    }
    last = current;
}

// Calculate the number of distinct substrings in each
// subtree
void calculate(int nodeIndex)
{
    nodes[nodeIndex].count = 1;
    for (auto& entry : nodes[nodeIndex].nextNodes) {
        int nextNodeIndex = entry.second;
        if (nodes[nextNodeIndex].count == 0) {
            calculate(nextNodeIndex);
        }
        nodes[nodeIndex].count
            += nodes[nextNodeIndex].count;
    }
}

// DFS traversal to find kth smallest distinct substring
void dfs(int nodeIndex, long long k)
{
    if (k < 0)
        return;
    for (auto& entry : nodes[nodeIndex].nextNodes) {
        char character = entry.first;
        int nextNodeIndex = entry.second;
        if (nodes[nextNodeIndex].count <= k) {
            k -= nodes[nextNodeIndex].count;
        }
        else {
            result.push_back(character);
            dfs(nextNodeIndex, k - 1);
            return;
        }
    }
}

int main()
{
    string inputString = "babaacbaab";
    long long kthSmallest = 10;

    init();
    for (char c : inputString) {
        extend(c);
    }
    calculate(0);
    dfs(0, kthSmallest - 1);

    for (char c : result) {
        cout << c;
    }
    cout << endl;

    return 0;
}

// This code is contributed by shivamgupta310570
Java
import java.util.*;

// Define a Node class representing each node in the suffix
// tree
class Node {
    // Number of distinct substrings in the subtree rooted
    // at this node
    long count;
    // Length of the longest suffix in the subtree rooted at
    // this node
    int length;
    // Suffix link
    int link;
    // Next nodes in the trie structure
    Map<Character, Integer> nextNodes;

    // Constructor to initialize node properties
    Node()
    {
        count = 0;
        length = 0;
        link = -1;
        nextNodes = new HashMap<>();
    }
}

public class SuffixTree {
    static final int MAX_N = 100005;
    // Array to store nodes of the suffix tree
    static Node[] nodes;
    // Variables to track size and last node index
    static int size, last;
    // List to store the kth smallest distinct substring
    static List<Character> result;

    public static void main(String[] args)
    {
        String inputString = "babaacbaab";
        long kthSmallest = 10;

        // Initialize suffix tree
        init();
        for (int i = 0; i < inputString.length(); i++) {
            extend(inputString.charAt(i));
        }
        // Calculate the number of distinct substrings in
        // each subtree
        calculate();

        // DFS traversal to find kth smallest distinct
        // substring
        dfs(0, kthSmallest - 1);

        // Output the kth smallest distinct substring
        for (char c : result) {
            System.out.print(c);
        }
        System.out.println();
    }

    // Initialize the suffix tree
    static void init()
    {
        // Initialize array to store nodes
        nodes = new Node[2 * MAX_N];
        for (int i = 0; i < nodes.length; i++) {
            // Create new node objects
            nodes[i] = new Node();
        }
        // Root node properties
        nodes[0].length = 0;
        nodes[0].link = -1;
        // Initialize size and last node index
        size = 1;
        last = 0;
        // Initialize result list
        result = new ArrayList<>();
    }

    // Extend the suffix tree with a new character
    static void extend(char character)
    {
        int current = size++;
        nodes[current].length = nodes[last].length + 1;
        int previous = last;
        while (previous != -1
               && !nodes[previous].nextNodes.containsKey(
                   character)) {
            nodes[previous].nextNodes.put(character,
                                          current);
            previous = nodes[previous].link;
        }
        if (previous == -1) {
            // If no common suffix, link to root
            nodes[current].link = 0;
        }
        else {
            // Get index of next node
            int next
                = nodes[previous].nextNodes.get(character);
            if (nodes[previous].length + 1
                == nodes[next].length) {
                // If current length is previous length + 1,
                // link to next node
                nodes[current].link = next;
            }
            else {
                // Create a clone of next node
                int clone = size++;
                // Update clone node properties
                nodes[clone].length
                    = nodes[previous].length + 1;
                nodes[clone].nextNodes
                    = new HashMap<>(nodes[next].nextNodes);
                nodes[clone].link = nodes[next].link;
                while (previous != -1
                       && nodes[previous].nextNodes.get(
                              character)
                              == next) {
                    // Update previous node's next nodes to
                    // point to clone
                    nodes[previous].nextNodes.put(character,
                                                  clone);
                    // Move to previous node's suffix link
                    previous = nodes[previous].link;
                }
                // Update current and next node's suffix
                // links
                nodes[next].link = nodes[current].link
                    = clone;
            }
        }
        // Update last node index
        last = current;
    }

    // Calculate the number of distinct substrings in each
    // subtree
    static void calculate()
    {
        // Start with the root node
        calculate(0);
    }

    // Recursive method to calculate the number of distinct
    // substrings in each subtree
    static void calculate(int nodeIndex)
    {
        // Initialize count for current node
        nodes[nodeIndex].count = 1;
        for (Map.Entry<Character, Integer> entry :
             nodes[nodeIndex].nextNodes.entrySet()) {
            // Get index of next node
            int nextNodeIndex = entry.getValue();
            if (nodes[nextNodeIndex].count == 0) {
                // Recursively calculate count for next node
                calculate(nextNodeIndex);
            }
            // Update count for current node
            nodes[nodeIndex].count
                += nodes[nextNodeIndex].count;
        }
    }

    // DFS traversal to find kth smallest distinct substring
    static void dfs(int nodeIndex, long k)
    {
        if (k < 0)
            // Base case: stop traversal if k is negative
            return;
        for (Map.Entry<Character, Integer> entry :
             nodes[nodeIndex].nextNodes.entrySet()) {
            // Get current character
            char character = entry.getKey();
            // Get index of next node
            int nextNodeIndex = entry.getValue();
            if (nodes[nextNodeIndex].count <= k) {
                // Decrement k if count is less than or
                // equal to k
                k -= nodes[nextNodeIndex].count;
            }
            else {
                // Add character to result list
                result.add(character);
                // Recursively traverse to next node
                dfs(nextNodeIndex, k - 1);
                // Stop traversal
                return;
            }
        }
    }
}
Python3
from collections import defaultdict

class Node:
    def __init__(self):
        self.count = 0
        self.length = 0
        self.link = -1
        self.nextNodes = {}

# Constants
MAX_N = 100005

# Global variables
nodes = [Node() for _ in range(2 * MAX_N)]
size = 0
last = 0
result = []

# Initialize the suffix tree
def init():
    global size, last
    nodes[0].length = 0
    nodes[0].link = -1
    size = 1
    last = 0

# Extend the suffix tree with a new character
def extend(character):
    global size, last
    current = size
    size += 1
    nodes[current].length = nodes[last].length + 1
    previous = last
    while previous != -1 and character not in nodes[previous].nextNodes:
        nodes[previous].nextNodes[character] = current
        previous = nodes[previous].link
    if previous == -1:
        nodes[current].link = 0
    else:
        next_node = nodes[previous].nextNodes[character]
        if nodes[previous].length + 1 == nodes[next_node].length:
            nodes[current].link = next_node
        else:
            clone = size
            size += 1
            nodes[clone].length = nodes[previous].length + 1
            nodes[clone].nextNodes = nodes[next_node].nextNodes.copy()
            nodes[clone].link = nodes[next_node].link
            while previous != -1 and nodes[previous].nextNodes[character] == next_node:
                nodes[previous].nextNodes[character] = clone
                previous = nodes[previous].link
            nodes[next_node].link = nodes[current].link = clone
    last = current

# Calculate the number of distinct substrings in each subtree
def calculate():
    calculate_helper(0)

# Helper function for calculating counts recursively
def calculate_helper(nodeIndex):
    nodes[nodeIndex].count = 1
    for char, nextNodeIndex in nodes[nodeIndex].nextNodes.items():
        if nodes[nextNodeIndex].count == 0:
            calculate_helper(nextNodeIndex)
        nodes[nodeIndex].count += nodes[nextNodeIndex].count

# DFS traversal to find kth smallest distinct substring
def dfs(nodeIndex, k):
    if k < 0:
        return
    for char, nextNodeIndex in sorted(nodes[nodeIndex].nextNodes.items()):
        if nodes[nextNodeIndex].count <= k:
            k -= nodes[nextNodeIndex].count
        else:
            result.append(char)
            dfs(nextNodeIndex, k - 1)
            return

# Main function
def main():
    inputString = "babaacbaab"
    kthSmallest = 10

    init()
    for c in inputString:
        extend(c)
    calculate()
    dfs(0, kthSmallest - 1)

    print(''.join(result))

if __name__ == "__main__":
    main()
JavaScript
// Define a Node class representing each node in the suffix tree
class Node {
    constructor() {
        this.count = 0;
        this.length = 0;
        this.link = -1;
        this.nextNodes = {};
    }
}

// Constants
const MAX_N = 100005;

// Global variables
let nodes = Array.from({ length: 2 * MAX_N }, () => new Node());
let size = 0;
let last = 0;
let result = [];

// Initialize the suffix tree
function init() {
    nodes[0].length = 0;
    nodes[0].link = -1;
    size = 1;
    last = 0;
}

// Extend the suffix tree with a new character
function extend(character) {
    const current = size;
    size++;
    nodes[current].length = nodes[last].length + 1;
    let previous = last;
    while (previous !== -1 && !(character in nodes[previous].nextNodes)) {
        nodes[previous].nextNodes[character] = current;
        previous = nodes[previous].link;
    }
    if (previous === -1) {
        nodes[current].link = 0;
    } else {
        const nextNode = nodes[previous].nextNodes[character];
        if (nodes[previous].length + 1 === nodes[nextNode].length) {
            nodes[current].link = nextNode;
        } else {
            const clone = size;
            size++;
            nodes[clone].length = nodes[previous].length + 1;
            nodes[clone].nextNodes = { ...nodes[nextNode].nextNodes };
            nodes[clone].link = nodes[nextNode].link;
            while (previous !== -1 && nodes[previous].nextNodes[character] === nextNode) {
                nodes[previous].nextNodes[character] = clone;
                previous = nodes[previous].link;
            }
            nodes[nextNode].link = nodes[current].link = clone;
        }
    }
    last = current;
}

// Calculate the number of distinct substrings in each subtree
function calculate() {
    calculateHelper(0);
}

// Helper function for calculating counts recursively
function calculateHelper(nodeIndex) {
    nodes[nodeIndex].count = 1;
    for (const nextNodeIndex of Object.values(nodes[nodeIndex].nextNodes)) {
        if (nodes[nextNodeIndex].count === 0) {
            calculateHelper(nextNodeIndex);
        }
        nodes[nodeIndex].count += nodes[nextNodeIndex].count;
    }
}

// DFS traversal to find kth smallest distinct substring
function dfs(nodeIndex, k) {
    if (k < 0) {
        return;
    }
    for (const [char, nextNodeIndex] of Object.entries(nodes[nodeIndex].nextNodes).sort()) {
        if (nodes[nextNodeIndex].count <= k) {
            k -= nodes[nextNodeIndex].count;
        } else {
            result.push(char);
            dfs(nextNodeIndex, k - 1);
            return;
        }
    }
}

// Main function
function main() {
    const inputString = "babaacbaab";
    const kthSmallest = 10;

    init();
    for (const c of inputString) {
        extend(c);
    }
    calculate();
    dfs(0, kthSmallest - 1);

    console.log(result.join(''));
}

// Execute the main function
main();

Output
aba

Time Complexity: O(N), where N is the length of the input string.
Auxiliary Space: O(N2).



Like Article
Suggest improvement
Previous
Next
Share your thoughts in the comments

Similar Reads