Given a string of length N. Your task is to find the kth smallest distinct substring when all of its distinct substrings are ordered lexicographically.
Examples:
Input: String = "AGCTAGCTAGCT" , k = 7
Output: AGCTAGC
Explanation: The 7 smallest distinct substrings in order are A, AG, AGC, AGCT, AGCTA, AGCTAG and AGCTAGC.Input: String = "BACAABC" , k = 5
Output: AB
Explanation: The 5 smallest distinct substrings in order are A, AA, AAB, AABC and AB.
Approach: To solve the problem, follow the below idea:
The problem can be solved by constructing a suffix tree, which represents all suffixes of the string. Then, we perform a Depth-First Search (DFS) traversal of the suffix tree to find the kth smallest distinct substring.
Step-by-step algorithm:
- Initialize a suffix tree to represent all suffixes of the input string.
- Extend the suffix tree by adding each character of the input string.
- Calculate the number of distinct substrings in each subtree of the suffix tree.
- Perform a Depth-First Search (DFS) traversal of the suffix tree to find the kth smallest distinct substring:
- At each node of the suffix tree, iterate through its next nodes (child nodes).
- For each next node, if the count of distinct substrings in its subtree is less than or equal to k, decrement k by that count and move to the next node.
- If the count of distinct substrings in the subtree of a next node is greater than k, add the corresponding character to the result list, recursively traverse to that next node, and decrement k by 1.
- Repeat this process until k becomes negative or the entire suffix tree is traversed.
- Output the characters stored in the result list as the kth smallest distinct substring.
Below is the implementation of the algorithm:
#include <iostream>
#include <map>
#include <vector>
using namespace std;
const int MAX_N = 100005;
// Define a Node struct representing each node in the suffix
// tree
struct Node {
long long count; // Number of distinct substrings in the
// subtree rooted at this node
int length; // Length of the longest suffix in the
// subtree rooted at this node
int link; // Suffix link
map<char, int>
nextNodes; // Next nodes in the trie structure
// Constructor to initialize node properties
Node()
{
count = 0;
length = 0;
link = -1;
}
};
// Global variables
Node nodes[2 * MAX_N];
int size, last;
vector<char> result;
// Initialize the suffix tree
void init()
{
nodes[0].length = 0;
nodes[0].link = -1;
size = 1;
last = 0;
}
// Extend the suffix tree with a new character
void extend(char character)
{
int current = size++;
nodes[current].length = nodes[last].length + 1;
int previous = last;
while (previous != -1
&& nodes[previous].nextNodes.find(character)
== nodes[previous].nextNodes.end()) {
nodes[previous].nextNodes[character] = current;
previous = nodes[previous].link;
}
if (previous == -1) {
nodes[current].link = 0;
}
else {
int next = nodes[previous].nextNodes[character];
if (nodes[previous].length + 1
== nodes[next].length) {
nodes[current].link = next;
}
else {
int clone = size++;
nodes[clone].length
= nodes[previous].length + 1;
nodes[clone].nextNodes = nodes[next].nextNodes;
nodes[clone].link = nodes[next].link;
while (previous != -1
&& nodes[previous].nextNodes[character]
== next) {
nodes[previous].nextNodes[character]
= clone;
previous = nodes[previous].link;
}
nodes[next].link = nodes[current].link = clone;
}
}
last = current;
}
// Calculate the number of distinct substrings in each
// subtree
void calculate(int nodeIndex)
{
nodes[nodeIndex].count = 1;
for (auto& entry : nodes[nodeIndex].nextNodes) {
int nextNodeIndex = entry.second;
if (nodes[nextNodeIndex].count == 0) {
calculate(nextNodeIndex);
}
nodes[nodeIndex].count
+= nodes[nextNodeIndex].count;
}
}
// DFS traversal to find kth smallest distinct substring
void dfs(int nodeIndex, long long k)
{
if (k < 0)
return;
for (auto& entry : nodes[nodeIndex].nextNodes) {
char character = entry.first;
int nextNodeIndex = entry.second;
if (nodes[nextNodeIndex].count <= k) {
k -= nodes[nextNodeIndex].count;
}
else {
result.push_back(character);
dfs(nextNodeIndex, k - 1);
return;
}
}
}
int main()
{
string inputString = "babaacbaab";
long long kthSmallest = 10;
init();
for (char c : inputString) {
extend(c);
}
calculate(0);
dfs(0, kthSmallest - 1);
for (char c : result) {
cout << c;
}
cout << endl;
return 0;
}
// This code is contributed by shivamgupta310570
import java.util.*;
// Define a Node class representing each node in the suffix
// tree
class Node {
// Number of distinct substrings in the subtree rooted
// at this node
long count;
// Length of the longest suffix in the subtree rooted at
// this node
int length;
// Suffix link
int link;
// Next nodes in the trie structure
Map<Character, Integer> nextNodes;
// Constructor to initialize node properties
Node()
{
count = 0;
length = 0;
link = -1;
nextNodes = new HashMap<>();
}
}
public class SuffixTree {
static final int MAX_N = 100005;
// Array to store nodes of the suffix tree
static Node[] nodes;
// Variables to track size and last node index
static int size, last;
// List to store the kth smallest distinct substring
static List<Character> result;
public static void main(String[] args)
{
String inputString = "babaacbaab";
long kthSmallest = 10;
// Initialize suffix tree
init();
for (int i = 0; i < inputString.length(); i++) {
extend(inputString.charAt(i));
}
// Calculate the number of distinct substrings in
// each subtree
calculate();
// DFS traversal to find kth smallest distinct
// substring
dfs(0, kthSmallest - 1);
// Output the kth smallest distinct substring
for (char c : result) {
System.out.print(c);
}
System.out.println();
}
// Initialize the suffix tree
static void init()
{
// Initialize array to store nodes
nodes = new Node[2 * MAX_N];
for (int i = 0; i < nodes.length; i++) {
// Create new node objects
nodes[i] = new Node();
}
// Root node properties
nodes[0].length = 0;
nodes[0].link = -1;
// Initialize size and last node index
size = 1;
last = 0;
// Initialize result list
result = new ArrayList<>();
}
// Extend the suffix tree with a new character
static void extend(char character)
{
int current = size++;
nodes[current].length = nodes[last].length + 1;
int previous = last;
while (previous != -1
&& !nodes[previous].nextNodes.containsKey(
character)) {
nodes[previous].nextNodes.put(character,
current);
previous = nodes[previous].link;
}
if (previous == -1) {
// If no common suffix, link to root
nodes[current].link = 0;
}
else {
// Get index of next node
int next
= nodes[previous].nextNodes.get(character);
if (nodes[previous].length + 1
== nodes[next].length) {
// If current length is previous length + 1,
// link to next node
nodes[current].link = next;
}
else {
// Create a clone of next node
int clone = size++;
// Update clone node properties
nodes[clone].length
= nodes[previous].length + 1;
nodes[clone].nextNodes
= new HashMap<>(nodes[next].nextNodes);
nodes[clone].link = nodes[next].link;
while (previous != -1
&& nodes[previous].nextNodes.get(
character)
== next) {
// Update previous node's next nodes to
// point to clone
nodes[previous].nextNodes.put(character,
clone);
// Move to previous node's suffix link
previous = nodes[previous].link;
}
// Update current and next node's suffix
// links
nodes[next].link = nodes[current].link
= clone;
}
}
// Update last node index
last = current;
}
// Calculate the number of distinct substrings in each
// subtree
static void calculate()
{
// Start with the root node
calculate(0);
}
// Recursive method to calculate the number of distinct
// substrings in each subtree
static void calculate(int nodeIndex)
{
// Initialize count for current node
nodes[nodeIndex].count = 1;
for (Map.Entry<Character, Integer> entry :
nodes[nodeIndex].nextNodes.entrySet()) {
// Get index of next node
int nextNodeIndex = entry.getValue();
if (nodes[nextNodeIndex].count == 0) {
// Recursively calculate count for next node
calculate(nextNodeIndex);
}
// Update count for current node
nodes[nodeIndex].count
+= nodes[nextNodeIndex].count;
}
}
// DFS traversal to find kth smallest distinct substring
static void dfs(int nodeIndex, long k)
{
if (k < 0)
// Base case: stop traversal if k is negative
return;
for (Map.Entry<Character, Integer> entry :
nodes[nodeIndex].nextNodes.entrySet()) {
// Get current character
char character = entry.getKey();
// Get index of next node
int nextNodeIndex = entry.getValue();
if (nodes[nextNodeIndex].count <= k) {
// Decrement k if count is less than or
// equal to k
k -= nodes[nextNodeIndex].count;
}
else {
// Add character to result list
result.add(character);
// Recursively traverse to next node
dfs(nextNodeIndex, k - 1);
// Stop traversal
return;
}
}
}
}
from collections import defaultdict
class Node:
def __init__(self):
self.count = 0
self.length = 0
self.link = -1
self.nextNodes = {}
# Constants
MAX_N = 100005
# Global variables
nodes = [Node() for _ in range(2 * MAX_N)]
size = 0
last = 0
result = []
# Initialize the suffix tree
def init():
global size, last
nodes[0].length = 0
nodes[0].link = -1
size = 1
last = 0
# Extend the suffix tree with a new character
def extend(character):
global size, last
current = size
size += 1
nodes[current].length = nodes[last].length + 1
previous = last
while previous != -1 and character not in nodes[previous].nextNodes:
nodes[previous].nextNodes[character] = current
previous = nodes[previous].link
if previous == -1:
nodes[current].link = 0
else:
next_node = nodes[previous].nextNodes[character]
if nodes[previous].length + 1 == nodes[next_node].length:
nodes[current].link = next_node
else:
clone = size
size += 1
nodes[clone].length = nodes[previous].length + 1
nodes[clone].nextNodes = nodes[next_node].nextNodes.copy()
nodes[clone].link = nodes[next_node].link
while previous != -1 and nodes[previous].nextNodes[character] == next_node:
nodes[previous].nextNodes[character] = clone
previous = nodes[previous].link
nodes[next_node].link = nodes[current].link = clone
last = current
# Calculate the number of distinct substrings in each subtree
def calculate():
calculate_helper(0)
# Helper function for calculating counts recursively
def calculate_helper(nodeIndex):
nodes[nodeIndex].count = 1
for char, nextNodeIndex in nodes[nodeIndex].nextNodes.items():
if nodes[nextNodeIndex].count == 0:
calculate_helper(nextNodeIndex)
nodes[nodeIndex].count += nodes[nextNodeIndex].count
# DFS traversal to find kth smallest distinct substring
def dfs(nodeIndex, k):
if k < 0:
return
for char, nextNodeIndex in sorted(nodes[nodeIndex].nextNodes.items()):
if nodes[nextNodeIndex].count <= k:
k -= nodes[nextNodeIndex].count
else:
result.append(char)
dfs(nextNodeIndex, k - 1)
return
# Main function
def main():
inputString = "babaacbaab"
kthSmallest = 10
init()
for c in inputString:
extend(c)
calculate()
dfs(0, kthSmallest - 1)
print(''.join(result))
if __name__ == "__main__":
main()
// Define a Node class representing each node in the suffix tree
class Node {
constructor() {
this.count = 0;
this.length = 0;
this.link = -1;
this.nextNodes = {};
}
}
// Constants
const MAX_N = 100005;
// Global variables
let nodes = Array.from({ length: 2 * MAX_N }, () => new Node());
let size = 0;
let last = 0;
let result = [];
// Initialize the suffix tree
function init() {
nodes[0].length = 0;
nodes[0].link = -1;
size = 1;
last = 0;
}
// Extend the suffix tree with a new character
function extend(character) {
const current = size;
size++;
nodes[current].length = nodes[last].length + 1;
let previous = last;
while (previous !== -1 && !(character in nodes[previous].nextNodes)) {
nodes[previous].nextNodes[character] = current;
previous = nodes[previous].link;
}
if (previous === -1) {
nodes[current].link = 0;
} else {
const nextNode = nodes[previous].nextNodes[character];
if (nodes[previous].length + 1 === nodes[nextNode].length) {
nodes[current].link = nextNode;
} else {
const clone = size;
size++;
nodes[clone].length = nodes[previous].length + 1;
nodes[clone].nextNodes = { ...nodes[nextNode].nextNodes };
nodes[clone].link = nodes[nextNode].link;
while (previous !== -1 && nodes[previous].nextNodes[character] === nextNode) {
nodes[previous].nextNodes[character] = clone;
previous = nodes[previous].link;
}
nodes[nextNode].link = nodes[current].link = clone;
}
}
last = current;
}
// Calculate the number of distinct substrings in each subtree
function calculate() {
calculateHelper(0);
}
// Helper function for calculating counts recursively
function calculateHelper(nodeIndex) {
nodes[nodeIndex].count = 1;
for (const nextNodeIndex of Object.values(nodes[nodeIndex].nextNodes)) {
if (nodes[nextNodeIndex].count === 0) {
calculateHelper(nextNodeIndex);
}
nodes[nodeIndex].count += nodes[nextNodeIndex].count;
}
}
// DFS traversal to find kth smallest distinct substring
function dfs(nodeIndex, k) {
if (k < 0) {
return;
}
for (const [char, nextNodeIndex] of Object.entries(nodes[nodeIndex].nextNodes).sort()) {
if (nodes[nextNodeIndex].count <= k) {
k -= nodes[nextNodeIndex].count;
} else {
result.push(char);
dfs(nextNodeIndex, k - 1);
return;
}
}
}
// Main function
function main() {
const inputString = "babaacbaab";
const kthSmallest = 10;
init();
for (const c of inputString) {
extend(c);
}
calculate();
dfs(0, kthSmallest - 1);
console.log(result.join(''));
}
// Execute the main function
main();
Output
aba
Time Complexity: O(N), where N is the length of the input string.
Auxiliary Space: O(N2).