Given a string of lower alphabetic characters, find K-th character in a string formed by substrings (of given string) when concatenated in sorted form.
Examples:
Input : str = “banana”
K = 10
Output : n
All substring in sorted form are,
"a", "an", "ana", "anan", "anana",
"b", "ba", "ban", "bana", "banan",
"banana", "n", "na", "nan", "nana"
Concatenated string = “aananaanana
nanabbabanbanabananbananannanannana”
We can see a 10th character in the
above concatenated string is ‘n’
which is our final answer.
A simple solution is to generate all substrings of a given string and store them in an array. Once substrings are generated, sort them and concatenate after sorting. Finally print K-th character in the concatenated string.
An efficient solution is based on counting distinct substring of a string using suffix array. Same method is used in solving this problem also. After getting suffix array and lcp array, we loop over all lcp values and for each such value, we calculate characters to skip. We keep subtracting these many characters from our K, when character to skip becomes more than K, we stop and loop over substrings corresponding to current lcp[i], in which we loop from lcp[i] till the maximum length of string and then print the Kth character.
Implementation:
// C++ program to print Kth character // in sorted concatenated substrings #include <bits/stdc++.h> using namespace std;
// Structure to store information of a suffix struct suffix
{ int index; // To store original index
int rank[2]; // To store ranks and next
// rank pair
}; // A comparison function used by sort() to compare // two suffixes. Compares two pairs, returns 1 if // first pair is smaller int cmp( struct suffix a, struct suffix b)
{ return (a.rank[0] == b.rank[0])?
(a.rank[1] < b.rank[1] ?1: 0):
(a.rank[0] < b.rank[0] ?1: 0);
} // This is the main function that takes a string // 'txt' of size n as an argument, builds and return // the suffix array for the given string vector< int > buildSuffixArray(string txt, int n)
{ // A structure to store suffixes and their indexes
struct suffix suffixes[n];
// Store suffixes and their indexes in an array
// of structures. The structure is needed to sort
// the suffixes alphabetically and maintain their
// old indexes while sorting
for ( int i = 0; i < n; i++)
{
suffixes[i].index = i;
suffixes[i].rank[0] = txt[i] - 'a' ;
suffixes[i].rank[1] = ((i+1) < n)?
(txt[i + 1] - 'a' ): -1;
}
// Sort the suffixes using the comparison function
// defined above.
sort(suffixes, suffixes+n, cmp);
// At his point, all suffixes are sorted according
// to first 2 characters. Let us sort suffixes
// according to first 4 characters, then first
// 8 and so on
int ind[n]; // This array is needed to get the
// index in suffixes[] from original
// index. This mapping is needed to get
// next suffix.
for ( int k = 4; k < 2*n; k = k*2)
{
// Assigning rank and index values to first suffix
int rank = 0;
int prev_rank = suffixes[0].rank[0];
suffixes[0].rank[0] = rank;
ind[suffixes[0].index] = 0;
// Assigning rank to suffixes
for ( int i = 1; i < n; i++)
{
// If first rank and next ranks are same as
// that of previous suffix in array, assign
// the same new rank to this suffix
if (suffixes[i].rank[0] == prev_rank &&
suffixes[i].rank[1] == suffixes[i-1].rank[1])
{
prev_rank = suffixes[i].rank[0];
suffixes[i].rank[0] = rank;
}
else // Otherwise increment rank and assign
{
prev_rank = suffixes[i].rank[0];
suffixes[i].rank[0] = ++rank;
}
ind[suffixes[i].index] = i;
}
// Assign next rank to every suffix
for ( int i = 0; i < n; i++)
{
int nextindex = suffixes[i].index + k/2;
suffixes[i].rank[1] = (nextindex < n)?
suffixes[ind[nextindex]].rank[0]: -1;
}
// Sort the suffixes according to first k characters
sort(suffixes, suffixes+n, cmp);
}
// Store indexes of all sorted suffixes in the suffix
// array
vector< int >suffixArr;
for ( int i = 0; i < n; i++)
suffixArr.push_back(suffixes[i].index);
// Return the suffix array
return suffixArr;
} /* To construct and return LCP */ vector< int > kasai(string txt, vector< int > suffixArr)
{ int n = suffixArr.size();
// To store LCP array
vector< int > lcp(n, 0);
// An auxiliary array to store inverse of suffix array
// elements. For example if suffixArr[0] is 5, the
// invSuff[5] would store 0. This is used to get next
// suffix string from suffix array.
vector< int > invSuff(n, 0);
// Fill values in invSuff[]
for ( int i=0; i < n; i++)
invSuff[suffixArr[i]] = i;
// Initialize length of previous LCP
int k = 0;
// Process all suffixes one by one starting from
// first suffix in txt[]
for ( int i=0; i<n; i++)
{
/* If the current suffix is at n-1, then we don’t
have next substring to consider. So lcp is not
defined for this substring, we put zero. */
if (invSuff[i] == n-1)
{
k = 0;
continue ;
}
/* j contains index of the next substring to
be considered to compare with the present
substring, i.e., next string in suffix array */
int j = suffixArr[invSuff[i]+1];
// Directly start matching from k'th index as
// at-least k-1 characters will match
while (i+k<n && j+k<n && txt[i+k]==txt[j+k])
k++;
lcp[invSuff[i]] = k; // lcp for the present suffix.
// Deleting the starting character from the string.
if (k>0)
k--;
}
// return the constructed lcp array
return lcp;
} // Utility method to get sum of first N numbers int sumOfFirstN( int N)
{ return (N * (N + 1)) / 2;
} // Returns Kth character in sorted concatenated // substrings of str char printKthCharInConcatSubstring(string str, int K)
{ int n = str.length();
// calculating suffix array and lcp array
vector< int > suffixArr = buildSuffixArray(str, n);
vector< int > lcp = kasai(str, suffixArr);
for ( int i = 0; i < lcp.size(); i++)
{
// skipping characters common to substring
// (n - suffixArr[i]) is length of current
// maximum substring lcp[i] will length of
// common substring
int charToSkip = sumOfFirstN(n - suffixArr[i]) -
sumOfFirstN(lcp[i]);
/* if characters are more than K, that means
Kth character belongs to substring
corresponding to current lcp[i]*/
if (K <= charToSkip)
{
// loop from current lcp value to current
// string length
for ( int j = lcp[i] + 1; j <= (n-suffixArr[i]); j++)
{
int curSubstringLen = j;
/* Again reduce K by current substring's
length one by one and when it becomes less,
print Kth character of current substring */
if (K <= curSubstringLen)
return str[(suffixArr[i] + K - 1)];
else
K -= curSubstringLen;
}
break ;
}
else
K -= charToSkip;
}
} // Driver code to test above methods int main()
{ string str = "banana" ;
int K = 10;
cout << printKthCharInConcatSubstring(str, K);
return 0;
} |
import java.util.Arrays;
class Suffix {
int index;
int [] rank = new int [ 2 ];
} public class Main {
// Function to build the suffix array
static int [] buildSuffixArray(String txt, int n) {
// Create an array of Suffix objects
Suffix[] suffixes = new Suffix[n];
for ( int i = 0 ; i < n; i++) {
suffixes[i] = new Suffix();
}
// Assign index and ranks to suffixes
for ( int i = 0 ; i < n; i++) {
suffixes[i].index = i;
suffixes[i].rank[ 0 ] = txt.charAt(i) - 'a' ;
suffixes[i].rank[ 1 ] = ((i + 1 ) < n) ? txt.charAt(i + 1 ) - 'a' : - 1 ;
}
// Sort the suffixes using custom comparison
Arrays.sort(suffixes, (a, b) -> {
if (a.rank[ 0 ] != b.rank[ 0 ]) {
return a.rank[ 0 ] - b.rank[ 0 ];
} else {
return a.rank[ 1 ] - b.rank[ 1 ];
}
});
int [] ind = new int [n];
int k = 4 ;
while (k < 2 * n) {
k *= 2 ;
int rank = 0 ;
int prev_rank = suffixes[ 0 ].rank[ 0 ];
suffixes[ 0 ].rank[ 0 ] = rank;
ind[suffixes[ 0 ].index] = 0 ;
for ( int i = 1 ; i < n; i++) {
if (suffixes[i].rank[ 0 ] == prev_rank && suffixes[i].rank[ 1 ] == suffixes[i - 1 ].rank[ 1 ]) {
prev_rank = suffixes[i].rank[ 0 ];
suffixes[i].rank[ 0 ] = rank;
} else {
prev_rank = suffixes[i].rank[ 0 ];
rank++;
suffixes[i].rank[ 0 ] = rank;
}
ind[suffixes[i].index] = i;
}
for ( int i = 0 ; i < n; i++) {
int nextindex = suffixes[i].index + k / 2 ;
suffixes[i].rank[ 1 ] = (nextindex < n) ? suffixes[ind[nextindex]].rank[ 0 ] : - 1 ;
}
Arrays.sort(suffixes, (a, b) -> {
if (a.rank[ 0 ] != b.rank[ 0 ]) {
return a.rank[ 0 ] - b.rank[ 0 ];
} else {
return a.rank[ 1 ] - b.rank[ 1 ];
}
});
}
// Store the suffix array indexes
int [] suffixArr = new int [n];
for ( int i = 0 ; i < n; i++) {
suffixArr[i] = suffixes[i].index;
}
return suffixArr;
}
// Function to compute the Longest Common Prefix (LCP) array
static int [] kasai(String txt, int [] suffixArr) {
int n = suffixArr.length;
int [] lcp = new int [n];
int [] invSuff = new int [n];
for ( int i = 0 ; i < n; i++) {
invSuff[suffixArr[i]] = i;
}
int k = 0 ;
for ( int i = 0 ; i < n; i++) {
if (invSuff[i] == n - 1 ) {
k = 0 ;
continue ;
}
int j = suffixArr[invSuff[i] + 1 ];
while (i + k < n && j + k < n && txt.charAt(i + k) == txt.charAt(j + k)) {
k++;
}
lcp[invSuff[i]] = k;
if (k > 0 ) {
k--;
}
}
return lcp;
}
// Utility function to calculate sum of first N numbers
static int sumOfFirstN( int N) {
return (N * (N + 1 )) / 2 ;
}
// Function to find Kth character in sorted concatenated substrings
static char printKthCharInConcatSubstring(String string, int K) {
int n = string.length();
int [] suffixArr = buildSuffixArray(string, n);
int [] lcp = kasai(string, suffixArr);
for ( int i = 0 ; i < lcp.length; i++) {
int charToSkip = (sumOfFirstN(n - suffixArr[i]) - sumOfFirstN(lcp[i]));
if (K <= charToSkip) {
for ( int j = lcp[i] + 1 ; j <= (n - suffixArr[i]); j++) {
int curSubstringLen = j;
if (K <= curSubstringLen) {
return string.charAt(suffixArr[i] + K - 1 );
} else {
K -= curSubstringLen;
}
}
break ;
} else {
K -= charToSkip;
}
}
return ' ' ;
}
public static void main(String[] args) {
String string = "banana" ;
int K = 10 ;
System.out.println(printKthCharInConcatSubstring(string, K));
}
} |
# Python3 program to print Kth character # in sorted concatenated substrings # Structure to store information of a suffix class suffix:
def __init__( self ):
self .index = 0
# To store original index
self .rank = [ 0 ] * 2
# To store ranks and next
# rank pair
# This is the main function that takes a string # 'txt' of size n as an argument, builds and return # the suffix array for the given string def buildSuffixArray(txt: str , n: int ) - > list :
# A structure to store suffixes
# and their indexes
suffixes = [ 0 ] * n
for i in range (n):
suffixes[i] = suffix()
# Store suffixes and their indexes in an array
# of structures. The structure is needed to sort
# the suffixes alphabetically and maintain their
# old indexes while sorting
for i in range (n):
suffixes[i].index = i
suffixes[i].rank[ 0 ] = ord (txt[i]) - ord ( 'a' )
suffixes[i].rank[ 1 ] = ( ord (txt[i + 1 ]) -
ord ( 'a' )) if ((i + 1 ) < n) else - 1
# Sort the suffixes using the comparison function
# defined above.
suffixes.sort(key = lambda a: a.rank)
# At his point, all suffixes are sorted according
# to first 2 characters. Let us sort suffixes
# according to first 4 characters, then first
# 8 and so on
ind = [ 0 ] * n
# This array is needed to get the
# index in suffixes[] from original
# index. This mapping is needed to get
# next suffix.
k = 4
while k < 2 * n:
k * = 2
# for k in range(4, 2 * n, k * 2):
# Assigning rank and index values
# to first suffix
rank = 0
prev_rank = suffixes[ 0 ].rank[ 0 ]
suffixes[ 0 ].rank[ 0 ] = rank
ind[suffixes[ 0 ].index] = 0
# Assigning rank to suffixes
for i in range ( 1 , n):
# If first rank and next ranks are same as
# that of previous suffix in array, assign
# the same new rank to this suffix
if (suffixes[i].rank[ 0 ] = = prev_rank and
suffixes[i].rank[ 1 ] = = suffixes[i - 1 ].rank[ 1 ]):
prev_rank = suffixes[i].rank[ 0 ]
suffixes[i].rank[ 0 ] = rank
# Otherwise increment rank and assign
else :
prev_rank = suffixes[i].rank[ 0 ]
rank + = 1
suffixes[i].rank[ 0 ] = rank
ind[suffixes[i].index] = i
# Assign next rank to every suffix
for i in range (n):
nextindex = suffixes[i].index + k / / 2
suffixes[i].rank[ 1 ] = suffixes[ind[nextindex]].rank[ 0 ] if (
nextindex < n) else - 1
# Sort the suffixes according to first k characters
suffixes.sort(key = lambda a : a.rank)
# Store indexes of all sorted suffixes
# in the suffix array
suffixArr = []
for i in range (n):
suffixArr.append(suffixes[i].index)
# Return the suffix array
return suffixArr
# To construct and return LCP */ def kasai(txt: str , suffixArr: list ) - > list :
n = len (suffixArr)
# To store LCP array
lcp = [ 0 ] * n
# An auxiliary array to store inverse of
# suffix array elements. For example if
# suffixArr[0] is 5, the invSuff[5] would
# store 0. This is used to get next
# suffix string from suffix array.
invSuff = [ 0 ] * n
# Fill values in invSuff[]
for i in range (n):
invSuff[suffixArr[i]] = i
# Initialize length of previous LCP
k = 0
# Process all suffixes one by one
# starting from first suffix in txt[]
for i in range (n):
# If the current suffix is at n-1, then
# we don’t have next substring to
# consider. So lcp is not defined for
# this substring, we put zero.
if (invSuff[i] = = n - 1 ):
k = 0
continue
# j contains index of the next substring to
# be considered to compare with the present
# substring, i.e., next string in suffix array
j = suffixArr[invSuff[i] + 1 ]
# Directly start matching from k'th index as
# at-least k-1 characters will match
while (i + k < n and j + k < n and
txt[i + k] = = txt[j + k]):
k + = 1
lcp[invSuff[i]] = k
# lcp for the present suffix.
# Deleting the starting character
# from the string.
if (k > 0 ):
k - = 1
# Return the constructed lcp array
return lcp
# Utility method to get sum of first N numbers def sumOfFirstN(N: int ) - > int :
return (N * (N + 1 )) / / 2
# Returns Kth character in sorted concatenated # substrings of str def printKthCharInConcatSubstring(string: str ,
K: int ) - > str :
n = len (string)
# Calculating suffix array and lcp array
suffixArr = buildSuffixArray(string, n)
lcp = kasai(string, suffixArr)
for i in range ( len (lcp)):
# Skipping characters common to substring
# (n - suffixArr[i]) is length of current
# maximum substring lcp[i] will length of
# common substring
charToSkip = (sumOfFirstN(n - suffixArr[i]) -
sumOfFirstN(lcp[i]))
# If characters are more than K, that means
# Kth character belongs to substring
# corresponding to current lcp[i]
if (K < = charToSkip):
# Loop from current lcp value to current
# string length
for j in range (lcp[i] + 1 ,
(n - suffixArr[i]) + 1 ):
curSubstringLen = j
# Again reduce K by current substring's
# length one by one and when it becomes less,
# print Kth character of current substring
if (K < = curSubstringLen):
return string[(suffixArr[i] + K - 1 )]
else :
K - = curSubstringLen
break
else :
K - = charToSkip
# Driver code if __name__ = = "__main__" :
string = "banana"
K = 10
print (printKthCharInConcatSubstring(string, K))
# This code is contributed by sanjeev2552 |
using System;
public class Suffix
{ public int Index;
public int [] Rank = new int [2];
} public class MainClass
{ // Function to build the suffix array
static int [] BuildSuffixArray( string txt, int n)
{
Suffix[] suffixes = new Suffix[n];
for ( int i = 0; i < n; i++)
{
suffixes[i] = new Suffix();
}
for ( int i = 0; i < n; i++)
{
suffixes[i].Index = i;
suffixes[i].Rank[0] = txt[i] - 'a' ;
suffixes[i].Rank[1] = (i + 1) < n ? txt[i + 1] - 'a' : -1;
}
Array.Sort(suffixes, (a, b) =>
{
if (a.Rank[0] != b.Rank[0])
{
return a.Rank[0] - b.Rank[0];
}
else
{
return a.Rank[1] - b.Rank[1];
}
});
int [] ind = new int [n];
int k = 4;
while (k < 2 * n)
{
k *= 2;
int rank = 0;
int prevRank = suffixes[0].Rank[0];
suffixes[0].Rank[0] = rank;
ind[suffixes[0].Index] = 0;
for ( int i = 1; i < n; i++)
{
if (suffixes[i].Rank[0] == prevRank && suffixes[i].Rank[1] == suffixes[i - 1].Rank[1])
{
prevRank = suffixes[i].Rank[0];
suffixes[i].Rank[0] = rank;
}
else
{
prevRank = suffixes[i].Rank[0];
rank++;
suffixes[i].Rank[0] = rank;
}
ind[suffixes[i].Index] = i;
}
for ( int i = 0; i < n; i++)
{
int nextindex = suffixes[i].Index + k / 2;
suffixes[i].Rank[1] = (nextindex < n) ? suffixes[ind[nextindex]].Rank[0] : -1;
}
Array.Sort(suffixes, (a, b) =>
{
if (a.Rank[0] != b.Rank[0])
{
return a.Rank[0] - b.Rank[0];
}
else
{
return a.Rank[1] - b.Rank[1];
}
});
}
int [] suffixArr = new int [n];
for ( int i = 0; i < n; i++)
{
suffixArr[i] = suffixes[i].Index;
}
return suffixArr;
}
// Function to compute the Longest Common Prefix (LCP) array
static int [] Kasai( string txt, int [] suffixArr)
{
int n = suffixArr.Length;
int [] lcp = new int [n];
int [] invSuff = new int [n];
for ( int i = 0; i < n; i++)
{
invSuff[suffixArr[i]] = i;
}
int k = 0;
for ( int i = 0; i < n; i++)
{
if (invSuff[i] == n - 1)
{
k = 0;
continue ;
}
int j = suffixArr[invSuff[i] + 1];
while (i + k < n && j + k < n && txt[i + k] == txt[j + k])
{
k++;
}
lcp[invSuff[i]] = k;
if (k > 0)
{
k--;
}
}
return lcp;
}
// Utility function to calculate sum of first N numbers
static int SumOfFirstN( int N)
{
return (N * (N + 1)) / 2;
}
// Function to find Kth character in sorted concatenated substrings
static char PrintKthCharInConcatSubstring( string str, int K)
{
int n = str.Length;
int [] suffixArr = BuildSuffixArray(str, n);
int [] lcp = Kasai(str, suffixArr);
for ( int i = 0; i < lcp.Length; i++)
{
int charToSkip = (SumOfFirstN(n - suffixArr[i]) - SumOfFirstN(lcp[i]));
if (K <= charToSkip)
{
for ( int j = lcp[i] + 1; j <= (n - suffixArr[i]); j++)
{
int curSubstringLen = j;
if (K <= curSubstringLen)
{
return str[suffixArr[i] + K - 1];
}
else
{
K -= curSubstringLen;
}
}
break ;
}
else
{
K -= charToSkip;
}
}
return ' ' ;
}
public static void Main( string [] args)
{
string str = "banana" ;
int K = 10;
Console.WriteLine(PrintKthCharInConcatSubstring(str, K));
}
} |
// Structure to store information of a suffix class Suffix { constructor() {
this .index = 0;
this .rank = [0, 0];
}
} // Function to build and return the suffix array for the given string function buildSuffixArray(txt) {
const n = txt.length;
const suffixes = new Array(n);
for (let i = 0; i < n; i++) {
suffixes[i] = new Suffix();
}
// Store suffixes and their indexes in an array
for (let i = 0; i < n; i++) {
suffixes[i].index = i;
suffixes[i].rank[0] = txt.charCodeAt(i) - 'a' .charCodeAt(0);
suffixes[i].rank[1] = (i + 1) < n ? txt.charCodeAt(i + 1) - 'a' .charCodeAt(0) : -1;
}
// Sort the suffixes based on rank
suffixes.sort((a, b) => a.rank[0] - b.rank[0] || a.rank[1] - b.rank[1]);
// Loop for sorting the suffixes according to first k characters
let ind = new Array(n).fill(0);
let k = 4;
while (k < 2 * n) {
suffixes[0].rank[0] = 0;
let prev_rank = suffixes[0].rank[0];
ind[suffixes[0].index] = 0;
// Assigning rank and index values to the first suffix
// Assign ranks to other suffixes based on previous ranks
// and increment ranks if they differ
for (let i = 1; i < n; i++) {
if (suffixes[i].rank[0] === prev_rank && suffixes[i].rank[1] ===
suffixes[i - 1].rank[1]) {
suffixes[i].rank[0] = prev_rank;
} else {
prev_rank = suffixes[i].rank[0];
suffixes[i].rank[0] = ++prev_rank;
}
ind[suffixes[i].index] = i;
}
// Assign next rank to every suffix
for (let i = 0; i < n; i++) {
const nextindex = suffixes[i].index + k / 2;
suffixes[i].rank[1] = (nextindex < n) ? suffixes[ind[nextindex]].rank[0] : -1;
}
// Sort the suffixes according to the updated ranks
suffixes.sort((a, b) => a.rank[0] - b.rank[0] || a.rank[1] - b.rank[1]);
k *= 2;
}
// Create the suffix array
const suffixArr = suffixes.map(suffix => suffix.index);
return suffixArr;
} // Function to construct and return the Longest Common Prefix (LCP) array function kasai(txt, suffixArr) {
const n = suffixArr.length;
const lcp = new Array(n).fill(0);
const invSuff = new Array(n).fill(0);
// Fill values in invSuff[] to get next suffix string from suffix array
for (let i = 0; i < n; i++) {
invSuff[suffixArr[i]] = i;
}
// Initialize the length of previous LCP
let k = 0;
// Process all suffixes starting from the first suffix in txt[]
for (let i = 0; i < n; i++) {
if (invSuff[i] === n - 1) {
k = 0;
continue ;
}
let j = suffixArr[invSuff[i] + 1];
while (i + k < n && j + k < n && txt[i + k] === txt[j + k]) {
k++;
}
lcp[invSuff[i]] = k;
if (k > 0) {
k--;
}
}
// Return the constructed LCP array
return lcp;
} // Utility method to get sum of first N numbers function sumOfFirstN(N) {
return (N * (N + 1)) / 2;
} // Returns Kth character in sorted concatenated substrings of str function printKthCharInConcatSubstring(string, K) {
const n = string.length;
const suffixArr = buildSuffixArray(string);
const lcp = kasai(string, suffixArr);
for (let i = 0; i < lcp.length; i++) {
// Skipping characters common to substring
const charToSkip = (sumOfFirstN(n - suffixArr[i]) - sumOfFirstN(lcp[i]));
if (K <= charToSkip) {
for (let j = lcp[i] + 1; j <= (n - suffixArr[i]) + 1; j++) {
const curSubstringLen = j;
if (K <= curSubstringLen) {
return string[suffixArr[i] + K - 1];
} else {
K -= curSubstringLen;
}
}
break ;
} else {
K -= charToSkip;
}
}
} // Driver code const string = "banana" ;
const K = 10; console.log(printKthCharInConcatSubstring(string, K)); |
n