Given a string, count all distinct substrings of the given string.
Examples:
Input : abcd
Output : abcd abc ab a bcd bc b cd c d
All Elements are Distinct
Input : aaa
Output : aaa aa a aa a a
All elements are not Distinct
Prerequisite : Print subarrays of a given array
The idea is to use hash table (HashSet in Java) to store all generated substrings. Finally we return size of the HashSet.
Implementation:
C++
#include<bits/stdc++.h>
using namespace std;
int distinctSubstring(string str)
{
set<string> result ;
for ( int i = 0; i <= str.length(); i++)
{
for ( int j = 1; j <= str.length()-i; j++)
{
result.insert(str.substr(i, j));
}
}
return result.size();
}
int main()
{
string str = "aaaa" ;
cout << (distinctSubstring(str));
}
|
Java
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
public class DistinctSubstring {
public static int distinctSubstring(String str)
{
Set<String> result = new HashSet<String>();
for ( int i = 0 ; i <= str.length(); i++) {
for ( int j = i + 1 ; j <= str.length(); j++) {
result.add(str.substring(i, j));
}
}
return result.size();
}
public static void main(String[] args)
{
String str = "aaaa" ;
System.out.println(distinctSubstring(str));
}
}
|
Python3
def distinctSubstring( str ):
result = set ()
for i in range ( len ( str ) + 1 ):
for j in range ( i + 1 , len ( str ) + 1 ):
result.add( str [i:j]);
return len (result);
if __name__ = = '__main__' :
str = "aaaa" ;
print (distinctSubstring( str ));
|
C#
using System;
using System.Collections.Generic;
class DistinctSubstring
{
public static int distinctSubstring(String str)
{
HashSet<String> result = new HashSet<String>();
for ( int i = 0; i <= str.Length; i++)
{
for ( int j = i + 1; j <= str.Length; j++)
{
result.Add(str.Substring(i, j - i));
}
}
return result.Count;
}
public static void Main(String[] args)
{
String str = "aaaa" ;
Console.WriteLine(distinctSubstring(str));
}
}
|
Javascript
<script>
function distinctSubstring(str)
{
let result = new Set();
for (let i = 0; i <= str.length; i++) {
for (let j = i + 1; j <= str.length; j++) {
result.add(str.substring(i, j));
}
}
return result.size;
}
let str = "aaaa" ;
document.write(distinctSubstring(str));
</script>
|
Complexity Analysis:
- Time Complexity: O(n3logn)
- Auxiliary Space: O(n), since n extra space has been taken.
How to print the distinct substrings?
C++
#include <bits/stdc++.h>
using namespace std;
set<string> distinctSubstring(string str)
{
set<string> result;
for ( int i = 0; i <= str.length(); i++)
{
for ( int j = i + 1; j <= str.length(); j++)
{
result.insert(str.substr(i, j));
}
}
return result;
}
int main()
{
string str = "aaaa" ;
set<string> subs = distinctSubstring(str);
cout << "Distinct Substrings are: \n" ;
for ( auto i : subs)
cout << i << endl;
}
|
Java
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
public class DistinctSubstring {
public static Set<String> distinctSubstring(String str)
{
Set<String> result = new HashSet<String>();
for ( int i = 0 ; i <= str.length(); i++) {
for ( int j = i + 1 ; j <= str.length(); j++) {
result.add(str.substring(i, j));
}
}
return result;
}
public static void main(String[] args)
{
String str = "aaaa" ;
Set<String> subs = distinctSubstring(str);
System.out.println( "Distinct Substrings are: " );
for (String s : subs) {
System.out.println(s);
}
}
}
|
Python3
def distinctSubstring( str ):
result = set ();
for i in range ( len ( str )):
for j in range (i + 1 , len ( str ) + 1 ):
result.add( str [i:j]);
return result;
if __name__ = = '__main__' :
str = "aaaa" ;
subs = distinctSubstring( str );
print ( "Distinct Substrings are: " );
for s in subs:
print (s);
|
C#
using System;
using System.Collections.Generic;
class GFG
{
public static HashSet<String> distinctSubstring(String str)
{
HashSet<String> result = new HashSet<String>();
for ( int i = 0; i <= str.Length; i++)
{
for ( int j = i + 1; j <= str.Length; j++)
{
result.Add(str.Substring(i, j - i));
}
}
return result;
}
public static void Main(String[] args)
{
String str = "aaaa" ;
HashSet<String> subs = distinctSubstring(str);
Console.WriteLine( "Distinct Substrings are: " );
foreach (String s in subs)
{
Console.WriteLine(s);
}
}
}
|
Javascript
<script>
function distinctSubstring(str)
{
let result = new Set();
for (let i = 0; i <= str.length; i++)
{
for (let j = i + 1; j <= str.length; j++)
{
result.add(str.substring(i, i+j));
}
}
return result;
}
let str = "aaaa" ;
let subs = distinctSubstring(str);
document.write( "Distinct Substrings are: " , "</br>" );
for (let i of subs)
document.write(i, "</br>" );
</script>
|
OutputDistinct Substrings are:
a
aa
aaa
aaaa
Complexity Analysis:
- Time Complexity: O(n3logn)
- Auxiliary Space: O(n)
Optimization: We can further optimize the above code. The substr() function works in linear time. We can use append current character to previous substring to get the current substring.
Implementation:
C++
#include <bits/stdc++.h>
using namespace std;
void printSubstrings(string s)
{
unordered_set<string> us;
for ( int i = 0; i < s.size(); ++i) {
string ss = "" ;
for ( int j = i; j < s.size(); ++j) {
ss = ss + s[j];
us.insert(ss);
}
}
for ( auto s : us)
cout << s << " " ;
}
int main()
{
string str = "aaabc" ;
printSubstrings(str);
return 0;
}
|
Java
import java.util.*;
class GFG
{
static void printSubStrings(String s)
{
HashSet<String> us = new HashSet<String>();
for ( int i = 0 ; i < s.length(); ++i)
{
String ss = "" ;
for ( int j = i; j < s.length(); ++j)
{
ss = ss + s.charAt(j);
us.add(ss);
}
}
for (String str : us)
System.out.print(str + " " );
}
public static void main(String[] args)
{
String str = "aaabc" ;
printSubStrings(str);
}
}
|
Python3
def printSubStrings(s):
us = set ();
for i in range ( len (s)):
ss = "";
for j in range (i, len (s)):
ss = ss + s[j];
us.add(ss);
for str in us:
print ( str , end = " " );
if __name__ = = '__main__' :
str = "aaabc" ;
printSubStrings( str );
|
C#
using System;
using System.Collections.Generic;
class GFG
{
static void printSubStrings(String s)
{
HashSet<String> us = new HashSet<String>();
for ( int i = 0; i < s.Length; ++i)
{
String ss = "" ;
for ( int j = i; j < s.Length; ++j)
{
ss = ss + s[j];
us.Add(ss);
}
}
foreach (String str in us)
Console.Write(str + " " );
}
public static void Main(String[] args)
{
String str = "aaabc" ;
printSubStrings(str);
}
}
|
Javascript
<script>
function printSubstrings(s)
{
let us= new Set();
for (let i = 0; i < s.length; ++i) {
let ss = "" ;
for (let j = i; j < s.length; ++j) {
ss = ss + s[j];
us.add(ss);
}
}
for (let s of us.values())
document.write(s+ " " );
}
let str = "aaabc" ;
printSubstrings(str);
</script>
|
Outputbc b abc ab aabc aa aaa c a aaab aab aaabc
Complexity Analysis:
- Time Complexity: O(n2)
- Auxiliary Space: O(n)
Space Optimization using Trie Data Structure (when we just need count of distinct substrings)
The above approach makes use of hashing which may lead to memory limit exceeded (MLE) in case of very large strings. The approximate space complexity of them is around O(n^3) as there can be n(n+1)/2 substrings which is around O(n^2) and each substring can be at least of 1 length or n length, i.e O(n/2) average case. This makes the total space complexity to be O(n^3).
We can improve this using Trie. The idea is to insert characters that are not already present in the Trie. And when such addition happens we know that this string is occurring for the first time and thus we print it. And if some characters of the string is already present we just move on to the next node without reading them which helps us on saving space.
The time complexity for this approach is O(n^2) similar to previous approach but the space reduces to O(n)*26.
Implementation:
C++
#include <bits/stdc++.h>
using namespace std;
class TrieNode {
public :
bool isWord;
TrieNode* child[26];
TrieNode()
{
isWord = 0;
for ( int i = 0; i < 26; i++) {
child[i] = 0;
}
}
};
int countDistinctSubstring(string str)
{
TrieNode* head = new TrieNode();
int count = 0;
for ( int i = 0; i < str.length(); i++) {
TrieNode* temp = head;
for ( int j = i; j < str.length(); j++) {
if (temp->child[str[j] - 'a' ] == NULL) {
temp->child[str[j] - 'a' ] = new TrieNode();
temp->isWord = 1;
count++;
}
temp = temp->child[str[j] - 'a' ];
}
}
return count;
}
int main()
{
int count = countDistinctSubstring( "aaabc" );
cout << "Count of Distinct Substrings: " << count
<< endl;
return 0;
}
|
Java
import java.io.*;
class GFG {
static class TrieNode {
TrieNode children[];
boolean isEnd;
TrieNode()
{
this .children = new TrieNode[ 26 ];
this .isEnd = false ;
}
}
static TrieNode root = new TrieNode();
static void insert(String str)
{
TrieNode cur = root;
for ( char ch : str.toCharArray()) {
int idx = ch - 'a' ;
if (cur.children[idx] == null )
cur.children[idx] = new TrieNode();
cur = cur.children[idx];
}
cur.isEnd = true ;
}
public static int distinctSubstringCount(String str)
{
int cnt = 0 ;
for ( int i = 0 ; i <= str.length(); i++) {
TrieNode temp = root;
for ( int j = i; j < str.length(); j++) {
char ch = str.charAt(j);
if (temp.children[ch - 'a' ] == null ) {
temp.children[ch - 'a' ]
= new TrieNode();
temp.isEnd = true ;
cnt++;
}
temp = temp.children[ch - 'a' ];
}
}
return cnt;
}
public static void main(String[] args)
{
int cnt = distinctSubstringCount( "aaa" );
System.out.println( "Count of distinct substrings: "
+ cnt);
}
}
|
Python3
class TrieNode:
def __init__( self ):
self .isWord = False
self .child = [ None ] * 26
def countDistinctSubstring(string):
head = TrieNode()
count = 0
for i in range ( len (string)):
temp = head
for j in range (i, len (string)):
if not temp.child[ ord (string[j]) - ord ( 'a' )]:
temp.child[ ord (string[j]) - ord ( 'a' )] = TrieNode()
temp.isWord = True
count + = 1
temp = temp.child[ ord (string[j]) - ord ( 'a' )]
return count
count = countDistinctSubstring( "aaabc" )
print ( "Count of Distinct Substrings:" , count)
|
C#
using System;
using System.Collections.Generic;
class GFG {
class TrieNode {
public bool isWord;
public TrieNode[] child = new TrieNode[26];
public TrieNode()
{
this .isWord= false ;
for ( int i = 0; i < 26; i++) {
child[i] = null ;
}
}
}
static int countDistinctSubstring( string str)
{
TrieNode head = new TrieNode();
int count = 0;
for ( int i = 0; i < str.Length; i++) {
TrieNode temp = head;
for ( int j = i; j < str.Length; j++)
{
if (temp.child[str[j] - 'a' ] == null ) {
temp.child[str[j] - 'a' ] = new TrieNode();
temp.isWord = true ;
count++;
}
temp = temp.child[str[j] - 'a' ];
}
}
return count;
}
public static void Main()
{
int count = countDistinctSubstring( "aaabc" );
Console.Write( "Count of Distinct Substrings: " + count);
}
}
|
Javascript
class TrieNode {
constructor() {
this .isWord = false ;
this .child = new Array(26).fill( null );
}
}
function countDistinctSubstring(str) {
let head = new TrieNode();
let count = 0;
for (let i = 0; i < str.length; i++) {
let temp = head;
for (let j = i; j < str.length; j++) {
if (temp.child[str.charCodeAt(j) - 'a' .charCodeAt(0)] === null ) {
temp.child[str.charCodeAt(j) - 'a' .charCodeAt(0)] = new TrieNode();
temp.isWord = true ;
count++;
}
temp = temp.child[str.charCodeAt(j) - 'a' .charCodeAt(0)];
}
}
return count;
}
console.log( "Count of Distinct Substrings: " + countDistinctSubstring( "aaabc" ));
|
OutputCount of Distinct Substrings: 12
Complexity Analysis:
- Time Complexity: O(n2)
- Auxiliary Space: O(n)