Open In App

Count number of Distinct Substring in a String

Last Updated : 22 May, 2023
Improve
Improve
Like Article
Like
Save
Share
Report

Given a string, count all distinct substrings of the given string.

Examples: 

Input : abcd
Output : abcd abc ab a bcd bc b cd c d
All Elements are Distinct

Input : aaa
Output : aaa aa a aa a a
All elements are not Distinct

Prerequisite : Print subarrays of a given array

The idea is to use hash table (HashSet in Java) to store all generated substrings. Finally we return size of the HashSet.

Implementation:

C++




// C++ program to count all distinct substrings in a string
#include<bits/stdc++.h>
using namespace std;
 
int distinctSubstring(string str)
{
    // Put all distinct substring in a HashSet
    set<string> result ;
 
    // List All Substrings
    for (int i = 0; i <= str.length(); i++)
    {
        for (int j = 1; j <= str.length()-i; j++)
        {
 
            // Add each substring in Set
            result.insert(str.substr(i, j));
        }
    }
 
    // Return size of the HashSet
    return result.size();
}
 
// Driver Code
int main()
{
    string str = "aaaa";
    cout << (distinctSubstring(str));
}
 
// This code is contributed by Rajput-Ji


Java




// Java program to count all distinct substrings in a string
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
 
public class DistinctSubstring {
 
    public static int distinctSubstring(String str)
    {
        // Put all distinct substring in a HashSet
        Set<String> result = new HashSet<String>();
 
        // List All Substrings
        for (int i = 0; i <= str.length(); i++) {
            for (int j = i + 1; j <= str.length(); j++) {
 
                // Add each substring in Set
                result.add(str.substring(i, j));
            }
        }
 
        // Return size of the HashSet
        return result.size();
    }
 
    // Driver Code
    public static void main(String[] args)
    {
        String str = "aaaa";
        System.out.println(distinctSubstring(str));
    }
}


Python3




# Python3 program to count all distinct substrings in a string
 
def distinctSubstring(str):
    # Put all distinct substring in a HashSet
    result = set()
 
    # List All Substrings
    for i in range(len(str)+1):
        for j in range( i + 1, len(str)+1):
 
            # Add each substring in Set
            result.add(str[i:j]);
        # Return size of the HashSet
    return len(result);
 
# Driver Code
if __name__ == '__main__':
    str = "aaaa";
    print(distinctSubstring(str));
 
# This code has been contributed by 29AjayKumar


C#




// C# program to count all distinct
// substrings in a string
using System;
using System.Collections.Generic;
 
class DistinctSubstring
{
    public static int distinctSubstring(String str)
    {
        // Put all distinct substring in a HashSet
        HashSet<String> result = new HashSet<String>();
 
        // List All Substrings
        for (int i = 0; i <= str.Length; i++)
        {
            for (int j = i + 1; j <= str.Length; j++)
            {
 
                // Add each substring in Set
                result.Add(str.Substring(i, j - i));
            }
        }
 
        // Return size of the HashSet
        return result.Count;
    }
 
    // Driver Code
    public static void Main(String[] args)
    {
        String str = "aaaa";
        Console.WriteLine(distinctSubstring(str));
    }
}
 
// This code is contributed by 29AjayKumar


Javascript




<script>
// Javascript program to count all distinct substrings in a string
 
function distinctSubstring(str)
{
    // Put all distinct substring in a HashSet
        let result = new Set();
   
        // List All Substrings
        for (let i = 0; i <= str.length; i++) {
            for (let j = i + 1; j <= str.length; j++) {
   
                // Add each substring in Set
                result.add(str.substring(i, j));
            }
        }
   
        // Return size of the HashSet
        return result.size;
}
 
// Driver Code
let str = "aaaa";
document.write(distinctSubstring(str));
 
// This code is contributed by patel2127
</script>


Output

4

Complexity Analysis:

  • Time Complexity: O(n3logn)
  • Auxiliary Space: O(n),  since n extra space has been taken.

How to print the distinct substrings?

C++




// C++ program to count all distinct
// substrings in a string
#include <bits/stdc++.h>
using namespace std;
 
set<string> distinctSubstring(string str)
{
 
    // Put all distinct substrings
    // in the Hashset
    set<string> result;
 
    // List all substrings
    for(int i = 0; i <= str.length(); i++)
    {
        for(int j = i + 1; j <= str.length(); j++)
        {
 
            // Add each substring in Set
            result.insert(str.substr(i, j));
        }
    }
 
    // Return the hashset
    return result;
}
 
// Driver code
int main()
{
    string str = "aaaa";
    set<string> subs = distinctSubstring(str);
 
    cout << "Distinct Substrings are: \n";
    for(auto i : subs)
        cout << i << endl;
}
 
// This code is contributed by Ronak Mangal


Java




// Java program to count all distinct substrings in a string
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
 
public class DistinctSubstring {
 
    public static Set<String> distinctSubstring(String str)
    {
 
        // Put all distinct substring in a HashSet
        Set<String> result = new HashSet<String>();
 
        // List All Substrings
        for (int i = 0; i <= str.length(); i++) {
            for (int j = i + 1; j <= str.length(); j++) {
 
                // Add each substring in Set
                result.add(str.substring(i, j));
            }
        }
 
        // Return the HashSet
        return result;
    }
 
    // Driver Code
    public static void main(String[] args)
    {
        String str = "aaaa";
        Set<String> subs = distinctSubstring(str);
 
        System.out.println("Distinct Substrings are: ");
        for (String s : subs) {
            System.out.println(s);
        }
    }
}


Python3




# Python3 program to count all distinct
# substrings in a string
 
def distinctSubstring(str):
 
    # Put all distinct substring in a HashSet
    result = set();
 
    # List All Substrings
    for i in range(len(str)):
        for j in range(i + 1, len(str) + 1):
 
            # Add each substring in Set
            result.add(str[i:j]);
 
        # Return the HashSet
    return result;
 
# Driver Code
if __name__ == '__main__':
 
    str = "aaaa";
    subs = distinctSubstring(str);
 
    print("Distinct Substrings are: ");
    for s in subs:
        print(s);
 
# This code is contributed by 29AjayKumar


C#




// C# program to count all distinct
// substrings in a string
using System;
using System.Collections.Generic;
 
class GFG
{
    public static HashSet<String> distinctSubstring(String str)
    {
 
        // Put all distinct substring in a HashSet
        HashSet<String> result = new HashSet<String>();
 
        // List All Substrings
        for (int i = 0; i <= str.Length; i++)
        {
            for (int j = i + 1; j <= str.Length; j++)
            {
 
                // Add each substring in Set
                result.Add(str.Substring(i, j - i));
            }
        }
 
        // Return the HashSet
        return result;
    }
 
    // Driver Code
    public static void Main(String[] args)
    {
        String str = "aaaa";
        HashSet<String> subs = distinctSubstring(str);
 
        Console.WriteLine("Distinct Substrings are: ");
        foreach (String s in subs)
        {
            Console.WriteLine(s);
        }
    }
}
 
// This code is contributed by 29AjayKumar


Javascript




<script>
 
// JavaScript program to count all distinct
// substrings in a string
function distinctSubstring(str)
{
 
    // Put all distinct substrings
    // in the Hashset
    let result = new Set();
 
    // List all substrings
    for(let i = 0; i <= str.length; i++)
    {
        for(let j = i + 1; j <= str.length; j++)
        {
 
            // Add each substring in Set
            result.add(str.substring(i, i+j));
        }
    }
 
    // Return the hashset
    return result;
}
 
// Driver code
 
let str = "aaaa";
let subs = distinctSubstring(str);
 
document.write("Distinct Substrings are: ","</br>");
for(let i of subs)
        document.write(i,"</br>");
 
// This code is contributed by shinjanpatra
</script>


Output

Distinct Substrings are: 
a
aa
aaa
aaaa

Complexity Analysis:

  • Time Complexity: O(n3logn)
  • Auxiliary Space: O(n)

Optimization: We can further optimize the above code. The substr() function works in linear time. We can use append current character to previous substring to get the current substring. 

Implementation:

C++




// C++ implementation of the approach
#include <bits/stdc++.h>
using namespace std;
 
// Function to return the count of
// valid sub-strings
void printSubstrings(string s)
{
 
    // To store distinct output substrings
    unordered_set<string> us;
 
    // Traverse through the given string and
    // one by one generate substrings beginning
    // from s[i].
    for (int i = 0; i < s.size(); ++i) {
 
        // One by one generate substrings ending
        // with s[j]
        string ss = "";
        for (int j = i; j < s.size(); ++j) {
 
            ss = ss + s[j];
            us.insert(ss);
        }
    }
 
    // Print all substrings one by one
    for (auto s : us)
        cout << s << " ";
}
 
// Driver code
int main()
{
    string str = "aaabc";
    printSubstrings(str);
    return 0;
}


Java




// Java implementation of the approach
import java.util.*;
 
class GFG
{
 
// Function to return the count of
// valid sub-Strings
static void printSubStrings(String s)
{
 
    // To store distinct output subStrings
    HashSet<String> us = new HashSet<String>();
 
    // Traverse through the given String and
    // one by one generate subStrings beginning
    // from s[i].
    for (int i = 0; i < s.length(); ++i)
    {
 
        // One by one generate subStrings ending
        // with s[j]
        String ss = "";
        for (int j = i; j < s.length(); ++j)
        {
            ss = ss + s.charAt(j);
            us.add(ss);
        }
    }
 
    // Print all subStrings one by one
    for (String str : us)
        System.out.print(str + " ");
}
 
// Driver code
public static void main(String[] args)
{
    String str = "aaabc";
    printSubStrings(str);
}
}
 
// This code is contributed by Rajput-Ji


Python3




# Python3 implementation of the approach
 
# Function to return the count of
# valid sub-Strings
def printSubStrings(s):
 
    # To store distinct output subStrings
    us = set();
 
    # Traverse through the given String and
    # one by one generate subStrings beginning
    # from s[i].
    for i in range(len(s)):
 
        # One by one generate subStrings ending
        # with s[j]
        ss = "";
        for j in range(i, len(s)):
            ss = ss + s[j];
            us.add(ss);
         
    # Print all subStrings one by one
    for str in us:
        print(str, end=" ");
 
# Driver code
if __name__ == '__main__':
    str = "aaabc";
    printSubStrings(str);
     
# This code is contributed by 29AjayKumar


C#




// C# implementation of the approach
using System;
using System.Collections.Generic;
 
class GFG
{
 
// Function to return the count of
// valid sub-Strings
static void printSubStrings(String s)
{
 
    // To store distinct output subStrings
    HashSet<String> us = new HashSet<String>();
 
    // Traverse through the given String and
    // one by one generate subStrings
    // beginning from s[i].
    for (int i = 0; i < s.Length; ++i)
    {
 
        // One by one generate subStrings
        // ending with s[j]
        String ss = "";
        for (int j = i; j < s.Length; ++j)
        {
            ss = ss + s[j];
            us.Add(ss);
        }
    }
 
    // Print all subStrings one by one
    foreach (String str in us)
        Console.Write(str + " ");
}
 
// Driver code
public static void Main(String[] args)
{
    String str = "aaabc";
    printSubStrings(str);
}
}
 
// This code is contributed by Rajput-Ji


Javascript




<script>
// Javascript implementation of the approach
 
// Function to return the count of
// valid sub-strings
function printSubstrings(s)
{
    // To store distinct output substrings
    let us=new Set();
  
    // Traverse through the given string and
    // one by one generate substrings beginning
    // from s[i].
    for (let i = 0; i < s.length; ++i) {
  
        // One by one generate substrings ending
        // with s[j]
        let ss = "";
        for (let j = i; j < s.length; ++j) {
  
            ss = ss + s[j];
            us.add(ss);
        }
    }
  
    // Print all substrings one by one
    for (let s of us.values())
        document.write(s+" ");
}
 
// Driver Code
let str = "aaabc";
printSubstrings(str);
 
// This code is contributed by unknown2108
</script>


Output

bc b abc ab aabc aa aaa c a aaab aab aaabc 

Complexity Analysis:

  • Time Complexity: O(n2)
  • Auxiliary Space: O(n)

Space Optimization using Trie Data Structure (when we just need count of distinct substrings)

The above approach makes use of hashing which may lead to memory limit exceeded (MLE) in case of very large strings. The approximate space complexity of them is around O(n^3) as there can be n(n+1)/2 substrings which is around O(n^2) and each substring can be at least of 1 length or n length, i.e O(n/2) average case. This makes the total space complexity to be O(n^3).

We can improve this using Trie. The idea is to insert characters that are not already present in the Trie. And when such addition happens we know that this string is occurring for the first time and thus we print it. And if some characters of the string is already present we just move on to the next node without reading them which helps us on saving space.

The time complexity for this approach is O(n^2) similar to previous approach but the space reduces to O(n)*26. 

Implementation:

C++




#include <bits/stdc++.h>
using namespace std;
 
class TrieNode {
public:
    bool isWord;
    TrieNode* child[26];
 
    TrieNode()
    {
        isWord = 0;
        for (int i = 0; i < 26; i++) {
            child[i] = 0;
        }
    }
};
 
int countDistinctSubstring(string str)
{
    TrieNode* head = new TrieNode();
 
    // will hold the count of unique substrings
    int count = 0;
    // included count of substr " "
 
    for (int i = 0; i < str.length(); i++) {
        TrieNode* temp = head;
 
        for (int j = i; j < str.length(); j++) {
            // when char not present add it to the trie
            if (temp->child[str[j] - 'a'] == NULL) {
                temp->child[str[j] - 'a'] = new TrieNode();
                temp->isWord = 1;
                count++;
            }
            // move on to the next char
            temp = temp->child[str[j] - 'a'];
        }
    }
 
    return count;
}
 
int main()
{
    int count = countDistinctSubstring("aaabc");
 
    cout << "Count of Distinct Substrings: " << count
         << endl;
 
    return 0;
}


Java




/*package whatever //do not write package name here */
 
import java.io.*;
 
class GFG {
    static class TrieNode {
        TrieNode children[];
        boolean isEnd;
 
        TrieNode()
        {
            this.children = new TrieNode[26];
            this.isEnd = false;
        }
    }
 
    static TrieNode root = new TrieNode();
 
    static void insert(String str)
    {
        TrieNode cur = root;
        for (char ch : str.toCharArray()) {
            int idx = ch - 'a';
 
            if (cur.children[idx] == null)
                cur.children[idx] = new TrieNode();
 
            cur = cur.children[idx];
        }
        cur.isEnd = true;
    }
 
    public static int distinctSubstringCount(String str)
    {
        // will hold the count of unique substrings
        int cnt = 0;
        for (int i = 0; i <= str.length(); i++) {
            // start from root of trie each time as new
            // starting point
            TrieNode temp = root;
            for (int j = i; j < str.length(); j++) {
                char ch = str.charAt(j);
                // when char not present add it to the trie
                if (temp.children[ch - 'a'] == null) {
                    temp.children[ch - 'a']
                        = new TrieNode();
                    temp.isEnd = true;
                    cnt++;
                }
                // move on to the next char
                temp = temp.children[ch - 'a'];
            }
        }
        return cnt;
    }
 
    public static void main(String[] args)
    {
        int cnt = distinctSubstringCount("aaa");
        System.out.println("Count of distinct substrings: "
                           + cnt);
    }
}


Python3




# Python code for the above approach:
 
class TrieNode:
    def __init__(self):
        self.isWord = False
        self.child = [None] * 26
 
def countDistinctSubstring(string):
    head = TrieNode()
 
    # will hold the count of unique substrings
    count = 0
    # included count of substr " "
 
    for i in range(len(string)):
        temp = head
 
        for j in range(i, len(string)):
            # when char not present add it to the trie
            if not temp.child[ord(string[j]) - ord('a')]:
                temp.child[ord(string[j]) - ord('a')] = TrieNode()
                temp.isWord = True
                count += 1
            # move on to the next char
            temp = temp.child[ord(string[j]) - ord('a')]
 
    return count
 
 
count = countDistinctSubstring("aaabc")
print("Count of Distinct Substrings:", count)
 
# This code is contributed by lokesh.


C#




using System;
using System.Collections.Generic;
 
class GFG {
 
  class TrieNode {
    public bool isWord;
    public TrieNode[] child = new TrieNode[26];
    public TrieNode()
    {
      this.isWord=false;
      for (int i = 0; i < 26; i++) {
        child[i] = null;
      }
    }
  }
 
  static int countDistinctSubstring(string str)
  {
    TrieNode head = new TrieNode();
 
    // will hold the count of unique substrings
    int count = 0;
    // included count of substr " "
 
    for (int i = 0; i < str.Length; i++) {
      TrieNode temp = head;
 
      for (int j = i; j < str.Length; j++)
      {
         
        // when char not present add it to the trie
        if (temp.child[str[j] - 'a'] == null) {
          temp.child[str[j] - 'a'] = new TrieNode();
          temp.isWord = true;
          count++;
        }
        // move on to the next char
        temp = temp.child[str[j] - 'a'];
      }
    }
 
    return count;
  }
 
  public static void Main()
  {
    int count = countDistinctSubstring("aaabc");
 
    Console.Write("Count of Distinct Substrings: " + count);
 
  }
}
 
// This code is contributed by poojaagarwal2.


Javascript




class TrieNode {
    constructor() {
        this.isWord = false;
        this.child = new Array(26).fill(null);
    }
}
 
function countDistinctSubstring(str) {
    let head = new TrieNode();
    let count = 0;
    for (let i = 0; i < str.length; i++) {
        let temp = head;
        for (let j = i; j < str.length; j++) {
            if (temp.child[str.charCodeAt(j) - 'a'.charCodeAt(0)] === null) {
                temp.child[str.charCodeAt(j) - 'a'.charCodeAt(0)] = new TrieNode();
                temp.isWord = true;
                count++;
            }
            temp = temp.child[str.charCodeAt(j) - 'a'.charCodeAt(0)];
        }
    }
    return count;
}
 
console.log("Count of Distinct Substrings: " + countDistinctSubstring("aaabc"));


Output

Count of Distinct Substrings: 12

Complexity Analysis:

  • Time Complexity: O(n2)
  • Auxiliary Space: O(n) 


Like Article
Suggest improvement
Previous
Next
Share your thoughts in the comments

Similar Reads