Count of distinct substrings of a string using Suffix Trie

• Difficulty Level : Hard
• Last Updated : 09 Apr, 2019

Given a string of length n of lowercase alphabet characters, we need to count total number of distinct substrings of this string.
Examples:

Input  : str = “ababa”
Output : 10
Total number of distinct substring are 10, which are,
"", "a", "b", "ab", "ba", "aba", "bab", "abab", "baba"
and "ababa"

Recommended: Please solve it on “PRACTICE ” first, before moving on to the solution.

The idea is create a Trie of all suffixes of given string. Once the Trie is constricted, our answer is total number of nodes in the constructed Trie. For example below diagram represent Trie of all suffixes for “ababa”. Total number of nodes is 10 which is our answer. How does this work?

• Each root to node path of a Trie represents a prefix of words present in Trie. Here we words are suffixes. So each node represents a prefix of suffixes.
• Every substring of a string “str” is a prefix of a suffix of “str”.

Below is implementation based on above idea.

C++

 // A C++ program to find the count of distinct substring// of a string using trie data structure#include #define MAX_CHAR 26using namespace std;  // A Suffix Trie (A Trie of all suffixes) Nodeclass SuffixTrieNode{public:    SuffixTrieNode *children[MAX_CHAR];    SuffixTrieNode() // Constructor    {        // Initialize all child pointers as NULL        for (int i = 0; i < MAX_CHAR; i++)          children[i] = NULL;    }      // A recursive function to insert a suffix of the s    // in subtree rooted with this node    void insertSuffix(string suffix);};  // A Trie of all suffixesclass SuffixTrie{    SuffixTrieNode *root;    int _countNodesInTrie(SuffixTrieNode *);public:    // Constructor (Builds a trie of suffies of the given text)    SuffixTrie(string s)    {        root = new SuffixTrieNode();          // Consider all suffixes of given string and insert        // them into the Suffix Trie using recursive function        // insertSuffix() in SuffixTrieNode class        for (int i = 0; i < s.length(); i++)            root->insertSuffix(s.substr(i));    }      //  method to count total nodes in suffix trie    int countNodesInTrie() { return _countNodesInTrie(root); }};  // A recursive function to insert a suffix of the s in// subtree rooted with this nodevoid SuffixTrieNode::insertSuffix(string s){    // If string has more characters    if (s.length() > 0)    {        // Find the first character and convert it        // into 0-25 range.        char cIndex = s.at(0) - 'a';          // If there is no edge for this character,        // add a new edge        if (children[cIndex] == NULL)            children[cIndex] = new SuffixTrieNode();          // Recur for next suffix        children[cIndex]->insertSuffix(s.substr(1));    }}  // A recursive function to count nodes in trieint SuffixTrie::_countNodesInTrie(SuffixTrieNode* node){    // If all characters of pattern have been processed,    if (node == NULL)        return 0;      int count = 0;    for (int i = 0; i < MAX_CHAR; i++)    {        // if children is not NULL then find count        // of all nodes in this subtrie        if (node->children[i] != NULL)            count += _countNodesInTrie(node->children[i]);    }      // return count of nodes of subtrie and plus    // 1 because of node's own count    return (1 + count);}  // Returns count of distinct substrings of strint countDistinctSubstring(string str){    // Construct a Trie of all suffixes    SuffixTrie sTrie(str);      // Return count of nodes in Trie of Suffixes    return sTrie.countNodesInTrie();}  // Driver program to test above functionint main(){    string str = "ababa";    cout << "Count of distinct substrings is "         << countDistinctSubstring(str);    return 0;}

Java

 // A Java program to find the count of distinct substring// of a string using trie data structurepublic class Suffix {    // A Suffix Trie (A Trie of all suffixes) Node    static class SuffixTrieNode    {        static final int MAX_CHAR = 26;        SuffixTrieNode[] children = new SuffixTrieNode[MAX_CHAR];          SuffixTrieNode() // Constructor        {            // Initialize all child pointers as NULL            for (int i = 0; i < MAX_CHAR; i++)                children[i] = null;        }          // A recursive function to insert a suffix of the s in        // subtree rooted with this node        void insertSuffix(String s)         {            // If string has more characters            if (s.length() > 0)             {                // Find the first character and convert it                // into 0-25 range.                char cIndex = (char) (s.charAt(0) - 'a');                  // If there is no edge for this character,                // add a new edge                if (children[cIndex] == null)                    children[cIndex] = new SuffixTrieNode();                  // Recur for next suffix                children[cIndex].insertSuffix(s.substring(1));                }        }    }          // A Trie of all suffixes    static class Suffix_trie     {        static final int MAX_CHAR = 26;        SuffixTrieNode root;          // Constructor (Builds a trie of suffies of the given text)        Suffix_trie(String s) {            root = new SuffixTrieNode();              // Consider all suffixes of given string and insert            // them into the Suffix Trie using recursive function            // insertSuffix() in SuffixTrieNode class            for (int i = 0; i < s.length(); i++)                root.insertSuffix(s.substring(i));        }          // A recursive function to count nodes in trie        int _countNodesInTrie(SuffixTrieNode node)         {            // If all characters of pattern have been processed,            if (node == null)                return 0;              int count = 0;            for (int i = 0; i < MAX_CHAR; i++) {                  // if children is not NULL then find count                // of all nodes in this subtrie                if (node.children[i] != null)                    count += _countNodesInTrie(node.children[i]);            }              // return count of nodes of subtrie and plus            // 1 because of node's own count            return (1 + count);        }          // method to count total nodes in suffix trie        int countNodesInTrie()         {            return _countNodesInTrie(root);        }      }       // Returns count of distinct substrings of str    static int countDistinctSubstring(String str)    {        // Construct a Trie of all suffixes        Suffix_trie sTrie = new Suffix_trie(str);          // Return count of nodes in Trie of Suffixes        return sTrie.countNodesInTrie();    }      // Driver program to test above function    public static void main(String args[])     {        String str = "ababa";        System.out.println("Count of distinct substrings is "                + countDistinctSubstring(str));              }}// This code is contributed by Sumit Ghosh

C#

 // C# program to find the count of distinct substring// of a string using trie data structureusing System;  public class Suffix {    // A Suffix Trie (A Trie of all suffixes) Node    public class SuffixTrieNode    {        static readonly int MAX_CHAR = 26;        public SuffixTrieNode[] children = new SuffixTrieNode[MAX_CHAR];          public SuffixTrieNode() // Constructor        {            // Initialize all child pointers as NULL            for (int i = 0; i < MAX_CHAR; i++)                children[i] = null;        }          // A recursive function to insert a suffix of the s in        // subtree rooted with this node        public void insertSuffix(String s)         {            // If string has more characters            if (s.Length > 0)             {                // Find the first character and convert it                // into 0-25 range.                char cIndex = (char) (s - 'a');                  // If there is no edge for this character,                // add a new edge                if (children[cIndex] == null)                    children[cIndex] = new SuffixTrieNode();                  // Recur for next suffix                children[cIndex].insertSuffix(s.Substring(1));                }        }    }          // A Trie of all suffixes    public class Suffix_trie     {        static readonly int MAX_CHAR = 26;        public SuffixTrieNode root;          // Constructor (Builds a trie of suffies of the given text)        public Suffix_trie(String s)         {            root = new SuffixTrieNode();              // Consider all suffixes of given string and insert            // them into the Suffix Trie using recursive function            // insertSuffix() in SuffixTrieNode class            for (int i = 0; i < s.Length; i++)                root.insertSuffix(s.Substring(i));        }          // A recursive function to count nodes in trie        public int _countNodesInTrie(SuffixTrieNode node)         {            // If all characters of pattern have been processed,            if (node == null)                return 0;              int count = 0;            for (int i = 0; i < MAX_CHAR; i++)             {                  // if children is not NULL then find count                // of all nodes in this subtrie                if (node.children[i] != null)                    count += _countNodesInTrie(node.children[i]);            }              // return count of nodes of subtrie and plus            // 1 because of node's own count            return (1 + count);        }          // method to count total nodes in suffix trie        public int countNodesInTrie()         {            return _countNodesInTrie(root);        }      }       // Returns count of distinct substrings of str    static int countDistinctSubstring(String str)    {        // Construct a Trie of all suffixes        Suffix_trie sTrie = new Suffix_trie(str);          // Return count of nodes in Trie of Suffixes        return sTrie.countNodesInTrie();    }      // Driver program to test above function    public static void Main(String []args)     {        String str = "ababa";        Console.WriteLine("Count of distinct substrings is "                + countDistinctSubstring(str));              }}  // This code contributed by Rajput-Ji

Output:
Count of distinct substrings is 10

We will soon be discussing Suffix Array and Suffix Tree based approaches for this problem.

This article is contributed by Utkarsh Trivedi. If you like GeeksforGeeks and would like to contribute, you can also write an article using contribute.geeksforgeeks.org or mail your article to contribute@geeksforgeeks.org. See your article appearing on the GeeksforGeeks main page and help other Geeks.