Counting the number of words in a Trie

A Trie is used to store dictionary words so that they can be searched efficiently and prefix search can be done. The task is to write a function to count the number of words.

Example :

Input :     root
          /   \    \
         t   a     b
         |   |     |
         h   n     y
         |   |  \  |
         e   s  y  e
         /  |   |
         i  r   w
         |  |   |
         r  e   e
                |
                r
Output : 8
Explanation : Words formed in the Trie : 
"the", "a", "there", "answer", "any", "by", 
"bye", "their".



In Trie structure, we have a field to store end of word marker, we call it isLeaf in below implementation. To count words, we need to simply traverse the Trie and count all nodes where isLeaf is set.

C++

// C++ implementation to count words in a trie
#include <bits/stdc++.h>
using namespace std;
   
#define ARRAY_SIZE(a) sizeof(a)/sizeof(a[0])
   
// Alphabet size (# of symbols)
#define ALPHABET_SIZE (26)
   
// Converts key current character into index
// use only 'a' through 'z' and lower case
#define CHAR_TO_INDEX(c) ((int)c - (int)'a')
  
// Trie node
struct TrieNode
{
    struct TrieNode *children[ALPHABET_SIZE];
   
    // isLeaf is true if the node represents
    // end of a word
    bool isLeaf;
};
   
// Returns new trie node (initialized to NULLs)
struct TrieNode *getNode(void)
{
    struct TrieNode *pNode = new TrieNode;
        pNode->isLeaf = false;
   
    for (int i = 0; i < ALPHABET_SIZE; i++)
        pNode->children[i] = NULL;    
   
    return pNode;
}
   
// If not present, inserts key into trie
// If the key is prefix of trie node, just
// marks leaf node
void insert(struct TrieNode *root, const char *key)
{
    int length = strlen(key);
  
    struct TrieNode *pCrawl = root;
   
    for (int level = 0; level < length; level++)
    {
        int index = CHAR_TO_INDEX(key[level]);
        if (!pCrawl->children[index])
            pCrawl->children[index] = getNode();
   
        pCrawl = pCrawl->children[index];
    }
   
    // mark last node as leaf
    pCrawl->isLeaf = true;
}
  
// Function to count number of words
int wordCount(struct TrieNode *root)
{
    int result = 0;
  
    // Leaf denotes end of a word
    if (root -> isLeaf)
        result++;
      
    for (int i = 0; i < ALPHABET_SIZE; i++)    
      if (root -> children[i])
         result += wordCount(root -> children[i]);
     
    return result;   
}
  
// Driver
int main()
{
    // Input keys (use only 'a' through 'z' 
    // and lower case)
    char keys[][8] = {"the", "a", "there", "answer"
                     "any", "by", "bye", "their"};
  
   
    struct TrieNode *root = getNode();
   
    // Construct Trie
    for (int i = 0; i < ARRAY_SIZE(keys); i++)
        insert(root, keys[i]);
  
    cout << wordCount(root);
      
    return 0;
}

Java

// Java implementation to count words in a trie
public class Words_in_trie {
      
    // Alphabet size (# of symbols)
    static final int ALPHABET_SIZE = 26;
       
    // Trie node
    static class TrieNode
    {
        TrieNode[] children =  new TrieNode[ALPHABET_SIZE];
        // isLeaf is true if the node represents
        // end of a word
        boolean isLeaf;
          
        TrieNode(){
            isLeaf = false;
            for (int i = 0; i < ALPHABET_SIZE; i++)
                 children[i] = null
        }
    };
    static TrieNode root;
        
    // If not present, inserts key into trie
    // If the key is prefix of trie node, just
    // marks leaf node
    static void insert(String key)
    {
        int length = key.length();
       
        TrieNode pCrawl = root;
        
        for (int level = 0; level < length; level++)
        {
            int index = key.charAt(level) - 'a';
            if (pCrawl.children[index] == null)
                pCrawl.children[index] = new TrieNode();
        
            pCrawl = pCrawl.children[index];
        }
        
        // mark last node as leaf
        pCrawl.isLeaf = true;
    }
       
    // Function to count number of words
    static int wordCount(TrieNode root)
    {
        int result = 0;
       
        // Leaf denotes end of a word
        if (root.isLeaf)
            result++;
           
        for (int i = 0; i < ALPHABET_SIZE; i++)    
          if (root.children[i] != null )
             result += wordCount(root.children[i]);
          
        return result;   
    }
       
    // Driver Program
    public static void main(String args[])
    {
        // Input keys (use only 'a' through 'z' 
        // and lower case)
        String keys[] = {"the", "a", "there", "answer"
                        "any", "by", "bye", "their"};
  
        root = new TrieNode();      
          
        // Construct Trie
        for (int i = 0; i < keys.length; i++)
            insert(keys[i]);
       
        System.out.println(wordCount(root)); 
    }
}
// This code is contributed by Sumit Ghosh


Output:

8

This article is contributed by Rohit Thapliyal. If you like GeeksforGeeks and would like to contribute, you can also write an article using contribute.geeksforgeeks.org or mail your article to contribute@geeksforgeeks.org. See your article appearing on the GeeksforGeeks main page and help other Geeks.

Please write comments if you find anything incorrect, or you want to share more information about the topic discussed above.



My Personal Notes arrow_drop_up


Article Tags :
Practice Tags :


Be the First to upvote.


Please write to us at contribute@geeksforgeeks.org to report any issue with the above content.