Open In App

Find shortest unique prefix for every word in a given list | Set 1 (Using Trie)

Given an array of words, find all shortest unique prefixes to represent each word in the given array. Assume that no word is prefix of another. 
Examples: 
 

Input: arr[] = {"zebra", "dog", "duck", "dove"}
Output: dog, dov, du, z
Explanation: dog => dog
             dove => dov 
             duck => du
             zebra => z

Input: arr[] =  {"geeksgeeks", "geeksquiz", "geeksforgeeks"};
Output: geeksf, geeksg, geeksq}

 

A Simple Solution is to consider every prefix of every word (starting from the shortest to largest), and if a prefix is not prefix of any other string, then print it. 
An Efficient Solution is to use Trie. The idea is to maintain a count in every node. Below are steps.
1) Construct a Trie of all words. Also maintain frequency of every node (Here frequency is number of times node is visited during insertion). Time complexity of this step is O(N) where N is total number of characters in all words. 
2) Now, for every word, we find the character nearest to the root with frequency as 1. The prefix of the word is path from root to this character. To do this, we can traverse Trie starting from root. For every node being traversed, we check its frequency. If frequency is one, we print all characters from root to this node and don’t traverse down this node.
Time complexity if this step also is O(N) where N is total number of characters in all words. 
 

                root
                / \
         (d, 3)/   \(z, 1)
              /     \
          Node1     Node2
           / \          \
     (o,2)/   \(u,1)     \(e,1)
         /     \          \
   Node1.1    Node1.2     Node2.1
      /  \         \            \
(g,1)/    \ (t,1)   \(c,1)       \(b,1)
    /      \         \            \ 
   Leaf   Leaf       Node1.2.1     Node2.1.1
   (dog)  (dot)        \               \
                         \(k, 1)          \(r, 1)
                          \                \   
                          Leaf           Node2.1.1.1
                          (duck)              \
                                                \(a,1)
                                                 \
                                                 Leaf
                                                 (zebra)

Below is the implementation of above idea. 
 




// C++ program to print all prefixes that
// uniquely represent words.
#include<bits/stdc++.h>
using namespace std;
 
#define MAX 256
 
// Maximum length of an input word
#define MAX_WORD_LEN 500
 
// Trie Node.
struct trieNode
{
    struct trieNode *child[MAX];
    int freq;  // To store frequency
};
 
// Function to create a new trie node.
struct trieNode *newTrieNode(void)
{
    struct trieNode *newNode = new trieNode;
    newNode->freq   = 1;
    for (int i = 0; i<MAX; i++)
        newNode->child[i] = NULL;
    return newNode;
}
 
// Method to insert a new string into Trie
void insert(struct trieNode *root, string str)
{
    // Length of the URL
    int len = str.length();
    struct trieNode *pCrawl = root;
 
    // Traversing over the length of given str.
    for (int level = 0; level<len; level++)
    {
        // Get index of child node from current character
        // in str.
        int index = str[level];
 
        // Create a new child if not exist already
        if (!pCrawl->child[index])
            pCrawl->child[index] = newTrieNode();
        else
           (pCrawl->child[index]->freq)++;
 
        // Move to the child
        pCrawl = pCrawl->child[index];
    }
}
 
// This function prints unique prefix for every word stored
// in Trie. Prefixes one by one are stored in prefix[].
// 'ind' is current index of prefix[]
void findPrefixesUtil(struct trieNode *root, char prefix[],
                      int ind)
{
    // Corner case
    if (root == NULL)
       return;
 
    // Base case
    if (root->freq == 1)
    {
       prefix[ind] = '\0';
       cout << prefix << " ";
       return;
    }
 
    for (int i=0; i<MAX; i++)
    {
       if (root->child[i] != NULL)
       {
           prefix[ind] = i;
           findPrefixesUtil(root->child[i], prefix, ind+1);
       }
    }
}
 
// Function to print all prefixes that uniquely
// represent all words in arr[0..n-1]
void findPrefixes(string arr[], int n)
{
    // Construct a Trie of all words
    struct trieNode *root = newTrieNode();
    root->freq = 0;
    for (int i = 0; i<n; i++)
        insert(root, arr[i]);
 
    // Create an array to store all prefixes
    char prefix[MAX_WORD_LEN];
 
    // Print all prefixes using Trie Traversal
    findPrefixesUtil(root, prefix, 0);
}
 
// Driver function.
int main()
{
    string arr[] = {"zebra", "dog", "duck", "dove"};
    int n = sizeof(arr)/sizeof(arr[0]);
    findPrefixes(arr, n);
 
    return 0;
}




// Java program to print all prefixes that
// uniquely represent words.
public class Unique_Prefix_Trie {
      
    static final int MAX  = 256;
      
    // Maximum length of an input word
    static final int MAX_WORD_LEN = 500;
      
    // Trie Node.
    static class TrieNode
    {
        TrieNode[] child = new TrieNode[MAX];
        int freq;  // To store frequency
        TrieNode() {
            freq =1;
            for (int i = 0; i < MAX; i++)
                child[i] = null;
        }
    }
    static TrieNode root;
     
    // Method to insert a new string into Trie
    static void insert(String str)
    {
        // Length of the URL
        int len = str.length();
        TrieNode pCrawl = root;
      
        // Traversing over the length of given str.
        for (int level = 0; level<len; level++)
        {
            // Get index of child node from current character
            // in str.
            int index = str.charAt(level);
      
            // Create a new child if not exist already
            if (pCrawl.child[index] == null)
                pCrawl.child[index] = new TrieNode();
            else
               (pCrawl.child[index].freq)++;
      
            // Move to the child
            pCrawl = pCrawl.child[index];
        }
    }
      
    // This function prints unique prefix for every word stored
    // in Trie. Prefixes one by one are stored in prefix[].
    // 'ind' is current index of prefix[]
    static void findPrefixesUtil(TrieNode root, char[] prefix,
                          int ind)
    {
        // Corner case
        if (root == null)
           return;
      
        // Base case
        if (root.freq == 1)
        {
           prefix[ind] = '\0';
           int i = 0;
           while(prefix[i] != '\0')
            System.out.print(prefix[i++]);
           System.out.print("  ");
           return;
        }
      
        for (int i=0; i<MAX; i++)
        {
           if (root.child[i] != null)
           {
               prefix[ind] = (char) i;
               findPrefixesUtil(root.child[i], prefix, ind+1);
           }
        }
    }
      
    // Function to print all prefixes that uniquely
    // represent all words in arr[0..n-1]
    static void findPrefixes(String arr[], int n)
    {
        // Construct a Trie of all words
        root = new TrieNode();
        root.freq = 0;
        for (int i = 0; i<n; i++)
            insert(arr[i]);
      
        // Create an array to store all prefixes
        char[] prefix = new char[MAX_WORD_LEN];
         
        // Print all prefixes using Trie Traversal
        findPrefixesUtil(root, prefix, 0);
    }
      
    // Driver function.
    public static void main(String args[])
    {
        String arr[] = {"zebra", "dog", "duck", "dove"};
        int n = arr.length;
        findPrefixes(arr, n);
    }
}
// This code is contributed by Sumit Ghosh




# Python program to print all prefixes that
# uniquely represent words.
MAX=256
 
# Maximum length of an input word
MAX_WORD_LEN=500
 
# Trie Node.
class TrieNode:
    def __init__(self):
        self.child = [None] * MAX
        # To store frequency
        self.freq = 1
 
# Function to create a new trie node.
def newTrieNode():
    newNode = TrieNode()
    return newNode
 
#  Method to insert a new string into Trie
def insert(root, str):
    # Length of the URL
    len_str = len(str)
    pCrawl = root
    # Traversing over the length of given str.
    for level in range(len_str):
        # Get index of child node from current character
        # in str.
        index = ord(str[level])
        # Create a new child if not exist already
        if not pCrawl.child[index]:
            pCrawl.child[index] = newTrieNode()
        else:
            pCrawl.child[index].freq += 1
        # Move to the child
        pCrawl = pCrawl.child[index]
 
# This function prints unique prefix for every word stored
# in Trie. Prefixes one by one are stored in prefix[].
# 'ind' is current index of prefix[]
def findPrefixesUtil(root, prefix, ind):
    # Corner case
    if not root:
        return
     
    # Base case
    if root.freq == 1:
        prefix[ind] = ""
        print("".join(prefix[:ind]), end=" ")
        return
    for i in range(MAX):
        if root.child[i]:
            prefix[ind] = chr(i)
            findPrefixesUtil(root.child[i], prefix, ind + 1)
 
# Function to print all prefixes that uniquely
# represent all words in arr[0..n-1]
def findPrefixes(arr, n):
    # Construct a Trie of all words
    root = newTrieNode()
    root.freq = 0
     
    for i in range(n):
        insert(root, arr[i])
         
    # Create an array to store all prefixes
    prefix = [None] * MAX_WORD_LEN
     
    # Print all prefixes using Trie Traversal
    findPrefixesUtil(root, prefix, 0)
 
# Driver function.
if __name__ == "__main__":
    arr = ["zebra", "dog", "duck", "dove"]
    n = len(arr)
    findPrefixes(arr, n)
 
#  This code is contributed by Aman Kumar.




// C# program to print all prefixes that
// uniquely represent words.
using System;
 
public class Unique_Prefix_Trie
{
     
    static readonly int MAX = 256;
     
    // Maximum length of an input word
    static readonly int MAX_WORD_LEN = 500;
     
    // Trie Node.
    public class TrieNode
    {
        public TrieNode[] child = new TrieNode[MAX];
        public int freq; // To store frequency
        public TrieNode()
        {
            freq = 1;
            for (int i = 0; i < MAX; i++)
                child[i] = null;
        }
    }
    static TrieNode root;
     
    // Method to insert a new string into Trie
    static void insert(String str)
    {
        // Length of the URL
        int len = str.Length;
        TrieNode pCrawl = root;
     
        // Traversing over the length of given str.
        for (int level = 0; level<len; level++)
        {
            // Get index of child node from 
            // current character in str.
            int index = str[level];
     
            // Create a new child if not exist already
            if (pCrawl.child[index] == null)
                pCrawl.child[index] = new TrieNode();
            else
            (pCrawl.child[index].freq)++;
     
            // Move to the child
            pCrawl = pCrawl.child[index];
        }
    }
     
    // This function prints unique prefix for every word stored
    // in Trie. Prefixes one by one are stored in prefix[].
    // 'ind' is current index of prefix[]
    static void findPrefixesUtil(TrieNode root, char[] prefix,
                        int ind)
    {
        // Corner case
        if (root == null)
        return;
     
        // Base case
        if (root.freq == 1)
        {
        prefix[ind] = '\0';
        int i = 0;
        while(prefix[i] != '\0')
            Console.Write(prefix[i++]);
        Console.Write(" ");
        return;
        }
     
        for (int i = 0; i < MAX; i++)
        {
        if (root.child[i] != null)
        {
            prefix[ind] = (char) i;
            findPrefixesUtil(root.child[i], prefix, ind + 1);
        }
        }
    }
     
    // Function to print all prefixes that uniquely
    // represent all words in arr[0..n-1]
    static void findPrefixes(String []arr, int n)
    {
        // Construct a Trie of all words
        root = new TrieNode();
        root.freq = 0;
        for (int i = 0; i < n; i++)
            insert(arr[i]);
     
        // Create an array to store all prefixes
        char[] prefix = new char[MAX_WORD_LEN];
         
        // Print all prefixes using Trie Traversal
        findPrefixesUtil(root, prefix, 0);
    }
     
    // Driver code
    public static void Main()
    {
        String []arr = {"zebra", "dog", "duck", "dove"};
        int n = arr.Length;
        findPrefixes(arr, n);
    }
}
 
/* This code contributed by PrinciRaj1992 */




<script>
    // Javascript code
     
    const MAX = 256;
    // Maximum length of an input word
    const MAX_WORD_LEN = 500;
     
    // Trie Node.
    class TrieNode {
      constructor() {
    this.child = new Array(MAX);
    this.freq = 0; // To store frequency
      }
    }
     
    // Function to create a new trie node.
    function newTrieNode() {
      return new TrieNode();
    }
     
    // Method to insert a new string into Trie
    function insert(root, str) {
      // Length of the string
      const len = str.length;
      let pCrawl = root;
     
      // Traversing over the length of given str.
      for (let level = 0; level < len; level++) {
    // Get index of child node from current character
    // in string.
    const index = str.charCodeAt(level);
     
    // Create a new child if not exist already.
    if (!pCrawl.child[index]) {
      pCrawl.child[index] = newTrieNode();
    }
    pCrawl.child[index].freq++;
     
    // Move to the child.
    pCrawl = pCrawl.child[index];
      }
    }
     
    // This function prints unique prefix for every word stored
    // in Trie. Prefixes one by one are stored in prefix[].
    // 'ind' is current index of prefix[]
    function findPrefixesUtil(root, prefix, ind) {
      // Corner case
      if (root == null) return;
     
      // Base case
      if (root.freq == 1) {
          prefix[ind] = '\0';
           let i = 0;
           while(prefix[i] != '\0')
            document.write(prefix[i++]);
           document.write(" ");
      return;
      }
     
      for (let i = 0; i < MAX; i++) {
    if (root.child[i] != null) {
      prefix[ind] = String.fromCharCode(i);
      findPrefixesUtil(root.child[i], prefix, ind + 1);
    }
      }
    }
     
    // Function to print all prefixes that uniquely
    // represent all words in arr[0..n-1]
    function findPrefixes(arr, n) {
      // Construct a Trie of all words
      const root = newTrieNode();
      for (let i = 0; i < n; i++) {
    insert(root, arr[i]);
      }
     
      // Create an array to store all prefixes
      const prefix = new Array(MAX_WORD_LEN);
     
      // Print all prefixes using Trie Traversal
      findPrefixesUtil(root, prefix, 0);
    }
     
    // Driver function.
    const arr = ['zebra', 'dog', 'duck', 'dove'];
    const n = arr.length;
    findPrefixes(arr, n);
     
    // This code is contributed by Utkarsh Kumar.
     
</script>

Output: 

dog dov du z

Time Complexity: O(n*m) where n is the length of the array and m is the length of the longest word.
Auxiliary Space: O(n*m)

Thanks to Gaurav Ahirwar for suggesting above solution.

 


Article Tags :