Given an array of words, find all shortest unique prefixes to represent each word in the given array. Assume that no word is prefix of another.
Examples:
Input: arr[] = {"zebra", "dog", "duck", "dove"}
Output: dog, dov, du, z
Explanation: dog => dog
dove => dov
duck => du
zebra => z
Input: arr[] = {"geeksgeeks", "geeksquiz", "geeksforgeeks"};
Output: geeksf, geeksg, geeksq}
A Simple Solution is to consider every prefix of every word (starting from the shortest to largest), and if a prefix is not prefix of any other string, then print it.
An Efficient Solution is to use Trie. The idea is to maintain a count in every node. Below are steps.
1) Construct a Trie of all words. Also maintain frequency of every node (Here frequency is number of times node is visited during insertion). Time complexity of this step is O(N) where N is total number of characters in all words.
2) Now, for every word, we find the character nearest to the root with frequency as 1. The prefix of the word is path from root to this character. To do this, we can traverse Trie starting from root. For every node being traversed, we check its frequency. If frequency is one, we print all characters from root to this node and don’t traverse down this node.
Time complexity if this step also is O(N) where N is total number of characters in all words.
root
/ \
(d, 3)/ \(z, 1)
/ \
Node1 Node2
/ \ \
(o,2)/ \(u,1) \(e,1)
/ \ \
Node1.1 Node1.2 Node2.1
/ \ \ \
(g,1)/ \ (t,1) \(c,1) \(b,1)
/ \ \ \
Leaf Leaf Node1.2.1 Node2.1.1
(dog) (dot) \ \
\(k, 1) \(r, 1)
\ \
Leaf Node2.1.1.1
(duck) \
\(a,1)
\
Leaf
(zebra)
Below is the implementation of above idea.
C++
#include<bits/stdc++.h>
using namespace std;
#define MAX 256
#define MAX_WORD_LEN 500
struct trieNode
{
struct trieNode *child[MAX];
int freq;
};
struct trieNode *newTrieNode( void )
{
struct trieNode *newNode = new trieNode;
newNode->freq = 1;
for ( int i = 0; i<MAX; i++)
newNode->child[i] = NULL;
return newNode;
}
void insert( struct trieNode *root, string str)
{
int len = str.length();
struct trieNode *pCrawl = root;
for ( int level = 0; level<len; level++)
{
int index = str[level];
if (!pCrawl->child[index])
pCrawl->child[index] = newTrieNode();
else
(pCrawl->child[index]->freq)++;
pCrawl = pCrawl->child[index];
}
}
void findPrefixesUtil( struct trieNode *root, char prefix[],
int ind)
{
if (root == NULL)
return ;
if (root->freq == 1)
{
prefix[ind] = '\0' ;
cout << prefix << " " ;
return ;
}
for ( int i=0; i<MAX; i++)
{
if (root->child[i] != NULL)
{
prefix[ind] = i;
findPrefixesUtil(root->child[i], prefix, ind+1);
}
}
}
void findPrefixes(string arr[], int n)
{
struct trieNode *root = newTrieNode();
root->freq = 0;
for ( int i = 0; i<n; i++)
insert(root, arr[i]);
char prefix[MAX_WORD_LEN];
findPrefixesUtil(root, prefix, 0);
}
int main()
{
string arr[] = { "zebra" , "dog" , "duck" , "dove" };
int n = sizeof (arr)/ sizeof (arr[0]);
findPrefixes(arr, n);
return 0;
}
|
Java
public class Unique_Prefix_Trie {
static final int MAX = 256 ;
static final int MAX_WORD_LEN = 500 ;
static class TrieNode
{
TrieNode[] child = new TrieNode[MAX];
int freq;
TrieNode() {
freq = 1 ;
for ( int i = 0 ; i < MAX; i++)
child[i] = null ;
}
}
static TrieNode root;
static void insert(String str)
{
int len = str.length();
TrieNode pCrawl = root;
for ( int level = 0 ; level<len; level++)
{
int index = str.charAt(level);
if (pCrawl.child[index] == null )
pCrawl.child[index] = new TrieNode();
else
(pCrawl.child[index].freq)++;
pCrawl = pCrawl.child[index];
}
}
static void findPrefixesUtil(TrieNode root, char [] prefix,
int ind)
{
if (root == null )
return ;
if (root.freq == 1 )
{
prefix[ind] = '\0' ;
int i = 0 ;
while (prefix[i] != '\0' )
System.out.print(prefix[i++]);
System.out.print( " " );
return ;
}
for ( int i= 0 ; i<MAX; i++)
{
if (root.child[i] != null )
{
prefix[ind] = ( char ) i;
findPrefixesUtil(root.child[i], prefix, ind+ 1 );
}
}
}
static void findPrefixes(String arr[], int n)
{
root = new TrieNode();
root.freq = 0 ;
for ( int i = 0 ; i<n; i++)
insert(arr[i]);
char [] prefix = new char [MAX_WORD_LEN];
findPrefixesUtil(root, prefix, 0 );
}
public static void main(String args[])
{
String arr[] = { "zebra" , "dog" , "duck" , "dove" };
int n = arr.length;
findPrefixes(arr, n);
}
}
|
Python3
MAX = 256
MAX_WORD_LEN = 500
class TrieNode:
def __init__( self ):
self .child = [ None ] * MAX
self .freq = 1
def newTrieNode():
newNode = TrieNode()
return newNode
def insert(root, str ):
len_str = len ( str )
pCrawl = root
for level in range (len_str):
index = ord ( str [level])
if not pCrawl.child[index]:
pCrawl.child[index] = newTrieNode()
else :
pCrawl.child[index].freq + = 1
pCrawl = pCrawl.child[index]
def findPrefixesUtil(root, prefix, ind):
if not root:
return
if root.freq = = 1 :
prefix[ind] = ""
print (" ".join(prefix[:ind]), end=" ")
return
for i in range ( MAX ):
if root.child[i]:
prefix[ind] = chr (i)
findPrefixesUtil(root.child[i], prefix, ind + 1 )
def findPrefixes(arr, n):
root = newTrieNode()
root.freq = 0
for i in range (n):
insert(root, arr[i])
prefix = [ None ] * MAX_WORD_LEN
findPrefixesUtil(root, prefix, 0 )
if __name__ = = "__main__" :
arr = [ "zebra" , "dog" , "duck" , "dove" ]
n = len (arr)
findPrefixes(arr, n)
|
C#
using System;
public class Unique_Prefix_Trie
{
static readonly int MAX = 256;
static readonly int MAX_WORD_LEN = 500;
public class TrieNode
{
public TrieNode[] child = new TrieNode[MAX];
public int freq;
public TrieNode()
{
freq = 1;
for ( int i = 0; i < MAX; i++)
child[i] = null ;
}
}
static TrieNode root;
static void insert(String str)
{
int len = str.Length;
TrieNode pCrawl = root;
for ( int level = 0; level<len; level++)
{
int index = str[level];
if (pCrawl.child[index] == null )
pCrawl.child[index] = new TrieNode();
else
(pCrawl.child[index].freq)++;
pCrawl = pCrawl.child[index];
}
}
static void findPrefixesUtil(TrieNode root, char [] prefix,
int ind)
{
if (root == null )
return ;
if (root.freq == 1)
{
prefix[ind] = '\0' ;
int i = 0;
while (prefix[i] != '\0' )
Console.Write(prefix[i++]);
Console.Write( " " );
return ;
}
for ( int i = 0; i < MAX; i++)
{
if (root.child[i] != null )
{
prefix[ind] = ( char ) i;
findPrefixesUtil(root.child[i], prefix, ind + 1);
}
}
}
static void findPrefixes(String []arr, int n)
{
root = new TrieNode();
root.freq = 0;
for ( int i = 0; i < n; i++)
insert(arr[i]);
char [] prefix = new char [MAX_WORD_LEN];
findPrefixesUtil(root, prefix, 0);
}
public static void Main()
{
String []arr = { "zebra" , "dog" , "duck" , "dove" };
int n = arr.Length;
findPrefixes(arr, n);
}
}
|
Javascript
<script>
const MAX = 256;
const MAX_WORD_LEN = 500;
class TrieNode {
constructor() {
this .child = new Array(MAX);
this .freq = 0;
}
}
function newTrieNode() {
return new TrieNode();
}
function insert(root, str) {
const len = str.length;
let pCrawl = root;
for (let level = 0; level < len; level++) {
const index = str.charCodeAt(level);
if (!pCrawl.child[index]) {
pCrawl.child[index] = newTrieNode();
}
pCrawl.child[index].freq++;
pCrawl = pCrawl.child[index];
}
}
function findPrefixesUtil(root, prefix, ind) {
if (root == null ) return ;
if (root.freq == 1) {
prefix[ind] = '\0' ;
let i = 0;
while (prefix[i] != '\0' )
document.write(prefix[i++]);
document.write( " " );
return ;
}
for (let i = 0; i < MAX; i++) {
if (root.child[i] != null ) {
prefix[ind] = String.fromCharCode(i);
findPrefixesUtil(root.child[i], prefix, ind + 1);
}
}
}
function findPrefixes(arr, n) {
const root = newTrieNode();
for (let i = 0; i < n; i++) {
insert(root, arr[i]);
}
const prefix = new Array(MAX_WORD_LEN);
findPrefixesUtil(root, prefix, 0);
}
const arr = [ 'zebra' , 'dog' , 'duck' , 'dove' ];
const n = arr.length;
findPrefixes(arr, n);
</script>
|
Output:
dog dov du z
Time Complexity: O(n*m) where n is the length of the array and m is the length of the longest word.
Auxiliary Space: O(n*m)
Thanks to Gaurav Ahirwar for suggesting above solution.
Please write comments if you find anything incorrect, or you want to share more information about the topic discussed above.