Given a sorted dictionary of an alien language, find order of characters
Given a sorted dictionary (array of words) of an alien language, find order of characters in the language.
Examples:
Input: words[] = {"baa", "abcd", "abca", "cab", "cad"} Output: Order of characters is 'b', 'd', 'a', 'c' Note that words are sorted and in the given language "baa" comes before "abcd", therefore 'b' is before 'a' in output. Similarly we can find other orders. Input: words[] = {"caa", "aaa", "aab"} Output: Order of characters is 'c', 'a', 'b'
Approach 1:
The idea is to create a graph of characters and then find topological sorting of the created graph. Following are the detailed steps.
1) Create a graph g with number of vertices equal to the size of alphabet in the given alien language. For example, if the alphabet size is 5, then there can be 5 characters in words. Initially there are no edges in graph.
2) Do following for every pair of adjacent words in given sorted array.
…..a) Let the current pair of words be word1 and word2. One by one compare characters of both words and find the first mismatching characters.
…..b) Create an edge in g from mismatching character of word1 to that of word2.
3) Print topological sorting of the above created graph.
The implementation of the above is in C++.
C++
#include <bits/stdc++.h> using namespace std; void addEdge(vector< int > adj[], char u, char v) { adj[u - 'a' ].push_back(v - 'a' ); } void topologicalSortUtil(vector< int > adj[], int u, bool visited[], stack< int > &st) { visited[u] = true ; for ( int i = 0; i < adj[u].size(); i++) { int v = adj[u][i]; if (!visited[v]) { topologicalSortUtil(adj, v, visited, st); } } st.push(u); } void topologicalSort(vector< int > adj[], int V) { bool visited[V]; stack< int > st; for ( int i = 0; i < V; i++) { visited[i] = false ; } for ( int i = 0; i < V; i++) { if (!visited[i]) { topologicalSortUtil(adj, i, visited, st); } } while (!st.empty()) { cout << char (st.top() + 'a' ) << " " ; st.pop(); } } void printOrder(string words[], int n, int k) { vector< int > adj[k]; for ( int i = 0; i < n - 1; i++) { string word1 = words[i]; string word2 = words[i + 1]; int j = 0; while (j < word1.length() && j < word2.length()) { if (word1[j] != word2[j]) { addEdge(adj, word1[j], word2[j]); break ; } j++; } } topologicalSort(adj, k); } int main() { string words[] = { "baa" , "abcd" , "abca" , "cab" , "cad" }; int n = sizeof (words) / sizeof (words[0]); int k = 4; printOrder(words, n, k); return 0; } |
Java
import java.util.*; public class Main { public static void addEdge(ArrayList<Integer>[] adj, char u, char v) { adj[u - 'a' ].add(v - 'a' ); } public static void topologicalSortUtil(ArrayList<Integer>[] adj, int u, boolean [] visited, Stack<Integer> st) { visited[u] = true ; for ( int i = 0 ; i < adj[u].size(); i++) { int v = adj[u].get(i); if (!visited[v]) { topologicalSortUtil(adj, v, visited, st); } } st.push(u); } public static void topologicalSort(ArrayList<Integer>[] adj, int V) { boolean [] visited = new boolean [V]; Stack<Integer> st = new Stack<Integer>(); for ( int i = 0 ; i < V; i++) { visited[i] = false ; } for ( int i = 0 ; i < V; i++) { if (!visited[i]) { topologicalSortUtil(adj, i, visited, st); } } while (!st.empty()) { System.out.print(( char ) (st.pop() + 'a' ) + " " ); } } public static void printOrder(String[] words, int n, int k) { ArrayList<Integer>[] adj = new ArrayList[k]; for ( int i = 0 ; i < k; i++) { adj[i] = new ArrayList<Integer>(); } for ( int i = 0 ; i < n - 1 ; i++) { String word1 = words[i]; String word2 = words[i + 1 ]; int j = 0 ; while (j < word1.length() && j < word2.length()) { if (word1.charAt(j) != word2.charAt(j)) { addEdge(adj, word1.charAt(j), word2.charAt(j)); break ; } j++; } } topologicalSort(adj, k); } public static void main(String[] args) { String[] words = { "baa" , "abcd" , "abca" , "cab" , "cad" }; int n = words.length; int k = 4 ; printOrder(words, n, k); } } |
Python3
from collections import defaultdict def addEdge(adj, u, v): adj[ ord (u) - 97 ].append( ord (v) - 97 ) def topologicalSortUtil(adj, u, visited, st): visited[u] = True for v in adj[u]: if not visited[v]: topologicalSortUtil(adj, v, visited, st) st.append(u) def topologicalSort(adj, V): visited = [ False ] * V st = [] for i in range (V): if not visited[i]: topologicalSortUtil(adj, i, visited, st) while st: print ( chr (st.pop() + 97 ), end = ' ' ) def printOrder(words, n, k): adj = defaultdict( list ) for i in range (n - 1 ): word1 = words[i] word2 = words[i + 1 ] j = 0 while j < len (word1) and j < len (word2): if word1[j] ! = word2[j]: addEdge(adj, word1[j], word2[j]) break j + = 1 topologicalSort(adj, k) if __name__ = = '__main__' : words = [ "baa" , "abcd" , "abca" , "cab" , "cad" ] n = len (words) k = 4 printOrder(words, n, k) |
C#
using System; using System.Collections.Generic; using System.Linq; public class MainClass { public static void AddEdge(List< int >[] adj, char u, char v) { adj[u - 'a' ].Add(v - 'a' ); } public static void TopologicalSortUtil(List< int >[] adj, int u, bool [] visited, Stack< int > st) { visited[u] = true ; foreach ( int v in adj[u]) { if (!visited[v]) { TopologicalSortUtil(adj, v, visited, st); } } st.Push(u); } public static void TopologicalSort(List< int >[] adj, int V) { bool [] visited = new bool [V]; Stack< int > st = new Stack< int >(); for ( int i = 0; i < V; i++) { visited[i] = false ; } for ( int i = 0; i < V; i++) { if (!visited[i]) { TopologicalSortUtil(adj, i, visited, st); } } while (st.Count > 0) { Console.Write(( char ) (st.Pop() + 'a' ) + " " ); } } public static void PrintOrder( string [] words, int n, int k) { List< int >[] adj = new List< int >[k]; for ( int i = 0; i < k; i++) { adj[i] = new List< int >(); } for ( int i = 0; i < n - 1; i++) { string word1 = words[i]; string word2 = words[i + 1]; int j = 0; while (j < word1.Length && j < word2.Length) { if (word1[j] != word2[j]) { AddEdge(adj, word1[j], word2[j]); break ; } j++; } } TopologicalSort(adj, k); } public static void Main() { string [] words = { "baa" , "abcd" , "abca" , "cab" , "cad" }; int n = words.Length; int k = 4; PrintOrder(words, n, k); } } |
Javascript
function addEdge(adj, u, v) { adj[u.charCodeAt(0) - 'a' .charCodeAt(0)].push(v.charCodeAt(0) - 'a' .charCodeAt(0)); } function topologicalSortUtil(adj, u, visited, st) { visited[u] = true ; for (let i = 0; i < adj[u].length; i++) { let v = adj[u][i]; if (!visited[v]) { topologicalSortUtil(adj, v, visited, st); } } st.push(u); } function topologicalSort(adj, V) { let visited = new Array(V); let st = []; for (let i = 0; i < V; i++) { visited[i] = false ; } for (let i = 0; i < V; i++) { if (!visited[i]) { topologicalSortUtil(adj, i, visited, st); } } while (st.length > 0) { process.stdout.write(String.fromCharCode(st[st.length - 1] + 'a' .charCodeAt(0)) + " " ); st.pop(); } } function printOrder(words, n, k) { let adj = new Array(k); for (let i = 0; i < k; i++) { adj[i] = []; } for (let i = 0; i < n - 1; i++) { let word1 = words[i]; let word2 = words[i + 1]; let j = 0; while (j < word1.length && j < word2.length) { if (word1[j] !== word2[j]) { addEdge(adj, word1[j], word2[j]); break ; } j++; } } topologicalSort(adj, k); } let words = [ "baa" , "abcd" , "abca" , "cab" , "cad" ]; let n = words.length; let k = 4; printOrder(words, n, k); |
b d a c
Time Complexity: The first step to create a graph takes O(n + alpha) time where n is number of given words and alpha is number of characters in given alphabet. The second step is also topological sorting. Note that there would be alpha vertices and at-most (n-1) edges in the graph. The time complexity of topological sorting is O(V+E) which is O(n + alpha) here. So overall time complexity is O(n + alpha) + O(n + alpha) which is O(n + alpha).
Space Complexity: O(V) , here V is number of Vertices.
Exercise: The above code doesn’t work when the input is not valid. For example {“aba”, “bba”, “aaa”} is not valid, because from first two words, we can deduce ‘a’ should appear before ‘b’, but from last two words, we can deduce ‘b’ should appear before ‘a’ which is not possible. Extend the above program to handle invalid inputs and generate the output as “Not valid”.
Approach 2: [Works for invalid input data]
We have implemented this approach in C#.
Algorithm:
(1) Compare 2 adjacent words at a time (i.e, word1 with word2, word2 with word3, … , word(startIndex) and word(startIndex + 1)
(2) Then we compare one character at a time for the 2 words selected.
(2a) If both characters are different, we stop the comparison here and conclude that the character from word(startIndex) comes before the other.
(2b) If both characters are the same, we continue to compare until (2a) occurs or if either of the words has been exhausted.
(3) We continue to compare each word in this fashion until we have compared all words.
Once we find a character set in (2a) we pass them to class ‘AlienCharacters’ which takes care of the overall ordering of the characters. The idea is to maintain the ordering of the characters in a linked list (DNode). To optimize the insertion time into the linked list, a map (C# Dictionary) is used as an indexing entity, thus, bringing down the complexity to O(1). This is an improvement from the previous algorithm where topological sort was used for the purpose.
Boundary conditions:
1. The startIndex must be within range
2. When comparing 2 words, if we exhaust on one i.e, the length of both words is different. Compare only until either one exhausts.
Complexity Analysis:
The method-wise time complexities have been mentioned in the code below (C#) for better understanding.
If ‘N’ is the number of words in the input alien vocabulary/dictionary, ‘L’ length of the max length word, and ‘C’ is the final number of unique characters,
Time Complexity: O(N * L)
Space Complexity: O(C)
C++
// A C++ program to order of characters in an alien language #include<bits/stdc++.h> using namespace std; // Class to represent a graph class Graph { int V; // No. of vertices' // Pointer to an array containing adjacency listsList list< int > *adj; // A function used by topologicalSort void topologicalSortUtil( int v, bool visited[], stack< int > &Stack); public : Graph( int V); // Constructor // function to add an edge to graph void addEdge( int v, int w); // prints a Topological Sort of the complete graph void topologicalSort(); }; Graph::Graph( int V) { this ->V = V; adj = new list< int >[V]; } void Graph::addEdge( int v, int w) { adj[v].push_back(w); // Add w to v’s list. } // A recursive function used by topologicalSort void Graph::topologicalSortUtil( int v, bool visited[], stack< int > &Stack) { // Mark the current node as visited. visited[v] = true ; // Recur for all the vertices adjacent to this vertex list< int >::iterator i; for (i = adj[v].begin(); i != adj[v].end(); ++i) if (!visited[*i]) topologicalSortUtil(*i, visited, Stack); // Push current vertex to stack which stores result Stack.push(v); } // The function to do Topological Sort. It uses recursive topologicalSortUtil() void Graph::topologicalSort() { stack< int > Stack; // Mark all the vertices as not visited bool *visited = new bool [V]; for ( int i = 0; i < V; i++) visited[i] = false ; // Call the recursive helper function to store Topological Sort // starting from all vertices one by one for ( int i = 0; i < V; i++) if (visited[i] == false ) topologicalSortUtil(i, visited, Stack); // Print contents of stack while (Stack.empty() == false ) { cout << ( char ) ( 'a' + Stack.top()) << " " ; Stack.pop(); } } int min( int x, int y) { return (x < y)? x : y; } // This function finds and prints order of character from a sorted // array of words. n is size of words[]. alpha is set of possible // alphabets. // For simplicity, this function is written in a way that only // first 'alpha' characters can be there in words array. For // example if alpha is 7, then words[] should have only 'a', 'b', // 'c' 'd', 'e', 'f', 'g' void printOrder(string words[], int n, int alpha) { // Create a graph with 'alpha' edges Graph g(alpha); // Process all adjacent pairs of words and create a graph for ( int i = 0; i < n-1; i++) { // Take the current two words and find the first mismatching // character string word1 = words[i], word2 = words[i+1]; for ( int j = 0; j < min(word1.length(), word2.length()); j++) { // If we find a mismatching character, then add an edge // from character of word1 to that of word2 if (word1[j] != word2[j]) { g.addEdge(word1[j]- 'a' , word2[j]- 'a' ); break ; } } } // Print topological sort of the above created graph g.topologicalSort(); } // Driver program to test above functions int main() { string words[] = { "caa" , "aaa" , "aab" }; printOrder(words, 3, 3); return 0; } |
Java
// A Java program to order of // characters in an alien language import java.util.*; // Class to represent a graph class Graph { // An array representing the graph as an adjacency list private final LinkedList<Integer>[] adjacencyList; Graph( int nVertices) { adjacencyList = new LinkedList[nVertices]; for ( int vertexIndex = 0 ; vertexIndex < nVertices; vertexIndex++) { adjacencyList[vertexIndex] = new LinkedList<>(); } } // function to add an edge to graph void addEdge( int startVertex, int endVertex) { adjacencyList[startVertex].add(endVertex); } private int getNoOfVertices() { return adjacencyList.length; } // A recursive function used by topologicalSort private void topologicalSortUtil( int currentVertex, boolean [] visited, Stack<Integer> stack) { // Mark the current node as visited. visited[currentVertex] = true ; // Recur for all the vertices adjacent to this // vertex for ( int adjacentVertex : adjacencyList[currentVertex]) { if (!visited[adjacentVertex]) { topologicalSortUtil(adjacentVertex, visited, stack); } } // Push current vertex to stack which stores result stack.push(currentVertex); } // prints a Topological Sort of the complete graph void topologicalSort() { Stack<Integer> stack = new Stack<>(); // Mark all the vertices as not visited boolean [] visited = new boolean [getNoOfVertices()]; for ( int i = 0 ; i < getNoOfVertices(); i++) { visited[i] = false ; } // Call the recursive helper function to store // Topological Sort starting from all vertices one // by one for ( int i = 0 ; i < getNoOfVertices(); i++) { if (!visited[i]) { topologicalSortUtil(i, visited, stack); } } // Print contents of stack while (!stack.isEmpty()) { System.out.print(( char )( 'a' + stack.pop()) + " " ); } } } public class OrderOfCharacters { // This function finds and prints order // of character from a sorted array of words. // alpha is number of possible alphabets // starting from 'a'. For simplicity, this // function is written in a way that only // first 'alpha' characters can be there // in words array. For example if alpha // is 7, then words[] should contain words // having only 'a', 'b','c' 'd', 'e', 'f', 'g' private static void printOrder(String[] words, int n, int alpha) { // Create a graph with 'alpha' edges Graph graph = new Graph(alpha); for ( int i = 0 ; i < n - 1 ; i++) { // Take the current two words and find the first // mismatching character String word1 = words[i]; String word2 = words[i + 1 ]; for ( int j = 0 ; j < Math.min(word1.length(), word2.length()); j++) { // If we find a mismatching character, then // add an edge from character of word1 to // that of word2 if (word1.charAt(j) != word2.charAt(j)) { graph.addEdge(word1.charAt(j) - 'a' , word2.charAt(j) - 'a' ); break ; } } } // Print topological sort of the above created graph graph.topologicalSort(); } // Driver program to test above functions public static void main(String[] args) { String[] words = { "caa" , "aaa" , "aab" }; printOrder(words, 3 , 3 ); } } // Contributed by Harikrishnan Rajan |
C#
using System; using System.Collections.Generic; using System.Linq; namespace AlienDictionary { public class DNode { public string Char; public DNode prev = null ; public DNode next = null ; public DNode( string character) => Char = character; } public class AlienCharacters { public AlienCharacters( int k) => MaxChars = k; private int MaxChars; private DNode head = null ; private Dictionary< string , DNode> index = new Dictionary< string , DNode>(); // As we use Dictionary for indexing, the time complexity for inserting // characters in order will take O(1) // Time: O(1) // Space: O(c), where 'c' is the unique character count. public bool UpdateCharacterOrdering( string predChar, string succChar) { DNode pNode = null , sNode = null ; bool isSNodeNew = false , isPNodeNew = false ; if (!index.TryGetValue(predChar, out pNode)) { pNode = new DNode(predChar); index[predChar] = pNode; isPNodeNew = true ; } if (!index.TryGetValue(succChar, out sNode)) { sNode = new DNode(succChar); index[succChar] = sNode; isSNodeNew = true ; } // before ordering is formed, validate if both the nodes are already present if (!isSNodeNew && !isPNodeNew) { if (!Validate(predChar, succChar)) return false ; } else if ((isPNodeNew && !isSNodeNew) || (isPNodeNew && isSNodeNew)) InsertNodeBefore( ref pNode, ref sNode); else InsertNodeAfter( ref pNode, ref sNode); if (pNode.prev == null ) head = pNode; return true ; } // Time: O(1) private void InsertNodeAfter( ref DNode pNode, ref DNode sNode) { sNode.next = pNode?.next; if (pNode.next != null ) pNode.next.prev = sNode; pNode.next = sNode; sNode.prev = pNode; } // Time: O(1) private void InsertNodeBefore( ref DNode pNode, ref DNode sNode) { // insert pnode before snode pNode.prev = sNode?.prev; if (sNode.prev != null ) sNode.prev.next = pNode; sNode.prev = pNode; pNode.next = sNode; } // Time: O(1) private bool Validate( string predChar, string succChar) { // this is the first level of validation // validate if predChar node actually occurs before succCharNode. DNode sNode = index[succChar]; while (sNode != null ) { if (sNode.Char != predChar) sNode = sNode.prev; else return true ; // validation successful } // if we have reached the end and not found the predChar before succChar // something is not right! return false ; } // Time: O(c), where 'c' is the unique character count. public override string ToString() { string res = "" ; int count = 0; DNode currNode = head; while (currNode != null ) { res += currNode.Char + " " ; count++; currNode = currNode.next; } // second level of validation if (count != MaxChars) // something went wrong! res = "ERROR!!! Input words not enough to find all k unique characters." ; return res; } } class Program { static int k = 4; static AlienCharacters alienCharacters = new AlienCharacters(k); static List< string > vocabulary = new List< string >(); static void Main( string [] args) { vocabulary.Add( "baa" ); vocabulary.Add( "abcd" ); vocabulary.Add( "abca" ); vocabulary.Add( "cab" ); vocabulary.Add( "cad" ); ProcessVocabulary(0); Console.WriteLine(alienCharacters.ToString()); Console.ReadLine(); } // Time: O(vocabulary.Count + max(word.Length)) static void ProcessVocabulary( int startIndex) { if (startIndex >= vocabulary.Count - 1) return ; var res = GetPredSuccChar(vocabulary.ElementAt(startIndex), vocabulary.ElementAt(startIndex + 1)); if (res != null ) { if (!alienCharacters.UpdateCharacterOrdering(res.Item1, res.Item2)) { Console.WriteLine( "ERROR!!! Invalid input data, the words maybe in wrong order" ); return ; } } ProcessVocabulary(startIndex + 1); } //Time: O(max(str1.Length, str2.Length) static Tuple< string , string > GetPredSuccChar( string str1, string str2) { Tuple< string , string > result = null ; if (str1.Length == 0 || str2.Length == 0) return null ; // invalid condition. if (str1[0] != str2[0]) // found an ordering { result = new Tuple< string , string >(str1[0].ToString(), str2[0].ToString()); return result; } string s1 = str1.Substring(1, str1.Length - 1); string s2 = str2.Substring(1, str2.Length - 1); if (s1.Length == 0 || s2.Length == 0) return null ; // recursion can stop now. return GetPredSuccChar(s1, s2); } } } // Contributed by Priyanka Pardesi Ramachander |
Javascript
<script> // Javascript program to order of characters in an alien language // Class to represent a graph class Graph { constructor(V) { this .V = V; this .adj = new Array(V); for (let i = 0; i < V; i++) this .adj[i] = []; } addEdge(v, w) { this .adj[v].push(w); // Add w to v’s list. } // A recursive function used by topologicalSort topologicalSortUtil(v, visited, stack) { // Mark the current node as visited. visited[v] = true ; // Recur for all the vertices adjacent to this vertex this .adj[v].forEach(i => { if (!visited[i]) this .topologicalSortUtil(i, visited, stack); }) // Push current vertex to stack which stores result stack.push(v); } // The function to do Topological Sort. It uses recursive topologicalSortUtil() topologicalSort() { let stack = []; // Mark all the vertices as not visited let visited = new Array( this .V); for (let i = 0; i < this .V; i++) visited[i] = false ; // Call the recursive helper function to store Topological Sort // starting from all vertices one by one for (let i = 0; i < this .V; i++) { if (visited[i] == false ) this .topologicalSortUtil(i, visited, stack); } // Print contents of stack while (stack.length > 0) { let x = stack.pop() + 'a' .charCodeAt(0); document.write(String.fromCharCode(x) + " " ); } } } // This function finds and prints order of character from a sorted // array of words. n is size of words[]. alpha is set of possible // alphabets. // For simplicity, this function is written in a way that only // first 'alpha' characters can be there in words array. For // example if alpha is 7, then words[] should have only 'a', 'b', // 'c' 'd', 'e', 'f', 'g' function printOrder(words, n, alpha) { // Create a graph with 'alpha' edges let g = new Graph(alpha); // Process all adjacent pairs of words and create a graph for (let i = 0; i < n-1; i++) { // Take the current two words and find the first mismatching // character word1 = words[i]; word2 = words[i+1]; for (let j = 0; j < Math.min(word1.length, word2.length); j++) { // If we find a mismatching character, then add an edge // from character of word1 to that of word2 if (word1[j] != word2[j]) { g.addEdge(word1.charCodeAt(j) - 'a' .charCodeAt(0), word2.charCodeAt(j) - 'a' .charCodeAt(0)); break ; } } } // Print topological sort of the above created graph g.topologicalSort(); } // Driver program to test above functions words = [ "caa" , "aaa" , "aab" ]; printOrder(words, 3, 3); // This code is contributed by cavi4762 </script> |
Python3
# A Python program to order of characters in an alien language class Graph: def __init__( self , V): self .V = V self .adj = [[] for _ in range (V)] def add_edge( self , v, w): self .adj[v].append(w) def topological_sort_util( self , v, visited, stack): visited[v] = True for i in self .adj[v]: if not visited[i]: self .topological_sort_util(i, visited, stack) stack.append(v) def topological_sort( self ): stack = [] visited = [ False ] * self .V for i in range ( self .V): if not visited[i]: self .topological_sort_util(i, visited, stack) return stack def min (x, y): return x if x < y else y def print_order(words, n, alpha): g = Graph(alpha) for i in range (n - 1 ): word1 = words[i] word2 = words[i + 1 ] for j in range ( min ( len (word1), len (word2))): if word1[j] ! = word2[j]: g.add_edge( ord (word1[j]) - ord ( 'a' ), ord (word2[j]) - ord ( 'a' )) break stack = g.topological_sort() return "".join([ chr (i + ord ( 'a' )) for i in stack])[:: - 1 ] words = [ "caa" , "aaa" , "aab" ] strin = print_order(words, len (words), 3 ) for i in strin: print (i,end = ' ' ) # This code is contributed by Susobhan Akhuli |
Output:
c a b
Time Complexity: O(V+E) , here V is number of Vertices and E is number of Edges.
Auxiliary Space : O(V) , here V is number of Vertices.
This article is contributed by Piyush Gupta (C++), Harikrishnan Rajan (java), Priyanka Pardesi Ramachander (C#) and Susobhan Akhuli (Python).
Please write comments if you find anything incorrect, or you want to share more information about the topic discussed above.
Please Login to comment...