Open In App
Related Articles

Alien Dictionary

Improve Article
Improve
Save Article
Save
Like Article
Like

Given a sorted dictionary (array of words) of an alien language, find the order of characters in the language.

Examples:  

Input: words[] = {“baa”, “abcd”, “abca”, “cab”, “cad”}
Output: Order of characters is ‘b’, ‘d’, ‘a’, ‘c’
Explanation: Note that words are sorted and in the given language “baa” comes before “abcd”, therefore ‘b’ is before ‘a’ in output. Similarly we can find other orders.

Input: words[] = {“caa”, “aaa”, “aab”}
Output: Order of characters is ‘c’, ‘a’, ‘b’

Recommended Practice

Alien Dictionary using Topological Sorting:

The idea is to create a directed graph where each character represents a node,and edges between nodes indicate the relative order of characters. If the graph created contains a cycle then a valid order of charcters is not possible else the topological sorting algorithm is applied to the graph to determine the character order.

Following are the detailed steps.

  • Create a directed graph g with number of vertices equal to the number of unique characters in alien language, where each character represents a node and edges between nodes indicate the relative order of characters.
  • Do following for every pair of adjacent words in given sorted array.
    • Let the current pair of words be word1 and word2. One by one compare characters of both words and find the first mismatching characters. 
    • Create an edge in g from mismatching character of word1 to that of word2.
  • If the graph created is DAG (Directed Acyclic Grapgh) then print topological sorting of the above created graph else if the graph contains a cycle then input data is inconsistent and valid order of characters does not exist.

Below is the implementation of above approach:

C++




#include <bits/stdc++.h>
using namespace std;
 
// Function to add a directed edge between characters u and
// v
void addEdge(vector<int> adj[], char u, char v)
{
    adj[u - 'a'].push_back(v - 'a');
}
 
// Depth-First Search (DFS) function to check for cycles in
// the graph
void dfs(vector<int> adj[], vector<int>& col, int curr,
         bool& isCyclic)
{
    // Mark the current node as visited and in the current
    // path
    col[curr] = 1;
   
    for (int i = 0; i < adj[curr].size(); i++) {
        int x = adj[curr][i];
        if (col[x] == 1) {
 
            // If the node is already in the current path, a
            // cycle is detected
            isCyclic = true;
           
            return;
        }
        else if (col[x] == 0) {
 
            // Recursively visit adjacent nodes
            dfs(adj, col, x, isCyclic);
        }
    }
 
    // Mark the current node as visited and not in the
    // current path
    col[curr] = 2;
}
 
// Function to check if a cycle exists in the graph using
// DFS
bool checkCycle(vector<int> adj[], vector<int> col, int k)
{
    bool isCyclic = false;
    for (int i = 0; i < k; i++) {
        if (col[i] == 0) {
            dfs(adj, col, i, isCyclic);
        }
    }
    return isCyclic;
}
 
// DFS-based topological sorting utility function
void topologicalSortUtil(vector<int> adj[], int u,
                         bool visited[], stack<int>& st)
{
    visited[u] = true;
    for (int i = 0; i < adj[u].size(); i++) {
        int v = adj[u][i];
        if (!visited[v]) {
            topologicalSortUtil(adj, v, visited, st);
        }
    }
    st.push(u);
}
 
// Function to perform topological sorting
void topologicalSort(vector<int> adj[], int V)
{
    bool visited[V];
    stack<int> st;
 
    for (int i = 0; i < V; i++) {
        visited[i] = false;
    }
 
    for (int i = 0; i < V; i++) {
        if (!visited[i]) {
            topologicalSortUtil(adj, i, visited, st);
        }
    }
 
    // Print the characters in topological order
    while (!st.empty()) {
        cout << char(st.top() + 'a') << " ";
        st.pop();
    }
}
 
// Function to process the words and find the character
// order
void printOrder(string words[], int n)
{
    // To track the frequency of characters
    vector<int> frq(26, 0);
 
    // Count of unique characters
    int k = 0;
 
    // Count unique characters and their frequencies
    for (int i = 0; i < n; i++) {
        string s = words[i];
        for (int j = 0; j < s.size(); j++) {
            frq[s[j] - 97]++;
            if (frq[s[j] - 97] == 1)
                k++;
        }
    }
 
    // Create adjacency list for the graph
    vector<int> adj[k];
 
    // Build the graph by iterating through adjacent words
    for (int i = 0; i < n - 1; i++) {
        string word1 = words[i];
        string word2 = words[i + 1];
 
        int j = 0;
        while (j < word1.length() && j < word2.length()) {
            if (word1[j] != word2[j]) {
 
                // Add edges based on character order
                addEdge(adj, word1[j], word2[j]);
 
                break;
            }
            j++;
        }
    }
 
    // Color array for cycle detection
    vector<int> col(k, 0);
    if (checkCycle(adj, col, k)) {
        // Detect and handle cycles
        cout << "Valid Order is not possible\n";
        return;
    }
 
    // Perform topological sorting and print character order
    topologicalSort(adj, k);
}
 
// Driver Code
int main()
{
    string words[]
        = { "baa", "abcd", "abca", "cab", "cad" };
    int n = sizeof(words) / sizeof(words[0]);
 
    printOrder(words, n);
 
    return 0;
}

Python3




# Function to add a directed edge between characters u and v
def addEdge(adj, u, v):
    adj[ord(u) - ord('a')].append(ord(v) - ord('a'))
 
# Depth-First Search (DFS) function to check for cycles in the graph
def dfs(adj, col, curr, isCyclic):
    # Mark the current node as visited and in the current path
    col[curr] = 1
 
    for x in adj[curr]:
        if col[x] == 1:
            # If the node is already in the current path, a cycle is detected
            isCyclic[0] = True
            return
        elif col[x] == 0:
            # Recursively visit adjacent nodes
            dfs(adj, col, x, isCyclic)
 
    # Mark the current node as visited and not in the current path
    col[curr] = 2
 
# Function to check if a cycle exists in the graph using DFS
def checkCycle(adj, col, k):
    isCyclic = [False]
    for i in range(k):
        if col[i] == 0:
            dfs(adj, col, i, isCyclic)
    return isCyclic[0]
 
# DFS-based topological sorting utility function
def topologicalSortUtil(adj, u, visited, st):
    visited[u] = True
    for v in adj[u]:
        if not visited[v]:
            topologicalSortUtil(adj, v, visited, st)
    st.append(u)
 
# Function to perform topological sorting
def topologicalSort(adj, V):
    visited = [False] * V
    st = []
 
    for i in range(V):
        if not visited[i]:
            topologicalSortUtil(adj, i, visited, st)
 
    # Print the characters in topological order
    while st:
        print(chr(st.pop() + ord('a')), end=" ")
 
# Function to process the words and find the character order
def printOrder(words):
    # To track the frequency of characters
    frq = [0] * 26
 
    # Count of unique characters
    k = 0
 
    # Count unique characters and their frequencies
    for word in words:
        for char in word:
            frq[ord(char) - ord('a')] += 1
            if frq[ord(char) - ord('a')] == 1:
                k += 1
 
    # Create adjacency list for the graph
    adj = [[] for _ in range(k)]
 
    # Build the graph by iterating through adjacent words
    for i in range(len(words) - 1):
        word1, word2 = words[i], words[i + 1]
        j = 0
        while j < len(word1) and j < len(word2):
            if word1[j] != word2[j]:
                # Add edges based on character order
                addEdge(adj, word1[j], word2[j])
                break
            j += 1
 
    # Color array for cycle detection
    col = [0] * k
    isCyclic = [False]
    if checkCycle(adj, col, k):
        # Detect and handle cycles
        print("Valid Order is not possible")
        return
 
    # Perform topological sorting and print character order
    topologicalSort(adj, k)
 
# Driver Code
if __name__ == "__main__":
    words = ["baa", "abcd", "abca", "cab", "cad"]
    printOrder(words)
# This code is contributed by Dwaipayan Bandyopadhyay

Output

b d a c 


Time Complexity: O(N+K) , where N is number of given words and Kis number of unique characters in given alphabet.
Auxiliary Space: O(N+K) ,


Last Updated : 02 Oct, 2023
Like Article
Save Article
Similar Reads
Related Tutorials