Given a sequence of words, print all anagrams together | Set 1

Given an array of words, print all anagrams together. For example, if the given array is {“cat”, “dog”, “tac”, “god”, “act”}, then output may be “cat tac act dog god”.

A simple method is to create a Hash Table. Calculate the hash value of each word in such a way that all anagrams have the same hash value. Populate the Hash Table with these hash values. Finally, print those words together with same hash values. A simple hashing mechanism can be modulo sum of all characters. With modulo sum, two non-anagram words may have same hash value. This can be handled by matching individual characters.

Following is another method to print all anagrams together. Take two auxiliary arrays, index array and word array. Populate the word array with the given sequence of words. Sort each individual word of the word array. Finally, sort the word array and keep track of the corresponding indices. After sorting, all the anagrams cluster together. Use the index array to print the strings from the original array of strings.

Let us understand the steps with following input Sequence of Words:

"cat", "dog", "tac", "god", "act"

1) Create two auxiliary arrays index[] and words[]. Copy all given words to words[] and store the original indexes in index[]



index[]:  0   1   2   3   4
words[]: cat dog tac god act

2) Sort individual words in words[]. Index array doesn’t change.

index[]:   0    1    2    3    4
words[]:  act  dgo  act  dgo  act

3) Sort the words array. Compare individual words using strcmp() to sort

index:     0    2    4    1    3
words[]:  act  act  act  dgo  dgo

4) All anagrams come together. But words are changed in words array. To print the original words, take index from the index array and use it in the original array. We get

"cat tac act dog god"

Following are the implementations of the above algorithm. In the following program, an array of structure “Word” is used to store both index and word arrays. DupArray is another structure that stores array of structure “Word”.

C/C++

filter_none

edit
close

play_arrow

link
brightness_4
code

// A C program to print all anagarms together
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
  
// structure for each word of duplicate array
struct Word {
    char* str; // to store word itself
    int index; // index of the word in the original array
};
  
// structure to represent duplicate array.
struct DupArray {
    struct Word* array; // Array of words
    int size; // Size of array
};
  
// Create a DupArray object that contains an array of Words
struct DupArray* createDupArray(char* str[], int size)
{
    // Allocate memory for dupArray and all members of it
    struct DupArray* dupArray = (struct DupArray*)malloc(sizeof(struct DupArray));
    dupArray->size = size;
    dupArray->array = (struct Word*)malloc(dupArray->size * sizeof(struct Word));
  
    // One by one copy words from the given wordArray to dupArray
    int i;
    for (i = 0; i < size; ++i) {
        dupArray->array[i].index = i;
        dupArray->array[i].str = (char*)malloc(strlen(str[i]) + 1);
        strcpy(dupArray->array[i].str, str[i]);
    }
  
    return dupArray;
}
  
// Compare two characters. Used in qsort() for sorting an array of
// characters (Word)
int compChar(const void* a, const void* b)
{
    return *(char*)a - *(char*)b;
}
  
// Compare two words. Used in qsort() for sorting an array of words
int compStr(const void* a, const void* b)
{
    struct Word* a1 = (struct Word*)a;
    struct Word* b1 = (struct Word*)b;
    return strcmp(a1->str, b1->str);
}
  
// Given a list of words in wordArr[],
void printAnagramsTogether(char* wordArr[], int size)
{
    // Step 1: Create a copy of all words present in given wordArr.
    // The copy will also have orignal indexes of words
    struct DupArray* dupArray = createDupArray(wordArr, size);
  
    // Step 2: Iterate through all words in dupArray and sort
    // individual words.
    int i;
    for (i = 0; i < size; ++i)
        qsort(dupArray->array[i].str,
              strlen(dupArray->array[i].str), sizeof(char), compChar);
  
    // Step 3: Now sort the array of words in dupArray
    qsort(dupArray->array, size, sizeof(dupArray->array[0]), compStr);
  
    // Step 4: Now all words in dupArray are together, but these
    // words are changed. Use the index member of word struct to
    // get the corresponding original word
    for (i = 0; i < size; ++i)
        printf("%s ", wordArr[dupArray->array[i].index]);
}
  
// Driver program to test above functions
int main()
{
    char* wordArr[] = { "cat", "dog", "tac", "god", "act" };
    int size = sizeof(wordArr) / sizeof(wordArr[0]);
    printAnagramsTogether(wordArr, size);
    return 0;
}

chevron_right


Java

filter_none

edit
close

play_arrow

link
brightness_4
code

// A Java program to print all anagrams together
import java.util.Arrays;
import java.util.Comparator;
public class GFG {
    // class for each word of duplicate array
    static class Word {
        String str; // to store word itself
        int index; // index of the word in the
        // original array
  
        // constructor
        Word(String str, int index)
        {
            this.str = str;
            this.index = index;
        }
    }
  
    // class to represent duplicate array.
    static class DupArray {
        Word[] array; // Array of words
        int size; // Size of array
  
        // constructor
        public DupArray(String str[], int size)
        {
            this.size = size;
            array = new Word[size];
  
            // One by one copy words from the
            // given wordArray to dupArray
            int i;
            for (i = 0; i < size; ++i) {
                // create a word Object with the
                // str[i] as str and index as i
                array[i] = new Word(str[i], i);
            }
        }
    }
  
    // Compare two words. Used in Arrays.sort() for
    // sorting an array of words
    static class compStr implements Comparator<Word> {
        public int compare(Word a, Word b)
        {
            return a.str.compareTo(b.str);
        }
    }
  
    // Given a list of words in wordArr[],
    static void printAnagramsTogether(String wordArr[],
                                      int size)
    {
        // Step 1: Create a copy of all words present
        // in given wordArr. The copy will also have
        // original indexes of words
        DupArray dupArray = new DupArray(wordArr, size);
  
        // Step 2: Iterate through all words in
        // dupArray and sort individual words.
        int i;
        for (i = 0; i < size; ++i) {
            char[] char_arr = dupArray.array[i].str.toCharArray();
            Arrays.sort(char_arr);
            dupArray.array[i].str = new String(char_arr);
        }
  
        // Step 3: Now sort the array of words in
        // dupArray
        Arrays.sort(dupArray.array, new compStr());
  
        // Step 4: Now all words in dupArray are together,
        // but these words are changed. Use the index
        // member of word struct to get the corresponding
        // original word
        for (i = 0; i < size; ++i)
            System.out.print(wordArr[dupArray.array[i].index] + " ");
    }
  
    // Driver program to test above functions
    public static void main(String args[])
    {
        String wordArr[] = { "cat", "dog", "tac", "god", "act" };
        int size = wordArr.length;
        printAnagramsTogether(wordArr, size);
    }
}
// This code is contributed by Sumit Ghosh

chevron_right


Python

filter_none

edit
close

play_arrow

link
brightness_4
code

# A Python program to print all anagarms together
  
# structure for each word of duplicate array
class Word(object):
    def __init__(self, string, index):
        self.string = string
        self.index = index
  
# Create a DupArray object that contains an array
# of Words
def createDupArray(string, size):
    dupArray = []
  
    # One by one copy words from the given wordArray
    # to dupArray
    for i in xrange(size):
        dupArray.append(Word(string[i], i))
  
    return dupArray
  
# Given a list of words in wordArr[]
def printAnagramsTogether(wordArr, size):
    # Step 1: Create a copy of all words present in
    # given wordArr.
    # The copy will also have orignal indexes of words
    dupArray = createDupArray(wordArr, size)
  
    # Step 2: Iterate through all words in dupArray and sort
    # individual words.
    for i in xrange(size):
        dupArray[i].string = ''.join(sorted(dupArray[i].string))
  
    # Step 3: Now sort the array of words in dupArray
    dupArray = sorted(dupArray, key = lambda k: k.string)
  
    # Step 4: Now all words in dupArray are together, but
    # these words are changed. Use the index member of word
    # struct to get the corresponding original word
    for word in dupArray:
        print wordArr[word.index],
  
# Driver program
wordArr = ["cat", "dog", "tac", "god", "act"]
size = len(wordArr)
printAnagramsTogether(wordArr, size)
  
# This code is contributed by BHAVYA JAIN

chevron_right



Output:

cat tac act dog god 

Time Complexity: Let there be N words and each word has maximum M characters. The upper bound is O(NMLogM + MNLogN).
Step 2 takes O(NMLogM) time. Sorting a word takes maximum O(MLogM) time. So sorting N words takes O(NMLogM) time. step 3 takes O(MNLogN) Sorting array of words takes NLogN comparisons. A comparison may take maximum O(M) time. So time to sort array of words will be O(MNLogN).

Using hashmap

Here, we will sort each word, calculate its hashcode and then put it in a map where the key will be hashcode generated after sorting. The value of the map will be a list containing all the words which have same hashcode after sorting.
Lastly, we will print all values from the hashmap where size of values will be greater than 1.

Java

filter_none

edit
close

play_arrow

link
brightness_4
code

// Java program to print anagrams 
// together using dictionary
import java.util.*;
  
public class FindAnagrams {
  
    private static void printAnagrams(String arr[])
    {
        HashMap<Integer, List<String> > map = new HashMap<>();
  
        // loop over all words
        for (int i = 0; i < arr.length; i++) {
  
            // convert to char array, sort and
            // then re-convert to string
            String word = arr[i];
            char[] letters = word.toCharArray();
            Arrays.sort(letters);
            String newWord = new String(letters);
  
            // calculate hashcode of string
            // after sorting
            int n = newWord.hashCode();
            if (map.containsKey(n)) {
  
                // Here, we already have
                // a word for the hashcode
                List<String> words = map.get(n);
                words.add(word);
                map.put(n, words);
            }
            else {
  
                // This is the first time we are
                // adding a word for a specific
                // hashcode
                List<String> words = new ArrayList<>();
                words.add(word);
                map.put(n, words);
            }
        }
  
        // print all the values where size is > 1
        // If you want to print non-anagrams,
        // just print the values having size = 1
        for (Integer i : map.keySet()) {
            List<String> values = map.get(i);
            if (values.size() > 1) {
                System.out.print(values);
            }
        }
    }
  
    public static void main(String[] args)
    {
  
        // Driver program
        String arr[] = { "cat", "dog", "tac", "god", "act" };
        printAnagrams(arr);
    }
}

chevron_right


Python3

filter_none

edit
close

play_arrow

link
brightness_4
code

# Python3 program to print anagrams 
# together using dictionary
  
# Implementation of hashcode function
# Source of hashcode function: https://gist.github.com/hanleybrand/5224673
def hashcode(s):
    h = 0
    for c in s:
        h = (31 * h + ord(c)) & 0xFFFFFFFF
    return ((h + 0x80000000) & 0xFFFFFFFF) - 0x80000000
      
# Function to print all anagrams together
def printAnagrams(arr): 
       
    Map = {} 
    
    # loop over all words 
    for i in range(len(arr)):  
    
        # convert to char array, sort and 
        # then re-convert to string 
        word = arr[i] 
        newWord = ''.join(sorted(word))
    
        # calculate hashcode of string 
        # after sorting 
        n = hashcode(newWord) 
        if n in Map:  
    
            # Here, we already have 
            # a word for the hashcode 
            Map[n] += [word] 
               
        else:
    
            # This is the first time we are 
            # adding a word for a specific 
            # hashcode 
            Map[n] = [word] 
    
    # print all the values where size is > 1 
    # If you want to print non-anagrams, 
    # just print the values having size = 1 
    for i in Map:  
        values = Map[i]
        if len(values) > 1:  
            print(values, end = " "
               
    
if __name__ == "__main__"
    
    arr =  ["cat", "dog", "tac", "god", "act"]  
    printAnagrams(arr) 
          
# This code is contributed by Rituraj Jain

chevron_right


Output :

[cat, tac, act][dog, god]

Given a sequence of words, print all anagrams together | Set 2

Please write comments if you find anything incorrect, or you want to share more information about the topic discussed above.



My Personal Notes arrow_drop_up

Improved By : subharaj01, rituraj_jain