Most frequent word in an array of strings

Given an array of words find the most occurring word in it

Examples:

Input : arr[] = {"geeks", "for", "geeks", "a", 
                "portal", "to", "learn", "can",
                "be", "computer", "science", 
                 "zoom", "yup", "fire", "in", 
                 "be", "data", "geeks"}
Output : Geeks 
"geeks" is the most frequent word as it 
occurs 3 times



A simple solution is to run two loops and count occurrences of every word. Time complexity of this solution is O(n * n * MAX_WORD_LEN).

An efficient solution is to use Trie data structure. The idea is simple first we will insert in trie. In trie, we keep counts of words ending at a node. We do preorder traversal and compare count present at each node and find the maximum occurring word

filter_none

edit
close

play_arrow

link
brightness_4
code

// CPP code to find most frequent word in
// an array of strings
#include <bits/stdc++.h>
using namespace std;
  
/*structing the trie*/
struct Trie {
    string key;
    int cnt;
    unordered_map<char, Trie*> map;
};
  
/* Function to return a new Trie node */
Trie* getNewTrieNode()
{
    Trie* node = new Trie;
    node->cnt = 0;
    return node;
}
  
/* function to insert a string */
void insert(Trie*& root, string& str)
{
    // start from root node
    Trie* temp = root;
  
    for (int i = 0; i < str.length(); i++) {
  
        char x = str[i];
  
        /*a new node if path doesn't exists*/
        if (temp->map.find(x) == temp->map.end())
            temp->map[x] = getNewTrieNode();
  
        // go to next node
        temp = temp->map[x];
    }
  
    // store key and its count in leaf nodes
    temp->key = str;
    temp->cnt += 1;
}
  
/* function for preorder traversal */
bool preorder(Trie* temp, int& maxcnt, string& key)
{
    if (temp == NULL)
        return false;
  
    for (auto it : temp->map) {
  
        /*leaf node will have non-zero count*/
        if (maxcnt < it.second->cnt) {
            key = it.second->key;
            maxcnt = it.second->cnt;
        }
  
        // recurse for current node children
        preorder(it.second, maxcnt, key);
    }
}
  
void mostFrequentWord(string arr[], int n)
{
    // Insert all words in a Trie
    Trie* root = getNewTrieNode();
    for (int i = 0; i < n; i++)
        insert(root, arr[i]);
  
    // Do preorder traversal to find the
    // most frequent word
    string key;
    int cnt = 0;
    preorder(root, cnt, key);
  
    cout << "The word that occurs most is : "
         << key << endl;
    cout << "No of times: " << cnt << endl;
}
  
// Driver code
int main()
{
    // given set of keys
    string arr[] = { "geeks", "for", "geeks", "a",
                     "portal", "to", "learn", "can", "be",
                     "computer", "science", "zoom", "yup",
                     "fire", "in", "be", "data", "geeks" };
    int n = sizeof(arr) / sizeof(arr[0]);
  
    mostFrequentWord(arr, n);
  
    return 0;
}

chevron_right


Output:

The word that occurs most is : geeks
No of times: 3

Time Complexity: O(n * MAX_WORD_LEN)

Another efficient solution is to use hashing. Please refer Find winner of an election where votes are represented as candidate names for details.

More simple solution is to use HashMap.

Approach:
Using HashMap, one can keep track of word and it’s frequency. Next step includes iterate over it and find out the word with maximum frequency.
Below is the implementation of the above approach.

filter_none

edit
close

play_arrow

link
brightness_4
code

// Java implementation
import java.util.*;
  
class GKG {
  
    // Function returns word with highest frequency
    static String findWord(String[] arr)
    {
  
        // Create HashMap to store word and it's frequency
        HashMap<String, Integer> hs = new HashMap<String, Integer>();
  
        // Iterate through array of words
        for (int i = 0; i < arr.length; i++) {
            // If word already exist in HashMap then increase it's count by 1
            if (hs.containsKey(arr[i])) {
                hs.put(arr[i], hs.get(arr[i]) + 1);
            }
            // Otherwise add word to HashMap
            else {
                hs.put(arr[i], 1);
            }
        }
  
        // Create set to iterate over HashMap
        Set<Map.Entry<String, Integer> > set = hs.entrySet();
        String key = "";
        int value = 0;
  
        for (Map.Entry<String, Integer> me : set) {
            // Check for word having highest frequency
            if (me.getValue() > value) {
                value = me.getValue();
                key = me.getKey();
            }
        }
  
        // Return word having highest frequency
        return key;
    }
  
    // Driver code
    public static void main(String[] args)
    {
        String arr[] = { "geeks", "for", "geeks", "a",
                         "portal", "to", "learn", "can", "be",
                         "computer", "science", "zoom", "yup",
                         "fire", "in", "be", "data", "geeks" };
        String sol = findWord(arr);
  
        // Print word having highest frequency
        System.out.println(sol);
    }
}
  
// This code is contributed by Divyank Sheth

chevron_right


Output:

geeks

This article is contributed by Pranav. If you like GeeksforGeeks and would like to contribute, you can also write an article using contribute.geeksforgeeks.org or mail your article to contribute@geeksforgeeks.org. See your article appearing on the GeeksforGeeks main page and help other Geeks.

Please write comments if you find anything incorrect, or you want to share more information about the topic discussed above.



My Personal Notes arrow_drop_up

Improved By : Divyank_Sheth