Most frequent word in an array of strings
Given an array of words find the most occurring word in it
Examples:
Input : arr[] = {"geeks", "for", "geeks", "a", "portal", "to", "learn", "can", "be", "computer", "science", "zoom", "yup", "fire", "in", "be", "data", "geeks"} Output : Geeks "geeks" is the most frequent word as it occurs 3 times
A simple solution is to run two loops and count occurrences of every word. Time complexity of this solution is O(n * n * MAX_WORD_LEN).
An efficient solution is to use Trie data structure. The idea is simple first we will insert in trie. In trie, we keep counts of words ending at a node. We do preorder traversal and compare count present at each node and find the maximum occurring word
Implementation:
CPP
// CPP code to find most frequent word in // an array of strings #include <bits/stdc++.h> using namespace std; /*structing the trie*/ struct Trie { string key; int cnt; unordered_map< char , Trie*> map; }; /* Function to return a new Trie node */ Trie* getNewTrieNode() { Trie* node = new Trie; node->cnt = 0; return node; } /* function to insert a string */ void insert(Trie*& root, string& str) { // start from root node Trie* temp = root; for ( int i = 0; i < str.length(); i++) { char x = str[i]; /*a new node if path doesn't exists*/ if (temp->map.find(x) == temp->map.end()) temp->map[x] = getNewTrieNode(); // go to next node temp = temp->map[x]; } // store key and its count in leaf nodes temp->key = str; temp->cnt += 1; } /* function for preorder traversal */ bool preorder(Trie* temp, int & maxcnt, string& key) { if (temp == NULL) return false ; for ( auto it : temp->map) { /*leaf node will have non-zero count*/ if (maxcnt < it.second->cnt) { key = it.second->key; maxcnt = it.second->cnt; } // recurse for current node children preorder(it.second, maxcnt, key); } } void mostFrequentWord(string arr[], int n) { // Insert all words in a Trie Trie* root = getNewTrieNode(); for ( int i = 0; i < n; i++) insert(root, arr[i]); // Do preorder traversal to find the // most frequent word string key; int cnt = 0; preorder(root, cnt, key); cout << "The word that occurs most is : " << key << endl; cout << "No of times: " << cnt << endl; } // Driver code int main() { // given set of keys string arr[] = { "geeks" , "for" , "geeks" , "a" , "portal" , "to" , "learn" , "can" , "be" , "computer" , "science" , "zoom" , "yup" , "fire" , "in" , "be" , "data" , "geeks" }; int n = sizeof (arr) / sizeof (arr[0]); mostFrequentWord(arr, n); return 0; } |
The word that occurs most is : geeks No of times: 3
Time Complexity: O(n * MAX_WORD_LEN)
Another efficient solution is to use hashing. Please refer Find winner of an election where votes are represented as candidate names for details.
More simple solution is to use HashMap.
Approach: Using HashMap, one can keep track of word and it’s frequency. Next step includes iterate over it and find out the word with maximum frequency.
Below is the implementation of the above approach.
C++
// c++ implementation // Function returns word with highest frequency #include <bits/stdc++.h> using namespace std; // Function returns word with highest frequency string findWord(vector<string> arr) { // Create HashMap to store word and it's frequency unordered_map<string, int > hs; // Iterate through array of words for ( int i = 0; i < arr.size(); i++) { hs[arr[i]]++; } string key = "" ; int value = 0; for ( auto me : hs) { // Check for word having highest frequency if (me.second > value) { value = me.second; key = me.first; } } // Return word having highest frequency return key; } int main() { vector<string> arr{ "geeks" , "for" , "geeks" , "a" , "portal" , "to" , "learn" , "can" , "be" , "computer" , "science" , "zoom" , "yup" , "fire" , "in" , "be" , "data" , "geeks" }; string sol = findWord(arr); // Print word having highest frequency cout << sol << endl; } // This code is contributed by Aarti_Rathi |
Java
// Java implementation import java.util.*; class GKG { // Function returns word with highest frequency static String findWord(String[] arr) { // Create HashMap to store word and it's frequency HashMap<String, Integer> hs = new HashMap<String, Integer>(); // Iterate through array of words for ( int i = 0 ; i < arr.length; i++) { // If word already exist in HashMap then increase it's count by 1 if (hs.containsKey(arr[i])) { hs.put(arr[i], hs.get(arr[i]) + 1 ); } // Otherwise add word to HashMap else { hs.put(arr[i], 1 ); } } // Create set to iterate over HashMap Set<Map.Entry<String, Integer> > set = hs.entrySet(); String key = "" ; int value = 0 ; for (Map.Entry<String, Integer> me : set) { // Check for word having highest frequency if (me.getValue() > value) { value = me.getValue(); key = me.getKey(); } } // Return word having highest frequency return key; } // Driver code public static void main(String[] args) { String arr[] = { "geeks" , "for" , "geeks" , "a" , "portal" , "to" , "learn" , "can" , "be" , "computer" , "science" , "zoom" , "yup" , "fire" , "in" , "be" , "data" , "geeks" }; String sol = findWord(arr); // Print word having highest frequency System.out.println(sol); } } // This code is contributed by Divyank Sheth |
C#
// C# implementation using System; using System.Collections.Generic; class GFG { // Function returns word with highest frequency static String findWord(String[] arr) { // Create Dictionary to store word // and it's frequency Dictionary<String, int > hs = new Dictionary<String, int >(); // Iterate through array of words for ( int i = 0; i < arr.Length; i++) { // If word already exist in Dictionary // then increase it's count by 1 if (hs.ContainsKey(arr[i])) { hs[arr[i]] = hs[arr[i]] + 1; } // Otherwise add word to Dictionary else { hs.Add(arr[i], 1); } } // Create set to iterate over Dictionary String key = "" ; int value = 0; foreach (KeyValuePair<String, int > me in hs) { // Check for word having highest frequency if (me.Value > value) { value = me.Value; key = me.Key; } } // Return word having highest frequency return key; } // Driver code public static void Main(String[] args) { String []arr = { "geeks" , "for" , "geeks" , "a" , "portal" , "to" , "learn" , "can" , "be" , "computer" , "science" , "zoom" , "yup" , "fire" , "in" , "be" , "data" , "geeks" }; String sol = findWord(arr); // Print word having highest frequency Console.WriteLine(sol); } } // This code is contributed by Rajput-Ji |
Javascript
<script> // JavaScript implementation // Function returns word with highest frequency function findWord(arr) { // Create Dictionary to store word // and it's frequency var hs = {}; // Iterate through array of words for ( var i = 0; i < arr.length; i++) { // If word already exist in Dictionary // then increase it's count by 1 if (hs.hasOwnProperty(arr[i])) { hs[arr[i]] = hs[arr[i]] + 1; } // Otherwise add word to Dictionary else { hs[arr[i]] = 1; } } // Create set to iterate over Dictionary var Key = "" ; var Value = 0; for (const [key, value] of Object.entries(hs)) { // Check for word having highest frequency if (value > Value) { Value = value; Key = key; } } // Return word having highest frequency return Key; } // Driver code var arr = [ "geeks" , "for" , "geeks" , "a" , "portal" , "to" , "learn" , "can" , "be" , "computer" , "science" , "zoom" , "yup" , "fire" , "in" , "be" , "data" , "geeks" , ]; var sol = findWord(arr); // Print word having highest frequency document.write(sol); </script> |
geeks
Another Efficient Approach( Using Trie Data Structure)
Maintain a counter while inserting a word in Trie would avoid traversing Trie again
Java
import java.util.HashMap; import java.util.Map; public class TrieTest { class TrieNode { Map<Character, TrieNode> children; boolean endOfWord; int count; public TrieNode() { children = new HashMap<>(); endOfWord = false ; count = 0 ; } } private TrieNode root = new TrieNode(); private int maxCount = Integer.MIN_VALUE; private String mostFrequentString; public void insert(String word) { TrieNode current = root; for ( int i= 0 ; i<word.length(); i++) { Character ch = word.charAt(i); if (current.children.size() == 0 || (!current.children.containsKey(ch))) { current.children.put(ch, new TrieNode()); } TrieNode child = current.children.get(ch); current = child; } current.endOfWord = true ; current.count++; if (maxCount < current.count) { maxCount = current.count; mostFrequentString = word; } } public static void main(String[] args) { String [] words = { "geeks" , "for" , "geeks" , "a" , "portal" , "to" , "learn" , "can" , "be" , "computer" , "science" , "zoom" , "yup" , "fire" , "in" , "be" , "data" , "geeks" }; TrieTest test = new TrieTest(); for (String word : words) { test.insert(word); } // print max count and System.out.println(test.maxCount); System.out.println(test.mostFrequentString); } } |
C#
using System; using System.Collections.Generic; public class TrieTest { public class TrieNode { public Dictionary< char , TrieNode> children; public bool endOfWord; public int count; public TrieNode() { children = new Dictionary< char , TrieNode>(); endOfWord = false ; count = 0; } } private TrieNode root = new TrieNode(); private int maxCount = int .MinValue; private String mostFrequentString; public void insert(String word) { TrieNode current = root; for ( int i = 0; i < word.Length; i++) { char ch = word[i]; if (current.children.Count == 0 || (!current.children.ContainsKey(ch))) { current.children.Add(ch, new TrieNode()); } TrieNode child = current.children[ch]; current = child; } current.endOfWord = true ; current.count++; if (maxCount < current.count) { maxCount = current.count; mostFrequentString = word; } } public static void Main(String[] args) { String[] words = { "geeks" , "for" , "geeks" , "a" , "portal" , "to" , "learn" , "can" , "be" , "computer" , "science" , "zoom" , "yup" , "fire" , "in" , "be" , "data" , "geeks" }; TrieTest test = new TrieTest(); foreach (String word in words) { test.insert(word); } // print max count and Console.WriteLine(test.maxCount); Console.WriteLine(test.mostFrequentString); } } // This code is contributed by Rajput-Ji |
3 geeks
Time complexity – time to insert words in Trie O(W*L) W – no of words, L – avg length of Strings
Another Efficient Approach( Using Hash map in c++)
C++
#include <bits/stdc++.h> using namespace std; // User function template for C++ class Solution { public : // Function to find most frequent word in an array of // strings. string mostFrequentWord(string arr[], int n) { unordered_map<string, int > m; unordered_map<string, int > m1; int max = 0; string result; int k = 1; for ( int i = 0; i < n; i++) { if (m1.count(arr[i]) > 0) { continue ; } m1[arr[i]] = k; k++; } for ( int i = 0; i < n; i++) { m[arr[i]]++; if (max <= m[arr[i]]) { if (max < m[arr[i]]) { max = m[arr[i]]; result = arr[i]; } else { if (m1[result] < m1[arr[i]]) { max = m[arr[i]]; result = arr[i]; } } } } return result; } }; int main() { string arr[] = { "geeks" , "for" , "geeks" , "a" , "portal" , "to" , "learn" , "can" , "be" , "computer" , "science" , "zoom" , "yup" , "fire" , "in" , "be" , "data" , "geeks" }; int n = sizeof (arr) / sizeof (arr[0]); Solution obj; cout << obj.mostFrequentWord(arr, n) << endl; return 0; } |
Java
import java.util.*; class GFG { // User function template for Java // Function to find most frequent word in an array of // Strings. String mostFrequentWord(String arr[], int n) { HashMap<String, Integer> m = new HashMap<>(); HashMap<String, Integer> m1 = new HashMap<>(); int max = 0 ; String result= "" ; int k = 1 ; for ( int i = 0 ; i < n; i++) { if (m1.containsKey(arr[i])) { continue ; } m1.put(arr[i], k); k++; } for ( int i = 0 ; i < n; i++) { if (m.containsKey(arr[i])) { m.put(arr[i], m.get(arr[i])+ 1 ); } else m.put(arr[i], + 1 ); if (max <= m.get(arr[i])) { if (max < m.get(arr[i])) { max = m.get(arr[i]); result = arr[i]; } else { if (m1.get(result) < m1.get(arr[i])) { max = m.get(arr[i]); result = arr[i]; } } } } return result; } public static void main(String[] args) { String arr[] = { "geeks" , "for" , "geeks" , "a" , "portal" , "to" , "learn" , "can" , "be" , "computer" , "science" , "zoom" , "yup" , "fire" , "in" , "be" , "data" , "geeks" }; int n = arr.length; GFG obj = new GFG(); System.out.print(obj.mostFrequentWord(arr, n) + "\n" ); } } // This code is contributed by Rajput-Ji |
Python3
# Function to find most frequent word in an array of # strings. def mostFrequentWord(arr, n): m = dict () m1 = dict () max = 0 result = "" k = 1 for i in range ( 0 , n): if arr[i] in m1.keys(): continue m1[arr[i]] = k k + = 1 for i in range ( 0 , n): if arr[i] in m.keys(): m[arr[i]] + = 1 else : m[arr[i]] = 1 if max < = m[arr[i]]: if max < m[arr[i]]: max = m[arr[i]] result = arr[i] else : if m1[result] < m1[arr[i]]: max = m[arr[i]] result = arr[i] return result if __name__ = = "__main__" : arr = [ 'geeks' , 'for' , 'geeks' , 'a' , 'portal' , 'to' , 'learn' , 'can' , 'be' , 'computer' , 'science' , 'zoom' , 'yup' , 'fire' , 'in' , 'be' , 'data' , 'geeks' ] n = len (arr) print (mostFrequentWord(arr, n), end = '') print ( "\n" , end = '') # This code is contributed by Aarti_Rathi |
C#
// C# Program for the above approach using System; using System.Collections; using System.Collections.Generic; class Solution { // Function to find most frequent word in an array of // strings. static string mostFrequentWord( string []arr, int n) { Dictionary< string , int > m = new Dictionary< string , int >(); Dictionary< string , int > m1 = new Dictionary< string , int >(); int max = 0; string result = "" ; int k = 1; for ( int i = 0; i < n; i++) { if (m1.ContainsKey(arr[i])) { continue ; } m1[arr[i]] = k; k++; } for ( int i = 0; i < n; i++) { if (m.ContainsKey(arr[i])) { m[arr[i]] = m[arr[i]] + 1; } else { m.Add(arr[i], 1); } if (max <= m[arr[i]]) { if (max < m[arr[i]]) { max = m[arr[i]]; result = arr[i]; } else { if (m1[result] < m1[arr[i]]) { max = m[arr[i]]; result = arr[i]; } } } } return result; } public static void Main() { string []arr = { "geeks" , "for" , "geeks" , "a" , "portal" , "to" , "learn" , "can" , "be" , "computer" , "science" , "zoom" , "yup" , "fire" , "in" , "be" , "data" , "geeks" }; int n = arr.Length; Console.Write(mostFrequentWord(arr, n)); } } // This code is contributed by Samim Hossain Mondal. |
geeks
Time complexity : O(n)
This article is contributed by Aarti_Rathi and Pranav. If you like GeeksforGeeks and would like to contribute, you can also write an article using write.geeksforgeeks.org or mail your article to review-team@geeksforgeeks.org. See your article appearing on the GeeksforGeeks main page and help other Geeks.