Minimum size of subset od String with frequency more than half of Array
Last Updated :
12 May, 2024
Given an Array of Strings (Arr), the task is to find the smallest subset of strings in the array such that the total count of those selected strings exceeds 50% of the size of the original array. In other words, find the minimum set of distinct strings that constitutes over 50% of the array’s elements.
Examples:
Input: Arr = [‘shoes’, ‘face’, ‘pizza’, ‘covid’, ‘shoes’, ‘covid’, ‘covid’, ‘face’, ‘shoes’]
Output: [‘covid’, ‘shoes’]
Explanation: Frequency of the strings is as follows: ‘shoes’ : 3, ‘covid’ : 3, ‘face’ : 2, ‘pizza’ : 1
So ‘shoes’ (3) + ‘covid’ (3) = 6 makes greater than the size of the array.
Input: Arr = [‘java’, ‘python’, ‘java’, ‘python’, ‘python’]
Output: [‘python’]
Explanation: Frequency of the strings is as follows: ‘python’ : 3, ‘java’ : 2.
So ‘python’ (3) makes greater than the size of the array.
Approach #1 :
Iterate through the arr and form a key in dictionary of newly occurred element or if element is already occurred, increase its value by 1 to count the frequency and then sort the dictionary in decreasing order and iterate through the dictionary until we get a subset.
Below is the implementation of the above approach:
C++
#include <iostream>
#include <vector>
#include <unordered_map>
#include <algorithm>
using namespace std;
// Function to find the minimum subset of strings that
// exceeds half of the array
vector<string> minSubsetToExceedHalf(string arr[], int n)
{
// Initialize a Map to store string frequencies
unordered_map<string, int> frequency;
// Calculate the threshold frequency to exceed half
// of the array
int maxFreq = (n / 2) + 1;
// Initialize a list to store the selected strings
vector<string> maxFreqStrings;
// Count the frequency of each string in the array
for (int i = 0; i < n; ++i) {
frequency[arr[i]]++;
}
// Sort the Map by frequency in descending order
vector<pair<string, int>> sortedFrequency(frequency.begin(), frequency.end());
sort(sortedFrequency.begin(), sortedFrequency.end(),
[](const pair<string, int>& a, const pair<string, int>& b) {
return a.second > b.second;
});
// Initialize a variable to keep track of the
// current frequency sum
int currFreq = 0;
// Iterate through the sorted Map and select strings
// until the threshold is reached
for (const auto& entry : sortedFrequency) {
maxFreqStrings.push_back(entry.first);
currFreq += entry.second;
// Check if the threshold is exceeded, and if
// so, break out of the loop
if (currFreq >= maxFreq) {
break;
}
}
return maxFreqStrings;
}
// Driver Code
int main()
{
string arr[] = { "shoes", "face", "pizza", "covid", "shoes",
"covid", "covid", "face", "shoes" };
int n = sizeof(arr) / sizeof(arr[0]);
// Calling and printing the result
vector<string> result = minSubsetToExceedHalf(arr, n);
for (const auto& str : result) {
cout << str << " ";
}
cout << endl;
return 0;
}
Java
import java.util.*;
public class Main {
// Function to find the minimum subset of strings that
// exceeds half of the array
static List<String> minSubsetToExceedHalf(String[] arr)
{
// Initialize a Map to store string frequencies
Map<String, Integer> frequency = new HashMap<>();
// Calculate the threshold frequency to exceed half
// of the array
int maxFreq = (arr.length / 2) + 1;
// Initialize a list to store the selected strings
List<String> maxFreqStrings = new ArrayList<>();
// Count the frequency of each string in the array
for (String string : arr) {
frequency.put(string,
frequency.getOrDefault(string, 0)
+ 1);
}
// Sort the Map by frequency in descending order
List<Map.Entry<String, Integer> > sortedFrequency
= new ArrayList<>(frequency.entrySet());
sortedFrequency.sort(
(a, b) -> b.getValue() - a.getValue());
// Initialize a variable to keep track of the
// current frequency sum
int currFreq = 0;
// Iterate through the sorted Map and select strings
// until the threshold is reached
for (Map.Entry<String, Integer> entry :
sortedFrequency) {
maxFreqStrings.add(entry.getKey());
currFreq += entry.getValue();
// Check if the threshold is exceeded, and if
// so, break out of the loop
if (currFreq >= maxFreq) {
break;
}
}
return maxFreqStrings;
}
// Driver Code
public static void main(String[] args)
{
String[] arr
= { "shoes", "face", "pizza", "covid", "shoes",
"covid", "covid", "face", "shoes" };
// Calling and printing the result
System.out.println(
String.join(" ", minSubsetToExceedHalf(arr)));
}
}
Python
def min_subset_to_exceed_half(arr):
# Initialize a dictionary to store string frequencies
frequency = {}
# Calculate the threshold frequency to exceed half of the array
max_freq = (len(arr) // 2) + 1
# Initialize a list to store the selected strings
max_freq_strings = []
# Count the frequency of each string in the array
for string in arr:
if string in frequency:
frequency[string] += 1
else:
frequency[string] = 1
# Sort the dictionary by frequency in descending order
sorted_frequency = dict(
sorted(frequency.items(), key=lambda item: item[1], reverse=True))
# Initialize a variable to keep track of the current frequency sum
curr_freq = 0
# Iterate through the sorted dictionary and select strings until the threshold is reached
for i in sorted_frequency:
max_freq_strings.append(i)
curr_freq += sorted_frequency[i]
# Check if the threshold is exceeded, and if so, break out of the loop
if curr_freq & gt
= max_freq:
break
return max_freq_strings
# Driver Code
arr = [ & quot
shoes", & quot
face"
, & quot
pizza"
, & quot
covid"
,
& quot
shoes"
, & quot
covid"
, & quot
covid"
, & quot
face"
, & quot
shoes"
]
# Calling and printing the result
print(*min_subset_to_exceed_half(arr)) # Output: shoes covid
Javascript
// JavaScript code for the above approach:
function minSubsetToExceedHalf(arr) {
// Initialize a Map to store string frequencies
const frequency = new Map();
// Calculate the threshold frequency to exceed half of the array
const maxFreq = Math.floor(arr.length / 2) + 1;
// Initialize an array to store the selected strings
const maxFreqStrings = [];
// Count the frequency of each string in the array
for (const string of arr) {
if (frequency.has(string)) {
frequency.set(string, frequency.get(string) + 1);
} else {
frequency.set(string, 1);
}
}
// Sort the Map by frequency in descending order
const sortedFrequency = new Map(
[...frequency.entries()].sort((a, b) => b[1] - a[1])
);
// Initialize a variable to keep track of the current frequency sum
let currFreq = 0;
// Iterate through the sorted Map and select strings until the threshold is reached
for (const [key, value] of sortedFrequency) {
maxFreqStrings.push(key);
currFreq += value;
// Check if the threshold is exceeded, and if so, break out of the loop
if (currFreq >= maxFreq) {
break;
}
}
return maxFreqStrings;
}
// Driver Code
const arr = ["shoes", "face", "pizza", "covid", "shoes", "covid", "covid", "face", "shoes"];
// Calling and printing the result
console.log(minSubsetToExceedHalf(arr).join(' '));
Time Complexity: O(N Log N),
Auxiliary Space: O(N), where N represents the number of unique strings in the input array.
Approach #2: Using collections.counter():
The most suggested method that could be used to find all occurrences is this method, which actually gets all element frequencies and could also be used to print single element frequencies if required.
Below is the implementation of the above approach:
C++
#include <algorithm>
#include <iostream>
#include <unordered_map>
#include <vector>
using namespace std;
vector<string> GFG(vector<string>& arr)
{
// Count the frequency of each string in the array
unordered_map<string, int> frequency;
for (const string& str : arr) {
frequency[str]++;
}
// Calculate the threshold frequency to exceed half of
// the array
int maxFreq = arr.size() / 2 + 1;
// Initialize a list to store the selected strings
vector<string> maxFreqStrings;
// Sort the frequencies in descending order
vector<pair<string, int> > sortedFrequency(
frequency.begin(), frequency.end());
sort(sortedFrequency.begin(), sortedFrequency.end(),
[](const auto& a, const auto& b) {
return a.second > b.second;
});
// Initialize a variable to keep track of current
// frequency sum
int currFreq = 0;
for (const auto& entry : sortedFrequency) {
maxFreqStrings.push_back(entry.first);
currFreq += entry.second;
// Check if the threshold is exceeded and if so
// break out of the loop
if (currFreq >= maxFreq) {
break;
}
}
return maxFreqStrings;
}
int main()
{
// Input array
vector<string> arr
= { "shoes", "face", "pizza", "covid", "shoes",
"covid", "covid", "face", "shoes" };
vector<string> result = GFG(arr);
for (const string& s : result) {
cout << s << " ";
}
cout << endl;
return 0;
}
Java
import java.util.*;
public class Main {
public static List<String> GFG(String[] arr)
{
// Count the frequency of each string in the array
Map<String, Integer> frequency = new HashMap<>();
for (String string : arr) {
frequency.put(string,
frequency.getOrDefault(string, 0)
+ 1);
}
// Calculate the threshold frequency to exceed half
// of the array
int maxFreq = arr.length / 2 + 1;
// Initialize a list to store the selected strings
List<String> maxFreqStrings = new ArrayList<>();
List<Map.Entry<String, Integer> > sortedFrequency
= new ArrayList<>(frequency.entrySet());
// Sort the frequencies in descending order
Collections.sort(
sortedFrequency,
(a, b) -> b.getValue() - a.getValue());
// Initialize a variable to keep track of current
// frequency sum
int currFreq = 0;
for (Map.Entry<String, Integer> entry :
sortedFrequency) {
maxFreqStrings.add(entry.getKey());
currFreq += entry.getValue();
// Check if the threshold is exceeded and if so
// break out of the loop
if (currFreq >= maxFreq) {
break;
}
}
return maxFreqStrings;
}
public static void main(String[] args)
{
// Input array
String[] arr
= { "shoes", "face", "pizza", "covid", "shoes",
"covid", "covid", "face", "shoes" };
List<String> result = GFG(arr);
for (String s : result) {
System.out.print(s + " ");
}
}
}
Python
from collections import Counter
def min_subset_to_exceed_half(arr):
# Count the frequency of each string in the array using Counter
frequency = Counter(arr)
# Calculate the threshold frequency to exceed half of the array
max_freq = (len(arr) // 2) + 1
# Initialize a list to store the selected strings
max_freq_strings = []
# Sort the Counter by frequency in descending order
sorted_frequency = dict(
sorted(frequency.items(), key=lambda item: item[1], reverse=True))
# Initialize a variable to keep track of the current frequency sum
curr_freq = 0
# Iterate through the sorted dictionary and select strings until the threshold is reached
for i in sorted_frequency:
max_freq_strings.append(i)
curr_freq += sorted_frequency[i]
# Check if the threshold is exceeded, and if so, break out of the loop
if curr_freq & gt
= max_freq:
break
return max_freq_strings
# Driver Code
arr = [ & quot
shoes", & quot
face"
, & quot
pizza"
, & quot
covid"
,
& quot
shoes"
, & quot
covid"
, & quot
covid"
, & quot
face"
, & quot
shoes"
]
# Calling and printing the result
print(*min_subset_to_exceed_half(arr)) # Output: shoes covid
JavaScript
function GFG(arr) {
// Count the frequency of each string in the array
const frequency = {};
for (const string of arr) {
frequency[string] = (frequency[string] || 0) + 1;
}
// Calculate the threshold frequency to the exceed half of the array
const maxFreq = Math.floor(arr.length / 2) + 1;
// Initialize an array to store the selected strings
const maxFreqStrings = [];
const sortedFrequency = Object.entries(frequency)
.sort((a, b) => b[1] - a[1]);
// Initialize a variable to keep track of current frequency sum
let currFreq = 0;
for (const [string, count] of sortedFrequency) {
maxFreqStrings.push(string);
currFreq += count;
// Check if the threshold is exceeded and if so
// break out of the loop
if (currFreq >= maxFreq) {
break;
}
}
return maxFreqStrings;
}
// Input array
const arr = ["shoes", "face", "pizza", "covid",
"shoes", "covid", "covid", "face", "shoes"];
console.log(...GFG(arr));
Time Complexity: O(N Log N),
Auxiliary Space: O(N), where N represents the number of unique strings in the input array.
Share your thoughts in the comments
Please Login to comment...