How to sort a big array with many repetitions?

3.1

Consider a big array where elements are from a small set and in any range, i.e. there are many repetitions. How to efficiently sort the array?

Example: 
Input:  arr[] = {100, 12, 100, 1, 1, 12, 100, 1, 12, 100, 1, 1}
Output: arr[] = {1, 1, 1, 1, 1, 12, 12, 12, 100, 100, 100, 100}

We strongly recommend you to minimize your browser and try this yourself first.

A Basic Sorting algorithm like MergeSort, HeapSort would take O(nLogn) time where n is number of elements, can we do better?

A Better Solution is to use Self-Balancing Binary Search Tree like AVL or Red-Black to sort in O(n Log m) time where m is number of distinct elements. The idea is to extend tree node to have count of keys also.

struct Node
{
   int key;
   struct Node *left. *right;
   int count;  // Added to handle duplicates

   // Other tree node info for balancing like height in AVL
}

Below is complete algorithm using AVL tree.
1) Create an empty AVL Tree with count as an additional field.
2) Traverse input array and do following for every element ‘arr[i]’
…..a) If arr[i] is not present in tree, then insert it and initialize count as 1
…..b) Else increment its count in tree.
3) Do Inorder Traversal of tree. While doing inorder put every key its count times in arr[].

The 2nd step takes O(n Log m) time and 3rd step takes O(n) time. So overall time complexity is O(n Log m)

Below is C++ implementation of above idea.

// C++ program to sort an array using AVL tree
#include<iostream>
using namespace std;

// An AVL tree Node
struct Node
{
    int key;
    struct Node *left, *right;
    int height, count;
};

// Function to isnert a key in AVL Tree, if key is already present,
// then it increments count in key's node.
struct Node* insert(struct Node* Node, int key);

// This function puts inorder traversal of AVL Tree in arr[]
void inorder(int arr[], struct Node *root, int *index_ptr);

// An AVL tree based sorting function for sorting an array with
// duplicates
void sort(int arr[], int n)
{
  // Create an empty AVL Tree
  struct Node *root = NULL;

  // Insert all nodes one by one in AVL tree. The insert function
  // increments count if key is already present
  for (int i=0; i<n; i++)
     root = insert(root, arr[i]);

  // Do inorder traversal to put elements back in sorted order
  int index = 0;
  inorder(arr, root, &index);
}

// This function puts inorder traversal of AVL Tree in arr[]
void inorder(int arr[], struct Node *root, int *index_ptr)
{
    if (root != NULL)
    {
        // Recur for left child
        inorder(arr, root->left, index_ptr);

        // Put all occurrences of root's key in arr[]
        for (int i=0; i<root->count; i++)
        {
           arr[*index_ptr] = root->key;
           (*index_ptr)++;
        }

        // Recur for right child
        inorder(arr, root->right, index_ptr);
    }
}

// A utility function to get height of the tree
int height(struct Node *N)
{
    if (N == NULL)
        return 0;
    return N->height;
}

// Helper function that allocates a new Node
struct Node* newNode(int key)
{
    struct Node* node = new Node;
    node->key   = key;
    node->left   = node->right = NULL;
    node->height = node->count = 1;
    return(node);
}

// A utility function to right rotate subtree rooted
// with y.
struct Node *rightRotate(struct Node *y)
{
    struct Node *x = y->left;
    struct Node *T2 = x->right;

    // Perform rotation
    x->right = y;
    y->left = T2;

    // Update heights
    y->height = max(height(y->left), height(y->right))+1;
    x->height = max(height(x->left), height(x->right))+1;

    // Return new root
    return x;
}

// A utility function to left rotate subtree rooted with x
struct Node *leftRotate(struct Node *x)
{
    struct Node *y = x->right;
    struct Node *T2 = y->left;

    // Perform rotation
    y->left = x;
    x->right = T2;

    //  Update heights
    x->height = max(height(x->left), height(x->right))+1;
    y->height = max(height(y->left), height(y->right))+1;

    // Return new root
    return y;
}

// Get Balance factor of Node N
int getBalance(struct Node *N)
{
    if (N == NULL)
        return 0;
    return height(N->left) - height(N->right);
}

// Function to insert a key in AVL Tree, if key is already
// present, then it increments count in key's node.
struct Node* insert(struct Node* Node, int key)
{
    /* 1.  Perform the normal BST rotation */
    if (Node == NULL)
        return (newNode(key));

    // If key already exists in BST, icnrement count and return
    if (key == Node->key)
    {
        (Node->count)++;
        return Node;
    }

     /* Otherwise, recur down the tree */
    if (key < Node->key)
        Node->left  = insert(Node->left, key);
    else
        Node->right = insert(Node->right, key);

    /* 2. Update height of this ancestor Node */
    Node->height = max(height(Node->left), height(Node->right)) + 1;

    /* 3. Get the balance factor of this ancestor Node to
       check whether this Node became unbalanced */
    int balance = getBalance(Node);

    // If this Node becomes unbalanced, then there are 4 cases

    // Left Left Case
    if (balance > 1 && key < Node->left->key)
        return rightRotate(Node);

    // Right Right Case
    if (balance < -1 && key > Node->right->key)
        return leftRotate(Node);

    // Left Right Case
    if (balance > 1 && key > Node->left->key)
    {
        Node->left =  leftRotate(Node->left);
        return rightRotate(Node);
    }

    // Right Left Case
    if (balance < -1 && key < Node->right->key)
    {
        Node->right = rightRotate(Node->right);
        return leftRotate(Node);
    }

    /* return the (unchanged) Node pointer */
    return Node;
}

// A utility function to print an array
void printArr(int arr[], int n)
{
    for (int i=0; i<n; i++)
       cout << arr[i] << ", ";
    cout << endl;
}

/* Driver program to test above function*/
int main()
{
  int arr[] = {100, 12, 100, 1, 1, 12, 100, 1, 12, 100, 1, 1};
  int n = sizeof(arr)/sizeof(arr[0]);

  cout << "Input array is\n";
  printArr(arr, n);

  sort(arr, n);

  cout << "Sorted array is\n";
  printArr(arr, n);
}

Output:

Input array is
100, 12, 100, 1, 1, 12, 100, 1, 12, 100, 1, 1,
Sorted array is
1, 1, 1, 1, 1, 12, 12, 12, 100, 100, 100, 100,

We can also use Binary Heap to solve in O(n Log m) time.
We can also use Hashing to solve above problem in O(n + m Log m) time.
1) Create an empty hash table. Input array values are stores as key and their counts are stored as value in hash table.
2) For every element ‘x’ of arr[], do following
…..a) If x ix present in hash table, increment its value
…..b) Else insert x with value equals to 1.
3) Consider all keys of hash table and sort them.
4) Traverse all sorted keys and print every key its value times.

Time complexity of 2nd step is O(n) under the assumption that hash search and insert take O(1) time. Step 3 takes O(m Log m) time where m is total number of distinct keys in input array. Step 4 takes O(n) time. So overall time complexity is O(n + m Log m).

Program implementation using Hash Table


// A C++ program to sort a big array with many repetitions

#include <iostream>
#include <algorithm>
#include <map>
using namespace std;

void sort(int arr[], int n)
{
   //1. Create an empty hash table.
    map<int, int> count;

    //2. Input array values are stores as key and their
    //counts are stored as value in hash table.
    for (int i=0; i<n; i++)
        count[arr[i]]++;

    map<int, int>::iterator it;
    int index = 0;

    //3. Consider all keys of hash table and sort them.
    //In std::map, keys are already sorted.

    //4. Traverse all sorted keys and print every key its value times.
    for (it=count.begin(); it!=count.end(); ++it)
    {
        while(it->second--)
        arr[index++]=it->first;
    }
}

// Utility function to print an array
void printArray(int arr[], int n)
{
    for (int i=0; i<n; i++)
        cout << arr[i] << " ";
        cout << endl;
 }

// Driver program to test above function.
int main()
{
    int arr[] = {100, 12, 100, 1, 1, 12, 100, 1, 12, 100, 1, 1};
    int n = sizeof(arr)/sizeof(arr[0]);

    cout << "Input array is\n";
    printArray(arr, n);

    sort(arr, n);

    cout << "Sorted array is\n";
    printArray(arr, n);

    return 0;
}
// Contributed by Aditya Goel

Output:

Input array is
100 12 100 1 1 12 100 1 12 100 1 1 
Sorted array is
1 1 1 1 1 12 12 12 100 100 100 100

This article is contributed by Ankur. Please write comments if you find anything incorrect, or you want to share more information about the topic discussed above.

GATE CS Corner    Company Wise Coding Practice

Recommended Posts:



3.1 Average Difficulty : 3.1/5.0
Based on 6 vote(s)










Writing code in comment? Please use ide.geeksforgeeks.org, generate link and share the link here.