Shannon-Fano Algorithm for Data Compression
Last Updated :
09 Apr, 2024
DATA COMPRESSION AND ITS TYPESÂ
Data Compression, also known as source coding, is the process of encoding or converting data in such a way that it consumes less memory space. Data compression reduces the number of resources required to store and transmit data.Â
It can be done in two ways- lossless compression and lossy compression. Lossy compression reduces the size of data by removing unnecessary information, while there is no data loss in lossless compression.
WHAT IS SHANNON FANO CODING?Â
Shannon Fano Algorithm is an entropy encoding technique for lossless data compression of multimedia. Named after Claude Shannon and Robert Fano, it assigns a code to each symbol based on their probabilities of occurrence. It is a variable-length encoding scheme, that is, the codes assigned to the symbols will be of varying lengths.Â
HOW DOES IT WORK?Â
The steps of the algorithm are as follows:Â Â
- Create a list of probabilities or frequency counts for the given set of symbols so that the relative frequency of occurrence of each symbol is known.
- Sort the list of symbols in decreasing order of probability, the most probable ones to the left and the least probable ones to the right.
- Split the list into two parts, with the total probability of both parts being as close to each other as possible.
- Assign the value 0 to the left part and 1 to the right part.
- Repeat steps 3 and 4 for each part until all the symbols are split into individual subgroups.
The Shannon codes are considered accurate if the code of each symbol is unique.
EXAMPLE:Â
The given task is to construct Shannon codes for the given set of symbols using the Shannon-Fano lossless compression technique.
Step:Â Â
Tree:Â
Solution:Â
1. Upon arranging the symbols in decreasing order of probability:
P(D) + P(B) = 0.30 + 0.2 = 0.58
and,Â
P(A) + P(C) + P(E) = 0.22 + 0.15 + 0.05 = 0.42
And since they almost equally split the table, the most is divided it the blockquote table isblockquotentoÂ
{D, B} and {A, C, E}
and assign them the values 0 and 1 respectively.
Step:Â
Tree:Â
2. Now, in {D, B} group,Â
P(D) = 0.30 and P(B) = 0.28
which means that P(D)~P(B), so divide {D, B} into {D} and {B} and assign 0 to D and 1 to B.
Step:Â
Tree:Â
3. In {A, C, E} group,Â
P(A) = 0.22 and P(C) + P(E) = 0.20
So the group is divided intoÂ
{A} and {C, E}
and they are assigned values 0 and 1 respectively.
4. In {C, E} group,Â
P(C) = 0.15 and P(E) = 0.05
So divide them into {C} and {E} and assign 0 to {C} and 1 to {E}
Step:
Tree:
Note: The splitting is now stopped as each symbol is separated now.
The Shannon codes for the set of symbols are:Â Â
Â
The correct table should be :
SYMBOL | A | B | C | D | E |
PROBABILITY | 0.22 | 0.28 | 0.15 | .30 | .05 |
SHANNON CODE: | 10 | 01 | 110 | 00 | 111 |
As it can be seen, these are all unique and of varying lengths.
Below is the implementation of the above approach:Â Â
C++
// C++ program for Shannon Fano Algorithm
// include header files
#include <bits/stdc++.h>
using namespace std;
// declare structure node
struct node {
// for storing symbol
string sym;
// for storing probability or frequency
float pro;
int arr[20];
int top;
} p[20];
typedef struct node node;
// function to find shannon code
void shannon(int l, int h, node p[])
{
float pack1 = 0, pack2 = 0, diff1 = 0, diff2 = 0;
int i, d, k, j;
if ((l + 1) == h || l == h || l > h) {
if (l == h || l > h)
return;
p[h].arr[++(p[h].top)] = 0;
p[l].arr[++(p[l].top)] = 1;
return;
}
else {
for (i = l; i <= h - 1; i++)
pack1 = pack1 + p[i].pro;
pack2 = pack2 + p[h].pro;
diff1 = pack1 - pack2;
if (diff1 < 0)
diff1 = diff1 * -1;
j = 2;
while (j != h - l + 1) {
k = h - j;
pack1 = pack2 = 0;
for (i = l; i <= k; i++)
pack1 = pack1 + p[i].pro;
for (i = h; i > k; i--)
pack2 = pack2 + p[i].pro;
diff2 = pack1 - pack2;
if (diff2 < 0)
diff2 = diff2 * -1;
if (diff2 >= diff1)
break;
diff1 = diff2;
j++;
}
k++;
for (i = l; i <= k; i++)
p[i].arr[++(p[i].top)] = 1;
for (i = k + 1; i <= h; i++)
p[i].arr[++(p[i].top)] = 0;
// Invoke shannon function
shannon(l, k, p);
shannon(k + 1, h, p);
}
}
// Function to sort the symbols
// based on their probability or frequency
void sortByProbability(int n, node p[])
{
int i, j;
node temp;
for (j = 1; j <= n - 1; j++) {
for (i = 0; i < n - 1; i++) {
if ((p[i].pro) > (p[i + 1].pro)) {
temp.pro = p[i].pro;
temp.sym = p[i].sym;
p[i].pro = p[i + 1].pro;
p[i].sym = p[i + 1].sym;
p[i + 1].pro = temp.pro;
p[i + 1].sym = temp.sym;
}
}
}
}
// function to display shannon codes
void display(int n, node p[])
{
int i, j;
cout << "\n\n\n\tSymbol\tProbability\tCode";
for (i = n - 1; i >= 0; i--) {
cout << "\n\t" << p[i].sym << "\t\t" << p[i].pro << "\t";
for (j = 0; j <= p[i].top; j++)
cout << p[i].arr[j];
}
}
// Driver code
int main()
{
int n, i, j;
float total = 0;
string ch;
node temp;
// Input number of symbols
cout << "Enter number of symbols\t: ";
n = 5;
cout << n << endl;
// Input symbols
for (i = 0; i < n; i++) {
cout << "Enter symbol " << i + 1 << " : ";
ch = (char)(65 + i);
cout << ch << endl;
// Insert the symbol to node
p[i].sym += ch;
}
// Input probability of symbols
float x[] = { 0.22, 0.28, 0.15, 0.30, 0.05 };
for (i = 0; i < n; i++) {
cout << "\nEnter probability of " << p[i].sym << " : ";
cout << x[i] << endl;
// Insert the value to node
p[i].pro = x[i];
total = total + p[i].pro;
// checking max probability
if (total > 1) {
cout << "Invalid. Enter new values";
total = total - p[i].pro;
i--;
}
}
p[i].pro = 1 - total;
// Sorting the symbols based on
// their probability or frequency
sortByProbability(n, p);
for (i = 0; i < n; i++)
p[i].top = -1;
// Find the shannon code
shannon(0, n - 1, p);
// Display the codes
display(n, p);
return 0;
}
Java
// Java code for the above approach
class Main {
// Node class to store each symbol along with its probability and code
static class Node {
char sym; // symbol
double pro; // probability
int[] arr = new int[20]; // array to store the code
int top; // top of the stack
}
// Function to calculate Shannon code
static void shannon(int l, int h, Node[] p) {
double pack1 = 0, pack2 = 0, diff1 = 0, diff2 = 0;
int i, d, k = 0, j; // Initialize k here
if ((l + 1) == h || l == h || l > h) {
if (l == h || l > h) return;
p[h].arr[++(p[h].top)] = 0;
p[l].arr[++(p[l].top)] = 1;
return;
} else {
for (i = l; i <= h - 1; i++) pack1 = pack1 + p[i].pro;
pack2 = pack2 + p[h].pro;
diff1 = pack1 - pack2;
if (diff1 < 0) diff1 = diff1 * -1;
j = 2;
while (j != h - l + 1) {
k = h - j;
pack1 = pack2 = 0;
for (i = l; i <= k; i++) pack1 = pack1 + p[i].pro;
for (i = h; i > k; i--) pack2 = pack2 + p[i].pro;
diff2 = pack1 - pack2;
if (diff2 < 0) diff2 = diff2 * -1;
if (diff2 >= diff1) break;
diff1 = diff2;
j++;
}
k++;
for (i = l; i <= k; i++) p[i].arr[++(p[i].top)] = 1;
for (i = k + 1; i <= h; i++) p[i].arr[++(p[i].top)] = 0;
shannon(l, k, p);
shannon(k + 1, h, p);
}
}
// Function to sort the symbols based on their probability
static void sortByProbability(int n, Node[] p) {
int i, j;
Node temp = new Node();
for (j = 1; j <= n - 1; j++) {
for (i = 0; i < n - 1; i++) {
if ((p[i].pro) > (p[i + 1].pro)) {
temp = p[i];
p[i] = p[i + 1];
p[i + 1] = temp;
}
}
}
}
// Function to display the symbols along with their probability and code
static void display(int n, Node[] p) {
int i, j;
System.out.println("\n\n\n Symbol Probability Code");
for (i = n - 1; i >= 0; i--) {
System.out.print(p[i].sym + " " + p[i].pro + " ");
for (j = 0; j <= p[i].top; j++) System.out.print(p[i].arr[j]);
System.out.println();
}
}
// Driver code
public static void main(String[] args) {
int n = 5;
Node[] p = new Node[n];
double total = 0;
for (int i = 0; i < n; i++) {
p[i] = new Node();
p[i].sym = (char) (65 + i);
p[i].pro = new double[]{0.22, 0.28, 0.15, 0.30, 0.05}[i];
total = total + p[i].pro;
if (total > 1) {
System.out.println("Invalid. Enter new values");
total = total - p[i].pro;
i--;
}
}
p[n - 1].pro = 1 - total;
sortByProbability(n, p);
for (int i = 0; i < n; i++) p[i].top = -1;
shannon(0, n - 1, p);
display(n, p);
}
}
Python3
# Python3 program for Shannon Fano Algorithm
# declare structure node
class node :
def __init__(self) -> None:
# for storing symbol
self.sym=''
# for storing probability or frequency
self.pro=0.0
self.arr=[0]*20
self.top=0
p=[node() for _ in range(20)]
# function to find shannon code
def shannon(l, h, p):
pack1 = 0; pack2 = 0; diff1 = 0; diff2 = 0
if ((l + 1) == h or l == h or l > h) :
if (l == h or l > h):
return
p[h].top+=1
p[h].arr[(p[h].top)] = 0
p[l].top+=1
p[l].arr[(p[l].top)] = 1
return
else :
for i in range(l,h):
pack1 = pack1 + p[i].pro
pack2 = pack2 + p[h].pro
diff1 = pack1 - pack2
if (diff1 < 0):
diff1 = diff1 * -1
j = 2
while (j != h - l + 1) :
k = h - j
pack1 = pack2 = 0
for i in range(l, k+1):
pack1 = pack1 + p[i].pro
for i in range(h,k,-1):
pack2 = pack2 + p[i].pro
diff2 = pack1 - pack2
if (diff2 < 0):
diff2 = diff2 * -1
if (diff2 >= diff1):
break
diff1 = diff2
j+=1
k+=1
for i in range(l,k+1):
p[i].top+=1
p[i].arr[(p[i].top)] = 1
for i in range(k + 1,h+1):
p[i].top+=1
p[i].arr[(p[i].top)] = 0
# Invoke shannon function
shannon(l, k, p)
shannon(k + 1, h, p)
# Function to sort the symbols
# based on their probability or frequency
def sortByProbability(n, p):
temp=node()
for j in range(1,n) :
for i in range(n - 1) :
if ((p[i].pro) > (p[i + 1].pro)) :
temp.pro = p[i].pro
temp.sym = p[i].sym
p[i].pro = p[i + 1].pro
p[i].sym = p[i + 1].sym
p[i + 1].pro = temp.pro
p[i + 1].sym = temp.sym
# function to display shannon codes
def display(n, p):
print("\n\n\n\tSymbol\tProbability\tCode",end='')
for i in range(n - 1,-1,-1):
print("\n\t", p[i].sym, "\t\t", p[i].pro,"\t",end='')
for j in range(p[i].top+1):
print(p[i].arr[j],end='')
# Driver code
if __name__ == '__main__':
total = 0
# Input number of symbols
print("Enter number of symbols\t: ",end='')
n = 5
print(n)
i=0
# Input symbols
for i in range(n):
print("Enter symbol", i + 1," : ",end="")
ch = chr(65 + i)
print(ch)
# Insert the symbol to node
p[i].sym += ch
# Input probability of symbols
x = [0.22, 0.28, 0.15, 0.30, 0.05]
for i in range(n):
print("\nEnter probability of", p[i].sym, ": ",end="")
print(x[i])
# Insert the value to node
p[i].pro = x[i]
total = total + p[i].pro
# checking max probability
if (total > 1) :
print("Invalid. Enter new values")
total = total - p[i].pro
i-=1
i+=1
p[i].pro = 1 - total
# Sorting the symbols based on
# their probability or frequency
sortByProbability(n, p)
for i in range(n):
p[i].top = -1
# Find the shannon code
shannon(0, n - 1, p)
# Display the codes
display(n, p)
C#
using System;
// Declare structure node
struct Node
{
// For storing symbol
public string sym;
// For storing probability or frequency
public float pro;
public int[] arr;
public int top;
}
class Program
{
// Function to find Shannon code
static void Shannon(int l, int h, Node[] p)
{
float pack1 = 0, pack2 = 0, diff1 = 0, diff2 = 0;
int i, d, k, j;
if ((l + 1) == h || l == h || l > h)
{
if (l == h || l > h)
return;
p[h].arr[++(p[h].top)] = 0;
p[l].arr[++(p[l].top)] = 1;
return;
}
else
{
for (i = l; i <= h - 1; i++)
pack1 = pack1 + p[i].pro;
pack2 = pack2 + p[h].pro;
diff1 = pack1 - pack2;
if (diff1 < 0)
diff1 = diff1 * -1;
j = 2;
while (j != h - l + 1)
{
k = h - j;
pack1 = pack2 = 0;
for (i = l; i <= k; i++)
pack1 = pack1 + p[i].pro;
for (i = h; i > k; i--)
pack2 = pack2 + p[i].pro;
diff2 = pack1 - pack2;
if (diff2 < 0)
diff2 = diff2 * -1;
if (diff2 >= diff1)
break;
diff1 = diff2;
j++;
}
k++;
for (i = l; i <= k; i++)
p[i].arr[++(p[i].top)] = 1;
for (i = k + 1; i <= h; i++)
p[i].arr[++(p[i].top)] = 0;
// Invoke Shannon function
Shannon(l, k, p);
Shannon(k + 1, h, p);
}
}
// Function to sort the symbols
// based on their probability or frequency
static void SortByProbability(int n, Node[] p)
{
int i, j;
Node temp;
for (j = 1; j <= n - 1; j++)
{
for (i = 0; i < n - 1; i++)
{
if ((p[i].pro) > (p[i + 1].pro))
{
temp.pro = p[i].pro;
temp.sym = p[i].sym;
p[i].pro = p[i + 1].pro;
p[i].sym = p[i + 1].sym;
p[i + 1].pro = temp.pro;
p[i + 1].sym = temp.sym;
}
}
}
}
// Function to display Shannon codes
static void Display(int n, Node[] p)
{
int i, j;
Console.WriteLine("\n\n\n\tSymbol\tProbability\tCode");
for (i = n - 1; i >= 0; i--)
{
Console.Write("\n\t" + p[i].sym + "\t\t" + p[i].pro + "\t");
for (j = 0; j <= p[i].top; j++)
Console.Write(p[i].arr[j]);
}
}
// Driver code
static void Main(string[] args)
{
int n, i, j;
float total = 0;
string ch;
Node temp;
// Input number of symbols
Console.Write("Enter number of symbols\t: ");
n = 5;
Console.WriteLine(n);
// Input symbols
Node[] p = new Node[n];
for (i = 0; i < n; i++)
{
Console.Write("Enter symbol " + (i + 1) + " : ");
ch = ((char)(65 + i)).ToString();
Console.WriteLine(ch);
// Insert the symbol to node
p[i].sym += ch;
}
// Input probability of symbols
float[] x = { 0.22f, 0.28f, 0.15f, 0.30f, 0.05f };
for (i = 0; i < n; i++)
{
Console.Write("\nEnter probability of " + p[i].sym + " : ");
Console.WriteLine(x[i]);
// Insert the value to node
p[i].pro = x[i];
total = total + p[i].pro;
// Checking max probability
if (total > 1)
{
Console.WriteLine("Invalid. Enter new values");
total = total - p[i].pro;
i--;
}
}
p[i].pro = 1 - total;
// Sorting the symbols based on
// their probability or frequency
SortByProbability(n, p);
for (i = 0; i < n; i++)
p[i].top = -1;
// Find the Shannon code
Shannon(0, n - 1, p);
// Display the codes
Display(n, p);
}
}
Javascript
<script>
//JavaScript code for the above approach
function shannon(l, h, p) {
let pack1 = 0, pack2 = 0, diff1 = 0, diff2 = 0;
let i, d, k, j;
if ((l + 1) == h || l == h || l > h) {
if (l == h || l > h) return;
p[h].arr[++(p[h].top)] = 0;
p[l].arr[++(p[l].top)] = 1;
return;
} else {
for (i = l; i <= h - 1; i++) pack1 = pack1 + p[i].pro;
pack2 = pack2 + p[h].pro;
diff1 = pack1 - pack2;
if (diff1 < 0) diff1 = diff1 * -1;
j = 2;
while (j != h - l + 1) {
k = h - j;
pack1 = pack2 = 0;
for (i = l; i <= k; i++) pack1 = pack1 + p[i].pro;
for (i = h; i > k; i--) pack2 = pack2 + p[i].pro;
diff2 = pack1 - pack2;
if (diff2 < 0) diff2 = diff2 * -1;
if (diff2 >= diff1) break;
diff1 = diff2;
j++;
}
k++;
for (i = l; i <= k; i++) p[i].arr[++(p[i].top)] = 1;
for (i = k + 1; i <= h; i++) p[i].arr[++(p[i].top)] = 0;
shannon(l, k, p);
shannon(k + 1, h, p);
}
}
function sortByProbability(n, p) {
let i, j;
let temp;
for (j = 1; j <= n - 1; j++) {
for (i = 0; i < n - 1; i++) {
if ((p[i].pro) > (p[i + 1].pro)) {
temp = p[i];
p[i] = p[i + 1];
p[i + 1] = temp;
}
}
}
}
function display(n, p) {
let i, j;
document.write("\n\n\n Symbol Probability Code" + "<br>");
for (i = n - 1; i >= 0; i--) {
document.write(`${p[i].sym}      ${p[i].pro}      `);
for (j = 0; j <= p[i].top; j++) document.write(p[i].arr[j]);
document.write("<br>")
}
}
//Driver code
document.write("Enter number of symbols : ");
let n = 5;
document.write(n + "<br>")
let p = [];
let total = 0;
for (let i = 0; i < n; i++) {
document.write(" Enter symbol " + (i + 1) + " : ");
let ch = String.fromCharCode(65 + i);
document.write(ch + "<br>")
p.push({ sym: ch, pro: 0, arr: [], top: -1 });
}
let x = [0.22, 0.28, 0.15, 0.30, 0.05];
for (let i = 0; i < n; i++) {
document.write("Enter probability of " + p[i].sym + " : ");
document.write(x[i] + "<br>")
p[i].pro = x[i];
total = total + p[i].pro;
if (total > 1) {
document.write("Invalid. Enter new values");
total = total - p[i].pro;
i--;
}
}
p[n - 1].pro = 1 - total;
sortByProbability(n, p);
for (let i = 0; i < n; i++) p[i].top = -1;
shannon(0, n - 1, p);
display(n, p);
// This code is contributed by Potta Lokesh
</script>
OutputEnter number of symbols : 5
Enter symbol 1 : A
Enter symbol 2 : B
Enter symbol 3 : C
Enter symbol 4 : D
Enter symbol 5 : E
Enter probability of A : 0.22
Enter probability of B : 0.28
Enter probability of C : 0.15
Enter probability of D : 0.3
Enter probability of E : 0.05
Symbol Probability Code
D 0.3 00
B 0.28 01
A 0.22 10
C 0.15 110
E 0.05 111
Like Article
Suggest improvement
Share your thoughts in the comments
Please Login to comment...