Open In App

Minimum number of deletions and insertions to transform one string into another

Given two strings ‘str1’ and ‘str2’ of size m and n respectively. The task is to remove/delete and insert the minimum number of characters from/in str1 to transform it into str2. It could be possible that the same character needs to be removed/deleted from one point of str1 and inserted at some another point.

Example 1: 



Input : 
str1 = "heap", str2 = "pea"
Output :
Minimum Deletion = 2 and
Minimum Insertion = 1
Explanation:
p and h deleted from heap
Then, p is inserted at the beginning
One thing to note, though p was required yet
it was removed/deleted first from its position and
then it is inserted to some other position.
Thus, p contributes one to the deletion_count
and one to the insertion_count.

Example 2: 



Input : 
str1 = "geeksforgeeks", str2 = "geeks"
Output :
Minimum Deletion = 8
Minimum Insertion = 0

Simple Approach:
A simple approach is to consider all subsequences of str1 and for each subsequence calculate minimum deletions and insertions so as to transform it into str2. A very complex method and the time complexity of this solution is exponential.

Efficient Approach:
An efficient approach uses the concept of finding the length of the longest common subsequence of the given two sequences.

Algorithm: 

Below is the implementation of the above code:




// Dynamic Programming C++ implementation to find
// minimum number of deletions and insertions
#include <bits/stdc++.h>
 
using namespace std;
 
// Returns length of longest common subsequence
// for str1[0..m-1], str2[0..n-1]
int lcs(string str1, string str2, int m, int n)
{
    int L[m + 1][n + 1];
    int i, j;
 
    // Following steps build L[m+1][n+1] in bottom
    // up fashion. Note that L[i][j] contains
    // length of LCS of str1[0..i-1] and str2[0..j-1]
    for (i = 0; i <= m; i++) {
        for (j = 0; j <= n; j++) {
            if (i == 0 || j == 0)
                L[i][j] = 0;
 
            else if (str1.at(i - 1) == str2.at(j - 1))
                L[i][j] = L[i - 1][j - 1] + 1;
 
            else
                L[i][j] = max(L[i - 1][j], L[i][j - 1]);
        }
    }
 
    // L[m][n] contains length of LCS
    // for X[0..n-1] and Y[0..m-1]
    return L[m][n];
}
 
// function to find minimum number
// of deletions and insertions
void printMinDelAndInsert(string str1, string str2)
{
    int m = str1.size();
    int n = str2.size();
 
    int len = lcs(str1, str2, m, n);
 
    cout << "Minimum number of deletions = " << (m - len)
         << endl;
 
    cout << "Minimum number of insertions = " << (n - len)
         << endl;
}
 
// Driver Code
int main()
{
    string str1 = "heap";
    string str2 = "pea";
   
      // Function Call
    printMinDelAndInsert(str1, str2);
    return 0;
}




// Dynamic Programming Java implementation
// to find minimum number of deletions and
// insertions
import java.io.*;
 
class GFG {
 
    // Returns length of length common
    // subsequence for str1[0..m-1],
    // str2[0..n-1]
    static int lcs(String str1, String str2, int m, int n)
    {
        int L[][] = new int[m + 1][n + 1];
        int i, j;
 
        // Following steps build L[m+1][n+1] in
        // bottom up fashion. Note that L[i][j]
        // contains length of LCS of str1[0..i-1]
        // and str2[0..j-1]
        for (i = 0; i <= m; i++) {
            for (j = 0; j <= n; j++) {
                if (i == 0 || j == 0)
                    L[i][j] = 0;
 
                else if (str1.charAt(i - 1)
                         == str2.charAt(j - 1))
                    L[i][j] = L[i - 1][j - 1] + 1;
 
                else
                    L[i][j] = Math.max(L[i - 1][j],
                                       L[i][j - 1]);
            }
        }
 
        // L[m][n] contains length of LCS
        // for X[0..n-1] and Y[0..m-1]
        return L[m][n];
    }
 
    // function to find minimum number
    // of deletions and insertions
    static void printMinDelAndInsert(String str1,
                                     String str2)
    {
        int m = str1.length();
        int n = str2.length();
 
        int len = lcs(str1, str2, m, n);
 
        System.out.println("Minimum number of "
                           + "deletions = ");
        System.out.println(m - len);
 
        System.out.println("Minimum number of "
                           + "insertions = ");
        System.out.println(n - len);
    }
 
    // Driver code
    public static void main(String[] args)
    {
        String str1 = new String("heap");
        String str2 = new String("pea");
       
          // Function Call
        printMinDelAndInsert(str1, str2);
    }
}
// This code is contributed by Prerna Saini




# Dynamic Programming Python3
# implementation to find minimum
# number of deletions and insertions
 
# Returns length of length
# common subsequence for
# str1[0..m-1], str2[0..n-1]
 
 
def lcs(str1, str2, m, n):
 
    L = [[0 for i in range(n + 1)]
         for i in range(m + 1)]
 
    # Following steps build L[m+1][n+1]
    # in bottom up fashion. Note that
    # L[i][j] contains length of LCS
    # of str1[0..i-1] and str2[0..j-1]
    for i in range(m + 1):
        for j in range(n + 1):
            if (i == 0 or j == 0):
                L[i][j] = 0
            elif(str1[i - 1] == str2[j - 1]):
                L[i][j] = L[i - 1][j - 1] + 1
            else:
                L[i][j] = max(L[i - 1][j],
                              L[i][j - 1])
 
    # L[m][n] contains length of LCS
    # for X[0..n-1] and Y[0..m-1]
    return L[m][n]
 
# function to find minimum number
# of deletions and insertions
 
 
def printMinDelAndInsert(str1, str2):
    m = len(str1)
    n = len(str2)
    leng = lcs(str1, str2, m, n)
    print("Minimum number of deletions = ",
          m - leng, sep=' ')
    print("Minimum number of insertions = ",
          n - leng, sep=' ')
 
 
# Driver Code
str1 = "heap"
str2 = "pea"
 
# Function Call
printMinDelAndInsert(str1, str2)
 
# This code is contributed
# by sahilshelangia




// Dynamic Programming C# implementation
// to find minimum number of deletions and
// insertions
using System;
 
class GFG {
 
    // Returns length of length common
    // subsequence for str1[0..m-1],
    // str2[0..n-1]
    static int lcs(string str1, string str2, int m, int n)
    {
        int[, ] L = new int[m + 1, n + 1];
        int i, j;
 
        // Following steps build L[m+1][n+1] in
        // bottom up fashion. Note that L[i][j]
        // contains length of LCS of str1[0..i-1]
        // and str2[0..j-1]
        for (i = 0; i <= m; i++) {
            for (j = 0; j <= n; j++) {
                if (i == 0 || j == 0)
                    L[i, j] = 0;
 
                else if (str1[i - 1] == str2[j - 1])
                    L[i, j] = L[i - 1, j - 1] + 1;
 
                else
                    L[i, j] = Math.Max(L[i - 1, j],
                                       L[i, j - 1]);
            }
        }
 
        // L[m][n] contains length of LCS
        // for X[0..n-1] and Y[0..m-1]
        return L[m, n];
    }
 
    // function to find minimum number
    // of deletions and insertions
    static void printMinDelAndInsert(string str1,
                                     string str2)
    {
        int m = str1.Length;
        int n = str2.Length;
 
        int len = lcs(str1, str2, m, n);
 
        Console.Write("Minimum number of "
                      + "deletions = ");
        Console.WriteLine(m - len);
 
        Console.Write("Minimum number of "
                      + "insertions = ");
        Console.Write(n - len);
    }
 
    // Driver code
    public static void Main()
    {
        string str1 = new string("heap");
        string str2 = new string("pea");
       
          // Function Call
        printMinDelAndInsert(str1, str2);
    }
}
 
// This code is contributed by nitin mittal.




<script>
    // Dynamic Programming Javascript implementation
    // to find minimum number of deletions and
    // insertions
 
    // Returns length of length common
    // subsequence for str1[0..m-1],
    // str2[0..n-1]
    function lcs(str1, str2, m, n)
    {
        let L = new Array(m + 1);
             
        let i, j;
         
        for (i = 0; i <= m; i++)
        {
            L[i] = new Array(n + 1);
            for (j = 0; j <= n; j++)
            {
                L[i][j] = 0;
            }
        }
  
        // Following steps build L[m+1][n+1] in
        // bottom up fashion. Note that L[i][j]
        // contains length of LCS of str1[0..i-1]
        // and str2[0..j-1]
        for (i = 0; i <= m; i++) {
            for (j = 0; j <= n; j++) {
                if (i == 0 || j == 0)
                    L[i][j] = 0;
  
                else if (str1[i - 1]
                         == str2[j - 1])
                    L[i][j] = L[i - 1][j - 1] + 1;
  
                else
                    L[i][j] = Math.max(L[i - 1][j],
                                       L[i][j - 1]);
            }
        }
  
        // L[m][n] contains length of LCS
        // for X[0..n-1] and Y[0..m-1]
        return L[m][n];
    }
  
    // function to find minimum number
    // of deletions and insertions
    function printMinDelAndInsert(str1, str2)
    {
        let m = str1.length;
        let n = str2.length;
  
        let len = lcs(str1, str2, m, n);
  
        document.write("Minimum number of "
                           + "deletions = ");
        document.write((m - len) + "</br>");
  
        document.write("Minimum number of "
                           + "insertions = ");
        document.write((n - len) + "</br>");
    }
     
    let str1 = "heap";
    let str2 = "pea";
 
    // Function Call
    printMinDelAndInsert(str1, str2);
     
    // This code is contributed by rameshtravel07.
</script>

Output
Minimum number of deletions = 2
Minimum number of insertions = 1





Time Complexity: O(m * n)
Auxiliary Space: O(m * n)

Method 2: Top-down approach using memoization

This method also uses the idea of finding the length of the longest common subsequence of the given two sequences. But this approach is using a top-down approach using memoization.




// Dynamic Programming C++ implementation to find
// minimum number of deletions and insertions
// using memoization technique
#include <bits/stdc++.h>
 
using namespace std;
 
int dp[1001][1001];
 
// Returns length of longest common subsequence
int lcs(string& s1, string& s2, int i, int j)
{
    if (i == 0 || j == 0) {
        return 0;
    }
    if (dp[i][j] != -1) {
        return dp[i][j];
    }
    if (s1[i - 1] == s2[j - 1]) {
        return dp[i][j] = 1 + lcs(s1, s2, i - 1, j - 1);
    }
    else {
        return dp[i][j] = max(lcs(s1, s2, i, j - 1),
                              lcs(s1, s2, i - 1, j));
    }
}
 
// function to find minimum number
// of deletions and insertions
void printMinDelAndInsert(string str1, string str2)
{
    int m = str1.size();
    int n = str2.size();
 
    dp[m][n];
 
    //    initialising dp array as -1
    memset(dp, -1, sizeof(dp));
 
    int len = lcs(str1, str2, m, n);
 
    cout << "Minimum number of deletions = " << (m - len)
         << endl;
 
    cout << "Minimum number of insertions = " << (n - len)
         << endl;
}
 
// driver code
int main()
{
    string str1 = "heap";
    string str2 = "pea";
 
    // Function Call
    printMinDelAndInsert(str1, str2);
 
    return 0;
}
 
// This code is contributed by Arun Bang.




// Dynamic Programming Java implementation
// to find minimum number of deletions and
// insertions
import java.io.*;
import java.util.Arrays;
class GFG {
    public static int dp[][]=new int[1001][1001];
   
    static int lcs(String str1, String str2, int i, int j)
    {
         if (i == 0 || j == 0) {
                return 0;
        }
        if (dp[i][j] != -1) {
            return dp[i][j];
        }
        if (str1.charAt(i - 1) == str2.charAt(j - 1)) {
            return dp[i][j] = 1 + lcs(str1, str2, i - 1, j - 1);
            }
            else {
            return dp[i][j] = Math.max(lcs(str1, str2, i, j - 1),
                              lcs(str1, str2, i - 1, j));
        }
         
    }
 
    // function to find minimum number
    // of deletions and insertions
    static void printMinDelAndInsert(String str1,
                                    String str2)
    {
        int m = str1.length();
        int n = str2.length();
 
        int len = lcs(str1, str2, m, n);
 
        System.out.println("Minimum number of "
                        + "deletions = "+(m - len));
         
 
        System.out.println("Minimum number of "
                        + "insertions = "+(n - len));
     
    }
 
    // Driver code
    public static void main(String[] args)
    {
        String str1 = new String("heap");
        String str2 = new String("pea");
        for(int i=0;i<1001;i++){
         Arrays.fill(dp[i],-1);
        }
       
        // Function Call
        printMinDelAndInsert(str1, str2);
    }
}
 
// This code is contributed by nikitamehrotra99.




#  Dynamic Programming C++ implementation to find
#  minimum number of deletions and insertions
#  using memoization technique
dp = [[-1 for i in range(1001)]for j in range(1001)]
 
#  Returns length of longest common subsequence
def lcs(s1, s2, i, j):
    if i == 0 or j == 0:
        return 0
    if dp[i][j] != -1:
        return dp[i][j]
 
    if s1[i - 1] == s2[j - 1]:
        dp[i][j] = 1 + lcs(s1, s2, i-1, j-1)
        return dp[i][j]
    else:
        dp[i][j] = max(lcs(s1, s2, i, j-1), lcs(s1, s2, i-1, j))
        return dp[i][j]
 
#  function to find minimum number
#  of deletions and insertions
def printMinDelAndInsert(str1, str2):
    m = len(str1)
    n = len(str2)
 
    #     initialising dp array as -1
    dp = [[-1 for i in range(n)]for j in range(m)]
    length = lcs(str1, str2, m, n)
    print("Minimum number of deletions = ", (m - length))
    print("Minimum number of insertions = ", (n - length))
 
#  driver code
if __name__ == "__main__":
    str1 = "heap"
    str2 = "pea"
 
    # Function Call
    printMinDelAndInsert(str1, str2)
 
#  This code is contributed by Vivek Maddeshiya




using System;
 
namespace MinDelAndInsert {
  class Program {
    static int[, ] dp = new int[1001, 1001];
 
    // Returns length of longest common subsequence
    static int lcs(string s1, string s2, int i, int j)
    {
      if (i == 0 || j == 0) {
        return 0;
      }
      if (dp[i, j] != -1) {
        return dp[i, j];
      }
      if (s1[i - 1] == s2[j - 1]) {
        return dp[i, j] = 1 + lcs(s1, s2, i - 1, j - 1);
      }
      else {
        return dp[i, j]
          = Math.Max(lcs(s1, s2, i, j - 1),
                     lcs(s1, s2, i - 1, j));
      }
    }
 
    // function to find minimum number
    // of deletions and insertions
    static void PrintMinDelAndInsert(string str1,
                                     string str2)
    {
      int m = str1.Length;
      int n = str2.Length;
 
      // Initializing dp array as -1
      for (int i = 0; i <= m; i++) {
        for (int j = 0; j <= n; j++) {
          dp[i, j] = -1;
        }
      }
 
      int len = lcs(str1, str2, m, n);
 
      Console.WriteLine("Minimum number of deletions = "
                        + (m - len));
      Console.WriteLine("Minimum number of insertions = "
                        + (n - len));
    }
 
    static void Main(string[] args)
    {
      string str1 = "heap";
      string str2 = "pea";
 
      // Function Call
      PrintMinDelAndInsert(str1, str2);
    }
  }
}
 
// This code is contributed by divyansh2212




// Dynamic Programming JavaScript implementation to find
// minimum number of deletions and insertions
// using memoization technique
const dp = new Array(1001).fill(-1).map(() => new Array(1001).fill(-1));
 
// Returns length of longest common subsequence
const lcs = (s1, s2, i, j) => {
if (i === 0 || j === 0) {
return 0;
}
if (dp[i][j] !== -1) {
return dp[i][j];
}
if (s1[i - 1] === s2[j - 1]) {
dp[i][j] = 1 + lcs(s1, s2, i - 1, j - 1);
return dp[i][j];
}
else {
dp[i][j] = Math.max(lcs(s1, s2, i, j - 1), lcs(s1, s2, i - 1, j));
return dp[i][j];
}
};
 
// function to find minimum number
// of deletions and insertions
const printMinDelAndInsert = (str1, str2) => {
let m = str1.length;
let n = str2.length;
 
//    initialising dp array as -1
 
let len = lcs(str1, str2, m, n);
 
console.log(`Minimum number of deletions = ${m - len}`);
 
console.log(`Minimum number of insertions = ${n - len}`);
}
 
// driver code
 
let str1 = "heap";
let str2 = "pea";
 
// Function Call
printMinDelAndInsert(str1, str2);
 
// This code is contributed by lokeshpotta20.

Output
Minimum number of deletions = 2
Minimum number of insertions = 1





Time Complexity: O(m * n)
Auxiliary Space: O(m * n)
 

Efficient approach: Space optimization

In previous approach the current value dp[i][j] is only depend upon the current and previous row values of DP. So to optimize the space complexity we use a single 1D array to store the computations.

Implementation steps:

Implementation: 




#include <algorithm>
#include <iostream>
#include <vector>
 
using namespace std;
 
int lcs(string str1, string str2)
{
    int m = str1.size();
    int n = str2.size();
 
    // Single vector to store the previous row
    vector<int> dp(n + 1, 0);
 
    for (int i = 1; i <= m; i++) {
 
        // Store the value of dp[j-1] from the previous
        // iteration
        int prev = 0;
 
        for (int j = 1; j <= n; j++) {
            // Store the current value of dp[j] before
            // modifying it
            int temp = dp[j];
 
            if (str1[i - 1] == str2[j - 1])
                dp[j] = prev + 1;
            else
                dp[j] = max(dp[j], dp[j - 1]);
 
            // Update prev for the next iteration
            prev = temp;
        }
    }
 
    return dp[n];
    // Return the length of the LCS
}
 
void printMinDelAndInsert(string str1, string str2)
{
    int len = lcs(str1, str2);
 
    int m = str1.size();
    int n = str2.size();
 
    cout << "Minimum number of deletions = " << (m - len)
         << endl;
    cout << "Minimum number of insertions = " << (n - len)
         << endl;
}
 
// Driver Code
int main()
{
    string str1 = "heap";
    string str2 = "pea";
 
    printMinDelAndInsert(str1, str2);
 
    return 0;
}
 
// -- by bhardwajji




import java.util.*;
import java.io.*;
 
public class GFG {
 
    public static int lcs(String str1, String str2) {
        int m = str1.length();
        int n = str2.length();
 
        // Single array to store the previous row
        int[] dp = new int[n + 1];
 
        for (int i = 1; i <= m; i++) {
 
            // Store the value of dp[j-1] from the previous iteration
            int prev = 0;
 
            for (int j = 1; j <= n; j++) {
                // Store the current value of dp[j] before modifying it
                int temp = dp[j];
 
                if (str1.charAt(i - 1) == str2.charAt(j - 1))
                    dp[j] = prev + 1;
                else
                    dp[j] = Math.max(dp[j], dp[j - 1]);
 
                // Update prev for the next iteration
                prev = temp;
            }
        }
 
        return dp[n];
        // Return the length of the LCS
    }
 
    public static void printMinDelAndInsert(String str1, String str2) {
        int len = lcs(str1, str2);
 
        int m = str1.length();
        int n = str2.length();
 
        System.out.println("Minimum number of deletions = " + (m - len));
        System.out.println("Minimum number of insertions = " + (n - len));
    }
    // Driver Code
    public static void main(String[] args) {
        String str1 = "heap";
        String str2 = "pea";
 
        printMinDelAndInsert(str1, str2);
    }
}
// by srihari2222




def lcs(str1, str2):
    m = len(str1)
    n = len(str2)
 
    # Create a matrix to store the LCS lengths
    dp = [[0] * (n + 1) for _ in range(m + 1)]
 
    for i in range(1, m + 1):
        for j in range(1, n + 1):
            if str1[i - 1] == str2[j - 1]:
                # If characters match, extend LCS by 1
                dp[i][j] = dp[i - 1][j - 1] + 1
            else:
                # Otherwise, take the maximum LCS length from the previous row or column
                dp[i][j] = max(dp[i - 1][j], dp[i][j - 1])
 
    return dp[m][n]
 
def print_min_del_and_insert(str1, str2):
    len_lcs = lcs(str1, str2)  # Calculate the length of the Longest Common Subsequence
    m = len(str1)
    n = len(str2)
 
    # Calculate the minimum number of deletions and insertions
    min_deletions = m - len_lcs
    min_insertions = n - len_lcs
 
    print("Minimum number of deletions =", min_deletions)
    print("Minimum number of insertions =", min_insertions)
 
# Driver Code
if __name__ == "__main__":
    str1 = "heap"
    str2 = "pea"
 
    print_min_del_and_insert(str1, str2)




using System;
 
class Program
{
    static int LCS(string str1, string str2)
    {
        int m = str1.Length;
        int n = str2.Length;
 
        // Single array to store the previous row
        int[] dp = new int[n + 1];
 
        for (int i = 1; i <= m; i++)
        {
            // Store the value of dp[j-1] from the previous iteration
            int prev = 0;
 
            for (int j = 1; j <= n; j++)
            {
                // Store the current value of dp[j] before modifying it
                int temp = dp[j];
 
                if (str1[i - 1] == str2[j - 1])
                {
                    dp[j] = prev + 1;
                }
                else
                {
                    dp[j] = Math.Max(dp[j], dp[j - 1]);
                }
 
                // Update prev for the next iteration
                prev = temp;
            }
        }
 
        return dp[n];
        // Return the length of the LCS
    }
 
    static void PrintMinDelAndInsert(string str1, string str2)
    {
        int len = LCS(str1, str2);
 
        int m = str1.Length;
        int n = str2.Length;
 
        Console.WriteLine("Minimum number of deletions = " + (m - len));
        Console.WriteLine("Minimum number of insertions = " + (n - len));
    }
 
    static void Main()
    {
        string str1 = "heap";
        string str2 = "pea";
 
        PrintMinDelAndInsert(str1, str2);
    }
}




function lcs(str1, str2) {
    const m = str1.length;
    const n = str2.length;
 
    // Single array to store the previous row
    const dp = new Array(n + 1).fill(0);
 
    for (let i = 1; i <= m; i++) {
 
        // Store the value of dp[j-1] from the previous iteration
        let prev = 0;
 
        for (let j = 1; j <= n; j++) {
            // Store the current value of dp[j] before modifying it
            let temp = dp[j];
 
            if (str1[i - 1] === str2[j - 1]) {
                dp[j] = prev + 1;
            } else {
                dp[j] = Math.max(dp[j], dp[j - 1]);
            }
 
            // Update prev for the next iteration
            prev = temp;
        }
    }
 
    return dp[n];
    // Return the length of the LCS
}
 
function printMinDelAndInsert(str1, str2) {
    const len = lcs(str1, str2);
 
    const m = str1.length;
    const n = str2.length;
 
    console.log("Minimum number of deletions = " + (m - len));
    console.log("Minimum number of insertions = " + (n - len));
}
 
// Driver Code
const str1 = "heap";
const str2 = "pea";
 
printMinDelAndInsert(str1, str2);
// --> by srihari2222

Output
Minimum number of deletions = 2
Minimum number of insertions = 1






Time Complexity: O(m * n)
Auxiliary Space: O(n)

This article is contributed by Ayush Jauhari.


Article Tags :