Open In App

Find the missing end tag in the given HTML Code

Last Updated : 10 Feb, 2024
Improve
Improve
Like Article
Like
Save
Share
Report

Given a string htmlCode which is HTML code of a webpage, the task is to find the missing end tag in the HTML code.
Examples: 

Input: htmlCode = "<!DOCTYPE html>
<html>
<head>
<title>
GeeksforGeeks
</title>
</head>
<body>
<button>
</body>
</html>"
Output: </button>
Input: htmlCode = "<!DOCTYPE html>
<html>
<body>
<p>Hello</p>

</html>"
Output: </body>

 

Approach: The idea is to use stack to keep track of the current start tags in the HTML Code and if at any moment there is an end tag which doesn’t match to the top of the stack which denotes a start tag. Then the tag which is at the top of the stack is to be closed first. Therefore, top of the stack will be the desired missing tag in the HTML Code.
Below is the implementation of the above approach:
 

C++




#include <iostream>
#include <vector>
#include <stack>
#include <string>
#include <algorithm>
 
// Function to Auto complete the missing tag in the HTML Code
std::string autoComplete(const std::string& s) {
    // Split the html Code into lines
    std::vector<std::string> linesOfCode;
    std::string line;
    for (char ch : s) {
        if (ch == '\n') {
            linesOfCode.push_back(line);
            line = "";
        } else {
            line += ch;
        }
    }
    if (!line.empty()) {
        linesOfCode.push_back(line);
    }
 
    // Tags which are self-closed and don't need closing
    std::vector<std::string> selfClosedTags = {"area", "base", "br", "col",
                                               "embed", "hr", "img", "input",
                                               "link", "meta", "param", "source",
                                               "track", "wbr"};
 
    std::stack<std::string> stack;
 
    // Loop to iterate over the lines of code
    for (const std::string& line : linesOfCode) {
        for (size_t j = 0; j < line.size();) {
            // Check for end tags
            if (j + 1 < line.size() && line[j] == '<' && line[j + 1] == '/') {
                std::string tag;
                j += 2;
                while (j < line.size() && line[j] >= 'a' && line[j] <= 'z') {
                    tag += line[j];
                    j++;
                }
                while (j < line.size() && line[j] != '>') {
                    j++;
                }
                if (!stack.empty() && stack.top() != tag) {
                    return "</" + stack.top() + ">";
                }
                stack.pop();
            }
            // Skip the HTML 5 condition
            else if (j + 1 < line.size() && line[j] == '<' && line[j + 1] == '!') {
                j += 2;
            }
            // Check for start tags
            else if (line[j] == '<') {
                std::string tag;
                j++;
                while (j < line.size() && line[j] >= 'a' && line[j] <= 'z') {
                    tag += line[j];
                    j++;
                }
                while (j < line.size() && line[j] != '>') {
                    j++;
                }
                if (std::find(selfClosedTags.begin(),
                              selfClosedTags.end(), tag) == selfClosedTags.end()) {
                    stack.push(tag);
                }
            }
            j++;
        }
    }
 
    // Check if any tag is unbalanced then return that tag
    if (!stack.empty()) {
        return "</" + stack.top() + ">";
    }
    return "-1";
}
 
// Driver Code
int main() {
    std::string s = "<! DOCTYPE html>\n<html>\n<head>\n    <title>\n        GeeksforGeeks\n    </title>\n</head>\n<body>\n    <button>\n</body>\n</html>";
    std::string tag = autoComplete(s);
    std::cout << tag << std::endl;
    return 0;
}
 
 
 
// This code is contributed by shivamgupta0987654321


Java




import java.util.*;
 
public class AutoCompleteHTML {
 
    // Function to Auto complete the missing tag in the HTML Code
    public static String autoComplete(String s) {
        // Split the HTML code into lines
        List<String> linesOfCode = new ArrayList<>();
        StringBuilder line = new StringBuilder();
        for (char ch : s.toCharArray()) {
            if (ch == '\n') {
                linesOfCode.add(line.toString());
                line.setLength(0);
            } else {
                line.append(ch);
            }
        }
        if (line.length() > 0) {
            linesOfCode.add(line.toString());
        }
 
        // Tags which are self-closed and don't need closing
        List<String> selfClosedTags = Arrays.asList("area", "base", "br", "col", "embed", "hr", "img", "input", "link", "meta", "param", "source", "track", "wbr");
 
        Stack<String> stack = new Stack<>();
 
        // Loop to iterate over the lines of code
        for (String lineCode : linesOfCode) {
            for (int j = 0; j < lineCode.length();) {
                // Check for end tags
                if (j + 1 < lineCode.length() && lineCode.charAt(j) == '<' && lineCode.charAt(j + 1) == '/') {
                    StringBuilder tag = new StringBuilder();
                    j += 2;
                    while (j < lineCode.length() && Character.isLowerCase(lineCode.charAt(j))) {
                        tag.append(lineCode.charAt(j));
                        j++;
                    }
                    while (j < lineCode.length() && lineCode.charAt(j) != '>') {
                        j++;
                    }
                    if (!stack.isEmpty() && !stack.peek().equals(tag.toString())) {
                        return "</" + stack.peek() + ">";
                    }
                    stack.pop();
                }
                // Skip the HTML 5 condition
                else if (j + 1 < lineCode.length() && lineCode.charAt(j) == '<' && lineCode.charAt(j + 1) == '!') {
                    j += 2;
                }
                // Check for start tags
                else if (lineCode.charAt(j) == '<') {
                    StringBuilder tag = new StringBuilder();
                    j++;
                    while (j < lineCode.length() && Character.isLowerCase(lineCode.charAt(j))) {
                        tag.append(lineCode.charAt(j));
                        j++;
                    }
                    while (j < lineCode.length() && lineCode.charAt(j) != '>') {
                        j++;
                    }
                    if (!selfClosedTags.contains(tag.toString())) {
                        stack.push(tag.toString());
                    }
                }
                j++;
            }
        }
 
        // Check if any tag is unbalanced then return that tag
        if (!stack.isEmpty()) {
            return "</" + stack.peek() + ">";
        }
        return "-1";
    }
 
    // Driver Code
    public static void main(String[] args) {
        String s = "<! DOCTYPE html>\n<html>\n<head>\n    <title>\n        GeeksforGeeks\n    </title>\n</head>\n<body>\n    <button>\n</body>\n</html>";
        String tag = autoComplete(s);
        System.out.println(tag);
    }
}
 
// This code is contributed by akshitaguprzj3


Python




# Python implementation to find the
# the missing end in the HTML code
 
# Function to Auto complete the
# missing tag in the html Code
def autoComplete(s):
     
    # Split the html Code in line
    linesOfCode = list(s.strip().split("\n"))
     
    # Tags which are self closed doesn't
    # needs to be closed
    selfClosedTags = ["area""base", "br", \
            "col",   "embed""hr",    "img", \
            "input", "link", "meta", "param", \
                    "source", "track", "wbr"]
    n = len(linesOfCode)
 
    stack = []
     
    # Loop to iterate over the
    # lines of code
    for i in range(n):
        j = 0
         
        # Current Line
        line = linesOfCode[i]
        while j < len(linesOfCode[i]):
             
            # Condition to check if the current
            # character is a end tag in the code
            if j + 1 < len(line) and line[j] == "<"\
                            and line[j + 1] == "/":
                tag = []
                j += 2
                 
                # Loop to get the tag
                while j < len(line) and\
                   "a" <= line[j] <= "z":
                    tag.append(line[j])
                    j += 1
                while j < len(line) and line[j] != ">":
                    j += 1
                if stack[-1] != tag:
                    tag = stack[-1]
                    return "</" + "".join(tag) + ">"
                stack.pop()
                 
            # Condition to check if the current
            # character denotes the code is
            # of the HTML 5
            elif j + 1 < len(line) and line[j] == "<"\
                                  and line[j] == "!":
                continue
                 
            # Condition to check if the current
            # tag is a start tag of the HTML Code
            elif line[j] == "<":
                tag = []
                j += 1
                 
                # Loop to get the tag of the HTML
                while j < len(line) and\
                      "a" <= line[j] <= "z":
                    tag.append(line[j])
                    j += 1
                while j < len(line) and line[j] != ">":
                    j += 1
                     
                # Condition to check if the
                # current tag is not a self closed tag
                if "".join(tag) not in selfClosedTags:
                    stack.append(tag)
            j += 1
             
    # Condition to check if any tag
    # is unbalanced then return that tag
    if stack:
        tag = stack.pop()
        return "</" + "".join(tag) + ">"
    return -1
 
# Driver Code
if __name__ == "__main__":
    s = """<! DOCTYPE html>
<html>
<head>
    <title>
        GeeksforGeeks
    </title>
</head>
<body>
    <button>
</body>
</html>"""
    tag = autoComplete(s)
    print(tag)


C#




using System;
using System.Collections.Generic;
 
public class AutoCompleteHTML
{
    // Function to Auto complete the missing tag in the HTML Code
    public static string autoComplete(string s)
    {
        // Split the HTML code into lines
        List<string> linesOfCode = new List<string>();
        System.Text.StringBuilder line = new System.Text.StringBuilder();
        foreach (char ch in s)
        {
            if (ch == '\n')
            {
                linesOfCode.Add(line.ToString());
                line.Clear();
            }
            else
            {
                line.Append(ch);
            }
        }
        if (line.Length > 0)
        {
            linesOfCode.Add(line.ToString());
        }
 
        // Tags which are self-closed and don't need closing
        List<string> selfClosedTags = new List<string> { "area", "base", "br", "col", "embed", "hr", "img", "input", "link", "meta", "param", "source", "track", "wbr" };
 
        Stack<string> stack = new Stack<string>();
 
        // Loop to iterate over the lines of code
        foreach (string lineCode in linesOfCode)
        {
            for (int j = 0; j < lineCode.Length;)
            {
                // Check for end tags
                if (j + 1 < lineCode.Length && lineCode[j] == '<' && lineCode[j + 1] == '/')
                {
                    System.Text.StringBuilder tag = new System.Text.StringBuilder();
                    j += 2;
                    while (j < lineCode.Length && char.IsLower(lineCode[j]))
                    {
                        tag.Append(lineCode[j]);
                        j++;
                    }
                    while (j < lineCode.Length && lineCode[j] != '>')
                    {
                        j++;
                    }
                    if (stack.Count > 0 && !stack.Peek().Equals(tag.ToString()))
                    {
                        return "</" + stack.Peek() + ">";
                    }
                    stack.Pop();
                }
                // Skip the HTML 5 condition
                else if (j + 1 < lineCode.Length && lineCode[j] == '<' && lineCode[j + 1] == '!')
                {
                    j += 2;
                }
                // Check for start tags
                else if (lineCode[j] == '<')
                {
                    System.Text.StringBuilder tag = new System.Text.StringBuilder();
                    j++;
                    while (j < lineCode.Length && char.IsLower(lineCode[j]))
                    {
                        tag.Append(lineCode[j]);
                        j++;
                    }
                    while (j < lineCode.Length && lineCode[j] != '>')
                    {
                        j++;
                    }
                    if (!selfClosedTags.Contains(tag.ToString()))
                    {
                        stack.Push(tag.ToString());
                    }
                }
                j++;
            }
        }
 
        // Check if any tag is unbalanced then return that tag
        if (stack.Count > 0)
        {
            return "</" + stack.Peek() + ">";
        }
        return "-1";
    }
 
    // Driver Code
    public static void Main(string[] args)
    {
        string s = "<! DOCTYPE html>\n<html>\n<head>\n    <title>\n        GeeksforGeeks\n    </title>\n</head>\n<body>\n    <button>\n</body>\n</html>";
        string tag = autoComplete(s);
        Console.WriteLine(tag);
    }
}
 
// This code is contributed by Yash Agarwal(yashagarwal2852002)


Javascript




// Function to Auto complete the missing tag in the HTML Code
function autoComplete(s) {
    // Split the HTML code into lines
    const linesOfCode = s.split('\n');
 
    // Tags which are self-closed and don't need closing
    const selfClosedTags = ["area", "base", "br", "col",
        "embed", "hr", "img", "input",
        "link", "meta", "param", "source",
        "track", "wbr"
    ];
 
    const stack = [];
 
    // Loop to iterate over the lines of code
    for (const line of linesOfCode) {
        for (let j = 0; j < line.length;) {
            // Check for end tags
            if (j + 1 < line.length && line[j] === '<' && line[j + 1] === '/') {
                let tag = "";
                j += 2;
                while (j < line.length && line[j] >= 'a' && line[j] <= 'z') {
                    tag += line[j];
                    j++;
                }
                while (j < line.length && line[j] !== '>') {
                    j++;
                }
                if (stack.length > 0 && stack[stack.length - 1] !== tag) {
                    return `</${stack[stack.length - 1]}>`;
                }
                stack.pop();
            }
            // Skip the HTML 5 condition
            else if (j + 1 < line.length && line[j] === '<' && line[j + 1] === '!') {
                j += 2;
            }
            // Check for start tags
            else if (line[j] === '<') {
                let tag = "";
                j++;
                while (j < line.length && line[j] >= 'a' && line[j] <= 'z') {
                    tag += line[j];
                    j++;
                }
                while (j < line.length && line[j] !== '>') {
                    j++;
                }
                if (!selfClosedTags.includes(tag)) {
                    stack.push(tag);
                }
            }
            j++;
        }
    }
 
    // Check if any tag is unbalanced, then return that tag
    if (stack.length > 0) {
        return `</${stack[stack.length - 1]}>`;
    }
    return "-1";
}
 
// Driver Code
const s = "<!DOCTYPE html>\n<html>\n<head>\n    <title>\n        GeeksforGeeks\n    </title>\n</head>\n<body>\n    <button>\n</body>\n</html>";
const tag = autoComplete(s);
console.log(tag);


Output

</button>





Like Article
Suggest improvement
Share your thoughts in the comments

Similar Reads