Open In App

Find last unique URL from long list of URLs in single traversal

Improve
Improve
Like Article
Like
Save
Share
Report

Given a very long list of URLs, find out last unique URL. Only one traversal of all URLs is allowed. Examples:

Input:
https://www.geeksforgeeks.org
https://www.geeksforgeeks.org/quiz-corner-gq/ 
http://qa.geeksforgeeks.org 
https://practice.geeksforgeeks.org 
https://ide.geeksforgeeks.org
http://write.geeksforgeeks.org  
https://www.geeksforgeeks.org/quiz-corner-gq/ 
https://practice.geeksforgeeks.org 
https://ide.geeksforgeeks.org 
https://www.geeksforgeeks.org/quiz-corner-gq/ 
http://qa.geeksforgeeks.org 
https://practice.geeksforgeeks.org

Output:
http://write.geeksforgeeks.org

We can solve this problem in one traversal by using Trie with a Doubly Linked List (We can insert and delete in O(1) time). The idea is to insert all URLs into the Trie one by one and check if it is duplicate or not. To know if we have previously encountered the URL, we need to mark the last node of every URL as leaf node. If we encounter a URL for the first time, we insert it into the doubly Linked list and maintain a pointer to that node in linked list in leaf node of the trie. If we encounter a URL that is already in the trie and has pointer to the url in the linked list, we delete the node from the linked list and set its pointer in the trie to null. After all URLs are processed, linked list will only contain the URLs that are distinct and the node at the beginning of the linked list will be last unique URL. 

CPP




// C++ program to print distinct URLs using Trie
// and Doubly Linked List
#include <bits/stdc++.h>
using namespace std;
 
// Alphabet size (# of symbols)
const int ALPHABET_SIZE = 256;
 
// A linked list node
struct DLLNode {
    string data;
    DLLNode *next, *prev;
};
 
// trie node
struct TrieNode {
    TrieNode* children[ALPHABET_SIZE];
 
    // isLeaf is true if the node represents
    // end of a word
    bool isLeaf;
 
    DLLNode* LLptr;
};
 
/* Given a reference (pointer to pointer) to the
head of a list and an int, inserts a new node
on the front of the list. */
void push(DLLNode*& head_ref, string new_data)
{
    DLLNode* new_node = new DLLNode;
 
    // put in the data
    new_node->data = new_data;
 
    // Make next of new node as head and previous
    // as NULL
    new_node->next = (head_ref);
    new_node->prev = NULL;
 
    // change prev of head node to new node
    if (head_ref != NULL)
        head_ref->prev = new_node;
 
    // move the head to point to the new node
    head_ref = new_node;
}
 
/* Function to delete a node in a Doubly Linked List.
head_ref --> pointer to head node pointer.
del --> pointer to node to be deleted. */
void deleteNode(DLLNode*& head_ref, DLLNode* del)
{
    // base case
    if (head_ref == NULL || del == NULL)
        return;
 
    // If node to be deleted is head node
    if (head_ref == del)
        head_ref = del->next;
 
    // Change next only if node to be deleted is
    // NOT the last node
    if (del->next != NULL)
        del->next->prev = del->prev;
 
    // Change prev only if node to be deleted is
    // NOT the first node
    if (del->prev != NULL)
        del->prev->next = del->next;
 
    // Finally, free the memory occupied by del
    delete (del);
    return;
}
 
// Returns new trie node (initialized to NULLs)
TrieNode* getNewTrieNode(void)
{
    TrieNode* pNode = new TrieNode;
 
    if (pNode) {
        pNode->isLeaf = false;
 
        for (int i = 0; i < ALPHABET_SIZE; i++)
            pNode->children[i] = NULL;
 
        pNode->LLptr = NULL;
    }
 
    return pNode;
}
 
// If not present, inserts key into trie
// If the key is prefix of trie node, just marks leaf node
void insert(TrieNode* root, string key, DLLNode*& head)
{
    int index;
    TrieNode* pCrawl = root;
 
    for (int level = 0; level < key.length(); level++) {
        index = int(key[level]);
        if (!pCrawl->children[index])
            pCrawl->children[index] = getNewTrieNode();
 
        pCrawl = pCrawl->children[index];
    }
 
    if (pCrawl->isLeaf) {
        // cout << "Duplicate Found " << key << endl;
        // delete from linked list
        if (pCrawl->LLptr)
            deleteNode(head, pCrawl->LLptr);
        pCrawl->LLptr = NULL;
    }
    else {
        // mark last node as leaf
        pCrawl->isLeaf = true;
 
        // insert to linked list
        push(head, key);
        pCrawl->LLptr = head;
    }
}
 
// Driver function
int main()
{
    string urls[]
        = { "https://www.geeksforgeeks.org",
            "http://write.geeksforgeeks.org",
            "http://www.geeksforgeeks.org",
            "http://qa.geeksforgeeks.org",
            "https://practice.geeksforgeeks.org",
            "https://ide.geeksforgeeks.org",
            "http://www.geeksforgeeks.org",
            "https://practice.geeksforgeeks.org",
            "https://ide.geeksforgeeks.org",
            "http://www.geeksforgeeks.org",
            "http://qa.geeksforgeeks.org",
            "https://practice.geeksforgeeks.org" };
 
    TrieNode* root = getNewTrieNode();
 
    // Start with the empty list
    DLLNode* head = NULL;
    int n = sizeof(urls) / sizeof(urls[0]);
 
    // Construct Trie from given URLs
    for (int i = 0; i < n; i++)
        insert(root, urls[i], head);
 
    // head of linked list will point to last
    // distinct URL
    cout << head->data << endl;
 
    return 0;
}


Python3




# Python program to print distinct URLs using Trie
# and Doubly Linked List
 
# Alphabet size (# of symbols)
ALPHABET_SIZE = 256
 
# A linked list node
 
 
class DLLNode:
    def __init__(self, data):
        self.data = data
        self.next = None
        self.prev = None
 
# trie node
 
 
class TrieNode:
    def __init__(self):
        self.children = [None] * ALPHABET_SIZE
        # isLeaf is true if the node represents
        # end of a word
        self.isLeaf = False
        self.LLptr = None
 
# Given a reference (pointer to pointer) to the
# head of a list and an int, inserts a new node
# on the front of the list.
 
 
def push(head_ref, new_data):
    # put in the data
    new_node = DLLNode(new_data)
 
    # Make next of new node as head and previous
    # as NULL
    new_node.next = head_ref
 
    # change prev of head node to new node
    if head_ref is not None:
        head_ref.prev = new_node
 
    # move the head to point to the new node
    head_ref = new_node
    return head_ref
 
# Function to delete a node in a Doubly Linked List.
# head_ref --> pointer to head node pointer.
# del  -->  pointer to node to be deleted.
 
 
def deleteNode(head_ref, del_node):
    # base case
    if head_ref is None or del_node is None:
        return
 
    # If node to be deleted is head node
    if head_ref == del_node:
        head_ref = del_node.next
 
    # Change next only if node to be deleted is
    # NOT the last node
    if del_node.next is not None:
        del_node.next.prev = del_node.prev
 
    # Change prev only if node to be deleted is
    # NOT the first node
    if del_node.prev is not None:
        del_node.prev.next = del_node.next
 
    # Finally, free the memory occupied by del
    del del_node
    return head_ref
 
# Returns new trie node (initialized to NULLs)
 
 
def getNewTrieNode():
    pNode = TrieNode()
 
    pNode.isLeaf = False
    pNode.LLptr = None
 
    return pNode
 
# If not present, inserts key into trie
# If the key is prefix of trie node, just marks leaf node
 
 
def insert(root, key, head):
    pCrawl = root
 
    for level in range(len(key)):
        index = ord(key[level])
        if pCrawl.children[index] is None:
            pCrawl.children[index] = getNewTrieNode()
 
        pCrawl = pCrawl.children[index]
 
    if pCrawl.isLeaf:
        # cout << "Duplicate Found " << key << endl;
        # delete from linked list
        if pCrawl.LLptr is not None:
            head = deleteNode(head, pCrawl.LLptr)
        pCrawl.LLptr = None
    else:
        # mark last node as leaf
        pCrawl.isLeaf = True
 
        # insert to linked list
        head = push(head, key)
        pCrawl.LLptr = head
 
    return head
 
 
# Driver function
urls = [
]
 
root = getNewTrieNode()
 
# Start with the empty list
head = None
 
# Construct Trie from given URLs
for url in urls:
    head = insert(root, url, head)
 
# head of linked list will point to last
# distinct URL
print(head.data)
 
# This code is contributed by Aman Kumar


Javascript




const ALPHABET_SIZE = 256; // Alphabet size (# of symbols)
 
//DLLNode class is a linked list node, which stores data and the next and previous node
class DLLNode {
    constructor(data) {  //constructor for DLLNode, which takes data as an argument
        this.data = data;
        this.next = null;
        this.prev = null;
    }
}
 
//TrieNode class is a trie node, which stores an array of children for each node, a boolean for isLeaf, and a pointer to a doubly linked list node
class TrieNode {
    constructor() {
        this.children = new Array(ALPHABET_SIZE).fill(null); //array of size ALPHABET_SIZE filled with null
        this.isLeaf = false;
        this.LLptr = null//pointer to doubly linked list node
    }
}
 
//push function which takes a reference to the head of a list and an int, inserts a new node on the front of the list
function push(head_ref, new_data) {
    const new_node = new DLLNode(new_data); //creates a new node of type DLLNode with data as the new_data argument
 
    new_node.next = head_ref; //sets the next node of the new node to the head node passed in
 
    //change the prev of the head node to the new node if the head node is not null
    if (head_ref !== null) {
        head_ref.prev = new_node;
    }
 
    head_ref = new_node; //move the head of the list to point to the new node
    return head_ref;
}
 
//deleteNode function which takes a reference to the head of a list and a node to be deleted, deletes the node from the list
function deleteNode(head_ref, del_node) {
    //base case if head or node to be deleted is null
    if (head_ref === null || del_node === null) {
        return;
    }
 
    //if node to be deleted is the head node
    if (head_ref === del_node) {
        head_ref = del_node.next;
    }
 
    //change next if node to be deleted is not the last node
    if (del_node.next !== null) {
        del_node.next.prev = del_node.prev;
    }
 
    //change prev if node to be deleted is not the first node
    if (del_node.prev !== null) {
        del_node.prev.next = del_node.next;
    }
 
    return head_ref;
}
 
//getNewTrieNode function which returns a new trie node initialized to nulls
function getNewTrieNode() {
    const pNode = new TrieNode();
 
    pNode.isLeaf = false;
    pNode.LLptr = null;
 
    return pNode;
}
 
//insert function which takes a root node, a key, and a head node, and inserts the key into the trie
function insert(root, key, head) {
    let pCrawl = root; //creates a pointer to the root node
 
    //loops through the length of the key
    for (let level = 0; level < key.length; level++) {
        const index = key.charCodeAt(level); //gets the ascii value of the character at the current level
        //if the node at the current level is null, create a new trie node
        if (pCrawl.children[index] === null) {
            pCrawl.children[index] = getNewTrieNode();
        }
 
        pCrawl = pCrawl.children[index]; //moves the pointer to the next node in the trie
    }
 
    //if the node is already a leaf node
    if (pCrawl.isLeaf) {
        //if the LLptr is not null, delete the node from the linked list
        if (pCrawl.LLptr !== null) {
            head = deleteNode(head, pCrawl.LLptr);
        }
        pCrawl.LLptr = null;
    } else {
        //mark last node as leaf
        pCrawl.isLeaf = true;
 
        //insert to linked list
        head = push(head, key);
        pCrawl.LLptr = head;
    }
 
    return head;
}
 
//list of URLs
const urls = [
];
 
const root = getNewTrieNode(); //create a root node
let head = null; //head of linked list
 
//loop through the URLs and insert them into the trie
for (const url of urls) {
    head = insert(root, url, head);
}
 
//head of linked list will point to the last distinct URL
console.log(head.data);


C#




// C# program to print distinct URLs using Trie
// and Doubly Linked List
 
using System;
using System.Collections.Generic;
 
// trie node
class TrieNode {
    // Alphabet size (# of symbols)
    const int ALPHABET_SIZE = 256;
    public TrieNode[] children
        = new TrieNode[ALPHABET_SIZE];
 
    // isLeaf is true if the node represents
    // end of a word
    public bool isLeaf;
    public DLLNode LLptr;
 
    public TrieNode()
    {
        isLeaf = false;
        LLptr = null;
        for (int i = 0; i < ALPHABET_SIZE; i++)
            children[i] = null;
    }
}
// A linked list node
class DLLNode {
    public string data;
    public DLLNode next, prev;
}
 
class Program {
 
    // If not present, inserts key into trie
    // If the key is prefix of trie node, just marks leaf
    // node
    static void Insert(TrieNode root, string key,
                       ref DLLNode head)
    {
        int index;
        TrieNode pCrawl = root;
 
        for (int level = 0; level < key.Length; level++) {
            index = (int)key[level];
            if (pCrawl.children[index] == null)
                pCrawl.children[index] = new TrieNode();
 
            pCrawl = pCrawl.children[index];
        }
 
        if (pCrawl.isLeaf) { // delete from linked list
 
            if (pCrawl.LLptr != null)
                DeleteNode(ref head, pCrawl.LLptr);
            pCrawl.LLptr = null;
        }
        else
 
        {
            // mark last node as leaf
            pCrawl.isLeaf = true;
            // insert to linked list
            Push(ref head, key);
            pCrawl.LLptr = head;
        }
    }
    /* Given a reference (pointer to pointer) to the
    head of a list and an int, inserts a new node
    on the front of the list. */
    static void Push(ref DLLNode head_ref, string new_data)
    {
        DLLNode new_node = new DLLNode();
        // put in the data
        new_node.data = new_data;
        // Make next of new node as head and previous
        // as NULL
        new_node.next = head_ref;
        new_node.prev = null;
        // change prev of head node to new node
        if (head_ref != null)
            head_ref.prev = new_node;
        // move the head to point to the new node
        head_ref = new_node;
    }
    /* Function to delete a node in a Doubly Linked List.
    head_ref --> pointer to head node pointer.
    del --> pointer to node to be deleted. */
    static void DeleteNode(ref DLLNode head_ref,
                           DLLNode del)
    {
        // base case
        if (head_ref == null || del == null)
            return;
        // If node to be deleted is head node
        if (head_ref == del)
            head_ref = del.next;
        // Change next only if node to be deleted is
        // NOT the last node
        if (del.next != null)
            del.next.prev = del.prev;
 
        // Change prev only if node to be deleted is
        // NOT the first node
        if (del.prev != null)
            del.prev.next = del.next;
        // Finally, free the memory occupied by del
        del = null;
    }
    // Driver function
    static void Main()
    {
        string[] urls
            = { "https://www.geeksforgeeks.org",
                "http://write.geeksforgeeks.org",
                "http://www.geeksforgeeks.org",
                "http://qa.geeksforgeeks.org",
                "https://practice.geeksforgeeks.org",
                "https://ide.geeksforgeeks.org",
                "http://www.geeksforgeeks.org",
                "https://practice.geeksforgeeks.org",
                "https://ide.geeksforgeeks.org",
                "http://www.geeksforgeeks.org",
                "http://qa.geeksforgeeks.org",
                "https://practice.geeksforgeeks.org" };
 
        TrieNode root = new TrieNode();
        DLLNode head = null;
        int n = urls.Length;
 
        for (int i = 0; i < n; i++)
            Insert(root, urls[i], ref head);
 
        Console.WriteLine(head.data);
    }
}


Java




import java.util.*;
 
class Trie {
    private static final int ALPHABET_SIZE
        = 256; // Alphabet size (# of symbols)
 
    // DLLNode class is a linked list node, which stores
    // data and the next and previous node
    private static class DLLNode {
        String data; // data stored in the node
        DLLNode prev; // pointer to the previous node
        DLLNode next; // pointer to the next node
 
        DLLNode(String data)
        { // constructor for DLLNode, which takes data as an
          // argument
            this.data = data;
            this.prev = null;
            this.next = null;
        }
    }
 
    // TrieNode class is a trie node, which stores an array
    // of children for each node, a boolean for isLeaf, and
    // a pointer to a doubly linked list node
    private static class TrieNode {
        TrieNode[] children; // array of size ALPHABET_SIZE
                             // filled with null
        boolean isLeaf; // boolean to mark if the node is a
                        // leaf node
        DLLNode LLptr; // pointer to doubly linked list node
 
        TrieNode()
        {
            children = new TrieNode[ALPHABET_SIZE];
            isLeaf = false;
            LLptr = null;
        }
    }
 
    // push function which takes a reference to the head of
    // a list and a string, inserts a new node at the front
    // of the list
    private static DLLNode push(DLLNode headRef,
                                String newData)
    {
        DLLNode newNode = new DLLNode(
            newData); // creates a new node of type DLLNode
                      // with data as the new_data argument
 
        newNode.next
            = headRef; // sets the next node of the new node
                       // to the head node passed in
 
        // change the prev of the head node to the new node
        // if the head node is not null
        if (headRef != null) {
            headRef.prev = newNode;
        }
 
        headRef = newNode; // move the head of the list to
                           // point to the new node
        return headRef;
    }
 
    // deleteNode function which takes a reference to the
    // head of a list and a node to be deleted, deletes the
    // node from the list
    private static DLLNode deleteNode(DLLNode headRef,
                                      DLLNode delNode)
    {
        // base case if head or node to be deleted is null
        if (headRef == null || delNode == null) {
            return headRef;
        }
 
        // if node to be deleted is the head node
        if (headRef == delNode) {
            headRef = delNode.next;
        }
 
        // change next if node to be deleted is not the last
        // node
        if (delNode.next != null) {
            delNode.next.prev = delNode.prev;
        }
 
        // change prev if node to be deleted is not the
        // first node
        if (delNode.prev != null) {
            delNode.prev.next = delNode.next;
        }
 
        return headRef;
    }
 
    // getNewTrieNode function which returns a new trie node
    // initialized to nulls
    private static TrieNode getNewTrieNode()
    {
        TrieNode pNode = new TrieNode();
 
        pNode.isLeaf = false;
        pNode.LLptr = null;
 
        return pNode;
    }
 
    // insert function which takes a root node, a key, and a
    // head node, and inserts the key into the trie
    static DLLNode insert(TrieNode root, String key,
                          DLLNode head)
    {
        TrieNode pCrawl
            = root; // create pointer to root node
 
        // loop through the levels
        for (int level = 0; level < key.length(); level++) {
            int index = (int)key.charAt(level);
 
            if (pCrawl.children[index] == null) {
                pCrawl.children[index] = getNewTrieNode();
            }
 
            // move pointer to next node
            pCrawl = pCrawl.children[index];
        }
 
        // if the node is already a leaf node
        if (pCrawl.isLeaf) {
            if (pCrawl.LLptr != null) {
 
                // if the LLptr is not null, delete the node
                // from the linked list
                head = deleteNode(head, pCrawl.LLptr);
            }
            pCrawl.LLptr = null;
        }
        else {
 
            // mark last node as leaf
            pCrawl.isLeaf = true;
            head = push(head, key);
            pCrawl.LLptr = head;
        }
 
        return head;
    }
 
    // Driver code
    public static void main(String[] args)
    {
        String[] urls = {
            "https://www.geeksforgeeks.org",
            "http://write.geeksforgeeks.org",
            "http://www.geeksforgeeks.org",
            "http://qa.geeksforgeeks.org",
            "https://practice.geeksforgeeks.org",
            "https://ide.geeksforgeeks.org",
            "http://www.geeksforgeeks.org",
            "https://practice.geeksforgeeks.org",
            "https://ide.geeksforgeeks.org",
            "http://www.geeksforgeeks.org",
            "http://qa.geeksforgeeks.org",
            "https://practice.geeksforgeeks.org"
        };
 
        TrieNode root = getNewTrieNode();
        DLLNode head = null;
 
        // Function calls
        for (String url : urls) {
            head = insert(root, url, head);
        }
 
        System.out.println(head.data);
    }
}


Output

http://write.geeksforgeeks.org

Time Complexity: O(n*m) where m is the maximum length of a URL, and n is the number of URLs.
Auxiliary Space: O(ALPHABET_SIZE * m * n)



Last Updated : 04 Apr, 2023
Like Article
Save Article
Previous
Next
Share your thoughts in the comments
Similar Reads