Given a very long list of URLs, find out last unique URL. Only one traversal of all URLs is allowed. Examples:
Input: https://www.geeksforgeeks.org https://www.geeksforgeeks.org/quiz-corner-gq/ http://qa.geeksforgeeks.org https://practice.geeksforgeeks.org https://ide.geeksforgeeks.org http://write.geeksforgeeks.org https://www.geeksforgeeks.org/quiz-corner-gq/ https://practice.geeksforgeeks.org https://ide.geeksforgeeks.org https://www.geeksforgeeks.org/quiz-corner-gq/ http://qa.geeksforgeeks.org https://practice.geeksforgeeks.org Output: http://write.geeksforgeeks.org
We can solve this problem in one traversal by using Trie with a Doubly Linked List (We can insert and delete in O(1) time). The idea is to insert all URLs into the Trie one by one and check if it is duplicate or not. To know if we have previously encountered the URL, we need to mark the last node of every URL as leaf node. If we encounter a URL for the first time, we insert it into the doubly Linked list and maintain a pointer to that node in linked list in leaf node of the trie. If we encounter a URL that is already in the trie and has pointer to the url in the linked list, we delete the node from the linked list and set its pointer in the trie to null. After all URLs are processed, linked list will only contain the URLs that are distinct and the node at the beginning of the linked list will be last unique URL.
// C++ program to print distinct URLs using Trie // and Doubly Linked List #include <bits/stdc++.h> using namespace std;
// Alphabet size (# of symbols) const int ALPHABET_SIZE = 256;
// A linked list node struct DLLNode {
string data;
DLLNode *next, *prev;
}; // trie node struct TrieNode {
TrieNode* children[ALPHABET_SIZE];
// isLeaf is true if the node represents
// end of a word
bool isLeaf;
DLLNode* LLptr;
}; /* Given a reference (pointer to pointer) to the head of a list and an int, inserts a new node on the front of the list. */ void push(DLLNode*& head_ref, string new_data)
{ DLLNode* new_node = new DLLNode;
// put in the data
new_node->data = new_data;
// Make next of new node as head and previous
// as NULL
new_node->next = (head_ref);
new_node->prev = NULL;
// change prev of head node to new node
if (head_ref != NULL)
head_ref->prev = new_node;
// move the head to point to the new node
head_ref = new_node;
} /* Function to delete a node in a Doubly Linked List. head_ref --> pointer to head node pointer. del --> pointer to node to be deleted. */ void deleteNode(DLLNode*& head_ref, DLLNode* del)
{ // base case
if (head_ref == NULL || del == NULL)
return ;
// If node to be deleted is head node
if (head_ref == del)
head_ref = del->next;
// Change next only if node to be deleted is
// NOT the last node
if (del->next != NULL)
del->next->prev = del->prev;
// Change prev only if node to be deleted is
// NOT the first node
if (del->prev != NULL)
del->prev->next = del->next;
// Finally, free the memory occupied by del
delete (del);
return ;
} // Returns new trie node (initialized to NULLs) TrieNode* getNewTrieNode( void )
{ TrieNode* pNode = new TrieNode;
if (pNode) {
pNode->isLeaf = false ;
for ( int i = 0; i < ALPHABET_SIZE; i++)
pNode->children[i] = NULL;
pNode->LLptr = NULL;
}
return pNode;
} // If not present, inserts key into trie // If the key is prefix of trie node, just marks leaf node void insert(TrieNode* root, string key, DLLNode*& head)
{ int index;
TrieNode* pCrawl = root;
for ( int level = 0; level < key.length(); level++) {
index = int (key[level]);
if (!pCrawl->children[index])
pCrawl->children[index] = getNewTrieNode();
pCrawl = pCrawl->children[index];
}
if (pCrawl->isLeaf) {
// cout << "Duplicate Found " << key << endl;
// delete from linked list
if (pCrawl->LLptr)
deleteNode(head, pCrawl->LLptr);
pCrawl->LLptr = NULL;
}
else {
// mark last node as leaf
pCrawl->isLeaf = true ;
// insert to linked list
push(head, key);
pCrawl->LLptr = head;
}
} // Driver function int main()
{ string urls[]
TrieNode* root = getNewTrieNode();
// Start with the empty list
DLLNode* head = NULL;
int n = sizeof (urls) / sizeof (urls[0]);
// Construct Trie from given URLs
for ( int i = 0; i < n; i++)
insert(root, urls[i], head);
// head of linked list will point to last
// distinct URL
cout << head->data << endl;
return 0;
} |
# Python program to print distinct URLs using Trie # and Doubly Linked List # Alphabet size (# of symbols) ALPHABET_SIZE = 256
# A linked list node class DLLNode:
def __init__( self , data):
self .data = data
self . next = None
self .prev = None
# trie node class TrieNode:
def __init__( self ):
self .children = [ None ] * ALPHABET_SIZE
# isLeaf is true if the node represents
# end of a word
self .isLeaf = False
self .LLptr = None
# Given a reference (pointer to pointer) to the # head of a list and an int, inserts a new node # on the front of the list. def push(head_ref, new_data):
# put in the data
new_node = DLLNode(new_data)
# Make next of new node as head and previous
# as NULL
new_node. next = head_ref
# change prev of head node to new node
if head_ref is not None :
head_ref.prev = new_node
# move the head to point to the new node
head_ref = new_node
return head_ref
# Function to delete a node in a Doubly Linked List. # head_ref --> pointer to head node pointer. # del --> pointer to node to be deleted. def deleteNode(head_ref, del_node):
# base case
if head_ref is None or del_node is None :
return
# If node to be deleted is head node
if head_ref = = del_node:
head_ref = del_node. next
# Change next only if node to be deleted is
# NOT the last node
if del_node. next is not None :
del_node. next .prev = del_node.prev
# Change prev only if node to be deleted is
# NOT the first node
if del_node.prev is not None :
del_node.prev. next = del_node. next
# Finally, free the memory occupied by del
del del_node
return head_ref
# Returns new trie node (initialized to NULLs) def getNewTrieNode():
pNode = TrieNode()
pNode.isLeaf = False
pNode.LLptr = None
return pNode
# If not present, inserts key into trie # If the key is prefix of trie node, just marks leaf node def insert(root, key, head):
pCrawl = root
for level in range ( len (key)):
index = ord (key[level])
if pCrawl.children[index] is None :
pCrawl.children[index] = getNewTrieNode()
pCrawl = pCrawl.children[index]
if pCrawl.isLeaf:
# cout << "Duplicate Found " << key << endl;
# delete from linked list
if pCrawl.LLptr is not None :
head = deleteNode(head, pCrawl.LLptr)
pCrawl.LLptr = None
else :
# mark last node as leaf
pCrawl.isLeaf = True
# insert to linked list
head = push(head, key)
pCrawl.LLptr = head
return head
# Driver function urls = [
] root = getNewTrieNode()
# Start with the empty list head = None
# Construct Trie from given URLs for url in urls:
head = insert(root, url, head)
# head of linked list will point to last # distinct URL print (head.data)
# This code is contributed by Aman Kumar |
const ALPHABET_SIZE = 256; // Alphabet size (# of symbols)
//DLLNode class is a linked list node, which stores data and the next and previous node class DLLNode { constructor(data) { //constructor for DLLNode, which takes data as an argument
this .data = data;
this .next = null ;
this .prev = null ;
}
} //TrieNode class is a trie node, which stores an array of children for each node, a boolean for isLeaf, and a pointer to a doubly linked list node class TrieNode { constructor() {
this .children = new Array(ALPHABET_SIZE).fill( null ); //array of size ALPHABET_SIZE filled with null
this .isLeaf = false ;
this .LLptr = null ; //pointer to doubly linked list node
}
} //push function which takes a reference to the head of a list and an int, inserts a new node on the front of the list function push(head_ref, new_data) {
const new_node = new DLLNode(new_data); //creates a new node of type DLLNode with data as the new_data argument
new_node.next = head_ref; //sets the next node of the new node to the head node passed in
//change the prev of the head node to the new node if the head node is not null
if (head_ref !== null ) {
head_ref.prev = new_node;
}
head_ref = new_node; //move the head of the list to point to the new node
return head_ref;
} //deleteNode function which takes a reference to the head of a list and a node to be deleted, deletes the node from the list function deleteNode(head_ref, del_node) {
//base case if head or node to be deleted is null
if (head_ref === null || del_node === null ) {
return ;
}
//if node to be deleted is the head node
if (head_ref === del_node) {
head_ref = del_node.next;
}
//change next if node to be deleted is not the last node
if (del_node.next !== null ) {
del_node.next.prev = del_node.prev;
}
//change prev if node to be deleted is not the first node
if (del_node.prev !== null ) {
del_node.prev.next = del_node.next;
}
return head_ref;
} //getNewTrieNode function which returns a new trie node initialized to nulls function getNewTrieNode() {
const pNode = new TrieNode();
pNode.isLeaf = false ;
pNode.LLptr = null ;
return pNode;
} //insert function which takes a root node, a key, and a head node, and inserts the key into the trie function insert(root, key, head) {
let pCrawl = root; //creates a pointer to the root node
//loops through the length of the key
for (let level = 0; level < key.length; level++) {
const index = key.charCodeAt(level); //gets the ascii value of the character at the current level
//if the node at the current level is null, create a new trie node
if (pCrawl.children[index] === null ) {
pCrawl.children[index] = getNewTrieNode();
}
pCrawl = pCrawl.children[index]; //moves the pointer to the next node in the trie
}
//if the node is already a leaf node
if (pCrawl.isLeaf) {
//if the LLptr is not null, delete the node from the linked list
if (pCrawl.LLptr !== null ) {
head = deleteNode(head, pCrawl.LLptr);
}
pCrawl.LLptr = null ;
} else {
//mark last node as leaf
pCrawl.isLeaf = true ;
//insert to linked list
head = push(head, key);
pCrawl.LLptr = head;
}
return head;
} //list of URLs const urls = [ ]; const root = getNewTrieNode(); //create a root node
let head = null ; //head of linked list
//loop through the URLs and insert them into the trie for (const url of urls) {
head = insert(root, url, head);
} //head of linked list will point to the last distinct URL console.log(head.data); |
// C# program to print distinct URLs using Trie // and Doubly Linked List using System;
using System.Collections.Generic;
// trie node class TrieNode {
// Alphabet size (# of symbols)
const int ALPHABET_SIZE = 256;
public TrieNode[] children
= new TrieNode[ALPHABET_SIZE];
// isLeaf is true if the node represents
// end of a word
public bool isLeaf;
public DLLNode LLptr;
public TrieNode()
{
isLeaf = false ;
LLptr = null ;
for ( int i = 0; i < ALPHABET_SIZE; i++)
children[i] = null ;
}
} // A linked list node class DLLNode {
public string data;
public DLLNode next, prev;
} class Program {
// If not present, inserts key into trie
// If the key is prefix of trie node, just marks leaf
// node
static void Insert(TrieNode root, string key,
ref DLLNode head)
{
int index;
TrieNode pCrawl = root;
for ( int level = 0; level < key.Length; level++) {
index = ( int )key[level];
if (pCrawl.children[index] == null )
pCrawl.children[index] = new TrieNode();
pCrawl = pCrawl.children[index];
}
if (pCrawl.isLeaf) { // delete from linked list
if (pCrawl.LLptr != null )
DeleteNode( ref head, pCrawl.LLptr);
pCrawl.LLptr = null ;
}
else
{
// mark last node as leaf
pCrawl.isLeaf = true ;
// insert to linked list
Push( ref head, key);
pCrawl.LLptr = head;
}
}
/* Given a reference (pointer to pointer) to the
head of a list and an int, inserts a new node
on the front of the list. */
static void Push( ref DLLNode head_ref, string new_data)
{
DLLNode new_node = new DLLNode();
// put in the data
new_node.data = new_data;
// Make next of new node as head and previous
// as NULL
new_node.next = head_ref;
new_node.prev = null ;
// change prev of head node to new node
if (head_ref != null )
head_ref.prev = new_node;
// move the head to point to the new node
head_ref = new_node;
}
/* Function to delete a node in a Doubly Linked List.
head_ref --> pointer to head node pointer.
del --> pointer to node to be deleted. */
static void DeleteNode( ref DLLNode head_ref,
DLLNode del)
{
// base case
if (head_ref == null || del == null )
return ;
// If node to be deleted is head node
if (head_ref == del)
head_ref = del.next;
// Change next only if node to be deleted is
// NOT the last node
if (del.next != null )
del.next.prev = del.prev;
// Change prev only if node to be deleted is
// NOT the first node
if (del.prev != null )
del.prev.next = del.next;
// Finally, free the memory occupied by del
del = null ;
}
// Driver function
static void Main()
{
string [] urls
TrieNode root = new TrieNode();
DLLNode head = null ;
int n = urls.Length;
for ( int i = 0; i < n; i++)
Insert(root, urls[i], ref head);
Console.WriteLine(head.data);
}
} |
import java.util.*;
class Trie {
private static final int ALPHABET_SIZE
= 256 ; // Alphabet size (# of symbols)
// DLLNode class is a linked list node, which stores
// data and the next and previous node
private static class DLLNode {
String data; // data stored in the node
DLLNode prev; // pointer to the previous node
DLLNode next; // pointer to the next node
DLLNode(String data)
{ // constructor for DLLNode, which takes data as an
// argument
this .data = data;
this .prev = null ;
this .next = null ;
}
}
// TrieNode class is a trie node, which stores an array
// of children for each node, a boolean for isLeaf, and
// a pointer to a doubly linked list node
private static class TrieNode {
TrieNode[] children; // array of size ALPHABET_SIZE
// filled with null
boolean isLeaf; // boolean to mark if the node is a
// leaf node
DLLNode LLptr; // pointer to doubly linked list node
TrieNode()
{
children = new TrieNode[ALPHABET_SIZE];
isLeaf = false ;
LLptr = null ;
}
}
// push function which takes a reference to the head of
// a list and a string, inserts a new node at the front
// of the list
private static DLLNode push(DLLNode headRef,
String newData)
{
DLLNode newNode = new DLLNode(
newData); // creates a new node of type DLLNode
// with data as the new_data argument
newNode.next
= headRef; // sets the next node of the new node
// to the head node passed in
// change the prev of the head node to the new node
// if the head node is not null
if (headRef != null ) {
headRef.prev = newNode;
}
headRef = newNode; // move the head of the list to
// point to the new node
return headRef;
}
// deleteNode function which takes a reference to the
// head of a list and a node to be deleted, deletes the
// node from the list
private static DLLNode deleteNode(DLLNode headRef,
DLLNode delNode)
{
// base case if head or node to be deleted is null
if (headRef == null || delNode == null ) {
return headRef;
}
// if node to be deleted is the head node
if (headRef == delNode) {
headRef = delNode.next;
}
// change next if node to be deleted is not the last
// node
if (delNode.next != null ) {
delNode.next.prev = delNode.prev;
}
// change prev if node to be deleted is not the
// first node
if (delNode.prev != null ) {
delNode.prev.next = delNode.next;
}
return headRef;
}
// getNewTrieNode function which returns a new trie node
// initialized to nulls
private static TrieNode getNewTrieNode()
{
TrieNode pNode = new TrieNode();
pNode.isLeaf = false ;
pNode.LLptr = null ;
return pNode;
}
// insert function which takes a root node, a key, and a
// head node, and inserts the key into the trie
static DLLNode insert(TrieNode root, String key,
DLLNode head)
{
TrieNode pCrawl
= root; // create pointer to root node
// loop through the levels
for ( int level = 0 ; level < key.length(); level++) {
int index = ( int )key.charAt(level);
if (pCrawl.children[index] == null ) {
pCrawl.children[index] = getNewTrieNode();
}
// move pointer to next node
pCrawl = pCrawl.children[index];
}
// if the node is already a leaf node
if (pCrawl.isLeaf) {
if (pCrawl.LLptr != null ) {
// if the LLptr is not null, delete the node
// from the linked list
head = deleteNode(head, pCrawl.LLptr);
}
pCrawl.LLptr = null ;
}
else {
// mark last node as leaf
pCrawl.isLeaf = true ;
head = push(head, key);
pCrawl.LLptr = head;
}
return head;
}
// Driver code
public static void main(String[] args)
{
String[] urls = {
};
TrieNode root = getNewTrieNode();
DLLNode head = null ;
// Function calls
for (String url : urls) {
head = insert(root, url, head);
}
System.out.println(head.data);
}
} |
http://write.geeksforgeeks.org
Time Complexity: O(n*m) where m is the maximum length of a URL, and n is the number of URLs.
Auxiliary Space: O(ALPHABET_SIZE * m * n)