Open In App

CSES Solution – Finding Patterns

Last Updated : 09 Apr, 2024
Improve
Improve
Like Article
Like
Save
Share
Report

Prerequisite: Aho-Corasick algorithm

Given a string and patterns, check for each pattern if it appears in the string.

Example:

Input: s = “aybabtu”, patterns = {“bab”, “abc”, “ayba”}
Output: YES
NO
YES

Input: s = “bybabtu”, patterns = {“bat”, “tu”}
Output:
NO
YES

Approach:

The solution uses a method called the Aho-Corasick algorithm. Let’s see how it works:

  • Building a Trie: We first create a special kind of tree called a ‘trie’ from the patterns we’re looking for. Each pattern is a path in the trie from the root to a node.
  • Failure Links: We then add “failure links” to the trie. These links help us jump back efficiently in the trie when a character doesn’t match.
  • Searching: We go through the given string, character by character, moving along the paths in the trie. If we reach the end of a path (a pattern), we know that pattern exists in the string.
  • Counting: While searching, we keep track of how many times we reach each node (pattern). This tells us how many times each pattern appears in the string.

This method is fast because it checks all patterns at the same time, instead of one by one.

Steps-by-step approach:

  • Implement a insertPattern() function to insert each pattern into the trie structure.
  • Build failure links in the trie using the buildFailureLinks() function to prepare for pattern matching.
  • Run the Aho-Corasick algorithm on the main string s using the runAhoCorasick() function to identify occurrences of patterns.
  • Perform a Depth-First Search (DFS) on the trie structure to propagate occurrences down and update pattern matches using the dfs function.
  • In the main function, input and insert patterns into the trie, build failure links, run the Aho-Corasick algorithm, and propagate occurrences using DFS.
  • Output the pattern matches by checking the patternFound array for each pattern index and print “YES” if a match is found, otherwise print “NO“.

Below is the implementation of the above approach:

C++
#include <algorithm>
#include <iostream>
#include <queue>
using namespace std;

#define pb push_back

// The main string and the number of patterns
string s;
int n, nodeIndex = 1, patternFound[500005];

// Adjacency list for the trie nodes
vector<int> trieAdjList[500005];

// Structure for each node in the trie
struct TrieNode {
    int failureLink, child[26] = {}, occurrences = 0;
    vector<int> patternIndices;
} trie[500005];

// Function to insert a pattern into the trie
void insertPattern(string pattern, int index)
{
    // Start at the root node
    int node = 1;

    // Traverse the pattern character by character
    for (int i = 0; i < pattern.size(); i++) {

        // If the current character's child node doesn't
        // exist, create it
        if (trie[node].child[pattern[i] - 'a'] == 0)
            trie[node].child[pattern[i] - 'a']
                = ++nodeIndex;

        // Move to the child node
        node = trie[node].child[pattern[i] - 'a'];
    }

    // After traversing the pattern, add the pattern index
    // to the current node
    trie[node].patternIndices.pb(index);
}

// Function to build the failure links for the Aho-Corasick
// algorithm
void buildFailureLinks()
{
    // Initialize a queue for BFS traversal
    queue<int> nodeQueue;

    // Start from the root node
    int node = 1;

    // Set failure link of root node to itself
    trie[1].failureLink = 1;

    // Push all child nodes of the root to the queue
    for (int i = 0; i < 26; i++) {
        if (trie[node].child[i])
            trie[trie[node].child[i]].failureLink = node,
            nodeQueue.push(trie[node].child[i]);
        else
            trie[node].child[i]
                = 1; // If child node doesn't exist, point
                     // to root
    }

    // Perform BFS traversal to set failure links
    while (!nodeQueue.empty()) {
        node = nodeQueue.front();
        nodeQueue.pop();
        for (int i = 0; i < 26; i++) {
            if (trie[node].child[i])
                trie[trie[node].child[i]].failureLink
                    = trie[trie[node].failureLink].child[i],
                    nodeQueue.push(trie[node].child[i]);
            else
                trie[node].child[i]
                    = trie[trie[node].failureLink].child[i];
        }
    }
    // Create adjacency list based on failure links
    for (int i = 2; i <= nodeIndex; i++)
        trieAdjList[trie[i].failureLink].pb(i);
}

// Function to run the Aho-Corasick algorithm on the main
// string
void runAhoCorasick(string s)
{
    // Start from the root node
    for (int i = 0, node = 1; i < s.size(); i++) {
        // Traverse the trie based on characters in the main
        // string
        node = trie[node].child[s[i] - 'a'];
        // Increment occurrences at each node
        trie[node].occurrences++;
    }
}

// DFS function to propagate the occurrences down the trie
int dfs(int u)
{
    // Initialize total occurrences at current node
    int totalOccurrences = trie[u].occurrences;

    // Traverse child nodes recursively
    for (int v : trieAdjList[u])
        totalOccurrences += dfs(v);

    // Propagate occurrences to pattern indices
    for (int patternIndex : trie[u].patternIndices)
        patternFound[patternIndex] = totalOccurrences;

    // Return total occurrences
    return totalOccurrences;
}

// Main function
int main()
{
    string s = "aybabtu";
    int n = 3;
    vector<string> patterns = { "bab", "abc", "ayba" };

    // Input and insert patterns into trie
    for (int i = 0; i < n; i++) {
        insertPattern(patterns[i], i);
    }

    // Build failure links for Aho-Corasick algorithm
    buildFailureLinks();

    // Run Aho-Corasick algorithm on main string
    runAhoCorasick(s);

    // Propagate occurrences down the trie using DFS
    dfs(1);

    // Output pattern matches
    for (int i = 0; i < n; i++)
        cout << (patternFound[i] ? "YES" : "NO") << '\n';
}
Java
/*code by flutterfly */
import java.util.*;

public class AhoCorasick {

    static String s;
    static int n, nodeIndex = 1;
    static int[] patternFound = new int[500005];
    static List<Integer>[] trieAdjList = new List[500005];

    // Structure for each node in the trie
    static class TrieNode {
        int failureLink;
        int[] child = new int[26];
        int occurrences = 0;
        List<Integer> patternIndices = new ArrayList<>();
    }

    static TrieNode[] trie = new TrieNode[500005];

    // Function to insert a pattern into the trie
    static void insertPattern(String pattern, int index) {
        // Start at the root node
        int node = 1;

        // Traverse the pattern character by character
        for (int i = 0; i < pattern.length(); i++) {

            // If the current character's child node doesn't exist, create it
            if (trie[node].child[pattern.charAt(i) - 'a'] == 0)
                trie[node].child[pattern.charAt(i) - 'a'] = ++nodeIndex;

            // Move to the child node
            node = trie[node].child[pattern.charAt(i) - 'a'];
        }

        // After traversing the pattern, add the pattern index to the current node
        trie[node].patternIndices.add(index);
    }

    // Function to build the failure links for the Aho-Corasick algorithm
    static void buildFailureLinks() {
        // Initialize a queue for BFS traversal
        Queue<Integer> nodeQueue = new LinkedList<>();

        // Start from the root node
        int node = 1;

        // Set failure link of root node to itself
        trie[1].failureLink = 1;

        // Push all child nodes of the root to the queue
        for (int i = 0; i < 26; i++) {
            if (trie[node].child[i] != 0) {
                trie[trie[node].child[i]].failureLink = node;
                nodeQueue.add(trie[node].child[i]);
            } else {
                trie[node].child[i] = 1; // If child node doesn't exist, point to root
            }
        }

        // Perform BFS traversal to set failure links
        while (!nodeQueue.isEmpty()) {
            node = nodeQueue.poll();
            for (int i = 0; i < 26; i++) {
                if (trie[node].child[i] != 0) {
                    trie[trie[node].child[i]].failureLink = trie[trie[node].failureLink].child[i];
                    nodeQueue.add(trie[node].child[i]);
                } else {
                    trie[node].child[i] = trie[trie[node].failureLink].child[i];
                }
            }
        }

        // Create adjacency list based on failure links
        for (int i = 2; i <= nodeIndex; i++)
            trieAdjList[trie[i].failureLink].add(i);
    }

    // Function to run the Aho-Corasick algorithm on the main string
    static void runAhoCorasick(String s) {
        // Start from the root node
        for (int i = 0, node = 1; i < s.length(); i++) {
            // Traverse the trie based on characters in the main string
            node = trie[node].child[s.charAt(i) - 'a'];
            // Increment occurrences at each node
            trie[node].occurrences++;
        }
    }

    // DFS function to propagate the occurrences down the trie
    static int dfs(int u) {
        // Initialize total occurrences at current node
        int totalOccurrences = trie[u].occurrences;

        // Traverse child nodes recursively
        for (int v : trieAdjList[u])
            totalOccurrences += dfs(v);

        // Propagate occurrences to pattern indices
        for (int patternIndex : trie[u].patternIndices)
            patternFound[patternIndex] = totalOccurrences;

        // Return total occurrences
        return totalOccurrences;
    }

    // Main function
    public static void main(String[] args) {
        s = "aybabtu";
        n = 3;
        String[] patterns = { "bab", "abc", "ayba" };

        // Initialize trie nodes
        for (int i = 0; i <= 500004; i++) {
            trie[i] = new TrieNode();
            trieAdjList[i] = new ArrayList<>();
        }

        // Input and insert patterns into trie
        for (int i = 0; i < n; i++) {
            insertPattern(patterns[i], i);
        }

        // Build failure links for Aho-Corasick algorithm
        buildFailureLinks();

        // Run Aho-Corasick algorithm on main string
        runAhoCorasick(s);

        // Propagate occurrences down the trie using DFS
        dfs(1);

        // Output pattern matches
        for (int i = 0; i < n; i++)
            System.out.println((patternFound[i] != 0 ? "YES" : "NO"));
    }
}
Python3
from collections import deque

class TrieNode:
    def __init__(self):
        self.failureLink = 0
        self.child = [0] * 26
        self.occurrences = 0
        self.patternIndices = []

def insert_pattern(pattern, index):
    """Inserts a pattern into the trie."""
    global nodeIndex
    node = 1

    # Traverse the trie to insert the pattern character by character
    for char in pattern:
        char_index = ord(char) - ord('a')
        if trie[node].child[char_index] == 0:
            nodeIndex += 1
            trie[node].child[char_index] = nodeIndex
        node = trie[node].child[char_index]

    # Add the pattern index to the current node
    trie[node].patternIndices.append(index)

def build_failure_links():
    """Builds failure links for the Aho-Corasick algorithm."""
    nodeQueue = deque()
    node = 1
    trie[1].failureLink = 1

    # Initialize failure links for child nodes of the root
    for i in range(26):
        if trie[node].child[i] != 0:
            trie[trie[node].child[i]].failureLink = node
            nodeQueue.append(trie[node].child[i])
        else:
            trie[node].child[i] = 1

    # Perform BFS traversal to set failure links
    while nodeQueue:
        node = nodeQueue.popleft()
        for i in range(26):
            if trie[node].child[i] != 0:
                trie[trie[node].child[i]].failureLink = trie[trie[node].failureLink].child[i]
                nodeQueue.append(trie[node].child[i])
            else:
                trie[node].child[i] = trie[trie[node].failureLink].child[i]

    # Create adjacency list based on failure links
    for i in range(2, nodeIndex + 1):
        trie_adj_list[trie[i].failureLink].append(i)

def run_aho_corasick(s):
    """Runs the Aho-Corasick algorithm on the main string."""
    node = 1
    for char in s:
        # Traverse the trie based on characters in the main string
        node = trie[node].child[ord(char) - ord('a')]
        # Increment occurrences at each node
        trie[node].occurrences += 1

def dfs(u):
    """DFS function to propagate the occurrences down the trie."""
    total_occurrences = trie[u].occurrences

    # Traverse child nodes recursively
    for v in trie_adj_list[u]:
        total_occurrences += dfs(v)

    # Propagate occurrences to pattern indices
    for pattern_index in trie[u].patternIndices:
        pattern_found[pattern_index] = total_occurrences

    # Return total occurrences
    return total_occurrences

# Sample Input
s = "aybabtu"
n = 3
patterns = ["bab", "abc", "ayba"]

# Initialize variables
nodeIndex = 1
trie = [TrieNode() for _ in range(500005)]
trie_adj_list = [[] for _ in range(500005)]
pattern_found = [0] * 500005

# Insert patterns into the trie
for i in range(n):
    insert_pattern(patterns[i], i)

# Build failure links for Aho-Corasick algorithm
build_failure_links()

# Run Aho-Corasick algorithm on the main string
run_aho_corasick(s)

# Propagate occurrences down the trie using DFS
dfs(1)

# Output pattern matches
for i in range(n):
    print("YES" if pattern_found[i] != 0 else "NO")
C#
//code by flutterfly
using System;
using System.Collections.Generic;

public class Program
{
    // Structure for each node in the trie
    public class TrieNode
    {
        public int failureLink;
        public int[] child = new int[26];
        public int occurrences = 0;
        public List<int> patternIndices = new List<int>();
    }

    static string s;
    static int n, nodeIndex = 1;
    static int[] patternFound = new int[500005];
    static List<int>[] trieAdjList = new List<int>[500005];
    static TrieNode[] trie = new TrieNode[500005];

    // Function to insert a pattern into the trie
    static void InsertPattern(string pattern, int index)
    {
        // Start at the root node
        int node = 1;

        // Traverse the pattern character by character
        foreach (char c in pattern)
        {
            int idx = c - 'a';

            // If the current character's child node doesn't exist, create it
            if (trie[node].child[idx] == 0)
                trie[node].child[idx] = ++nodeIndex;

            // Move to the child node
            node = trie[node].child[idx];
        }

        // After traversing the pattern, add the pattern index to the current node
        trie[node].patternIndices.Add(index);
    }

    // Function to build the failure links for the Aho-Corasick algorithm
    static void BuildFailureLinks()
    {
        // Initialize a queue for BFS traversal
        Queue<int> nodeQueue = new Queue<int>();

        // Start from the root node
        int node = 1;

        // Set failure link of root node to itself
        trie[1].failureLink = 1;

        // Push all child nodes of the root to the queue
        for (int i = 0; i < 26; i++)
        {
            if (trie[node].child[i] != 0)
            {
                trie[trie[node].child[i]].failureLink = node;
                nodeQueue.Enqueue(trie[node].child[i]);
            }
            else
            {
                trie[node].child[i] = 1; // If child node doesn't exist, point to root
            }
        }

        // Perform BFS traversal to set failure links
        while (nodeQueue.Count > 0)
        {
            node = nodeQueue.Dequeue();
            for (int i = 0; i < 26; i++)
            {
                if (trie[node].child[i] != 0)
                {
                    trie[trie[node].child[i]].failureLink = trie[trie[node].failureLink].child[i];
                    nodeQueue.Enqueue(trie[node].child[i]);
                }
                else
                {
                    trie[node].child[i] = trie[trie[node].failureLink].child[i];
                }
            }
        }

        // Create adjacency list based on failure links
        for (int i = 0; i <= nodeIndex; i++)
            trieAdjList[i] = new List<int>();

        for (int i = 2; i <= nodeIndex; i++)
            trieAdjList[trie[i].failureLink].Add(i);
    }

    // Function to run the Aho-Corasick algorithm on the main string
    static void RunAhoCorasick(string s)
    {
        // Start from the root node
        int node = 1;

        // Traverse the main string character by character
        foreach (char c in s)
        {
            int idx = c - 'a';

            // Traverse the trie based on characters in the main string
            node = trie[node].child[idx];

            // Increment occurrences at each node
            trie[node].occurrences++;
        }
    }

    // DFS function to propagate the occurrences down the trie
    static int DFS(int u)
    {
        // Initialize total occurrences at current node
        int totalOccurrences = trie[u].occurrences;

        // Traverse child nodes recursively
        foreach (int v in trieAdjList[u])
            totalOccurrences += DFS(v);

        // Propagate occurrences to pattern indices
        foreach (int patternIndex in trie[u].patternIndices)
            patternFound[patternIndex] = totalOccurrences;

        // Return total occurrences
        return totalOccurrences;
    }

    // Main function
    public static void Main(string[] args)
    {
        s = "aybabtu";
        n = 3;
        string[] patterns = { "bab", "abc", "ayba" };

        // Input and insert patterns into trie
        for (int i = 0; i < 500005; i++)
            trie[i] = new TrieNode();

        for (int i = 0; i < n; i++)
            InsertPattern(patterns[i], i);

        // Build failure links for Aho-Corasick algorithm
        BuildFailureLinks();

        // Run Aho-Corasick algorithm on main string
        RunAhoCorasick(s);

        // Propagate occurrences down the trie using DFS
        DFS(1);

        // Output pattern matches
        for (int i = 0; i < n; i++)
            Console.WriteLine((patternFound[i] != 0 ? "YES" : "NO"));
    }
}
Javascript
//code by flutterfly
// The main string and the number of patterns
let s;
let n;
let nodeIndex = 1;
let patternFound = new Array(500005).fill(0);

// Adjacency list for the trie nodes
let trieAdjList = new Array(500005).fill().map(() => []);

// Structure for each node in the trie
class TrieNode {
    constructor() {
        this.failureLink = 0;
        this.child = new Array(26).fill(0);
        this.occurrences = 0;
        this.patternIndices = [];
    }
}

let trie = new Array(500005).fill().map(() => new TrieNode());

// Function to insert a pattern into the trie
function insertPattern(pattern, index) {
    // Start at the root node
    let node = 1;

    // Traverse the pattern character by character
    for (let i = 0; i < pattern.length; i++) {
        // If the current character's child node doesn't exist, create it
        if (trie[node].child[pattern.charCodeAt(i) - 'a'.charCodeAt(0)] === 0) {
            trie[node].child[pattern.charCodeAt(i) - 'a'.charCodeAt(0)] = ++nodeIndex;
        }
        // Move to the child node
        node = trie[node].child[pattern.charCodeAt(i) - 'a'.charCodeAt(0)];
    }

    // After traversing the pattern, add the pattern index to the current node
    trie[node].patternIndices.push(index);
}

// Function to build the failure links for the Aho-Corasick algorithm
function buildFailureLinks() {
    // Initialize a queue for BFS traversal
    let nodeQueue = [];

    // Start from the root node
    let node = 1;

    // Set failure link of root node to itself
    trie[1].failureLink = 1;

    // Push all child nodes of the root to the queue
    for (let i = 0; i < 26; i++) {
        if (trie[node].child[i] !== 0) {
            trie[trie[node].child[i]].failureLink = node;
            nodeQueue.push(trie[node].child[i]);
        } else {
            trie[node].child[i] = 1; // If child node doesn't exist, point to root
        }
    }

    // Perform BFS traversal to set failure links
    while (nodeQueue.length > 0) {
        node = nodeQueue.shift();
        for (let i = 0; i < 26; i++) {
            if (trie[node].child[i] !== 0) {
                trie[trie[node].child[i]].failureLink = trie[trie[node].failureLink].child[i];
                nodeQueue.push(trie[node].child[i]);
            } else {
                trie[node].child[i] = trie[trie[node].failureLink].child[i];
            }
        }
    }

    // Create adjacency list based on failure links
    for (let i = 2; i <= nodeIndex; i++) {
        trieAdjList[trie[i].failureLink].push(i);
    }
}

// Function to run the Aho-Corasick algorithm on the main string
function runAhoCorasick(s) {
    // Start from the root node
    let node = 1;

    // Traverse the main string character by character
    for (let i = 0; i < s.length; i++) {
        // Traverse the trie based on characters in the main string
        node = trie[node].child[s.charCodeAt(i) - 'a'.charCodeAt(0)];
        // Increment occurrences at each node
        trie[node].occurrences++;
    }
}

// DFS function to propagate the occurrences down the trie
function dfs(u) {
    // Initialize total occurrences at current node
    let totalOccurrences = trie[u].occurrences;

    // Traverse child nodes recursively
    for (let v of trieAdjList[u]) {
        totalOccurrences += dfs(v);
    }

    // Propagate occurrences to pattern indices
    for (let patternIndex of trie[u].patternIndices) {
        patternFound[patternIndex] = totalOccurrences;
    }

    // Return total occurrences
    return totalOccurrences;
}

// Main function
function main() {
    s = "aybabtu";
    n = 3;
    let patterns = ["bab", "abc", "ayba"];

    // Input and insert patterns into trie
    for (let i = 0; i < n; i++) {
        insertPattern(patterns[i], i);
    }

    // Build failure links for Aho-Corasick algorithm
    buildFailureLinks();

    // Run Aho-Corasick algorithm on main string
    runAhoCorasick(s);

    // Propagate occurrences down the trie using DFS
    dfs(1);

    // Output pattern matches
    for (let i = 0; i < n; i++) {
        console.log(patternFound[i] ? "YES" : "NO");
    }
}

main();

Output
YES
NO
YES




Time complexity: O(numPatterns * m + n + s), where: numPatterns is the number of patterns, m is the average length of the patterns, n is the total number of nodes in the trie, s is the length of the main string.
Auxiliary Space: O(n + numPatterns)




Like Article
Suggest improvement
Share your thoughts in the comments

Similar Reads