Open In App

Rabin-Karp algorithm for Pattern Searching in Matrix

Given matrices txt[][] of dimensions m1 x m2 and pattern pat[][] of dimensions n1 x n2, the task is to check whether a pattern exists in the matrix or not, and if yes then print the top most indices of the pat[][] in txt[][]. It is assumed that m1, m2 ? n1, n2

Examples: 

Input:
txt[][] = {{G, H, I, P}
           {J, K, L, Q}
           {R, G, H, I}  
           {S, J, K, L}
          }
pat[][] = {{G, H, I},
           {J, K, L}
          }
Output:
Pattern found at ( 0, 0 )
Pattern found at ( 2, 1 )
Explanation:


Input:
txt[][] = { {A, B, C},
            {D, E, F},
            {G, H, I}
          }
pat[][] = { {E, F},
            {H, I}
          }
Output:
Pattern found at (1, 1)

Approach: In order to find a pattern in a 2-D array using Rabin-Karp algorithm, consider an input matrix txt[m1][m2] and a pattern pat[n1][n2]. The idea is to find the hash of each columns of mat[][] and pat[][] and compare the hash values. For any column if hash values are equals than check for the corresponding rows values. Below are the steps:

  1. Find the hash values of each column for the first N1 rows in both txt[][] and pat[][] matrix.
  2. Apply Rabin-Karp Algorithm by finding hash values for the column hashes found in step 1.
  3. If a match is found compare txt[][] and pat[][] matrices for the specific rows and columns.
  4. Else slide down the column hashes by 1 row in the txt matrix using a rolling hash.
  5. Repeat steps 2 to 4 for all the hash values and if we found any pat[][] match in txt[][] then print the indices of top most cell in the txt[][].

To find the hash value: In order to find the hash value of a substring of size N in a text using rolling hash follow below steps:  

  1. Remove the first character from the string: hash(txt[s:s+n-1])-(radix**(n-1)*txt[s])%prime_number
  2. Add the next character to the string: hash(txt[s:s+n-1])*radix + txt[n]

Below is the implementation of the above approach: 




#include <bits/stdc++.h>
using namespace std;
long long mod = 257;  //The modular value
long long r = 256;    //radix
long long dr = 1;     //Highest power for row hashing
long long dc = 1;     //Highest power for col hashing
 
//func that return a power n under mod m in LogN
long long power(int a,int n,long long m){
  if(n == 0){
    return 1;
  }
  if(n == 1){
    return a%m;
  }
  long long pow = power(a,n/2,m);
  if(n&1){
    return ((a%m)*(pow)%m * (pow)%m)%m;
  }
  else{
    return ((pow)%m * (pow)%m)%m;
  }
}
//Checks if all values of pattern matches with the text
bool check(vector<vector<char>> &txt,vector<vector<char>> &pat, long long r,long long c){
  for(long long i=0;i<pat.size();i++){
    for(long long j=0;j<pat[0].size();j++){
      if(pat[i][j] != txt[i+r][j+c]) return false;
    }
  }
  return true;
}
//Finds the first hash of first n rows where n is no. of rows in pattern
vector<long long> findHash(vector<vector<char>> &mat,long long row){
  vector<long long> hash;
  long long col = mat[0].size();
  for(long long i=0;i<col;i++){
    long long h = 0;
    for(long long j=0;j<row;j++){
      h = ((h*r)%mod + mat[j][i]%mod)%mod;
    }
    hash.push_back(h);
  }
  return hash;
}
//rolling hash function for columns
void colRollingHash(vector<vector<char>> &txt, vector<long long> &t_hash, long long row, long long p_row){
  for(long long i=0;i<t_hash.size();i++){
    t_hash[i] = (t_hash[i]%mod - ((txt[row][i])%mod*(dr)%mod)%mod)%mod;
    t_hash[i] = ((t_hash[i]%mod) * (r%mod))%mod;
    t_hash[i] = (t_hash[i]%mod + txt[row+p_row][i]%mod)%mod;
  }
}
void rabinKarp(vector<vector<char>> &txt, vector<vector<char>> &pat){
  long long t_row = txt.size();
  long long t_col = txt[0].size();
  long long p_row = pat.size();
  long long p_col = pat[0].size();
  dr = power(r,p_row-1,mod);
  dc = power(r,p_col-1,mod);
  vector<long long> t_hash = findHash(txt,p_row); //column hash of p_row rows
  vector<long long> p_hash = findHash(pat,p_row); //column hash of p_row rows
  long long p_val = 0;  //hash of entire pattern matrix
  for(long long i=0;i<p_col;i++){
    p_val = (p_val*r + p_hash[i])%mod;
  }
  for(long long i=0;i<=(t_row-p_row);i++){
    long long t_val = 0;
    for(long long i=0;i<p_col;i++){
      t_val = ((t_val*r) + t_hash[i])%mod;
    }
    for(long long j=0;j<=(t_col-p_col);j++){
      if(p_val == t_val){
        if(check(txt,pat,i,j)){
          cout<<i<<" "<<j<<endl;
        }
      }
      //calculating t_val for next set of columns
      t_val = (t_val%mod - ((t_hash[j]%mod)*(dc%mod))%mod + mod)%mod;
      t_val = (t_val%mod * r%mod)%mod;
      t_val = (t_val%mod + t_hash[j+p_col]%mod)%mod;
    }
    if(i < t_row-p_row){
      //call this function for hashing form next row
      colRollingHash(txt,t_hash,i,p_row);
    }
  }
}
int main(){
  vector<vector<char>> txt{{'A','B','C','D','E'},{'A','B','C','D','E'},{'A','B','C','D','E'},{'A','B','C','D','E'},{'A','B','C','D','E'}};
  vector<vector<char>> pat{{'A','B','C','D','E'},{'A','B','C','D','E'},{'A','B','C','D','E'},{'A','B','C','D','E'}};
  //function prints the indices of row and col where its a match in txt
  rabinKarp(txt,pat);
  return 0;
}




# Python implementation for the
# pattern matching in 2-D matrix
 
# Function to find the hash-value
# of the given columns of text
def findHash(arr, col, row):
    hashCol = []
    add = 0
    radix = 256
 
    # For each column
    for i in range(0, col):
 
        for j in reversed(range(0, row)):
            add = add + (radix**(row-j-1) *
                         ord(arr[j][i]))% 101
        hashCol.append(add % 101);
        add = 0
    return hashCol
 
# Function to check equality of the
# two strings
def checkEquality(txt, row, col, flag):
    txt = [txt[i][col:patCol + col]
           for i in range(row, patRow + row)]
 
# If pattern found
    if txt == pat:
        flag = 1
        print("Pattern found at", \
              "(", row, ", ", col, ")")
    return flag
     
# Function to find the hash value of
# of the next column using rolling-hash
# of the Rabin-karp
def colRollingHash(txtHash, nxtRow):
 
    radix = 256
 
    # Find the hash of the matrix
    for j in range(len(txtHash)):
        txtHash[j] = (txtHash[j]*radix \
                      + ord(txt[nxtRow][j]))% 101
        txtHash[j] = txtHash[j] - (radix**(patRow) *
                     ord(txt[nxtRow-patRow][j]))% 101
        txtHash[j] = txtHash[j]% 101
    return txtHash
     
 
# Function to match a pattern in
# the given 2D Matrix
def search(txt, pat):
     
# List of the hashed value for
    # the text and pattern columns
    patHash = []
    txtHash = []
 
    # Hash value of the
    # pat_hash and txt_hash
    patVal = 0
    txtVal = 0
 
    # Radix value for the input characters
    radix = 256
     
    # Variable to determine if
    # pattern was found or not
    flag = 0
     
    # Function call to find the
    # hash value of columns
    txtHash = findHash(txt, txtCol, patRow) 
    patHash = findHash(pat, patCol, patRow)
     
    # Calculate hash value for patHash
    for i in range(len(patHash)):
        patVal = patVal \
                 + (radix**(len(patHash)-i-1) *
                 patHash[i]% 101)
    patVal = patVal % 101
     
     
    # Applying Rabin-Karp to compare
    # txtHash and patHash
    for i in range(patRow-1, txtRow):
        col = 0
        txtVal = 0
         
        # Find the hash value txtHash
        for j in range(len(patHash)):
            txtVal = txtVal\
                     + (radix**(len(patHash)-j-1) *
                     txtHash[j])% 101
        txtVal = txtVal % 101
         
        if txtVal == patVal:
            flag = checkEquality(\
                     txt, i + 1-patRow, col, flag)
             
        else:
 
            # Roll the txt window by one character
            for k in range(len(patHash), len(txtHash)):
 
                txtVal = txtVal \
                         * radix + (txtHash[k])% 101
                txtVal = txtVal \
                         - (radix**(len(patHash)) *
                         (txtHash[k-len(patHash)]))% 101
                txtVal = txtVal % 101
                col = col + 1
 
                # Check if txtVal and patVal are equal
                if patVal == txtVal:
                    flag = checkEquality(\
                           txt, i + 1-patRow, col, flag)  
                else:
                    continue
                 
        # To make sure i does not exceed txtRow
        if i + 1<txtRow:
            txtHash = colRollingHash(txtHash, i + 1)
             
    if flag == 0:
        print("Pattern not found")
     
 
# Driver Code
if __name__ == "__main__":
 
  # Given Text
  txt = [['A', 'B', 'C'], \
         ['D', 'E', 'F'], \
         ['G', 'H', 'I']]
 
  # Given Pattern
  pat = [['E', 'F'], ['H', 'I']]
 
  # Dimensions of the text
  txtRow = 3
  txtCol = 3
 
  # Dimensions for the pattern
  patRow = 2
  patCol = 2
 
  # Function Call
  search(txt, pat)




// JS program to implement the approach
 
let mod = 257; // The modular value
let r = 256; // radix
let dr = 1; // Highest power for row hashing
let dc = 1; // Highest power for col hashing
 
// func that return a power n under mod m in LogN
function power(a, n, m)
{
    if (n == 0) {
        return 1;
    }
    if (n == 1) {
        return a % m;
    }
    let pow = power(a, Math.floor(n / 2), m);
    if (n & 1) {
        return ((a % m) * (pow) % m * (pow) % m) % m;
    }
    else {
        return ((pow) % m * (pow) % m) % m;
    }
}
// Checks if all values of pattern matches with the text
function check(txt, pat, r, c)
{
    for (let i = 0; i < pat.length; i++) {
        for (let j = 0; j < pat[0].length; j++) {
            if (pat[i][j] != txt[i + r][j + c])
                return false;
        }
    }
    return true;
}
// Finds the first hash of first n rows where n is no. of
// rows in pattern
function findHash(mat, row)
{
    let hash = [];
    let col = (mat[0]).length;
    for (let i = 0; i < col; i++) {
        let h = 0;
        for (let j = 0; j < row; j++) {
            h = ((h * r) % mod + mat[j][i] % mod) % mod;
        }
        hash.push(h);
    }
    return hash;
}
// rolling hash function for columns
function colRollingHash(txt, t_hash, row, p_row)
{
    for (let i = 0; i < t_hash.length; i++) {
        t_hash[i] = (t_hash[i] % mod
               - ((txt[row][i]) % mod * (dr) % mod) % mod)
              % mod;
        t_hash[i] = ((t_hash[i] % mod) * (r % mod)) % mod;
        t_hash[i] = (t_hash[i] % mod + txt[row + p_row][i] % mod) % mod;
    }
    return t_hash
}
function rabinKarp(txt, pat)
{
    let t_row = txt.length;
    let t_col = (txt[0]).length;
    let p_row = pat.length;
    let p_col = (pat[0]).length;
    dr = power(r, p_row - 1, mod);
    dc = power(r, p_col - 1, mod);
    let t_hash  = findHash(txt, p_row); // column hash of p_row rows
    let p_hash  = findHash(pat, p_row); // column hash of p_row rows
    let p_val = 0; // hash of entire pattern matrix
    for (let i = 0; i < p_col; i++) {
        p_val = (p_val * r + p_hash[i]) % mod;
    }
    for (let i = 0; i <= (t_row - p_row); i++) {
        let t_val = 0;
        for (let i = 0; i < p_col; i++) {
            t_val = ((t_val * r) + t_hash[i]) % mod;
        }
        for (let j = 0; j <= (t_col - p_col); j++) {
            if (p_val == t_val) {
                if (check(txt, pat, i, j)) {
                    console.log(i + " " + j);
                }
            }
            // calculating t_val for next set of columns
            t_val = (t_val % mod
                   - ((t_hash[j] % mod) * (dc % mod)) % mod
                   + mod)
                  % mod;
            t_val = (t_val % mod * r % mod) % mod;
            t_val = (t_val % mod + t_hash[j + p_col] % mod)   % mod;
        }
        if (i < t_row - p_row) {
            // call this function for hashing form next row
            t_hash = colRollingHash(txt, t_hash, i, p_row);
        }
    }
}
 
let txt = [
    [ 'A', 'B', 'C', 'D', 'E' ],
    [ 'A', 'B', 'C', 'D', 'E' ],
    [ 'A', 'B', 'C', 'D', 'E' ],
    [ 'A', 'B', 'C', 'D', 'E' ],
    [ 'A', 'B', 'C', 'D', 'E' ]
];
let pat = [
    [ 'A', 'B', 'C', 'D', 'E' ],
    [ 'A', 'B', 'C', 'D', 'E' ],
    [ 'A', 'B', 'C', 'D', 'E' ],
    [ 'A', 'B', 'C', 'D', 'E' ]
];
 
// function prints the indices of row and col where its a
// match in txt
rabinKarp(txt, pat);
 
// This code is contributed by phasing17.




import java.io.*;
import java.util.*;
 
public class Main {
 
    static long mod = 257; // The modular value
    static long r = 256; // radix
    static long dr = 1; // Highest power for row hashing
    static long dc = 1; // Highest power for col hashing
 
    // func that return a power n under mod m in LogN
    static long power(int a, int n, long m)
    {
        if (n == 0) {
            return 1;
        }
        if (n == 1) {
            return a % m;
        }
        long pow = power(a, n / 2, m);
        if (n % 2 == 1) {
            return ((a % m) * (pow) % m * (pow) % m) % m;
        }
        else {
            return ((pow) % m * (pow) % m) % m;
        }
    }
 
    // Checks if all values of pattern matches with the text
    static boolean check(char[][] txt, char[][] pat, long r,
                         long c)
    {
        for (long i = 0; i < pat.length; i++) {
            for (long j = 0; j < pat[0].length; j++) {
                if (pat[(int)i][(int)j]
                    != txt[(int)(i + r)][(int)(j + c)])
                    return false;
            }
        }
        return true;
    }
 
    // Finds the first hash of first n rows where n is no.
    // of rows in pattern
    static List<Long> findHash(char[][] mat, long row)
    {
        List<Long> hash = new ArrayList<Long>();
        long col = mat[0].length;
        for (int i = 0; i < col; i++) {
            long h = 0;
            for (int j = 0; j < row; j++) {
                h = ((h * r) % mod + mat[j][i] % mod) % mod;
            }
            hash.add(h);
        }
        return hash;
    }
 
    // rolling hash function for columns
    static void colRollingHash(char[][] txt,
                               List<Long> t_hash, long row,
                               long p_row)
    {
        for (int i = 0; i < t_hash.size(); i++) {
            t_hash.set(i, (t_hash.get(i) % mod
                           - ((txt[(int)row][i]) % mod
                              * (dr) % mod)
                                 % mod)
                              % mod);
            t_hash.set(i,
                       ((t_hash.get(i) % mod) * (r % mod))
                           % mod);
            t_hash.set(i,
                       (t_hash.get(i) % mod
                        + txt[(int)(row + p_row)][i] % mod)
                           % mod);
        }
    }
    static void rabinKarp(char[][] txt, char[][] pat)
    {
        long t_row = txt.length;
        long t_col = txt[0].length;
        long p_row = pat.length;
        long p_col = pat[0].length;
        dr = power((int)r, (int)p_row - 1, mod);
        dc = power((int)r, (int)p_col - 1, mod);
        List<Long> t_hash = findHash(
            txt, p_row); // Column hash of p_row rows
        List<Long> p_hash = findHash(
            pat, p_row); // Column hash of p_row rows
        long p_val = 0; // Hash of entire pattern matrix
        for (long i = 0; i < p_col; i++) {
            p_val = (p_val * r + p_hash.get((int)i)) % mod;
        }
        for (long i = 0; i <= (t_row - p_row); i++) {
            long t_val = 0;
            for (long j = 0; j < p_col; j++) {
                t_val = ((t_val * r) + t_hash.get((int)j))
                        % mod;
            }
            for (long j = 0; j <= (t_col - p_col); j++) {
                if (p_val == t_val) {
                    if (check(txt, pat, i, j)) {
                        System.out.println(i + " " + j);
                    }
                }
                // Calculating t_val for next set of columns
                t_val = (t_val % mod
                         - ((t_hash.get((int)j) % mod)
                            * (dc % mod))
                               % mod
                         + mod)
                        % mod;
                t_val = (t_val % mod * r % mod) % mod;
                t_val = (t_val % mod
                         + t_hash.get((int)(j)) % mod)
                        % mod;
            }
            if (i < t_row - p_row) {
                // Call this function for hashing from next
                // row
                colRollingHash(txt, t_hash, i, p_row);
            }
        }
    }
 
    public static void main(String[] args)
    {
        char[][] txt = { { 'A', 'B', 'C', 'D', 'E' },
                         { 'A', 'B', 'C', 'D', 'E' },
                         { 'A', 'B', 'C', 'D', 'E' },
                         { 'A', 'B', 'C', 'D', 'E' },
                         { 'A', 'B', 'C', 'D', 'E' } };
        char[][] pat = { { 'A', 'B', 'C', 'D', 'E' },
                         { 'A', 'B', 'C', 'D', 'E' },
                         { 'A', 'B', 'C', 'D', 'E' },
                         { 'A', 'B', 'C', 'D', 'E' } };
        // function prints the indices of row and col where
        // its a match in txt
        rabinKarp(txt, pat);
    }
}




using System;
using System.Collections.Generic;
 
class Program {
    static long mod = 257; // The modular value
    static long r = 256; // radix
    static long dr = 1; // Highest power for row hashing
    static long dc = 1; // Highest power for col hashing
 
    // func that return a power n under mod m in LogN
    static long power(int a, int n, long m)
    {
        if (n == 0) {
            return 1;
        }
        if (n == 1) {
            return a % m;
        }
        long pow = power(a, n / 2, m);
        if ((n & 1) != 0) {
            return ((a % m) * (pow % m) * (pow % m)) % m;
        }
        else {
            return ((pow % m) * (pow % m)) % m;
        }
    }
 
    // Checks if all values of pattern matches with the text
    static bool check(List<List<char> > txt,
                      List<List<char> > pat, long r, long c)
    {
        for (long i = 0; i < pat.Count; i++) {
            for (long j = 0; j < pat[0].Count; j++) {
                if (pat[(int)i][(int)j]
                    != txt[(int)i + (int)r]
                          [(int)j + (int)c])
                    return false;
            }
        }
        return true;
    }
 
    // Finds the first hash of first n rows where n is no.
    // of rows in pattern
    static List<long> findHash(List<List<char> > mat,
                               long row)
    {
        List<long> hash = new List<long>();
        long col = mat[0].Count;
        for (long i = 0; i < col; i++) {
            long h = 0;
            for (long j = 0; j < row; j++) {
                h = ((h * r) % mod
                     + mat[(int)j][(int)i] % mod)
                    % mod;
            }
            hash.Add(h);
        }
        return hash;
    }
 
    // rolling hash function for columns
    static void colRollingHash(List<List<char> > txt,
                               List<long> t_hash, long row,
                               long p_row)
    {
        for (long i = 0; i < t_hash.Count; i++) {
            t_hash[(int)i]
                = (t_hash[(int)i] % mod
                   - ((txt[(int)row][(int)i] % mod)
                      * (dr % mod))
                         % mod)
                  % mod;
            t_hash[(int)i]
                = ((t_hash[(int)i] % mod) * (r % mod))
                  % mod;
            t_hash[(int)i]
                = (t_hash[(int)i] % mod
                   + txt[(int)row + (int)p_row][(int)i]
                         % mod)
                  % mod;
        }
    }
    static void rabinKarp(List<List<char> > txt,
                          List<List<char> > pat)
    {
        long t_row = txt.Count;
        long t_col = txt[0].Count;
        long p_row = pat.Count;
        long p_col = pat[0].Count;
        dr = power((int)r, (int)p_row - 1, mod);
        dc = power((int)r, (int)p_col - 1, mod);
 
        List<long> t_hash = findHash(
            txt, p_row); // Column hash of p_row rows
        List<long> p_hash = findHash(
            pat, p_row); // Column hash of p_row rows
        long p_val = 0; // Hash of entire pattern matrix
        for (long i = 0; i < p_col; i++) {
            p_val = (p_val * r + p_hash[(int)i]) % mod;
        }
        for (long i = 0; i <= (t_row - p_row); i++) {
            long t_val = 0;
            for (long j = 0; j < p_col; j++) {
                t_val
                    = ((t_val * r) + t_hash[(int)j]) % mod;
            }
            for (long j = 0; j <= (t_col - p_col); j++) {
                if (p_val == t_val) {
                    if (check(txt, pat, i, j)) {
                        Console.WriteLine(i + " " + j);
                    }
                }
                // Calculating t_val for next set of columns
                t_val = (t_val % mod
                         - ((t_hash[(int)j] % mod)
                            * (dc % mod))
                               % mod
                         + mod)
                        % mod;
                t_val = (t_val % mod * r % mod) % mod;
                t_val = (t_val % mod + t_hash[(int)j] % mod)
                        % mod;
            }
            if (i < t_row - p_row) {
                // Call this function for hashing from next
                // row
                colRollingHash(txt, t_hash, i, p_row);
            }
        }
    }
 
    public static void Main()
    {
        List<List<char> > txt = new List<List<char> >{
            new List<char>{ 'A', 'B', 'C', 'D', 'E' },
            new List<char>{ 'A', 'B', 'C', 'D', 'E' },
            new List<char>{ 'A', 'B', 'C', 'D', 'E' },
            new List<char>{ 'A', 'B', 'C', 'D', 'E' },
            new List<char>{ 'A', 'B', 'C', 'D', 'E' }
        };
        List<List<char> > pat = new List<List<char> >{
            new List<char>{ 'A', 'B', 'C', 'D', 'E' },
            new List<char>{ 'A', 'B', 'C', 'D', 'E' },
            new List<char>{ 'A', 'B', 'C', 'D', 'E' },
            new List<char>{ 'A', 'B', 'C', 'D', 'E' },
        };
        // function prints the indices of row and col where
        // its a match in txt
        rabinKarp(txt, pat);
    }
}

Output
Pattern found at ( 1 ,  1 )

Article Tags :