Skip to content
Related Articles

Related Articles

Improve Article

Rabin-Karp algorithm for Pattern Searching in Matrix

  • Difficulty Level : Hard
  • Last Updated : 06 Aug, 2021
Geek Week

Given matrices txt[][] of dimensions m1 x m2 and pattern pat[][] of dimensions n1 x n2, the task is to check whether a pattern exists in the matrix or not, and if yes then print the top most indices of the pat[][] in txt[][]. It is assumed that m1, m2 ≥ n1, n2

Examples: 

Input:
txt[][] = {{G, H, I, P}
           {J, K, L, Q}
           {R, G, H, I}  
           {S, J, K, L}
          }
pat[][] = {{G, H, I},
           {J, K, L}
          }
Output:
Pattern found at ( 0, 0 )
Pattern found at ( 2, 1 )
Explanation:


Input:
txt[][] = { {A, B, C},
            {D, E, F},
            {G, H, I}
          }
pat[][] = { {E, F},
            {H, I}
          }
Output:
Pattern found at (1, 1)

Approach: In order to find a pattern in a 2-D array using Rabin-Karp algorithm, consider an input matrix txt[m1][m2] and a pattern pat[n1][n2]. The idea is to find the hash of each columns of mat[][] and pat[][] and compare the hash values. For any column if hash values are equals than check for the corresponding rows values. Below are the steps:

  1. Find the hash values of each column for the first N1 rows in both txt[][] and pat[][] matrix.
  2. Apply Rabin-Karp Algorithm by finding hash values for the column hashes found in step 1.
  3. If a match is found compare txt[][] and pat[][] matrices for the specific rows and columns.
  4. Else slide down the column hashes by 1 row in the txt matrix using a rolling hash.
  5. Repeat steps 2 to 4 for all the hash values and if we found any pat[][] match in txt[][] then print the indices of top most cell in the txt[][].

To find the hash value: In order to find the hash value of a substring of size N in a text using rolling hash follow below steps:  

  1. Remove the first character from the string: hash(txt[s:s+n-1])-(radix**(n-1)*txt[s])%prime_number
  2. Add the next character to the string: hash(txt[s:s+n-1])*radix + txt[n]

Below is the implementation of the above approach: 



C++




#include <bits/stdc++.h>
using namespace std;
long long mod = 257;  //The modular value
long long r = 256;    //radix
long long dr = 1;     //Highest power for row hashing
long long dc = 1;     //Highest power for col hashing
 
//func that return a power n under mod m in LogN
long long power(int a,int n,long long m){
  if(n == 0){
    return 1;
  }
  if(n == 1){
    return a%m;
  }
  long long pow = power(a,n/2,m);
  if(n&1){
    return ((a%m)*(pow)%m * (pow)%m)%m;
  }
  else{
    return ((pow)%m * (pow)%m)%m;
  }
}
//Checks if all values of pattern matches with the text
bool check(vector<vector<char>> &txt,vector<vector<char>> &pat, long long r,long long c){
  for(long long i=0;i<pat.size();i++){
    for(long long j=0;j<pat[0].size();j++){
      if(pat[i][j] != txt[i+r][j+c]) return false;
    }
  }
  return true;
}
//Finds the first hash of first n rows where n is no. of rows in pattern
vector<long long> findHash(vector<vector<char>> &mat,long long row){
  vector<long long> hash;
  long long col = mat[0].size();
  for(long long i=0;i<col;i++){
    long long h = 0;
    for(long long j=0;j<row;j++){
      h = ((h*r)%mod + mat[j][i]%mod)%mod;
    }
    hash.push_back(h);
  }
  return hash;
}
//rolling hash function for columns
void colRollingHash(vector<vector<char>> &txt, vector<long long> &t_hash, long long row, long long p_row){
  for(long long i=0;i<t_hash.size();i++){
    t_hash[i] = (t_hash[i]%mod - ((txt[row][i])%mod*(dr)%mod)%mod)%mod;
    t_hash[i] = ((t_hash[i]%mod) * (r%mod))%mod;
    t_hash[i] = (t_hash[i]%mod + txt[row+p_row][i]%mod)%mod;
  }
}
void rabinKarp(vector<vector<char>> &txt, vector<vector<char>> &pat){
  long long t_row = txt.size();
  long long t_col = txt[0].size();
  long long p_row = pat.size();
  long long p_col = pat[0].size();
  dr = power(r,p_row-1,mod);
  dc = power(r,p_col-1,mod);
  vector<long long> t_hash = findHash(txt,p_row); //column hash of p_row rows
  vector<long long> p_hash = findHash(pat,p_row); //column hash of p_row rows
  long long p_val = 0;  //hash of entire pattern matrix
  for(long long i=0;i<p_col;i++){
    p_val = (p_val*r + p_hash[i])%mod;
  }
  for(long long i=0;i<=(t_row-p_row);i++){
    long long t_val = 0;
    for(long long i=0;i<p_col;i++){
      t_val = ((t_val*r) + t_hash[i])%mod;
    }
    for(long long j=0;j<=(t_col-p_col);j++){
      if(p_val == t_val){
        if(check(txt,pat,i,j)){
          cout<<i<<" "<<j<<endl;
        }
      }
      //calculating t_val for next set of columns
      t_val = (t_val%mod - ((t_hash[j]%mod)*(dc%mod))%mod + mod)%mod;
      t_val = (t_val%mod * r%mod)%mod;
      t_val = (t_val%mod + t_hash[j+p_col]%mod)%mod;
    }
    if(i < t_row-p_row){
      //call this function for hashing form next row
      colRollingHash(txt,t_hash,i,p_row);
    }
  }
}
int main(){
  vector<vector<char>> txt{{'A','B','C','D','E'},{'A','B','C','D','E'},{'A','B','C','D','E'},{'A','B','C','D','E'},{'A','B','C','D','E'}};
  vector<vector<char>> pat{{'A','B','C','D','E'},{'A','B','C','D','E'},{'A','B','C','D','E'},{'A','B','C','D','E'}};
  //function prints the indices of row and col where its a match in txt
  rabinKarp(txt,pat);
  return 0;
}

Python3




# Python implementation for the
# pattern matching in 2-D matrix
 
# Function to find the hash-value
# of the given columns of text
def findHash(arr, col, row):
    hashCol = []
    add = 0
    radix = 256
 
    # For each column
    for i in range(0, col):
 
        for j in reversed(range(0, row)):
            add = add + (radix**(row-j-1) *
                         ord(arr[j][i]))% 101
        hashCol.append(add % 101);
        add = 0
    return hashCol
 
# Function to check equality of the
# two strings
def checkEquality(txt, row, col, flag):
    txt = [txt[i][col:patCol + col]
           for i in range(row, patRow + row)]
 
# If pattern found
    if txt == pat:
        flag = 1
        print("Pattern found at", \
              "(", row, ", ", col, ")")
    return flag
     
# Function to find the hash value of
# of the next column using rolling-hash
# of the Rabin-karp
def colRollingHash(txtHash, nxtRow):
 
    radix = 256
 
    # Find the hash of the matrix
    for j in range(len(txtHash)):
        txtHash[j] = (txtHash[j]*radix \
                      + ord(txt[nxtRow][j]))% 101
        txtHash[j] = txtHash[j] - (radix**(patRow) *
                     ord(txt[nxtRow-patRow][j]))% 101
        txtHash[j] = txtHash[j]% 101
    return txtHash
     
 
# Function to match a pattern in
# the given 2D Matrix
def search(txt, pat):
     
# List of the hashed value for
    # the text and pattern columns
    patHash = []
    txtHash = []
 
    # Hash value of the
    # pat_hash and txt_hash
    patVal = 0
    txtVal = 0
 
    # Radix value for the input characters
    radix = 256
     
    # Variable to determine if
    # pattern was found or not
    flag = 0
     
    # Function call to find the
    # hash value of columns
    txtHash = findHash(txt, txtCol, patRow) 
    patHash = findHash(pat, patCol, patRow)
     
    # Calculate hash value for patHash
    for i in range(len(patHash)):
        patVal = patVal \
                 + (radix**(len(patHash)-i-1) *
                 patHash[i]% 101)
    patVal = patVal % 101
     
     
    # Applying Rabin-Karp to compare
    # txtHash and patHash
    for i in range(patRow-1, txtRow):
        col = 0
        txtVal = 0
         
        # Find the hash value txtHash
        for j in range(len(patHash)):
            txtVal = txtVal\
                     + (radix**(len(patHash)-j-1) *
                     txtHash[j])% 101
        txtVal = txtVal % 101
         
        if txtVal == patVal:
            flag = checkEquality(\
                     txt, i + 1-patRow, col, flag)
             
        else:
 
            # Roll the txt window by one character
            for k in range(len(patHash), len(txtHash)):
 
                txtVal = txtVal \
                         * radix + (txtHash[k])% 101
                txtVal = txtVal \
                         - (radix**(len(patHash)) *
                         (txtHash[k-len(patHash)]))% 101
                txtVal = txtVal % 101
                col = col + 1
 
                # Check if txtVal and patVal are equal
                if patVal == txtVal:
                    flag = checkEquality(\
                           txt, i + 1-patRow, col, flag)  
                else:
                    continue
                 
        # To make sure i does not exceed txtRow
        if i + 1<txtRow:
            txtHash = colRollingHash(txtHash, i + 1)
             
    if flag == 0:
        print("Pattern not found")
     
 
# Driver Code
if __name__ == "__main__":
 
  # Given Text
  txt = [['A', 'B', 'C'], \
         ['D', 'E', 'F'], \
         ['G', 'H', 'I']]
 
  # Given Pattern
  pat = [['E', 'F'], ['H', 'I']]
 
  # Dimensions of the text
  txtRow = 3
  txtCol = 3
 
  # Dimensions for the pattern
  patRow = 2
  patCol = 2
 
  # Function Call
  search(txt, pat)
Output
Pattern found at ( 1 ,  1 )

Attention reader! Don’t stop learning now. Get hold of all the important DSA concepts with the DSA Self Paced Course at a student-friendly price and become industry ready.  To complete your preparation from learning a language to DS Algo and many more,  please refer Complete Interview Preparation Course.

In case you wish to attend live classes with experts, please refer DSA Live Classes for Working Professionals and Competitive Programming Live for Students.




My Personal Notes arrow_drop_up
Recommended Articles
Page :