Implementation of Wu Manber Algorithm?
Last Updated :
10 Jan, 2023
What is Wu- Manber Algorithm?
The Wu-Manber algorithm is a string-matching algorithm that is used to efficiently search for patterns in a body of text. It is a hybrid algorithm that combines the strengths of the Boyer-Moore and Knuth-Morris-Pratt algorithms to provide fast and accurate pattern matching.
Illustration:
Example: s = “the quick brown fox jumps over the lazy dog” pattern = “brown”:
Step 1: Divide the pattern into two subpatterns let’s say “br” and “own“.
Step 2: Next step includes calculating hash values for each subpattern formed in step 1.
Step 3: Start iterating in s from the first character.
Step 4: If one subpattern matches the substring in s like “br” matches “brown” substring in s.
Step 5: Then will check, whether the whole pattern is matching that substring or not.
Step 6: The whole pattern is matching the substring found in string s. It will return the index of the substring indicating pattern matched.
Step 7: If let’s say it doesn’t match, it will search for another substring in s. If not found return “no match was found”.
Steps involved in Wu-Manber Algorithm:
- Create a hash table that maps each possible substring of the pattern to the positions in the pattern where that substring appears.
- This hash table is used to quickly identify the potential starting positions of the pattern in the text.
- Iterate through the text and compare each character to the corresponding character in the pattern.
- If the characters match, you can move to the next character and continue the comparison.
- If the characters do not match, you can use the hash table to determine the maximum number of characters that can be skipped before the next potential starting position of the pattern.
- This allows the algorithm to quickly skip over large sections of the text without missing any potential matches.
Below is the code to implement the above approach:
C++14
#include <bits/stdc++.h>
using namespace std;
int HashPattern(string& pattern, int i, int j)
{
int h = 0;
for ( int k = i; k < j; k++) {
h = h * 256 + (( int )pattern[k] - 'a' );
}
return h;
}
void WuManber(string& text, string& pattern)
{
int m = pattern.length();
int n = text.length();
int s = 2;
int t = m / s;
int h[s];
for ( int i = 0; i < s; i++) {
h[i] = HashPattern(pattern, i * t, (i + 1) * t);
}
int shift[s];
for ( int i = 0; i < s; i++) {
shift[i] = t * (s - i - 1);
}
bool match = false ;
for ( int i = 0; i < n - m + 1; i++) {
bool subpatternsMatch = true ;
int j;
for (j = 0; j < s; j++) {
if (HashPattern(text, i + j * t,
i + (j + 1) * t)
!= h[j]) {
subpatternsMatch = false ;
break ;
}
}
if (subpatternsMatch) {
if (text.substr(i, m) == pattern) {
cout << "Match found at index " << i
<< endl;
match = true ;
}
}
bool shouldShift = true ;
for (j = 0; j < s; j++) {
if (i + shift[j] < n - m + 1) {
shouldShift = false ;
break ;
}
}
if (shouldShift) {
i += shift[j];
}
}
if (!match) {
cout << "No match found \n" ;
}
}
int main()
{
string text = "the cat sat on the mat" ;
string pattern = "the" ;
WuManber(text, pattern);
return 0;
}
|
Java
import java.io.*;
import java.util.*;
class GFG {
static int hashPattern(String pattern, int i, int j)
{
int h = 0 ;
for ( int k = i; k < j; k++) {
h = h * 256 + ( int )pattern.charAt(k);
}
return h;
}
static void wuManber(String text, String pattern)
{
int m = pattern.length();
int n = text.length();
int s = 2 ;
int t = m / s;
int [] h = new int [s];
for ( int i = 0 ; i < s; i++) {
h[i] = hashPattern(pattern, i * t, (i + 1 ) * t);
}
int [] shift = new int [s];
for ( int i = 0 ; i < s; i++) {
shift[i] = t * (s - i - 1 );
}
boolean match = false ;
for ( int i = 0 ; i < n - m + 1 ; i++) {
boolean subpatternsMatch = true ;
int j;
for (j = 0 ; j < s; j++) {
if (hashPattern(text, i + j * t,
i + (j + 1 ) * t)
!= h[j]) {
subpatternsMatch = false ;
break ;
}
}
if (subpatternsMatch) {
if (text.substring(i, i + m).equals(
pattern)) {
System.out.println(
"Match found at index " + i);
match = true ;
}
}
boolean shouldShift = true ;
for (j = 0 ; j < s; j++) {
if (i + shift[j] < n - m + 1 ) {
shouldShift = false ;
break ;
}
}
if (shouldShift) {
i += shift[j];
}
}
if (!match) {
System.out.println( "No match found" );
}
}
public static void main(String[] args)
{
String text = "the cat sat on the mat" ;
String pattern = "the" ;
wuManber(text, pattern);
}
}
|
Python3
def hashPattern(pattern, i, j):
h = 0
for k in range (i, j):
h = h * 256 + ord (pattern[k])
return h
def wuManber(text, pattern):
m = len (pattern)
n = len (text)
s = 2
t = m / / s
h = [ 0 ] * s
for i in range (s):
h[i] = hashPattern(pattern, i * t, (i + 1 ) * t)
shift = [ 0 ] * s
for i in range (s):
shift[i] = t * (s - i - 1 )
match = False
for i in range (n - m + 1 ):
for j in range (s):
if hashPattern(text, i + j * t, i + (j + 1 ) * t) ! = h[j]:
break
else :
if text[i:i + m] = = pattern:
print ( "Match found at index" , i)
match = True
for j in range (s):
if i + shift[j] < n - m + 1 :
break
else :
i + = shift[j]
if not match:
print ( "No match found" )
text = "the cat sat on the mat"
pattern = "the"
wuManber(text, pattern)
|
C#
using System;
using System.Collections.Generic;
public class GFG {
static int HashPattern( string pattern, int i, int j)
{
int h = 0;
for ( int k = i; k < j; k++) {
h = h * 256 + ( int )pattern[k];
}
return h;
}
static void WuManber( string text, string pattern)
{
int m = pattern.Length;
int n = text.Length;
int s = 2;
int t = m / s;
int [] h = new int [s];
for ( int i = 0; i < s; i++) {
h[i] = HashPattern(pattern, i * t, (i + 1) * t);
}
int [] shift = new int [s];
for ( int i = 0; i < s; i++) {
shift[i] = t * (s - i - 1);
}
bool match = false ;
for ( int i = 0; i < n - m + 1; i++) {
bool subpatternsMatch = true ;
int j;
for (j = 0; j < s; j++) {
if (HashPattern(text, i + j * t,
i + (j + 1) * t)
!= h[j]) {
subpatternsMatch = false ;
break ;
}
}
if (subpatternsMatch) {
if (text.Substring(i, m).Equals(pattern)) {
Console.WriteLine(
"Match found at index " + i);
match = true ;
}
}
bool shouldShift = true ;
for (j = 0; j < s; j++) {
if (i + shift[j] < n - m + 1) {
shouldShift = false ;
break ;
}
}
if (shouldShift) {
i += shift[j];
}
}
if (!match) {
Console.WriteLine( "No match found" );
}
}
static public void Main()
{
string text = "the cat sat on the mat" ;
string pattern = "the" ;
WuManber(text, pattern);
}
}
|
Javascript
function hashPattern(pattern, i, j) {
let h = 0
for (let k = i; k < j; k++)
h = h * 256 + (pattern[k]).charCodeAt(0)
return h
}
function wuManber(text, pattern) {
let m = pattern.length
let n = text.length
let s = 2
let t = Math.floor(m / s)
let h = new Array(s).fill(0)
for (let i = 0; i < s; i++)
h[i] = hashPattern(pattern, i * t, (i + 1) * t)
let shift = new Array(s).fill(0)
for (let i = 0; i < s; i++)
shift[i] = t * (s - i - 1)
let match = false
for (let i = 0; i < (n - m + 1); i++) {
for (let j = 0; j < s; j++) {
if (hashPattern(text, i + j * t, i + (j + 1) * t) != h[j])
break
}
if (text.slice(i, i + m) == pattern) {
console.log( "Match found at index" + i + "<br>" )
match = true
}
for (let j = 0; j < s; j++) {
if (i + shift[j] < n - m + 1)
break
else
i += shift[j]
}
}
if (!match)
console.log( "No match found" )
}
let text = "the cat sat on the mat"
let pattern = "the"
wuManber(text, pattern)
|
Output
Match found at index 0
Match found at index 15
Time complexity: O(n + m)
Auxiliary Space: O (n+m)
Difference between KMP and Wu-Manber Algorithms?
KMP algorithm and Wu Manber algorithm are both string-matching algorithms, which means that they are used to find a substring within a larger string. Both algorithms have the same time complexity, which means that they have the same performance characteristics in terms of how long it takes for the algorithm to run.
However, there are some differences between them:
- KMP algorithm uses a preprocessing step to generate a partial match table, which is used to speed up the string-matching process. This makes the KMP algorithm more efficient than the Wu Manber algorithm when the pattern that is being searched for is relatively long.
- Wu Manber algorithm uses a different approach to string matching, which involves dividing the pattern into several subpatterns and using these subpatterns to search for matches in the text. This makes the Wu Manber algorithm more efficient than the KMP algorithm when the pattern that is being searched for is relatively short.
Share your thoughts in the comments
Please Login to comment...