Open In App

Anagram Substring Search (Or Search for all permutations)

Improve
Improve
Like Article
Like
Save
Share
Report

Given a text txt[0..n-1] and a pattern pat[0..m-1], write a function search(char pat[], char txt[]) that prints all occurrences of pat[] and its permutations (or anagrams) in txt[]. You may assume that n > m. 

Expected time complexity is O(n)

Examples: 

1) Input:  txt[] = "BACDGABCDA"  pat[] = "ABCD"
   Output:   Found at Index 0
             Found at Index 5
             Found at Index 6
2) Input: txt[] =  "AAABABAA" pat[] = "AABA"
   Output:   Found at Index 0
             Found at Index 1
             Found at Index 4

We strongly recommend that you click here and practice it, before moving on to the solution.

This problem is slightly different from the standard pattern-searching problem, here we need to search for anagrams as well. Therefore, we cannot directly apply standard pattern-searching algorithms like KMP, Rabin Karp, Boyer Moore, etc.

Approach 1 :

Brute Force : 
Consider the Input txt[] = "BACDGABCDA"  pat[] = "ABCD".
Occurrences of the pat[] and its permutations are found at indexes 0,5,6. 
The permutations are BACD,ABCD,BCDA. 
Let's sort the pat[] and the permutations of pat[] in txt[].
pat[] after sorting becomes : ABCD
permutations of pat[] in txt[] after sorting becomes : ABCD, ABCD,ABCD.
So we can say that the sorted version of pat[] and sorted version of its
permutations yield the same result. 

INTUITION: The idea is to consider all the substrings of the txt[] with are of lengths equal to the length of pat[] and check whether the sorted version of substring is equal to the sorted version of pat[]. If they are equal then that particular substring is the permutation of the pat[], else not.

C++




#include <bits/stdc++.h>
using namespace std;
  
void search(string& pat, string& txt)
{
    /*finding lengths of strings pat and txt*/
    int n = txt.length(), m = pat.length();
    /*string sortedpat stores the sorted version of pat*/
    string sortedpat = pat;
    sort(sortedpat.begin(), sortedpat.end());
    /*temp for storing the substring of length equal to
     * pat*/
    string temp;
    for (int i = 0; i <= n - m; i++) {
        temp = "";
        for (int k = i; k < m + i; k++)
            temp.push_back(txt[k]);
        sort(temp.begin(), temp.end());
        /*checking whether sorted versions are equal or
         * not*/
        if (sortedpat == temp)
            cout << "Found at Index " << i << endl;
    }
}
int main()
  
{
    string txt = "BACDGABCDA";
    string pat = "ABCD";
    search(pat, txt);
    return 0;
}


Java




/*package whatever //do not write package name here */
import java.util.*;
class GFG {
    
  static void search(char[] pat, char[] txt)
{
      
    /*finding lengths of character array pat and txt*/
    int n = txt.length, m = pat.length;
      
    /*string sortedpat stores the sorted version of pat*/
    char[] sortedpat = pat;
    Arrays.sort(sortedpat);
      
    /*temp for storing the substring of length equal to
     * pat*/
    String temp;
    for (int i = 0; i <= n - m; i++) {
        temp = "";
        for (int k = i; k < m + i; k++)
            temp += txt[k];
          char tempArray[] = temp.toCharArray();
          Arrays.sort(tempArray);
          temp = new String(tempArray);
        
        /*checking whether sorted versions are equal or
         * not*/
        if (String.valueOf(sortedpat).equals(temp))
            System.out.println("Found at Index " + i);
    }
}
    
    public static void main (String[] args) {
        String txt = "BACDGABCDA";
            String pat = "ABCD";
        search(pat.toCharArray(), txt.toCharArray());
    }
}
  
// This code is contributed by aadityaburujwale.


Python3




# Python code for the approach
def search(pat, txt):
    
  # finding lengths of strings pat and txt
  n = len(txt)
  m = len(pat);
    
  # string sortedpat stores the sorted version of pat
  sortedpat = pat;
  sortedpat = list(sortedpat);
  sortedpat.sort()
  sortedpat = ' '.join([str(elem) for elem in sortedpat])
    
  # temp for storing the substring of length equal to pat
  for i in range(0,n-m+1): 
    temp = txt[i:i+m]
    temp = list(temp);
    temp.sort()
    temp = ' '.join([str(elem) for elem in temp])
      
    # checking whether sorted versions are equal or not
    if (sortedpat == temp):
      print("Found at Index ",i);
  
# driver code
txt = "BACDGABCDA";
pat = "ABCD";
search(pat, txt);
  
# This code is contributed by kothavvsaakash


C#




// C# code addition for the above approach
  
using System;
using System.Collections;
  
public class GFG {
  
  static void search(char[] pat, char[] txt)
  {
  
    /*finding lengths of character array pat and txt*/
    int n = txt.Length, m = pat.Length;
  
    /*string sortedpat stores the sorted version of
         * pat*/
    char[] sortedpat = pat;
    Array.Sort(sortedpat);
  
    /*temp for storing the substring of length equal to
         * pat*/
    String temp;
    for (int i = 0; i <= n - m; i++) {
      temp = "";
      for (int k = i; k < m + i; k++)
        temp += txt[k];
      char[] tempArray = temp.ToCharArray();
      Array.Sort(tempArray);
      temp = new String(tempArray);
  
      /*checking whether sorted versions are equal or
             * not*/
      string s = string.Join("", sortedpat);
      if (s.Equals(temp))
        Console.WriteLine("Found at Index " + i);
    }
  }
  
  static public void Main()
  {
  
    // Code
    String txt = "BACDGABCDA";
    String pat = "ABCD";
    search(pat.ToCharArray(), txt.ToCharArray());
  }
}
  
// // This code is contributed by lokesh.


Javascript




<script>
  
// JavaScript code for the approach
  
function search(pat, txt)
{
    /*finding lengths of strings pat and txt*/
    let n = txt.length, m = pat.length;
    /*string sortedpat stores the sorted version of pat*/
    let sortedpat = pat;
    sortedpat = sortedpat.split("").sort().join("");
    /*temp for storing the substring of length equal to
     * pat*/
    let temp;
    for (let i = 0; i <= n - m; i++) {
        temp = "";
        for (let k = i; k <br m + i; k++)
            temp += txt[k];
        temp = temp.split("").sort().join("");
        /*checking whether sorted versions are equal or
         * not*/
        if (sortedpat == temp)
            document.write("Found at Index ",i,"</br>");
    }
}
  
// driver code
  
let txt = "BACDGABCDA";
let pat = "ABCD";
search(pat, txt);
  
  
// This code is contributed by shinjanpatra
  
</script>


Output

Found at Index 0
Found at Index 5
Found at Index 6

Time Complexity : O(mlogm) + O( (n-m+1)(m + mlogm + m) ) 

mlogm for sorting pat. So O(mlogm)

The for loop runs for n-m+1 times in each iteration we build string temp, which takes O(m) time, and sorting temp, which takes O(mlogm) time, and comparing sorted pat and sorted substring, which takes O(m). So time complexity is O( (n-m+1)*(m+mlogm+m) ) 

Total Time complexity :  O(mlogm) + O( (n-m+1)(m + mlogm + m) ) 

Space Complexity: O(m) As we are using Extra space for strings temp and sortedpat

Approach 2 :

The idea is to modify Rabin Karp Algorithm. For example, we can keep the hash value as sum of ASCII values of all characters under modulo of a big prime number. For every character of text, we can add the current character to hash value and subtract the first character of previous window. This solution looks good, but like standard Rabin Karp, the worst case time complexity of this solution is O(mn). The worst case occurs when all hash values match and we one by one match all characters.

We can achieve O(n) time complexity under the assumption that alphabet size is fixed which is typically true as we have maximum 256 possible characters in ASCII. The idea is to use two count arrays: 

  1. The first count array store frequencies of characters in pattern. 
  2. The second count array stores frequencies of characters in current window of text.

The important thing to note is, time complexity to compare two count arrays is O(1) as the number of elements in them are fixed (independent of pattern and text sizes). Following are steps of this algorithm. 

  1. Store counts of frequencies of pattern in first count array countP[]. Also store counts of frequencies of characters in first window of text in array countTW[].
  2. Now run a loop from i = M to N-1. Do following in loop. 
    • If the two count arrays are identical, we found an occurrence. 
    • Increment count of current character of text in countTW[] 
    • Decrement count of first character in previous window in countWT[]
  3. The last window is not checked by above loop, so explicitly check it.

Following is the implementation of above algorithm. 
Implementation:

C++




// C++ program to search all anagrams of a pattern in a text
#include <bits/stdc++.h>
#define MAX 256
using namespace std;
  
// This function returns true if contents of arr1[] and
// arr2[] are same, otherwise false.
bool compare(char arr1[], char arr2[])
{
    for (int i = 0; i < MAX; i++)
        if (arr1[i] != arr2[i])
            return false;
    return true;
}
  
// This function search for all permutations of pat[] in
// txt[]
void search(char* pat, char* txt)
{
    int M = strlen(pat), N = strlen(txt);
  
    // countP[]:  Store count of all characters of pattern
    // countTW[]: Store count of current window of text
    char countP[MAX] = { 0 }, countTW[MAX] = { 0 };
    for (int i = 0; i < M; i++) {
        (countP[pat[i]])++;
        (countTW[txt[i]])++;
    }
  
    // Traverse through remaining characters of pattern
    for (int i = M; i < N; i++) {
        // Compare counts of current window of text with
        // counts of pattern[]
        if (compare(countP, countTW))
            cout << "Found at Index " << (i - M) << endl;
  
        // Add current character to current window
        (countTW[txt[i]])++;
  
        // Remove the first character of previous window
        countTW[txt[i - M]]--;
    }
  
    // Check for the last window in text
    if (compare(countP, countTW))
        cout << "Found at Index " << (N - M) << endl;
}
  
/* Driver program to test above function */
int main()
{
    char txt[] = "BACDGABCDA";
    char pat[] = "ABCD";
    search(pat, txt);
    return 0;
}
  
// This code is contributed by Aditya Kumar (adityakumar129)


C




// C program to search all anagrams of a pattern in a text
#include <stdbool.h>
#include <stdio.h>
#include <string.h>
  
#define MAX 256
  
// This function returns true if contents of arr1[] and
// arr2[] are same, otherwise false.
bool compare(char arr1[], char arr2[])
{
    for (int i = 0; i < MAX; i++)
        if (arr1[i] != arr2[i])
            return false;
    return true;
}
  
// This function search for all permutations of pat[] in
// txt[]
void search(char* pat, char* txt)
{
    int M = strlen(pat), N = strlen(txt);
  
    // countP[]:  Store count of all characters of pattern
    // countTW[]: Store count of current window of text
    char countP[MAX] = { 0 }, countTW[MAX] = { 0 };
    for (int i = 0; i < M; i++) {
        (countP[pat[i]])++;
        (countTW[txt[i]])++;
    }
  
    // Traverse through remaining characters of pattern
    for (int i = M; i < N; i++) {
        // Compare counts of current window of text with
        // counts of pattern[]
        if (compare(countP, countTW))
            printf("Found at Index %d \n", (i - M));
  
        // Add current character to current window
        (countTW[txt[i]])++;
  
        // Remove the first character of previous window
        countTW[txt[i - M]]--;
    }
  
    // Check for the last window in text
    if (compare(countP, countTW))
        printf("Found at Index %d \n", (N - M));
}
  
/* Driver program to test above function */
int main()
{
    char txt[] = "BACDGABCDA";
    char pat[] = "ABCD";
    search(pat, txt);
    return 0;
}
  
// This code is contributed by Aditya Kumar (adityakumar129)


Java




// Java program to search all anagrams 
// of a pattern in a text
public class GFG 
{
    static final int MAX = 256;
      
    // This function returns true if contents
    // of arr1[] and arr2[] are same, otherwise
    // false.
    static boolean compare(char arr1[], char arr2[])
    {
        for (int i = 0; i < MAX; i++)
            if (arr1[i] != arr2[i])
                return false;
        return true;
    }
  
    // This function search for all permutations
    // of pat[] in txt[]
    static void search(String pat, String txt)
    {
        int M = pat.length();
        int N = txt.length();
  
        // countP[]:  Store count of all 
        // characters of pattern
        // countTW[]: Store count of current
        // window of text
        char[] countP = new char[MAX];
        char[] countTW = new char[MAX];
        for (int i = 0; i < M; i++)
        {
            (countP[pat.charAt(i)])++;
            (countTW[txt.charAt(i)])++;
        }
  
        // Traverse through remaining characters
        // of pattern
        for (int i = M; i < N; i++)
        {
            // Compare counts of current window
            // of text with counts of pattern[]
            if (compare(countP, countTW))
                System.out.println("Found at Index " +
                                          (i - M));
              
            // Add current character to current 
            // window
            (countTW[txt.charAt(i)])++;
  
            // Remove the first character of previous
            // window
            countTW[txt.charAt(i-M)]--;
        }
  
        // Check for the last window in text
        if (compare(countP, countTW))
            System.out.println("Found at Index "
                                       (N - M));
    }
  
    /* Driver program to test above function */
    public static void main(String args[])
    {
        String txt = "BACDGABCDA";
        String pat = "ABCD";
        search(pat, txt);
    }
}
// This code is contributed by Sumit Ghosh


Python3




# Python program to search all
# anagrams of a pattern in a text
  
MAX=256 
  
# This function returns true
# if contents of arr1[] and arr2[]
# are same, otherwise false.
def compare(arr1, arr2):
    for i in range(MAX):
        if arr1[i] != arr2[i]:
            return False
    return True
      
# This function search for all
# permutations of pat[] in txt[]  
def search(pat, txt):
  
    M = len(pat)
    N = len(txt)
  
    # countP[]:  Store count of
    # all characters of pattern
    # countTW[]: Store count of
    # current window of text
    countP = [0]*MAX
  
    countTW = [0]*MAX
  
    for i in range(M):
        (countP[ord(pat[i]) ]) += 1
        (countTW[ord(txt[i]) ]) += 1
  
    # Traverse through remaining
    # characters of pattern
    for i in range(M,N):
  
        # Compare counts of current
        # window of text with
        # counts of pattern[]
        if compare(countP, countTW):
            print("Found at Index", (i-M))
  
        # Add current character to current window
        (countTW[ ord(txt[i]) ]) += 1
  
        # Remove the first character of previous window
        (countTW[ ord(txt[i-M]) ]) -= 1
      
    # Check for the last window in text    
    if compare(countP, countTW):
        print("Found at Index", N-M)
          
# Driver program to test above function       
txt = "BACDGABCDA"
pat = "ABCD"       
search(pat, txt)   
  
# This code is contributed
# by Upendra Singh Bartwal


C#




// C# program to search all anagrams 
// of a pattern in a text 
using System;
  
class GFG
{
public const int MAX = 256;
  
// This function returns true if  
// contents of arr1[] and arr2[] 
// are same, otherwise false. 
public static bool compare(char[] arr1, 
                           char[] arr2)
{
    for (int i = 0; i < MAX; i++)
    {
        if (arr1[i] != arr2[i])
        {
            return false;
        }
    }
    return true;
}
  
// This function search for all 
// permutations of pat[] in txt[] 
public static void search(string pat, 
                          string txt)
{
    int M = pat.Length;
    int N = txt.Length;
  
    // countP[]: Store count of all 
    // characters of pattern 
    // countTW[]: Store count of current 
    // window of text 
    char[] countP = new char[MAX];
    char[] countTW = new char[MAX];
    for (int i = 0; i < M; i++)
    {
        (countP[pat[i]])++;
        (countTW[txt[i]])++;
    }
  
    // Traverse through remaining 
    // characters of pattern 
    for (int i = M; i < N; i++)
    {
        // Compare counts of current window 
        // of text with counts of pattern[] 
        if (compare(countP, countTW))
        {
            Console.WriteLine("Found at Index "
                             (i - M));
        }
  
        // Add current character to 
        // current window 
        (countTW[txt[i]])++;
  
        // Remove the first character of
        // previous window 
        countTW[txt[i - M]]--;
    }
  
    // Check for the last window in text 
    if (compare(countP, countTW))
    {
        Console.WriteLine("Found at Index "
                         (N - M));
    }
}
  
// Driver Code
public static void Main(string[] args)
{
    string txt = "BACDGABCDA";
    string pat = "ABCD";
    search(pat, txt);
}
}
  
// This code is contributed 
// by Shrikant1


Javascript




<script>
  
      // JavaScript program to search all anagrams
      // of a pattern in a text
      const MAX = 256;
  
      // This function returns true if
      // contents of arr1[] and arr2[]
      // are same, otherwise false.
      function compare(arr1, arr2) {
        for (var i = 0; i < MAX; i++) {
          if (arr1[i] !== arr2[i]) {
            return false;
          }
        }
        return true;
      }
  
      // This function search for all
      // permutations of pat[] in txt[]
      function search(pat, txt) {
        var M = pat.length;
        var N = txt.length;
  
        // countP[]: Store count of all
        // characters of pattern
        // countTW[]: Store count of current
        // window of text
        var countP = new Array(MAX).fill(0);
        var countTW = new Array(MAX).fill(0);
        for (var i = 0; i < M; i++) {
          countP[pat[i].charCodeAt(0)]++;
          countTW[txt[i].charCodeAt(0)]++;
        }
  
        // Traverse through remaining
        // characters of pattern
        for (var i = M; i < N; i++) {
          // Compare counts of current window
          // of text with counts of pattern[]
          if (compare(countP, countTW)) {
            document.write("Found at Index " + (i - M) + "<br>");
          }
  
          // Add current character to
          // current window
          countTW[txt[i].charCodeAt(0)]++;
  
          // Remove the first character of
          // previous window
          countTW[txt[i - M].charCodeAt(0)]--;
        }
  
        // Check for the last window in text
        if (compare(countP, countTW)) {
          document.write("Found at Index " + (N - M) + "<br>");
        }
      }
  
      // Driver Code
      var txt = "BACDGABCDA";
      var pat = "ABCD";
      search(pat, txt);
        
</script>


Output

Found at Index 0
Found at Index 5
Found at Index 6

Time Complexity: O(256 * (n – m) + m)
Auxiliary space: O(m), where m is 256

Please suggest if someone has a better solution which is more efficient in terms of space and time.



Last Updated : 18 Sep, 2023
Like Article
Save Article
Previous
Next
Share your thoughts in the comments
Similar Reads