Open In App

Find length of repeating Substring

Last Updated : 14 Aug, 2023
Improve
Improve
Like Article
Like
Save
Share
Report

Given a string s and a positive integer K, the task is to find the length of the longest contiguous substring that appears at least K times in s.

Examples:

Input: s = “abababcdabcd”, K = 2
Output: 4
Explanation: “abcd” is the longest repeating substring at least K times.

Input: s = “aacd”, K = 4
Output: 0

Approach: This can be solved with the following idea:

We will use the RabinKarp algorithm to solve this problem, which is a rolling hash-based algorithm to find the longest common substring that repeats at least k times.

Below are the steps: 

  • Initialize the parameters: base, modulus, and power.
  • Define a function for computing the rolling hash.
  • Define a function for binary search to find the largest substring length.
  • For each possible substring length, use the Rabin-Karp algorithm to check if it repeats at least K times.
  • Return the longest substring length that meets the requirement.

Below is the implementation of the above code:

C++




// C++ code for the above approach:
#include <algorithm>
#include <iostream>
#include <string>
#include <unordered_map>
#include <vector>
 
using namespace std;
 
// Define constants for the base and
// modulus of the hash function
const int BASE = 53;
const int MODULUS = 1e9 + 7;
 
// Define a maximum length for the
// input string
const int MAX_LENGTH = 100000;
 
// Function to compute the rolling hash
// for all substrings of length len in
// the input string s
long long rolling_hash(const string& s, int len,
                       vector<long long>& power,
                       vector<long long>& hashes)
{
 
    // Initialize the hash value for the
    // first substring
    long long hash = 0;
    for (int i = 0; i < len; ++i) {
        hash = (hash * BASE + s[i]) % MODULUS;
    }
 
    // Store the hash value for the first
    // substring in the hashes vector
    hashes[1] = hash;
 
    // Compute the hash value for all
    // subsequent substrings using the
    // rolling hash technique
    for (int i = len; i < s.size(); ++i) {
 
        hash = ((hash - power[len - 1] * s[i - len])
                    % MODULUS
                + MODULUS)
               % MODULUS;
        hash = (hash * BASE + s[i]) % MODULUS;
 
        // Store the hash value in the
        // hashes vector
        hashes[i - len + 2] = hash;
    }
 
    // Return the hash value for the last
    // substring
    return hash;
}
 
// Binary search to find the length of
// the longest substring that repeats
// at least k times
int binary_search(const string& s, int k)
{
    // Initialize the range of possible
    // substring lengths to check
    int low = 0, high = s.size();
    int ans = 0;
 
    // Initialize vectors to store the
    // powers of the base and the hash
    // values for all substrings
    vector<long long> power(MAX_LENGTH), hashes(MAX_LENGTH);
    power[0] = 1;
    for (int i = 1; i < MAX_LENGTH; ++i) {
        power[i] = (power[i - 1] * BASE) % MODULUS;
    }
 
    // Perform binary search on the range
    // of possible substring lengths
    while (low <= high) {
 
        // Compute the midpoint of
        // the range
        int mid = (low + high) / 2;
 
        // Compute the rolling hash for
        // all substrings of length mid
        rolling_hash(s, mid, power, hashes);
 
        // Count the frequency of
        // each hash value
        unordered_map<long long, int> count;
        for (int i = 1; i <= s.size() - mid + 1; ++i) {
            count[hashes[i]]++;
        }
 
        // Check if any hash value
        // appears at least k times
        bool found = false;
        for (const auto& p : count) {
            if (p.second >= k) {
                found = true;
                break;
            }
        }
 
        // If a repeating substring of
        // length mid appears at least k
        // times, update the answer and
        // search the upper half
        // of the range
        if (found) {
            ans = mid;
            low = mid + 1;
        }
        else {
 
            // Otherwise, search the
            // lower half of the range
            high = mid - 1;
        }
    }
 
    // Return the length of the
    // longest substring
    return ans;
}
 
// Driver code
int main()
{
 
    string s = "abababcdabcd";
    int k = 2;
 
    // Function call
    int ans = binary_search(s, k);
 
    cout << ans << endl;
 
    return 0;
}


Java




import java.util.HashMap;
import java.util.Map;
 
public class GFG {
    public static void main(String[] args) {
        String s = "abababcdabcd";
        int k = 2;
       
       // Function call
        int ans = binarySearch(s, k);
        System.out.println(ans);
    }
 
      // Binary search to find the length of
    // the longest substring that repeats
    // at least k times
    public static int binarySearch(String s, int k) {
   
          // Initialize the range of possible
        // substring lengths to check
        int low = 0;
        int high = s.length();
        int ans = 0;
       
          // Perform binary search on the range
        // of possible substring lengths
        while (low <= high) {
           
              // Compute the midpoint of
            // the range
            int mid = (low + high) / 2;
           
              // Compute the rolling hash for
            // all substrings of length mid
            int[] hashes = rollingHash(s, mid);
           
              // Count the frequency of
            // each hash value
            Map<Integer, Integer> count = new HashMap<>();
            for (int hash : hashes) {
                count.put(hash, count.getOrDefault(hash, 0) + 1);
            }
           
              // Check if any hash value
            // appears at least k times
            boolean found = false;
            for (int value : count.values()) {
                if (value >= k) {
                    found = true;
                    break;
                }
            }
           
         // If a repeating substring of
            // length mid appears at least k
            // times, update the answer and
         // search the upper half
            // of the range
            if (found) {
                ans = mid;
                low = mid + 1;
            }
          // Otherwise, search the
          // lower half of the range
          else {
                high = mid - 1;
            }
        }
       
          // Return the length of the
        // longest substring
        return ans;
    }
 
    public static int[] rollingHash(String s, int length) {
       
          // Define constants for the base and
        // modulus of the hash function
        int base = 53;
        int modulus = (int) (Math.pow(10, 9) + 7);
         
       
          int power = 1;
        int hashValue = 0;
        int[] hashes = new int[s.length() - length + 1];
       
          // Initialize the hash value for the
        // first substring
        for (int i = 0; i < length; i++) {
            hashValue = (hashValue * base + (int) s.charAt(i)) % modulus;
            power = (power * base) % modulus;
        }
        hashes[0] = hashValue;
       
          // Compute the hash value for all
        // subsequent substrings using the
        // rolling hash technique
        for (int i = length; i < s.length(); i++) {
            hashValue = (hashValue * base -
            (int) s.charAt(i - length) * power + (int) s.charAt(i)) % modulus;
           
            // Store the hash value in the
            // hashes vector
              hashes[i - length + 1] = hashValue;
        }
       
          // Return the hash value for the last
        // substring
        return hashes;
    }
}


Python3




# Python code for the above approach
 
import collections
 
# Function to compute the rolling hash
# for all substrings of length len in
# the input string s
def rolling_hash(s, length):
    base = 53
    modulus = 10**9 + 7
    power = 1
    hash_value = 0
    hashes = []
 
    for i in range(length):
        hash_value = (hash_value * base + ord(s[i])) % modulus
        power = (power * base) % modulus
 
    hashes.append(hash_value)
 
    for i in range(length, len(s)):
        hash_value = (hash_value * base - ord(s[i - length]) * power + ord(s[i])) % modulus
        hashes.append(hash_value)
 
    return hashes
 
# Binary search to find the length of
# the longest substring that repeats
# at least k times
def binary_search(s, k):
    low = 0
    high = len(s)
    ans = 0
 
    while low <= high:
        mid = (low + high) // 2
 
        hashes = rolling_hash(s, mid)
 
        count = collections.Counter(hashes)
 
        found = False
        for value in count.values():
            if value >= k:
                found = True
                break
 
        if found:
            ans = mid
            low = mid + 1
        else:
            high = mid - 1
 
    return ans
 
# Driver code
if __name__ == "__main__":
    s = "abababcdabcd"
    k = 2
 
    ans = binary_search(s, k)
 
    print(ans)


C#




using System;
using System.Collections.Generic;
 
public class GFG
{
    public static void Main(string[] args)
    {
        string s = "abababcdabcd";
        int k = 2;
 
        // Function call
        int ans = BinarySearch(s, k);
        Console.WriteLine(ans);
    }
 
    // Binary search to find the length of
    // the longest substring that repeats
    // at least k times
    public static int BinarySearch(string s, int k)
    {
        // Initialize the range of possible
        // substring lengths to check
        int low = 0;
        int high = s.Length;
        int ans = 0;
 
        // Perform binary search on the range
        // of possible substring lengths
        while (low <= high)
        {
            // Compute the midpoint of
            // the range
            int mid = (low + high) / 2;
 
            // Compute the rolling hash for
            // all substrings of length mid
            int[] hashes = RollingHash(s, mid);
 
            // Count the frequency of
            // each hash value
            Dictionary<int, int> count = new Dictionary<int, int>();
            foreach (int hash in hashes)
            {
                if (count.ContainsKey(hash))
                    count[hash]++;
                else
                    count[hash] = 1;
            }
 
            // Check if any hash value
            // appears at least k times
            bool found = false;
            foreach (int value in count.Values)
            {
                if (value >= k)
                {
                    found = true;
                    break;
                }
            }
 
            // If a repeating substring of
            // length mid appears at least k
            // times, update the answer and
            // search the upper half
            // of the range
            if (found)
            {
                ans = mid;
                low = mid + 1;
            }
            // Otherwise, search the
            // lower half of the range
            else
            {
                high = mid - 1;
            }
        }
 
        // Return the length of the
        // longest substring
        return ans;
    }
 
    public static int[] RollingHash(string s, int length)
    {
        // Define constants for the base and
        // modulus of the hash function
        int @base = 53;
        int modulus = (int)(Math.Pow(10, 9) + 7);
 
        int power = 1;
        int hashValue = 0;
        int[] hashes = new int[s.Length - length + 1];
 
        // Initialize the hash value for the
        // first substring
        for (int i = 0; i < length; i++)
        {
            hashValue = (int)(((long)hashValue * @base + (int)s[i]) % modulus);
            power = (int)(((long)power * @base) % modulus);
        }
        hashes[0] = hashValue;
 
        // Compute the hash value for all
        // subsequent substrings using the
        // rolling hash technique
        for (int i = length; i < s.Length; i++)
        {
            hashValue = (int)((((long)hashValue * @base - (int)s[i - length] * power + (int)s[i]) % modulus + modulus) % modulus);
 
            // Store the hash value in the
            // hashes vector
            hashes[i - length + 1] = hashValue;
        }
 
        // Return the hash value for the last
        // substring
        return hashes;
    }
}


Javascript




function rolling_hash(s, length) {
     
    // Define constants for the base and
    // modulus of the hash function
    const base = 53;
    const modulus = 10**9 + 7;
    let power = 1;
     
    // Initialize the hash value for the
    // first substring
    let hash_value = 0;
    let hashes = [];
    for (let i = 0; i < length; i++) {
        hash_value = (hash_value * base + s.charCodeAt(i)) % modulus;
        power = (power * base) % modulus;
    }
     
    // Store the hash value for the first
    // substring in the hashes vector
    hashes.push(hash_value);
     
    // Compute the hash value for all
    // subsequent substrings using the
    // rolling hash technique
    for (let i = length; i < s.length; i++) {
        hash_value = (hash_value * base - s.charCodeAt(i - length) * power + s.charCodeAt(i)) % modulus;
         
        // Store the hash value in the
        // hashes vector
        hashes.push(hash_value);
    }
     
    // Return the hash value for the last
    // substring
    return hashes;
}
 
// Binary search to find the length of
// the longest substring that repeats
// at least k times
function binary_search(s, k) {
     
    // Initialize the range of possible
    // substring lengths to check
    let low = 0;
    let high = s.length;
    let ans = 0;
     
    // Perform binary search on the range
    // of possible substring lengths
    while (low <= high) {
         
        // Compute the midpoint of
        // the range
        let mid = Math.floor((low + high) / 2);
         
        // Compute the rolling hash for
        // all substrings of length mid
        let hashes = rolling_hash(s, mid);
         
        // Count the frequency of
        // each hash value
        let count = new Map();
        for (let i = 0; i < hashes.length; i++) {
            if (count.has(hashes[i])) {
                count.set(hashes[i], count.get(hashes[i]) + 1);
            } else {
                count.set(hashes[i], 1);
            }
        }
         
        // Check if any hash value
        // appears at least k times
        let found = false;
        for (let value of count.values()) {
            if (value >= k) {
                found = true;
                break;
            }
        }
         
        // If a repeating substring of
        // length mid appears at least k
        // times, update the answer and
        // search the upper half
        // of the range
        if (found) {
            ans = mid;
            low = mid + 1;
        }
        // Otherwise, search the
        // lower half of the range
        else {
            high = mid - 1;
        }
    }
     
    // Return the length of the
    // longest substring
    return ans;
}
 
 
// Function call
let s = "abababcdabcd";
let k = 2;
let ans = binary_search(s, k);
console.log(ans);


Output

4






Time Complexity: O(N*log(N))
Auxiliary Space: O(N)



Like Article
Suggest improvement
Share your thoughts in the comments

Similar Reads