Pattern matching is a fundamental problem in computer science, where the goal is to find occurrences of a "pattern" string within a "text" string. Two efficient algorithms for this purpose are Knuth-Morris-Pratt (KMP) and Boyer-Moore.
The KMP algorithm preprocesses the pattern to create a partial match (or "failure") table, which is used to skip unnecessary comparisons in the text.
Steps:
Implementation:
gopackage main
import (
"fmt"
)
// BuildLPS constructs the LPS (Longest Prefix Suffix) array
func BuildLPS(pattern string) []int {
m := len(pattern)
lps := make([]int, m)
j := 0 // length of previous longest prefix suffix
i := 1
for i < m {
if pattern[i] == pattern[j] {
j++
lps[i] = j
i++
} else {
if j != 0 {
j = lps[j-1]
} else {
lps[i] = 0
i++
}
}
}
return lps
}
// KMPSearch performs the KMP search algorithm
func KMPSearch(text, pattern string) []int {
n := len(text)
m := len(pattern)
lps := BuildLPS(pattern)
var result []int
i, j := 0, 0
for i < n {
if pattern[j] == text[i] {
i++
j++
}
if j == m {
result = append(result, i-j)
j = lps[j-1]
} else if i < n && pattern[j] != text[i] {
if j != 0 {
j = lps[j-1]
} else {
i++
}
}
}
return result
}
func main() {
text := "ABABDABACDABABCABAB"
pattern := "ABABCABAB"
matches := KMPSearch(text, pattern)
fmt.Println("Pattern found at indices:", matches)
}
The Boyer-Moore algorithm improves the efficiency of pattern matching by using two heuristics: the bad character rule and the good suffix rule.
Steps:
Implementation:
gopackage main
import (
"fmt"
"math"
)
// BadCharHeuristic preprocesses the pattern to create the bad character table
func BadCharHeuristic(pattern string) []int {
const ALPHABET_SIZE = 256
badChar := make([]int, ALPHABET_SIZE)
for i := range badChar {
badChar[i] = -1
}
for i := 0; i < len(pattern); i++ {
badChar[pattern[i]] = i
}
return badChar
}
// BoyerMooreSearch performs the Boyer-Moore search algorithm
func BoyerMooreSearch(text, pattern string) []int {
m := len(pattern)
n := len(text)
badChar := BadCharHeuristic(pattern)
var result []int
s := 0 // s is the shift of the pattern with respect to text
for s <= (n - m) {
j := m - 1
// Keep reducing index j of pattern while characters of pattern and text are matching
for j >= 0 && pattern[j] == text[s+j] {
j--
}
// If the pattern is present at current shift, then index j will become -1 after the above loop
if j < 0 {
result = append(result, s)
if (s + m < n) {
s += m - badChar[text[s+m]]
} else {
s += 1
}
} else {
s += int(math.Max(1, float64(j-badChar[text[s+j]])))
}
}
return result
}
func main() {
text := "ABDSAAABCD"
pattern := "ABC"
matches := BoyerMooreSearch(text, pattern)
fmt.Println("Pattern found at indices:", matches)
}
These algorithms are powerful tools for efficient string searching and are widely used in various applications such as text editors, search engines, and bioinformatics. Understanding and implementing these algorithms will enhance your capability to solve complex pattern matching problems.