Suffix Trees and Arrays

Suffix trees and suffix arrays are advanced data structures used for efficient string processing tasks such as substring search, longest common substring, and more. They are especially useful for problems involving many queries on the same text.

Suffix Trees

A suffix tree is a compressed trie of all suffixes of a given string. It provides fast substring searches and can be constructed in linear time.

Key Concepts:

Construction:

Implementation: Below is a basic implementation of a suffix tree.

go
package main import ( "fmt" ) // SuffixTreeNode represents a node in the suffix tree type SuffixTreeNode struct { children map[byte]*SuffixTreeNode start int end *int suffixLink *SuffixTreeNode } // SuffixTree represents the suffix tree type SuffixTree struct { text string root *SuffixTreeNode end int active *activePoint remainingSuffixCount int } // activePoint maintains the active point in the suffix tree type activePoint struct { node *SuffixTreeNode edge byte length int } // NewSuffixTreeNode creates a new suffix tree node func NewSuffixTreeNode(start int, end *int) *SuffixTreeNode { return &SuffixTreeNode{ children: make(map[byte]*SuffixTreeNode), start: start, end: end, } } // NewSuffixTree creates a new suffix tree func NewSuffixTree(text string) *SuffixTree { tree := &SuffixTree{text: text} tree.root = NewSuffixTreeNode(-1, nil) tree.active = &activePoint{node: tree.root} tree.buildSuffixTree() return tree } // EdgeLength returns the length of the edge func (n *SuffixTreeNode) EdgeLength(position int) int { if n == nil { return 0 } return min(*n.end, position+1) - n.start } // BuildSuffixTree constructs the suffix tree using Ukkonen's algorithm func (tree *SuffixTree) buildSuffixTree() { size := len(tree.text) tree.end = -1 for i := 0; i < size; i++ { tree.extendSuffixTree(i) } } // ExtendSuffixTree extends the suffix tree at a given position func (tree *SuffixTree) extendSuffixTree(pos int) { tree.end++ tree.remainingSuffixCount++ var lastNewNode *SuffixTreeNode for tree.remainingSuffixCount > 0 { if tree.active.length == 0 { tree.active.edge = tree.text[pos] } if tree.active.node.children[tree.active.edge] == nil { tree.active.node.children[tree.active.edge] = NewSuffixTreeNode(pos, &tree.end) if lastNewNode != nil { lastNewNode.suffixLink = tree.active.node lastNewNode = nil } } else { next := tree.active.node.children[tree.active.edge] if tree.walkDown(next) { continue } if tree.text[next.start+tree.active.length] == tree.text[pos] { if lastNewNode != nil && tree.active.node != tree.root { lastNewNode.suffixLink = tree.active.node lastNewNode = nil } tree.active.length++ break } splitEnd := next.start + tree.active.length - 1 split := NewSuffixTreeNode(next.start, &splitEnd) tree.active.node.children[tree.active.edge] = split split.children[tree.text[pos]] = NewSuffixTreeNode(pos, &tree.end) next.start += tree.active.length split.children[tree.text[next.start]] = next if lastNewNode != nil { lastNewNode.suffixLink = split } lastNewNode = split } tree.remainingSuffixCount-- if tree.active.node == tree.root && tree.active.length > 0 { tree.active.length-- tree.active.edge = tree.text[pos-tree.remainingSuffixCount+1] } else if tree.active.node != tree.root { tree.active.node = tree.active.node.suffixLink } } } // WalkDown performs the walk down operation func (tree *SuffixTree) walkDown(currNode *SuffixTreeNode) bool { if tree.active.length >= currNode.EdgeLength(tree.end) { tree.active.edge = tree.text[currNode.start + currNode.EdgeLength(tree.end)] tree.active.length -= currNode.EdgeLength(tree.end) tree.active.node = currNode return true } return false } // Min returns the minimum of two integers func min(a, b int) int { if a < b { return a } return b } // PrintSuffixTree prints the suffix tree (for debugging purposes) func (tree *SuffixTree) PrintSuffixTree() { tree.print(tree.root, 0) } func (tree *SuffixTree) print(node *SuffixTreeNode, depth int) { if node == nil { return } if node.start != -1 { fmt.Println(string(tree.text[node.start:min(*node.end, len(tree.text))])) } for _, child := range node.children { tree.print(child, depth+1) } } func main() { text := "banana" suffixTree := NewSuffixTree(text) suffixTree.PrintSuffixTree() }

Suffix Arrays

A suffix array is an array of integers giving the starting positions of suffixes of a string in lexicographical order. It's more space-efficient than a suffix tree and can be built in linear time.

Construction:

Implementation:

go
package main import ( "fmt" "sort" ) // SuffixArrayEntry represents a suffix array entry type SuffixArrayEntry struct { index int rank [2]int } // SuffixArray constructs the suffix array using the efficient approach func SuffixArray(text string) []int { n := len(text) suffixes := make([]SuffixArrayEntry, n) for i := range text { suffixes[i] = SuffixArrayEntry{index: i, rank: [2]int{int(text[i]), -1}} if i+1 < n { suffixes[i].rank[1] = int(text[i+1]) } } sort.Slice(suffixes, func(i, j int) bool { return suffixes[i].rank[0] < suffixes[j].rank[0] }) index := make([]int, n) k := 4 for k < 2*n { rank := 0 prevRank := suffixes[0].rank suffixes[0].rank[0] = rank index[suffixes[0].index] = 0 for i := 1; i < n; i++ { if suffixes[i].rank == prevRank { prevRank = suffixes[i].rank suffixes[i].rank[0] = rank } else { prevRank = suffixes[i].rank rank++ suffixes[i].rank[0] = rank } index[suffixes[i].index] = i } for i := 0; i < n; i++ { nextIndex := suffixes[i].index + k/2 if nextIndex < n { suffixes[i].rank[1] = suffixes[index[nextIndex]].rank[0] } else { suffixes[i].rank[1] = -1 } } sort.Slice(suffixes, func(i, j int) bool { return suffixes[i].rank[0] < suffixes[j].rank[0] }) k *= 2 } suffixArray := make([]int, n) for i := range suffixes { suffixArray[i] = suffixes[i].index } return suffixArray } func main() { text := "banana" suffixArray := SuffixArray(text) fmt.Println("Suffix Array:", suffixArray) }

Summary

Understanding and implementing these data structures will enhance your ability to solve complex string processing problems efficiently.

Becoming a Senior Go Developer: Mastering Go and Its Ecosystem