All Data Structures
Trie
Core Implementation
Basic Trie
class TrieNode:
def __init__(self):
self.children = {}
self.is_end = False
self.count = 0 # Number of words ending here
self.prefix_count = 0 # Number of words using this prefix
class Trie:
def __init__(self):
self.root = TrieNode()
def insert(self, word):
node = self.root
for char in word:
node.prefix_count += 1
if char not in node.children:
node.children[char] = TrieNode()
node = node.children[char]
node.prefix_count += 1
node.is_end = True
node.count += 1
def search(self, word):
node = self._traverse(word)
return node is not None and node.is_end
def starts_with(self, prefix):
node = self._traverse(prefix)
return node is not None
def _traverse(self, prefix):
node = self.root
for char in prefix:
if char not in node.children:
return None
node = node.children[char]
return node
def count_words_with_prefix(self, prefix):
node = self._traverse(prefix)
return node.prefix_count if node else 0
Implementation Variations
1. Compressed Trie (Radix Tree)
class CompressedTrieNode:
def __init__(self):
self.children = {} # {string: node} pairs
self.is_end = False
class CompressedTrie:
def __init__(self):
self.root = CompressedTrieNode()
def insert(self, word):
node = self.root
while word:
# Find matching prefix in children
matched_prefix = None
matched_child = None
for prefix, child in node.children.items():
for i in range(min(len(word), len(prefix))):
if word[i] != prefix[i]:
break
if i == min(len(word), len(prefix)) - 1:
matched_prefix = prefix
matched_child = child
break
if not matched_prefix:
# No match found, create new node
node.children[word] = CompressedTrieNode()
node.children[word].is_end = True
break
if len(matched_prefix) > len(word):
# Split existing node
new_node = CompressedTrieNode()
new_node.children = matched_child.children
new_node.is_end = matched_child.is_end
node.children[word] = CompressedTrieNode()
node.children[word].is_end = True
node.children[matched_prefix[len(word):]] = new_node
del node.children[matched_prefix]
break
if len(word) > len(matched_prefix):
# Continue with remaining substring
node = matched_child
word = word[len(matched_prefix):]
else:
matched_child.is_end = True
break
2. Ternary Search Tree
class TSTNode:
def __init__(self, char):
self.char = char
self.left = None
self.right = None
self.equal = None
self.is_end = False
class TST:
def __init__(self):
self.root = None
def insert(self, word):
self.root = self._insert_recursive(self.root, word, 0)
def _insert_recursive(self, node, word, index):
char = word[index]
if not node:
node = TSTNode(char)
if char < node.char:
node.left = self._insert_recursive(node.left, word, index)
elif char > node.char:
node.right = self._insert_recursive(node.right, word, index)
elif index < len(word) - 1:
node.equal = self._insert_recursive(node.equal, word, index + 1)
else:
node.is_end = True
return node
Time Complexities
Operation | Standard Trie | Compressed Trie | TST |
---|---|---|---|
Insert | O(m) | O(m) | O(m) |
Search | O(m) | O(m) | O(m) |
Prefix Search | O(p) | O(p) | O(p) |
Delete | O(m) | O(m) | O(m) |
Space Complexity | O(ALPHABET_SIZE * m * n) | O(n) | O(n) |
where m = length of word, p = length of prefix, n = number of words
Space Complexities
Implementation | Space Complexity | Additional Notes |
---|---|---|
Standard Trie | O(ALPHABET_SIZE * m * n) | More space for faster lookups |
Compressed Trie | O(n) | Space efficient but slower updates |
TST | O(n) | Best for sparse datasets |
Common Data Structure Patterns
-
Autocomplete Systems
- Prefix matching
- Top-k suggestions
- Fuzzy search
-
Spell Checkers
- Word validation
- Suggestion generation
- Edit distance
-
String Matching
- Pattern matching
- Wildcard matching
- Regex operations
-
IP Routing Tables
- Longest prefix matching
- Route aggregation
- Next hop lookup
-
Dictionary Operations
- Word insertion
- Deletion
- Lookup
-
Phone Directory
- Contact search
- Partial matching
- Prefix based search
Edge Cases to Consider
- Empty String
def insert_empty(self):
if not word:
self.root.is_end = True
return
- Single Character
def handle_single_char(self, char):
if len(char) == 1:
node = self.root
if char not in node.children:
node.children[char] = TrieNode()
node.children[char].is_end = True
- Duplicate Words
def insert_with_count(self, word):
node = self.root
for char in word:
if char not in node.children:
node.children[char] = TrieNode()
node = node.children[char]
node.is_end = True
node.count += 1 # Track duplicates
Optimization Techniques
- Array-based Children
class OptimizedTrieNode:
def __init__(self):
self.children = [None] * 26 # For lowercase English letters
self.is_end = False
def get_index(self, char):
return ord(char) - ord('a')
def get_child(self, char):
return self.children[self.get_index(char)]
def set_child(self, char, node):
self.children[self.get_index(char)] = node
- Bit Vector Implementation
class BitVectorTrie:
def __init__(self):
self.children = 0 # Use bits to represent children
self.child_nodes = []
self.is_end = False
def has_child(self, char):
return bool(self.children & (1 << (ord(char) - ord('a'))))
def add_child(self, char):
self.children |= (1 << (ord(char) - ord('a')))
- Path Compression
class CompressedNode:
def __init__(self):
self.path = "" # Store complete path segment
self.children = {}
self.is_end = False
Memory Management
- Memory Pool
class TrieNodePool:
def __init__(self, size=1000):
self.pool = [TrieNode() for _ in range(size)]
self.available = list(range(size))
def get_node(self):
if self.available:
return self.pool[self.available.pop()]
return TrieNode()
def return_node(self, node):
if len(self.available) < len(self.pool):
node.children.clear()
node.is_end = False
self.available.append(self.pool.index(node))
- Lazy Loading
class LazyTrie:
def __init__(self, word_file):
self.root = TrieNode()
self.word_file = word_file
self.loaded = False
def ensure_loaded(self):
if not self.loaded:
with open(self.word_file) as f:
for word in f:
self.insert(word.strip())
self.loaded = True
Performance Considerations
- Batch Operations
def bulk_insert(self, words):
# Sort words for better cache locality
words.sort()
for word in words:
self.insert(word)
- Prefix Caching
class CachedTrie(Trie):
def __init__(self):
super().__init__()
self.prefix_cache = {}
def find_with_prefix(self, prefix):
if prefix in self.prefix_cache:
return self.prefix_cache[prefix]
results = self._find_words(self._traverse(prefix), prefix)
self.prefix_cache[prefix] = results
return results
Common Pitfalls
- Memory Leak in Deletion
# Wrong
def delete_wrong(self, word):
node = self._traverse(word)
if node:
node.is_end = False # Memory leak, nodes still exist
# Correct
def delete(self, word):
def _delete_recursive(node, word, depth=0):
if not node:
return None
if depth == len(word):
node.is_end = False
if not node.children:
return None
return node
char = word[depth]
node.children[char] = _delete_recursive(node.children.get(char), word, depth + 1)
if not node.children[char]:
del node.children[char]
if not node.children and not node.is_end:
return None
return node
self.root = _delete_recursive(self.root, word)
- Incorrect Prefix Counting
# Wrong
def count_prefix_wrong(self, prefix):
node = self._traverse(prefix)
return 1 if node else 0 # Doesn't count all words with prefix
# Correct
def count_prefix(self, prefix):
def _count_recursive(node):
count = node.count if node.is_end else 0
for child in node.children.values():
count += _count_recursive(child)
return count
node = self._traverse(prefix)
return _count_recursive(node) if node else 0
- Inefficient Space Usage
# Wrong (wastes space)
class InefficientTrieNode:
def __init__(self):
self.children = [{} for _ in range(26)] # Unnecessary nesting
# Better
class EfficientTrieNode:
def __init__(self):
self.children = {} # Only create what's needed