Overview
Indexing strategies enable fast lookups and queries on large documents. Different index types serve different purposes:
- Document Index: Fast node lookup by ID or path
- Position Index: Fast position-to-path conversion
- Content Index: Full-text search capabilities
Document Index
Node Indexing
Index nodes by ID for O(1) lookup:
class DocumentIndex {
#nodeMap = new Map(); // nodeId -> Node
#pathMap = new Map(); // nodeId -> path
#parentMap = new Map(); // nodeId -> parentId
index(document) {
this.#indexNode(document, []);
}
#indexNode(node, path) {
const nodeId = node.id || this.#generateId(node, path);
node.id = nodeId;
this.#nodeMap.set(nodeId, node);
this.#pathMap.set(nodeId, path);
if (path.length > 0) {
const parentPath = path.slice(0, -1);
const parentId = this.#getNodeIdAtPath(parentPath);
this.#parentMap.set(nodeId, parentId);
}
// Index children
node.children?.forEach((child, index) => {
this.#indexNode(child, [...path, index]);
});
}
getNodeById(id) {
return this.#nodeMap.get(id);
}
getPathById(id) {
return this.#pathMap.get(id);
}
getParentId(id) {
return this.#parentMap.get(id);
}
}Path Indexing
Index paths for fast path-based lookups:
class PathIndex {
#pathToNode = new Map(); // path -> Node
#nodeToPath = new Map(); // nodeId -> path
index(document) {
this.#indexPath(document, []);
}
#indexPath(node, path) {
const pathKey = this.#pathToString(path);
this.#pathToNode.set(pathKey, node);
if (node.id) {
this.#nodeToPath.set(node.id, path);
}
node.children?.forEach((child, index) => {
this.#indexPath(child, [...path, index]);
});
}
getNodeByPath(path) {
const pathKey = this.#pathToString(path);
return this.#pathToNode.get(pathKey);
}
getPathByNodeId(nodeId) {
return this.#nodeToPath.get(nodeId);
}
#pathToString(path) {
return path.join(',');
}
}Position Index
Text Positioning
Index text positions for fast range queries:
class PositionIndex {
#textPositions = [];
#charIndexToPath = new Map();
index(document) {
let charIndex = 0;
this.#indexNode(document, [], charIndex);
}
#indexNode(node, path, charIndex) {
if (node.type === 'text') {
// Index each character position
for (let i = 0; i < node.text.length; i++) {
this.#textPositions.push({
path,
offset: i,
charIndex: charIndex + i
});
this.#charIndexToPath.set(charIndex + i, { path, offset: i });
}
return charIndex + node.text.length;
}
// Index children
let currentIndex = charIndex;
node.children?.forEach((child, index) => {
currentIndex = this.#indexNode(child, [...path, index], currentIndex);
});
return currentIndex;
}
pathToCharIndex(path, offset) {
const pos = this.#textPositions.find(
p => this.#pathsEqual(p.path, path) && p.offset === offset
);
return pos ? pos.charIndex : -1;
}
charIndexToPath(charIndex) {
return this.#charIndexToPath.get(charIndex);
}
}Range Queries
Support efficient range queries:
class RangeIndex extends PositionIndex {
getRange(startPath, startOffset, endPath, endOffset) {
const startIndex = this.pathToCharIndex(startPath, startOffset);
const endIndex = this.pathToCharIndex(endPath, endOffset);
if (startIndex === -1 || endIndex === -1) {
return null;
}
// Extract text between positions
return this.#extractText(startIndex, endIndex);
}
#extractText(startIndex, endIndex) {
const positions = this.#textPositions.filter(
p => p.charIndex >= startIndex && p.charIndex < endIndex
);
// Group by path and extract text
const textByPath = new Map();
positions.forEach(pos => {
if (!textByPath.has(pos.path)) {
textByPath.set(pos.path, []);
}
textByPath.get(pos.path).push(pos);
});
// Build text string
let text = '';
textByPath.forEach((positions, path) => {
positions.sort((a, b) => a.offset - b.offset);
const node = this.#getNodeByPath(path);
positions.forEach(pos => {
text += node.text[pos.offset];
});
});
return text;
}
}Content Index
Inverted Index
Index content for search and queries:
class ContentIndex {
#invertedIndex = new Map(); // term -> nodeIds
#nodeContent = new Map(); // nodeId -> content
index(document) {
this.#indexNode(document);
}
#indexNode(node) {
if (node.type === 'text') {
const nodeId = node.id;
const content = node.text;
this.#nodeContent.set(nodeId, content);
// Tokenize and index
const terms = this.#tokenize(content);
terms.forEach(term => {
if (!this.#invertedIndex.has(term)) {
this.#invertedIndex.set(term, new Set());
}
this.#invertedIndex.get(term).add(nodeId);
});
}
// Index children
node.children?.forEach(child => this.#indexNode(child));
}
#tokenize(text) {
// Simple tokenization
return text.toLowerCase()
.split(/s+/)
.filter(term => term.length > 0);
}Search Operations
Support various search operations:
class SearchIndex extends ContentIndex {
search(query) {
const terms = this.#tokenize(query);
// Find nodes containing all terms (AND search)
let result = null;
terms.forEach(term => {
const nodeIds = this.#invertedIndex.get(term);
if (!nodeIds) {
return [];
}
if (result === null) {
result = new Set(nodeIds);
} else {
// Intersection
result = new Set([...result].filter(id => nodeIds.has(id)));
}
});
return result ? Array.from(result) : [];
}
searchOr(query) {
const terms = this.#tokenize(query);
const result = new Set();
terms.forEach(term => {
const nodeIds = this.#invertedIndex.get(term);
if (nodeIds) {
nodeIds.forEach(id => result.add(id));
}
});
return Array.from(result);
}
searchPhrase(phrase) {
// Find exact phrase matches
const terms = this.#tokenize(phrase);
const candidates = this.search(phrase);
// Verify phrase order
return candidates.filter(nodeId => {
const content = this.#nodeContent.get(nodeId);
return content.toLowerCase().includes(phrase.toLowerCase());
});
}
}Index Maintenance
Efficiently update indexes when document changes:
class IndexManager {
#indexes = [];
addIndex(index) {
this.#indexes.push(index);
}
index(document) {
this.#indexes.forEach(index => {
index.index(document);
});
}
updateIndex(change) {
this.#indexes.forEach(index => {
index.updateIndex(change);
});
}
// Incremental update
updateIncremental(change) {
switch (change.type) {
case 'insert':
this.#handleInsert(change);
break;
case 'delete':
this.#handleDelete(change);
break;
case 'update':
this.#handleUpdate(change);
break;
case 'move':
this.#handleMove(change);
break;
}
}
#handleInsert(change) {
// Insert new node into indexes
this.#indexes.forEach(index => {
index.insertNode(change.node, change.path);
});
}
#handleDelete(change) {
// Remove node from indexes
this.#indexes.forEach(index => {
index.deleteNode(change.path);
});
}
#handleUpdate(change) {
// Update node in indexes
this.#indexes.forEach(index => {
index.updateNode(change.path, change.node);
});
}
#handleMove(change) {
// Move node in indexes
this.#indexes.forEach(index => {
index.moveNode(change.path, change.newPath);
});
}
}Related Pages
Model & Schema
Overview of model and schema concepts
Collaborative Editing
Operational Transformation and CRDTs
Schema Migration
Version management and migration strategies
Advanced Validation
Recursive validation and performance optimization
Transaction System
Atomic operations and rollback mechanisms
Node Types
Comprehensive node type examples