Advanced Validation

Complex validation algorithms ensure document integrity while maintaining performance.

Overview

Advanced validation goes beyond basic schema checking. It includes recursive validation with memoization, circular reference detection, and performance optimizations for large documents.

Recursive Validation

Memoization

Validate document structure recursively with memoization:

class RecursiveValidator {
  #memo = new Map(); // nodeId -> isValid
  
  validate(document, schema) {
    return this.#validateNode(document, schema, []);
  }
  
  #validateNode(node, schema, path) {
    const nodeId = this.#getNodeId(node, path);
    
    // Check memo
    if (this.#memo.has(nodeId)) {
      const isValid = this.#memo.get(nodeId);
      return { valid: isValid, errors: [] };
    }
    
    const spec = schema.nodes[node.type];
    if (!spec) {
      return { valid: false, errors: ['Unknown node type: ' + node.type] };
    }
    
    // Validate attributes
    const attrErrors = this.#validateAttributes(node.attrs, spec.attrs);
    if (attrErrors.length > 0) {
      this.#memo.set(nodeId, false);
      return { valid: false, errors: attrErrors };
    }
    
    // Validate content
    const contentErrors = this.#validateContent(node.children, spec.content, schema);
    if (contentErrors.length > 0) {
      this.#memo.set(nodeId, false);
      return { valid: false, errors: contentErrors };
    }
    
    // Recursively validate children
    const childErrors = [];
    if (node.children) {
      node.children.forEach((child, index) => {
        const childResult = this.#validateNode(child, schema, [...path, index]);
        if (!childResult.valid) {
          childErrors.push(...childResult.errors);
        }
      });
    }
    
    const valid = childErrors.length === 0;
    this.#memo.set(nodeId, valid);
    
    return { valid, errors: childErrors };
  }
  
  #validateContent(children, rule, schema) {
    // Parse content rule (e.g., "block+", "inline*")
    const parsed = this.#parseContentRule(rule);
    
    // Validate against rule
    return this.#checkContentRule(children, parsed, schema);
  }
}

Validation Rules

Support complex validation rules:

class ValidationRules {
  validateContent(children, rule, schema) {
    // Parse rule: "block+", "inline*", "paragraph | heading"
    const parsed = this.#parseRule(rule);
    
    switch (parsed.type) {
      case 'sequence':
        return this.#validateSequence(children, parsed, schema);
      case 'choice':
        return this.#validateChoice(children, parsed, schema);
      case 'group':
        return this.#validateGroup(children, parsed, schema);
    }
  }
  
  #validateSequence(children, rule, schema) {
    // Validate sequence of nodes matching rule
    let index = 0;
    const errors = [];
    
    rule.items.forEach(item => {
      const count = this.#getCount(item);
      for (let i = 0; i < count; i++) {
        if (index >= children.length) {
          if (item.required) {
            errors.push('Missing required node');
          }
          break;
        }
        
        const child = children[index];
        if (!this.#matchesRule(child, item, schema)) {
          errors.push('Node does not match rule');
        }
        index++;
      }
    });
    
    return errors;
  }
}

Circular Reference Handling

Cycle Detection

Detect and handle circular references in document structure:

class CircularReferenceDetector {
  #visited = new Set();
  #path = [];
  
  detect(document) {
    return this.#hasCycle(document, []);
  }
  
  #hasCycle(node, path) {
    const nodeId = this.#getNodeId(node, path);
    
    // Check if we've seen this node before
    if (this.#visited.has(nodeId)) {
      // Check if it's in current path (cycle)
      if (this.#path.includes(nodeId)) {
        return true; // Cycle detected
      }
      return false; // Already visited, no cycle
    }
    
    // Mark as visited
    this.#visited.add(nodeId);
    this.#path.push(nodeId);
    
    // Check children
    const hasCycle = node.children?.some((child, index) => {
      return this.#hasCycle(child, [...path, index]);
    }) || false;
    
    // Remove from path
    this.#path.pop();
    
    return hasCycle;
  }
}

Reference Counting

Alternative approach using reference counting:

class ReferenceCounter {
  #refCount = new Map();
  
  detectCycles(document) {
    // Count references
    this.#countReferences(document);
    
    // Check for cycles
    return this.#checkCycles(document, []);
  }
  
  #countReferences(node, path) {
    const nodeId = this.#getNodeId(node, path);
    
    if (!this.#refCount.has(nodeId)) {
      this.#refCount.set(nodeId, 0);
    }
    
    this.#refCount.set(nodeId, this.#refCount.get(nodeId) + 1);
    
    // Count children
    node.children?.forEach((child, index) => {
      this.#countReferences(child, [...path, index]);
    });
  }
  
  #checkCycles(node, path) {
    const nodeId = this.#getNodeId(node, path);
    const count = this.#refCount.get(nodeId);
    
    // If referenced more than once in same path, it's a cycle
    if (count > 1 && path.includes(nodeId)) {
      return true;
    }
    
    // Check children
    return node.children?.some((child, index) => {
      return this.#checkCycles(child, [...path, index]);
    }) || false;
  }
}

Performance Optimization

Incremental Validation

Optimize validation for large documents:

class OptimizedValidator {
  #cache = new Map();
  #dirtyNodes = new Set();
  
  validate(document, schema) {
    // Only validate dirty nodes
    const nodesToValidate = this.#getDirtyNodes(document);
    
    if (nodesToValidate.size === 0) {
      // No changes, return cached result
      return this.#cache.get(this.#getDocumentKey(document)) || { valid: true, errors: [] };
    }
    
    // Validate only changed nodes
    const errors = [];
    nodesToValidate.forEach(nodeId => {
      const result = this.#validateNode(nodeId, document, schema);
      if (!result.valid) {
        errors.push(...result.errors);
      }
    });
    
    // Cache result
    const result = { valid: errors.length === 0, errors };
    this.#cache.set(this.#getDocumentKey(document), result);
    this.#dirtyNodes.clear();
    
    return result;
  }
  
  markDirty(nodeId) {
    this.#dirtyNodes.add(nodeId);
    // Mark ancestors as dirty too
    this.#markAncestorsDirty(nodeId);
  }
  
  // Incremental validation
  validateIncremental(change, schema) {
    // Only validate affected subtree
    const affectedNodes = this.#getAffectedNodes(change);
    
    const errors = [];
    affectedNodes.forEach(nodeId => {
      const result = this.#validateNode(nodeId, this.#document, schema);
      if (!result.valid) {
        errors.push(...result.errors);
      }
    });
    
    return { valid: errors.length === 0, errors };
  }
}

Caching

Cache validation results:

class CachedValidator {
  #cache = new LRUCache(1000);
  
  validate(document, schema) {
    const key = this.#getDocumentKey(document);
    
    // Check cache
    if (this.#cache.has(key)) {
      return this.#cache.get(key);
    }
    
    // Perform validation
    const result = this.#doValidate(document, schema);
    
    // Cache result
    this.#cache.set(key, result);
    
    return result;
  }
  
  invalidateCache(nodeId) {
    // Remove all cache entries for this node
    this.#cache.forEach((value, key) => {
      if (key.includes(nodeId)) {
        this.#cache.delete(key);
      }
    });
  }
}