Appendix D: Performance Reference

Optimization guidelines: GPU family capabilities, battery impact, memory usage, and profiling tools guide.

GPU Family Capabilities

Device Performance Tiers

enum GPUTier {
    case tier1  // A8-A10: Basic effects only
    case tier2  // A11-A12: Most effects, reduced quality
    case tier3  // A13-A14: Full effects, some limitations
    case tier4  // A15+: All effects at full quality
    
    static func current() -> GPUTier {
        let device = MTLCreateSystemDefaultDevice()!
        
        if device.supportsFamily(.apple8) {
            return .tier4
        } else if device.supportsFamily(.apple7) {
            return .tier3
        } else if device.supportsFamily(.apple6) {
            return .tier2
        } else {
            return .tier1
        }
    }
    
    var maxComplexity: ShaderComplexity {
        switch self {
        case .tier1:
            return ShaderComplexity(
                maxTextureSize: 2048,
                maxSamples: 4,
                supportsLayerEffects: false,
                maxShaderComplexity: .simple
            )
        case .tier2:
            return ShaderComplexity(
                maxTextureSize: 4096,
                maxSamples: 8,
                supportsLayerEffects: true,
                maxShaderComplexity: .moderate
            )
        case .tier3:
            return ShaderComplexity(
                maxTextureSize: 8192,
                maxSamples: 16,
                supportsLayerEffects: true,
                maxShaderComplexity: .complex
            )
        case .tier4:
            return ShaderComplexity(
                maxTextureSize: 16384,
                maxSamples: 32,
                supportsLayerEffects: true,
                maxShaderComplexity: .unlimited
            )
        }
    }
}

Battery Impact Measurement

Power Monitoring

class ShaderPowerMonitor {
    private var startEnergy: Double = 0
    private var startTime: Date = Date()
    
    func beginMeasurement() {
        startTime = Date()
        startEnergy = getCurrentEnergyUsage()
    }
    
    func endMeasurement() -> PowerReport {
        let duration = Date().timeIntervalSince(startTime)
        let energyUsed = getCurrentEnergyUsage() - startEnergy
        
        return PowerReport(
            duration: duration,
            energyUsed: energyUsed,
            averagePower: energyUsed / duration,
            impact: PowerImpact(from: energyUsed / duration)
        )
    }
    
    private func getCurrentEnergyUsage() -> Double {
        // In practice, use IOKit or Instruments
        return ProcessInfo.processInfo.systemUptime
    }
}

enum PowerImpact {
    case low      // < 100mW
    case medium   // 100-500mW
    case high     // 500-1000mW
    case extreme  // > 1000mW
    
    init(from averagePower: Double) {
        switch averagePower {
        case ..<100: self = .low
        case 100..<500: self = .medium
        case 500..<1000: self = .high
        default: self = .extreme
        }
    }
}

Optimization Strategies by Power Impact

struct PowerOptimizedShader {
    static func configure(for impact: PowerImpact) -> ShaderConfiguration {
        switch impact {
        case .low:
            return ShaderConfiguration(
                updateFrequency: 60,
                quality: .high,
                enableAllEffects: true
            )
        case .medium:
            return ShaderConfiguration(
                updateFrequency: 30,
                quality: .medium,
                enableAllEffects: true
            )
        case .high:
            return ShaderConfiguration(
                updateFrequency: 30,
                quality: .low,
                enableAllEffects: false
            )
        case .extreme:
            return ShaderConfiguration(
                updateFrequency: 15,
                quality: .minimal,
                enableAllEffects: false
            )
        }
    }
}

Memory Usage Patterns

Texture Memory Optimization

struct TextureMemoryManager {
    static func optimalTextureSize(for device: MTLDevice) -> CGSize {
        let maxTextureSize = device.maxTextureSize
        let availableMemory = device.recommendedMaxWorkingSetSize
        
        // Rule of thumb: Each texture uses width * height * 4 bytes
        let maxPixels = availableMemory / (4 * 10) // Allow 10 textures
        let maxDimension = sqrt(Double(maxPixels))
        
        return CGSize(
            width: min(maxDimension, Double(maxTextureSize)),
            height: min(maxDimension, Double(maxTextureSize))
        )
    }
}

Shader Memory Patterns

// MEMORY INTENSIVE: Large arrays
constant float kernelWeights[49] = { /* 7x7 kernel */ };

// OPTIMIZED: Compute weights on the fly
float gaussianWeight(float2 offset, float sigma) {
    return exp(-(offset.x * offset.x + offset.y * offset.y) / (2.0 * sigma * sigma));
}

// MEMORY INTENSIVE: Multiple texture samples
half4 colors[16];
for (int i = 0; i < 16; i++) {
    colors[i] = texture.sample(sampler, uv + offsets[i]);
}

// OPTIMIZED: Accumulate directly
half4 result = half4(0);
for (int i = 0; i < 16; i++) {
    result += texture.sample(sampler, uv + offsets[i]) * weights[i];
}

Profiling Tools Guide

Metal System Trace

  1. Open Instruments
  2. Choose "Metal System Trace" template
  3. Key metrics to watch:
    • GPU Utilization %
    • Shader Core Utilization
    • Memory Bandwidth
    • Thermal State

Frame Capture

// Enable programmatic frame capture
let captureManager = MTLCaptureManager.shared()
let captureDescriptor = MTLCaptureDescriptor()
captureDescriptor.captureObject = device
captureDescriptor.destination = .developerTools

do {
    try captureManager.startCapture(with: captureDescriptor)
    // Render one frame
    try captureManager.stopCapture()
} catch {
    print("Failed to capture: \(error)")
}

Custom Performance Metrics

class ShaderPerformanceProfiler {
    struct Metrics {
        let drawCallCount: Int
        let pixelsFilled: Int
        let averageFrameTime: TimeInterval
        let peakMemoryUsage: Int
        let thermalState: ProcessInfo.ThermalState
    }
    
    func profile(duration: TimeInterval) async -> Metrics {
        // Implementation
    }
}

Optimization Quick Reference

Arithmetic Operations Cost (Cycles)

  • Add/Subtract: 1
  • Multiply: 1
  • Divide: 4-6
  • Sqrt: 4-6
  • Sin/Cos: 8-12
  • Pow: 12-20
  • Texture Sample: 50-200

Memory Access Patterns

  • Register (immediate): 0 cycles
  • Constant Memory: 1-2 cycles
  • Texture Cache Hit: 4-8 cycles
  • Texture Cache Miss: 50-200 cycles
  • Main Memory: 200-400 cycles

Optimization Priorities

  1. Reduce texture samples
  2. Minimize branches
  3. Use half precision where possible
  4. Batch similar operations
  5. Reuse calculations
  6. Optimize loop bounds
  7. Consider approximations for expensive functions