Skip to content

C++ Performance Optimization

Overview

Performance optimization is an important aspect of C++ programming. This chapter introduces key technologies such as memory management, algorithm optimization, compiler optimization, and performance measurement to help developers write efficient C++ programs.

📊 Performance Measurement

Benchmarking

cpp
#include <iostream>
#include <chrono>
#include <vector>
#include <algorithm>

class Timer {
private:
    std::chrono::high_resolution_clock::time_point start_;
    std::string name_;
    
public:
    Timer(const std::string& name) : name_(name) {
        start_ = std::chrono::high_resolution_clock::now();
    }
    
    ~Timer() {
        auto end = std::chrono::high_resolution_clock::now();
        auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end - start_);
        std::cout << name_ << " elapsed: " << duration.count() << " microseconds" << std::endl;
    }
};

void memoryAccessTest() {
    const size_t SIZE = 1024 * 1024;
    std::vector<int> data(SIZE, 1);
    
    // Sequential access
    {
        Timer timer("Sequential access");
        int sum = 0;
        for (size_t i = 0; i < SIZE; ++i) {
            sum += data[i];
        }
    }
    
    // Strided access
    {
        Timer timer("Strided access");
        int sum = 0;
        for (size_t i = 0; i < SIZE; i += 16) {
            sum += data[i];
        }
    }
}

🧠 Memory Optimization

Object Pool Pattern

cpp
#include <vector>
#include <memory>

template<typename T>
class ObjectPool {
private:
    std::vector<std::unique_ptr<T>> pool_;
    std::vector<T*> available_;
    
public:
    T* acquire() {
        if (available_.empty()) {
            pool_.push_back(std::make_unique<T>());
            return pool_.back().get();
        }
        T* obj = available_.back();
        available_.pop_back();
        return obj;
    }
    
    void release(T* obj) {
        available_.push_back(obj);
    }
};

// Usage example
void objectPoolDemo() {
    struct ExpensiveObject {
        std::vector<int> data;
        ExpensiveObject() : data(1000, 42) {}
    };
    
    ObjectPool<ExpensiveObject> pool;
    
    // Object pool version
    {
        Timer timer("Object pool");
        for (int i = 0; i < 1000; ++i) {
            auto* obj = pool.acquire();
            // Use object
            pool.release(obj);
        }
    }
    
    // Direct creation version
    {
        Timer timer("Direct creation");
        for (int i = 0; i < 1000; ++i) {
            auto obj = std::make_unique<ExpensiveObject>();
            // Use object
        }
    }
}

Memory Pool Allocator

cpp
class MemoryPool {
private:
    char* memory_;
    size_t block_size_;
    std::vector<void*> free_blocks_;
    
public:
    MemoryPool(size_t block_size, size_t num_blocks) 
        : block_size_(block_size) {
        memory_ = new char[block_size * num_blocks];
        
        for (size_t i = 0; i < num_blocks; ++i) {
            free_blocks_.push_back(memory_ + i * block_size);
        }
    }
    
    ~MemoryPool() {
        delete[] memory_;
    }
    
    void* allocate() {
        if (free_blocks_.empty()) return nullptr;
        void* block = free_blocks_.back();
        free_blocks_.pop_back();
        return block;
    }
    
    void deallocate(void* ptr) {
        free_blocks_.push_back(ptr);
    }
};

⚡ Algorithm Optimization

Container Selection

cpp
#include <set>
#include <unordered_set>
#include <vector>

void containerComparison() {
    const size_t SIZE = 100000;
    const int SEARCH_VALUE = SIZE / 2;
    
    std::vector<int> vec(SIZE);
    std::iota(vec.begin(), vec.end(), 0);
    
    std::set<int> ordered_set(vec.begin(), vec.end());
    std::unordered_set<int> hash_set(vec.begin(), vec.end());
    
    // Linear search
    {
        Timer timer("vector linear search");
        auto it = std::find(vec.begin(), vec.end(), SEARCH_VALUE);
    }
    
    // Binary search
    {
        Timer timer("vector binary search");
        bool found = std::binary_search(vec.begin(), vec.end(), SEARCH_VALUE);
    }
    
    // Ordered set search
    {
        Timer timer("set search");
        auto it = ordered_set.find(SEARCH_VALUE);
    }
    
    // Hash table search
    {
        Timer timer("unordered_set search");
        auto it = hash_set.find(SEARCH_VALUE);
    }
}

Loop Optimization

cpp
void loopOptimization() {
    const size_t SIZE = 1000;
    std::vector<std::vector<int>> matrix(SIZE, std::vector<int>(SIZE, 1));
    
    // Row-wise access (cache friendly)
    {
        Timer timer("Row-wise access");
        long long sum = 0;
        for (size_t i = 0; i < SIZE; ++i) {
            for (size_t j = 0; j < SIZE; ++j) {
                sum += matrix[i][j];
            }
        }
    }
    
    // Column-wise access (cache unfriendly)
    {
        Timer timer("Column-wise access");
        long long sum = 0;
        for (size_t j = 0; j < SIZE; ++j) {
            for (size_t i = 0; i < SIZE; ++i) {
                sum += matrix[i][j];
            }
        }
    }
    
    // Loop unrolling
    {
        Timer timer("Loop unrolling");
        long long sum = 0;
        for (size_t i = 0; i < SIZE; ++i) {
            size_t j;
            for (j = 0; j + 4 <= SIZE; j += 4) {
                sum += matrix[i][j] + matrix[i][j+1] + 
                       matrix[i][j+2] + matrix[i][j+3];
            }
            for (; j < SIZE; ++j) {
                sum += matrix[i][j];
            }
        }
    }
}

🔧 Compiler Optimization

Inline and Constant Optimization

cpp
// Inline function
inline int fastAdd(int a, int b) {
    return a + b;
}

// Constant expression
constexpr int factorial(int n) {
    return n <= 1 ? 1 : n * factorial(n - 1);
}

void compilerOptimizations() {
    const int ITERATIONS = 10000000;
    
    // Function call
    {
        Timer timer("Function call");
        int sum = 0;
        for (int i = 0; i < ITERATIONS; ++i) {
            sum = fastAdd(sum, i);
        }
    }
    
    // Direct calculation
    {
        Timer timer("Direct calculation");
        int sum = 0;
        for (int i = 0; i < ITERATIONS; ++i) {
            sum += i;
        }
    }
    
    // Compile-time calculation
    constexpr int result = factorial(10);
    std::cout << "Compile-time calculation result: " << result << std::endl;
}

Branch Prediction Optimization

cpp
void branchPrediction() {
    const size_t SIZE = 1000000;
    std::vector<int> data(SIZE);
    
    std::random_device rd;
    std::mt19937 gen(rd());
    std::uniform_int_distribution<> dis(0, 100);
    for (auto& val : data) {
        val = dis(gen);
    }
    
    const int THRESHOLD = 50;
    
    // Random data (hard to predict branches)
    {
        Timer timer("Unpredictable branches");
        int count = 0;
        for (int value : data) {
            if (value > THRESHOLD) {
                count++;
            }
        }
    }
    
    // Sorted data (predictable branches)
    std::sort(data.begin(), data.end());
    {
        Timer timer("Predictable branches");
        int count = 0;
        for (int value : data) {
            if (value > THRESHOLD) {
                count++;
            }
        }
    }
    
    // Branchless version
    {
        Timer timer("Branchless version");
        int count = 0;
        for (int value : data) {
            count += (value > THRESHOLD);
        }
    }
}

🚀 Advanced Optimization

Parallel Algorithms

cpp
#include <execution>
#include <numeric>

void parallelOptimization() {
    const size_t SIZE = 10000000;
    std::vector<int> data(SIZE);
    std::iota(data.begin(), data.end(), 1);
    
    // Serial sum
    {
        Timer timer("Serial sum");
        long long sum = std::accumulate(data.begin(), data.end(), 0LL);
    }
    
    // Parallel sum (C++17)
    #ifdef __cpp_lib_parallel_algorithm
    {
        Timer timer("Parallel sum");
        long long sum = std::reduce(std::execution::par, data.begin(), data.end(), 0LL);
    }
    #endif
}

Cache Optimization

cpp
// Array of structures vs Structure of arrays
struct Point3D_AoS {
    float x, y, z;
};

struct Points3D_SoA {
    std::vector<float> x, y, z;
    Points3D_SoA(size_t size) : x(size), y(size), z(size) {}
};

void cacheOptimization() {
    const size_t SIZE = 1000000;
    
    std::vector<Point3D_AoS> aos_points(SIZE);
    Points3D_SoA soa_points(SIZE);
    
    // AoS access (potentially cache unfriendly)
    {
        Timer timer("AoS access");
        float sum = 0;
        for (const auto& point : aos_points) {
            sum += point.x;
        }
    }
    
    // SoA access (cache friendly)
    {
        Timer timer("SoA access");
        float sum = 0;
        for (float x : soa_points.x) {
            sum += x;
        }
    }
}

Demo Program

cpp
int main() {
    std::cout << "=== C++ Performance Optimization Demo ===" << std::endl;
    
    std::cout << "\n1. Memory access patterns:" << std::endl;
    memoryAccessTest();
    
    std::cout << "\n2. Object pool optimization:" << std::endl;
    objectPoolDemo();
    
    std::cout << "\n3. Container selection:" << std::endl;
    containerComparison();
    
    std::cout << "\n4. Loop optimization:" << std::endl;
    loopOptimization();
    
    std::cout << "\n5. Compiler optimization:" << std::endl;
    compilerOptimizations();
    
    std::cout << "\n6. Branch prediction:" << std::endl;
    branchPrediction();
    
    std::cout << "\n7. Parallel optimization:" << std::endl;
    parallelOptimization();
    
    std::cout << "\n8. Cache optimization:" << std::endl;
    cacheOptimization();
    
    return 0;
}

Summary

Optimization Strategies

  • Measure first: Measure before optimizing
  • Algorithm is king: Choose correct algorithms and data structures
  • Memory awareness: Consider cache friendliness and memory access patterns
  • Compiler cooperation: Utilize compiler optimization features

Performance Points

TechniqueImpactUse Cases
Cache optimizationHighLarge data processing
Memory poolsMediumFrequent allocation
Parallel algorithmsHighCPU-intensive tasks
Branch predictionMediumCondition-heavy tasks

Best Practices

  • Choose appropriate containers and algorithms
  • Consider memory layout and access patterns
  • Utilize compiler optimization options
  • Perform detailed optimization on critical paths
  • Balance code readability and performance

Performance optimization requires deep understanding of computer architecture and compiler working principles, and is an important skill for advanced C++ development.

Content is for learning and research only.