The Mandelbrot set is a well known application of visual mathematics. It is a good example of simple math with complex (imaginary) numbers on a complex plane leading to visually impressive results. It works by keeping track of how many iterations of the equation zn+1 = zn2 + c will occur before a complex number is no longer bound. This can go on forever, and the image generated is unique at every depth, but for the sake of running time there is usually a maximum depth that the iteration can occur. The simulations in this example are run serially, with OpenMP* pragma omp simd for vectorization, and Intel® Threading Building Blocks (Intel® TBB) tbb::parallel_for for parallelization.

In addition, this code uses an image base class (image_base.h) and BMPImage class (bmp_image.cpp) to write the results to a viewable image. Do not worry about how this is implemented, as it is unaffected by the code changes listed below.

System Requirements:

Code Change Highlights:

All changes occur while traversing the complex plane, using one for loop for the y-axis, and one for loop for the x-axis.
tbb:parallel_for
linear version:
// Traverse the sample space in equally spaced steps with width * height samples
for (int j = 0; j < height; ++j) {
    for (int i = 0; i < width; ++i) {
        double z_real = x0 + i*xstep;
        double z_imaginary = y0 + j*ystep;
        double c_real = z_real;
        double c_imaginary = z_imaginary;

        double depth = 0;
        // Figures out how many recurrences are required before divergence, up to max_depth
        while(depth < max_depth) {
            if(z_real * z_real + z_imaginary * z_imaginary > 4.0) {
                break; // Escape from a circle of radius 2
            }
            double temp_real = z_real*z_real - z_imaginary*z_imaginary;
            double temp_imaginary = 2.0*z_real*z_imaginary;
            z_real = c_real + temp_real;
            z_imaginary = c_imaginary + temp_imaginary;

            ++depth;
        }
        output[j*width + i] = static_cast(static_cast(depth) / max_depth * 255);
    }
}
tbb::parallel_for version:
// Traverse the sample space in equally spaced steps with width * height samples
tbb::parallel_for (0; height; 1, [&](int j) {
    for (int i = 0; i < width; ++i) {
        double z_real = x0 + i*xstep;
        double z_imaginary = y0 + j*ystep;
        double c_real = z_real;
        double c_imaginary = z_imaginary;

        double depth = 0;
        // Figures out how many recurrences are required before divergence, up to max_depth
        while(depth < max_depth) {
            if(z_real * z_real + z_imaginary * z_imaginary > 4.0) {
                break; // Escape from a circle of radius 2
            }
            double temp_real = z_real*z_real - z_imaginary*z_imaginary;
            double temp_imaginary = 2.0*z_real*z_imaginary;
            z_real = c_real + temp_real;
            z_imaginary = c_imaginary + temp_imaginary;

            ++depth;
        }
        output[j*width + i] = static_cast(static_cast(depth) / max_depth * 255);
    }
});
pragma omp simd
scalar version:
// Traverse the sample space in equally spaced steps with width * height samples
for (int j = 0; j < height; ++j) {
    for (int i = 0; i < width; ++i) {
        double z_real = x0 + i*xstep;
        double z_imaginary = y0 + j*ystep;
        double c_real = z_real;
        double c_imaginary = z_imaginary;

        double depth = 0;
        // Figures out how many recurrences are required before divergence, up to max_depth
        while(depth < max_depth) {
            if(z_real * z_real + z_imaginary * z_imaginary > 4.0) {
                break; // Escape from a circle of radius 2
            }
            double temp_real = z_real*z_real - z_imaginary*z_imaginary;
            double temp_imaginary = 2.0*z_real*z_imaginary;
            z_real = c_real + temp_real;
            z_imaginary = c_imaginary + temp_imaginary;

            ++depth;
        }
        output[j*width + i] = static_cast(static_cast(depth) / max_depth * 255);
    }
}
pragma omp simd version:
// Traverse the sample space in equally spaced steps with width * height samples
for (int j = 0; j < height; ++j) {
#pragma omp simd
    for (int i = 0; i < width; ++i) {
        double z_real = x0 + i*xstep;
        double z_imaginary = y0 + j*ystep;
        double c_real = z_real;
        double c_imaginary = z_imaginary;

        double depth = 0;
        // Figures out how many recurrences are required before divergence, up to max_depth
        while(depth < max_depth) {
            if(z_real * z_real + z_imaginary * z_imaginary > 4.0) {
                break; // Escape from a circle of radius 2
            }
            double temp_real = z_real*z_real - z_imaginary*z_imaginary;
            double temp_imaginary = 2.0*z_real*z_imaginary;
            z_real = c_real + temp_real;
            z_imaginary = c_imaginary + temp_imaginary;

            ++depth;
        }
        output[j*width + i] = static_cast(static_cast(depth) / max_depth * 255);
    }
}
tbb::parallel_for + pragma omp simd
linear/scalar version:
// Traverse the sample space in equally spaced steps with width * height samples
for (int j = 0; j < height; ++j) {
    for (int i = 0; i < width; ++i) {
        double z_real = x0 + i*xstep;
        double z_imaginary = y0 + j*ystep;
        double c_real = z_real;
        double c_imaginary = z_imaginary;

        double depth = 0;
        // Figures out how many recurrences are required before divergence, up to max_depth
        while(depth < max_depth) {
            if(z_real * z_real + z_imaginary * z_imaginary > 4.0) {
                break; // Escape from a circle of radius 2
            }
            double temp_real = z_real*z_real - z_imaginary*z_imaginary;
            double temp_imaginary = 2.0*z_real*z_imaginary;
            z_real = c_real + temp_real;
            z_imaginary = c_imaginary + temp_imaginary;

            ++depth;
        }
        output[j*width + i] = static_cast(static_cast(depth) / max_depth * 255);
    }
}
tbb::parallel_for + pragma omp simd version:
// Traverse the sample space in equally spaced steps with width * height samples
tbb::parallel_for (0; height; 1, [&](int j) {
#pragma omp simd
    for (int i = 0; i < width; ++i) {
        double z_real = x0 + i*xstep;
        double z_imaginary = y0 + j*ystep;
        double c_real = z_real;
        double c_imaginary = z_imaginary;

        double depth = 0;
        // Figures out how many recurrences are required before divergence, up to max_depth
        while(depth < max_depth) {
            if(z_real * z_real + z_imaginary * z_imaginary > 4.0) {
                break; // Escape from a circle of radius 2
            }
            double temp_real = z_real*z_real - z_imaginary*z_imaginary;
            double temp_imaginary = 2.0*z_real*z_imaginary;
            z_real = c_real + temp_real;
            z_imaginary = c_imaginary + temp_imaginary;

            ++depth;
        }
        output[j*width + i] = static_cast(static_cast(depth) / max_depth * 255);
    }
});

Performance Data:

Note: Modified Speedup shows performance speedup with respect to serial implementation.

Modified Speedup Compiler (Intel-64) Compiler options System specifications
simd: 1.4x
tbb::parallel_for: 2.3x
Both: 3.5x
Intel® C++ Compiler 19.0 for Windows /O3 /QxHost /Qipo /Qopenmp /Qtbb
Microsoft Windows 10 Enterprise (x64)
Intel® Core(TM) i5-5300U CPU @ 2.30GHz
8GB memory
simd: 1.6x
tbb::parallel_for: 5.0x
Both: 7.6x
Intel® C++ Compiler 19.0 for Linux -O3 -xHost -ipo -qopenmp -tbb
RHEL* 7 (x64)
4th Generation Intel® Core™ i7-4790 CPU @ 3.60GHz
32GB memory

Build Instructions:

For Microsoft Visual Studio users:
For Microsoft Windows Command Line users:
For Linux* or macOS* users: