The Mandelbrot set is a well known application of visual mathematics. It is a good example of simple math with complex (imaginary) numbers on a complex plane leading to visually impressive results. It works by keeping track of how many iterations of the equation zn+1 = zn2 + c
will occur before a complex number is no longer bound. This can go on forever, and the image generated is unique at every depth, but for the sake of running time there is usually a maximum depth that the iteration can occur. The simulations in this example are run serially, with OpenMP* pragma omp simd
for vectorization, and Intel® Threading Building Blocks (Intel® TBB) tbb::parallel_for
for parallelization.
In addition, this code uses an image base class (image_base.h
) and BMPImage
class (bmp_image.cpp
) to write the results to a viewable image. Do not worry about how this is implemented, as it is unaffected by the code changes listed below.
for
loop for the y-axis, and one for
loop for the x-axis.tbb:parallel_for
// Traverse the sample space in equally spaced steps with width * height samples for (int j = 0; j < height; ++j) { for (int i = 0; i < width; ++i) { double z_real = x0 + i*xstep; double z_imaginary = y0 + j*ystep; double c_real = z_real; double c_imaginary = z_imaginary; double depth = 0; // Figures out how many recurrences are required before divergence, up to max_depth while(depth < max_depth) { if(z_real * z_real + z_imaginary * z_imaginary > 4.0) { break; // Escape from a circle of radius 2 } double temp_real = z_real*z_real - z_imaginary*z_imaginary; double temp_imaginary = 2.0*z_real*z_imaginary; z_real = c_real + temp_real; z_imaginary = c_imaginary + temp_imaginary; ++depth; } output[j*width + i] = static_cast(static_cast (depth) / max_depth * 255); } }
tbb::parallel_for
version:
// Traverse the sample space in equally spaced steps with width * height samples tbb::parallel_for (0; height; 1, [&](int j) { for (int i = 0; i < width; ++i) { double z_real = x0 + i*xstep; double z_imaginary = y0 + j*ystep; double c_real = z_real; double c_imaginary = z_imaginary; double depth = 0; // Figures out how many recurrences are required before divergence, up to max_depth while(depth < max_depth) { if(z_real * z_real + z_imaginary * z_imaginary > 4.0) { break; // Escape from a circle of radius 2 } double temp_real = z_real*z_real - z_imaginary*z_imaginary; double temp_imaginary = 2.0*z_real*z_imaginary; z_real = c_real + temp_real; z_imaginary = c_imaginary + temp_imaginary; ++depth; } output[j*width + i] = static_cast(static_cast (depth) / max_depth * 255); } });
pragma omp simd
// Traverse the sample space in equally spaced steps with width * height samples for (int j = 0; j < height; ++j) { for (int i = 0; i < width; ++i) { double z_real = x0 + i*xstep; double z_imaginary = y0 + j*ystep; double c_real = z_real; double c_imaginary = z_imaginary; double depth = 0; // Figures out how many recurrences are required before divergence, up to max_depth while(depth < max_depth) { if(z_real * z_real + z_imaginary * z_imaginary > 4.0) { break; // Escape from a circle of radius 2 } double temp_real = z_real*z_real - z_imaginary*z_imaginary; double temp_imaginary = 2.0*z_real*z_imaginary; z_real = c_real + temp_real; z_imaginary = c_imaginary + temp_imaginary; ++depth; } output[j*width + i] = static_cast(static_cast (depth) / max_depth * 255); } }
pragma omp simd
version:
// Traverse the sample space in equally spaced steps with width * height samples for (int j = 0; j < height; ++j) { #pragma omp simd for (int i = 0; i < width; ++i) { double z_real = x0 + i*xstep; double z_imaginary = y0 + j*ystep; double c_real = z_real; double c_imaginary = z_imaginary; double depth = 0; // Figures out how many recurrences are required before divergence, up to max_depth while(depth < max_depth) { if(z_real * z_real + z_imaginary * z_imaginary > 4.0) { break; // Escape from a circle of radius 2 } double temp_real = z_real*z_real - z_imaginary*z_imaginary; double temp_imaginary = 2.0*z_real*z_imaginary; z_real = c_real + temp_real; z_imaginary = c_imaginary + temp_imaginary; ++depth; } output[j*width + i] = static_cast(static_cast (depth) / max_depth * 255); } }
tbb::parallel_for
+ pragma omp simd
// Traverse the sample space in equally spaced steps with width * height samples for (int j = 0; j < height; ++j) { for (int i = 0; i < width; ++i) { double z_real = x0 + i*xstep; double z_imaginary = y0 + j*ystep; double c_real = z_real; double c_imaginary = z_imaginary; double depth = 0; // Figures out how many recurrences are required before divergence, up to max_depth while(depth < max_depth) { if(z_real * z_real + z_imaginary * z_imaginary > 4.0) { break; // Escape from a circle of radius 2 } double temp_real = z_real*z_real - z_imaginary*z_imaginary; double temp_imaginary = 2.0*z_real*z_imaginary; z_real = c_real + temp_real; z_imaginary = c_imaginary + temp_imaginary; ++depth; } output[j*width + i] = static_cast(static_cast (depth) / max_depth * 255); } }
tbb::parallel_for
+ pragma omp simd
version:
// Traverse the sample space in equally spaced steps with width * height samples tbb::parallel_for (0; height; 1, [&](int j) { #pragma omp simd for (int i = 0; i < width; ++i) { double z_real = x0 + i*xstep; double z_imaginary = y0 + j*ystep; double c_real = z_real; double c_imaginary = z_imaginary; double depth = 0; // Figures out how many recurrences are required before divergence, up to max_depth while(depth < max_depth) { if(z_real * z_real + z_imaginary * z_imaginary > 4.0) { break; // Escape from a circle of radius 2 } double temp_real = z_real*z_real - z_imaginary*z_imaginary; double temp_imaginary = 2.0*z_real*z_imaginary; z_real = c_real + temp_real; z_imaginary = c_imaginary + temp_imaginary; ++depth; } output[j*width + i] = static_cast(static_cast (depth) / max_depth * 255); } });
Note: Modified Speedup shows performance speedup with respect to serial implementation.
Modified Speedup | Compiler (Intel-64) | Compiler options | System specifications | ||||||
---|---|---|---|---|---|---|---|---|---|
|
Intel® C++ Compiler 19.0 for Windows | /O3 /QxHost /Qipo /Qopenmp /Qtbb |
|
||||||
|
Intel® C++ Compiler 19.0 for Linux | -O3 -xHost -ipo -qopenmp -tbb |
|
.sln
filePERF_NUM
Build.bat [perf_num]
perf_num
: collect performance numbers (will run example 5 times and take average time)Build.bat run [help|0|1|2|3|4]
source <icc-install-dir>/bin/compilervars.sh {ia32|intel64}
make [icpc] [perf_num=1]
perf_num=1
: collect performance numbers (will run example 5 times and take average time)make run [option=help|0|1|2|3|4]