Double presicion math in mandelbrot_avx

parent d2727d24
No preview for this file type
......@@ -31,37 +31,37 @@
int
mandelbrot_avx(Complex c)
{
__m256 cr = _mm256_set1_ps(c.real);
__m256 ci = _mm256_set1_ps(c.imag);
__m256 cr = _mm256_set1_pd(c.real);
__m256 ci = _mm256_set1_pd(c.imag);
__m256 zr = cr;
__m256 zi = ci;
__m256 threshold = _mm256_set1_ps(4);
__m256 threshold = _mm256_set1_pd(4);
int k = 1;
__m256 mk = _mm256_set1_ps(k);
__m256 one = _mm256_set1_ps(1);
__m256 mk = _mm256_set1_pd(k);
__m256 one = _mm256_set1_pd(1);
while (++k < MAX_ITERATIONS){
/* Compute z1 from z0 */
__m256 zr2 = _mm256_mul_ps(zr, zr);
__m256 zi2 = _mm256_mul_ps(zi, zi);
__m256 zrzi = _mm256_mul_ps(zr, zi);
__m256 zr2 = _mm256_mul_pd(zr, zr);
__m256 zi2 = _mm256_mul_pd(zi, zi);
__m256 zrzi = _mm256_mul_pd(zr, zi);
/* zr1 = zr0 * zr0 - zi0 * zi0 + cr */
/* zi1 = zr0 * zi0 + zr0 * zi0 + ci */
zr = _mm256_add_ps(_mm256_sub_ps(zr2, zi2), cr);
zi = _mm256_add_ps(_mm256_add_ps(zrzi, zrzi), ci);
zr = _mm256_add_pd(_mm256_sub_pd(zr2, zi2), cr);
zi = _mm256_add_pd(_mm256_add_pd(zrzi, zrzi), ci);
/* Increment k */
zr2 = _mm256_mul_ps(zr, zr);
zi2 = _mm256_mul_ps(zi, zi);
__m256 mag2 = _mm256_add_ps(zr2, zi2);
__m256 mask = _mm256_cmp_ps(mag2, threshold, _CMP_LT_OS);
mk = _mm256_add_ps(_mm256_and_ps(mask, one), mk);
zr2 = _mm256_mul_pd(zr, zr);
zi2 = _mm256_mul_pd(zi, zi);
__m256 mag2 = _mm256_add_pd(zr2, zi2);
__m256 mask = _mm256_cmp_pd(mag2, threshold, _CMP_LT_OS);
mk = _mm256_add_pd(_mm256_and_pd(mask, one), mk);
/* Early bailout? */
if (_mm256_testz_ps(mask, _mm256_set1_ps(-1)))
if (_mm256_testz_pd(mask, _mm256_set1_pd(-1)))
break;
}
return k;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment