#ifdef __AVX__ // Note: put super set first. const int L{4}
using mydbl = __m256d; using ldptr = __m256d(*)(const double[L]); using jsptr = __m256d(*)(__m256d, __m256d); const ldptr dbload = _mm256_load_pd; const jsptr db_mul = _mm256_mul_pd; const jsptr db_add = _mm256_add_pd; #elif __SSE4_1__ // 2 doubles * 8byte * 8bit = 128 bit width const int L{2}; using mydbl = __m128d; using ldptr = __m128d(*)(const double[L]); using jsptr = __m128d(*)(__m128d, const ldptr dbload = _mm_load_pd; const jsptr db_mul = _mm_mul_pd; const jsptr db_add = _mm_add_pd; #endif
这样自定义函数中的db_load, mul, 或 add 等函数就可以随条件编译自动匹配了。
没有评论:
发表评论