diff --git a/3rdparty/ext_vc/CMakeLists.txt b/3rdparty/ext_vc/CMakeLists.txt --- a/3rdparty/ext_vc/CMakeLists.txt +++ b/3rdparty/ext_vc/CMakeLists.txt @@ -1,4 +1,5 @@ SET(PREFIX_ext_vc "${EXTPREFIX}" ) +if(APPLE) ExternalProject_Add( ext_vc DOWNLOAD_DIR ${EXTERNALS_DOWNLOAD_DIR} URL https://github.com/VcDevel/Vc/releases/download/1.3.3/Vc-1.3.3.tar.gz @@ -6,7 +7,22 @@ INSTALL_DIR ${PREFIX_ext_vc} + PATCH_COMMAND ${PATCH_COMMAND} -p1 -i ${CMAKE_CURRENT_SOURCE_DIR}/macOS_compileVc1.3.3.diff + CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${PREFIX_ext_vc} -DBUILD_TESTING=OFF -DCMAKE_BUILD_TYPE=${GLOBAL_BUILD_TYPE} ${GLOBAL_PROFILE} -DCMAKE_SYSTEM_PROCESSOR=x86 UPDATE_COMMAND "" ) +else() +ExternalProject_Add( ext_vc + DOWNLOAD_DIR ${EXTERNALS_DOWNLOAD_DIR} + URL https://github.com/VcDevel/Vc/releases/download/1.3.3/Vc-1.3.3.tar.gz + URL_HASH SHA1=3d3ddd29eca7c2b541fd8d0f00923e57f58d5ef0 + + INSTALL_DIR ${PREFIX_ext_vc} + + CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${PREFIX_ext_vc} -DBUILD_TESTING=OFF -DCMAKE_BUILD_TYPE=${GLOBAL_BUILD_TYPE} ${GLOBAL_PROFILE} -DCMAKE_SYSTEM_PROCESSOR=x86 + + UPDATE_COMMAND "" +) +endif() diff --git a/3rdparty/ext_vc/macOS_compileVc1.3.3.diff b/3rdparty/ext_vc/macOS_compileVc1.3.3.diff new file mode 100644 --- /dev/null +++ b/3rdparty/ext_vc/macOS_compileVc1.3.3.diff @@ -0,0 +1,136 @@ +diff --git a/godbolt.h b/godbolt.h +index 7ca79259..78ab7415 100644 +--- a/godbolt.h ++++ b/godbolt.h +@@ -9672,63 +9672,6 @@ namespace Vc_VERSIONED_NAMESPACE + { + namespace SseIntrinsics + { +- static Vc_INTRINSIC Vc_CONST float extract_float_imm(const __m128 v, const size_t i) { +- float f; +- switch (i) { +- case 0: +- f = _mm_cvtss_f32(v); +- break; +-#if defined Vc_IMPL_SSE4_1 && !defined Vc_MSVC +- default: +-#ifdef Vc_GCC +- f = __builtin_ia32_vec_ext_v4sf(static_cast<__v4sf>(v), (i)); +-#else +- // MSVC fails to compile this because it can't optimize i to an immediate +- _MM_EXTRACT_FLOAT(f, v, i); +-#endif +- break; +-#else +- case 1: +- f = _mm_cvtss_f32(_mm_castsi128_ps(_mm_srli_si128(_mm_castps_si128(v), 4))); +- break; +- case 2: +- f = _mm_cvtss_f32(_mm_movehl_ps(v, v)); +- break; +- case 3: +- f = _mm_cvtss_f32(_mm_castsi128_ps(_mm_srli_si128(_mm_castps_si128(v), 12))); +- break; +-#endif +- } +- return f; +- } +- static Vc_INTRINSIC Vc_CONST double extract_double_imm(const __m128d v, const size_t i) { +- if (i == 0) { +- return _mm_cvtsd_f64(v); +- } +- return _mm_cvtsd_f64(_mm_castps_pd(_mm_movehl_ps(_mm_castpd_ps(v), _mm_castpd_ps(v)))); +- } +- static Vc_INTRINSIC Vc_CONST float extract_float(const __m128 v, const size_t i) { +-#ifdef Vc_GCC +- if (__builtin_constant_p(i)) { +- return extract_float_imm(v, i); +-//X if (index <= 1) { +-//X unsigned long long tmp = _mm_cvtsi128_si64(_mm_castps_si128(v)); +-//X if (index == 0) tmp &= 0xFFFFFFFFull; +-//X if (index == 1) tmp >>= 32; +-//X return Common::AliasingEntryHelper(tmp); +-//X } +- } else { +- typedef float float4[4] Vc_MAY_ALIAS; +- const float4 &data = reinterpret_cast(v); +- return data[i]; +- } +-#else +- union { __m128 v; float m[4]; } u; +- u.v = v; +- return u.m[i]; +-#endif +- } +- + static Vc_INTRINSIC Vc_PURE __m128 _mm_stream_load(const float *mem) { + #ifdef Vc_IMPL_SSE4_1 + return _mm_castsi128_ps(_mm_stream_load_si128(reinterpret_cast<__m128i *>(const_cast(mem)))); +diff --git a/sse/intrinsics.h b/sse/intrinsics.h +index 651e2db5..5492f24f 100644 +--- a/sse/intrinsics.h ++++ b/sse/intrinsics.h +@@ -586,63 +586,6 @@ namespace Vc_VERSIONED_NAMESPACE + { + namespace SseIntrinsics + { +- static Vc_INTRINSIC Vc_CONST float extract_float_imm(const __m128 v, const size_t i) { +- float f; +- switch (i) { +- case 0: +- f = _mm_cvtss_f32(v); +- break; +-#if defined Vc_IMPL_SSE4_1 && !defined Vc_MSVC +- default: +-#ifdef Vc_GCC +- f = __builtin_ia32_vec_ext_v4sf(static_cast<__v4sf>(v), (i)); +-#else +- // MSVC fails to compile this because it can't optimize i to an immediate +- _MM_EXTRACT_FLOAT(f, v, i); +-#endif +- break; +-#else +- case 1: +- f = _mm_cvtss_f32(_mm_castsi128_ps(_mm_srli_si128(_mm_castps_si128(v), 4))); +- break; +- case 2: +- f = _mm_cvtss_f32(_mm_movehl_ps(v, v)); +- break; +- case 3: +- f = _mm_cvtss_f32(_mm_castsi128_ps(_mm_srli_si128(_mm_castps_si128(v), 12))); +- break; +-#endif +- } +- return f; +- } +- static Vc_INTRINSIC Vc_CONST double extract_double_imm(const __m128d v, const size_t i) { +- if (i == 0) { +- return _mm_cvtsd_f64(v); +- } +- return _mm_cvtsd_f64(_mm_castps_pd(_mm_movehl_ps(_mm_castpd_ps(v), _mm_castpd_ps(v)))); +- } +- static Vc_INTRINSIC Vc_CONST float extract_float(const __m128 v, const size_t i) { +-#ifdef Vc_GCC +- if (__builtin_constant_p(i)) { +- return extract_float_imm(v, i); +-//X if (index <= 1) { +-//X unsigned long long tmp = _mm_cvtsi128_si64(_mm_castps_si128(v)); +-//X if (index == 0) tmp &= 0xFFFFFFFFull; +-//X if (index == 1) tmp >>= 32; +-//X return Common::AliasingEntryHelper(tmp); +-//X } +- } else { +- typedef float float4[4] Vc_MAY_ALIAS; +- const float4 &data = reinterpret_cast(v); +- return data[i]; +- } +-#else +- union { __m128 v; float m[4]; } u; +- u.v = v; +- return u.m[i]; +-#endif +- } +- + static Vc_INTRINSIC Vc_PURE __m128 _mm_stream_load(const float *mem) { + #ifdef Vc_IMPL_SSE4_1 + return _mm_castsi128_ps(_mm_stream_load_si128(reinterpret_cast<__m128i *>(const_cast(mem))));