Skip to content

Commit 23c78bd

Browse files
committed
Use ordered reduction intrinsics for integer reductions
only ordered intrinsics have implementation in rustc-const-eval
1 parent 3ef10b0 commit 23c78bd

File tree

2 files changed

+30
-32
lines changed

2 files changed

+30
-32
lines changed

crates/core_arch/src/x86/avx512bw.rs

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -4778,7 +4778,7 @@ pub fn _mm_mask_cmp_epi8_mask<const IMM8: i32>(k1: __mmask16, a: __m128i, b: __m
47784778
#[target_feature(enable = "avx512bw,avx512vl")]
47794779
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
47804780
pub fn _mm256_reduce_add_epi16(a: __m256i) -> i16 {
4781-
unsafe { simd_reduce_add_unordered(a.as_i16x16()) }
4781+
unsafe { simd_reduce_add_ordered(a.as_i16x16(), 0) }
47824782
}
47834783

47844784
/// Reduce the packed 16-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
@@ -4788,7 +4788,7 @@ pub fn _mm256_reduce_add_epi16(a: __m256i) -> i16 {
47884788
#[target_feature(enable = "avx512bw,avx512vl")]
47894789
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
47904790
pub fn _mm256_mask_reduce_add_epi16(k: __mmask16, a: __m256i) -> i16 {
4791-
unsafe { simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i16x16(), i16x16::ZERO)) }
4791+
unsafe { simd_reduce_add_ordered(simd_select_bitmask(k, a.as_i16x16(), i16x16::ZERO), 0) }
47924792
}
47934793

47944794
/// Reduce the packed 16-bit integers in a by addition. Returns the sum of all elements in a.
@@ -4798,7 +4798,7 @@ pub fn _mm256_mask_reduce_add_epi16(k: __mmask16, a: __m256i) -> i16 {
47984798
#[target_feature(enable = "avx512bw,avx512vl")]
47994799
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
48004800
pub fn _mm_reduce_add_epi16(a: __m128i) -> i16 {
4801-
unsafe { simd_reduce_add_unordered(a.as_i16x8()) }
4801+
unsafe { simd_reduce_add_ordered(a.as_i16x8(), 0) }
48024802
}
48034803

48044804
/// Reduce the packed 16-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
@@ -4808,7 +4808,7 @@ pub fn _mm_reduce_add_epi16(a: __m128i) -> i16 {
48084808
#[target_feature(enable = "avx512bw,avx512vl")]
48094809
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
48104810
pub fn _mm_mask_reduce_add_epi16(k: __mmask8, a: __m128i) -> i16 {
4811-
unsafe { simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i16x8(), i16x8::ZERO)) }
4811+
unsafe { simd_reduce_add_ordered(simd_select_bitmask(k, a.as_i16x8(), i16x8::ZERO), 0) }
48124812
}
48134813

48144814
/// Reduce the packed 8-bit integers in a by addition. Returns the sum of all elements in a.
@@ -4818,7 +4818,7 @@ pub fn _mm_mask_reduce_add_epi16(k: __mmask8, a: __m128i) -> i16 {
48184818
#[target_feature(enable = "avx512bw,avx512vl")]
48194819
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
48204820
pub fn _mm256_reduce_add_epi8(a: __m256i) -> i8 {
4821-
unsafe { simd_reduce_add_unordered(a.as_i8x32()) }
4821+
unsafe { simd_reduce_add_ordered(a.as_i8x32(), 0) }
48224822
}
48234823

48244824
/// Reduce the packed 8-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
@@ -4828,7 +4828,7 @@ pub fn _mm256_reduce_add_epi8(a: __m256i) -> i8 {
48284828
#[target_feature(enable = "avx512bw,avx512vl")]
48294829
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
48304830
pub fn _mm256_mask_reduce_add_epi8(k: __mmask32, a: __m256i) -> i8 {
4831-
unsafe { simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i8x32(), i8x32::ZERO)) }
4831+
unsafe { simd_reduce_add_ordered(simd_select_bitmask(k, a.as_i8x32(), i8x32::ZERO), 0) }
48324832
}
48334833

48344834
/// Reduce the packed 8-bit integers in a by addition. Returns the sum of all elements in a.
@@ -4838,7 +4838,7 @@ pub fn _mm256_mask_reduce_add_epi8(k: __mmask32, a: __m256i) -> i8 {
48384838
#[target_feature(enable = "avx512bw,avx512vl")]
48394839
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
48404840
pub fn _mm_reduce_add_epi8(a: __m128i) -> i8 {
4841-
unsafe { simd_reduce_add_unordered(a.as_i8x16()) }
4841+
unsafe { simd_reduce_add_ordered(a.as_i8x16(), 0) }
48424842
}
48434843

48444844
/// Reduce the packed 8-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
@@ -4848,7 +4848,7 @@ pub fn _mm_reduce_add_epi8(a: __m128i) -> i8 {
48484848
#[target_feature(enable = "avx512bw,avx512vl")]
48494849
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
48504850
pub fn _mm_mask_reduce_add_epi8(k: __mmask16, a: __m128i) -> i8 {
4851-
unsafe { simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i8x16(), i8x16::ZERO)) }
4851+
unsafe { simd_reduce_add_ordered(simd_select_bitmask(k, a.as_i8x16(), i8x16::ZERO), 0) }
48524852
}
48534853

48544854
/// Reduce the packed 16-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
@@ -5282,7 +5282,7 @@ pub fn _mm_mask_reduce_min_epu8(k: __mmask16, a: __m128i) -> u8 {
52825282
#[target_feature(enable = "avx512bw,avx512vl")]
52835283
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
52845284
pub fn _mm256_reduce_mul_epi16(a: __m256i) -> i16 {
5285-
unsafe { simd_reduce_mul_unordered(a.as_i16x16()) }
5285+
unsafe { simd_reduce_mul_ordered(a.as_i16x16(), 1) }
52865286
}
52875287

52885288
/// Reduce the packed 16-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
@@ -5292,7 +5292,7 @@ pub fn _mm256_reduce_mul_epi16(a: __m256i) -> i16 {
52925292
#[target_feature(enable = "avx512bw,avx512vl")]
52935293
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
52945294
pub fn _mm256_mask_reduce_mul_epi16(k: __mmask16, a: __m256i) -> i16 {
5295-
unsafe { simd_reduce_mul_unordered(simd_select_bitmask(k, a.as_i16x16(), i16x16::splat(1))) }
5295+
unsafe { simd_reduce_mul_ordered(simd_select_bitmask(k, a.as_i16x16(), i16x16::splat(1)), 1) }
52965296
}
52975297

52985298
/// Reduce the packed 16-bit integers in a by multiplication. Returns the product of all elements in a.
@@ -5302,7 +5302,7 @@ pub fn _mm256_mask_reduce_mul_epi16(k: __mmask16, a: __m256i) -> i16 {
53025302
#[target_feature(enable = "avx512bw,avx512vl")]
53035303
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
53045304
pub fn _mm_reduce_mul_epi16(a: __m128i) -> i16 {
5305-
unsafe { simd_reduce_mul_unordered(a.as_i16x8()) }
5305+
unsafe { simd_reduce_mul_ordered(a.as_i16x8(), 1) }
53065306
}
53075307

53085308
/// Reduce the packed 16-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
@@ -5312,7 +5312,7 @@ pub fn _mm_reduce_mul_epi16(a: __m128i) -> i16 {
53125312
#[target_feature(enable = "avx512bw,avx512vl")]
53135313
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
53145314
pub fn _mm_mask_reduce_mul_epi16(k: __mmask8, a: __m128i) -> i16 {
5315-
unsafe { simd_reduce_mul_unordered(simd_select_bitmask(k, a.as_i16x8(), i16x8::splat(1))) }
5315+
unsafe { simd_reduce_mul_ordered(simd_select_bitmask(k, a.as_i16x8(), i16x8::splat(1)), 1) }
53165316
}
53175317

53185318
/// Reduce the packed 8-bit integers in a by multiplication. Returns the product of all elements in a.
@@ -5322,7 +5322,7 @@ pub fn _mm_mask_reduce_mul_epi16(k: __mmask8, a: __m128i) -> i16 {
53225322
#[target_feature(enable = "avx512bw,avx512vl")]
53235323
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
53245324
pub fn _mm256_reduce_mul_epi8(a: __m256i) -> i8 {
5325-
unsafe { simd_reduce_mul_unordered(a.as_i8x32()) }
5325+
unsafe { simd_reduce_mul_ordered(a.as_i8x32(), 1) }
53265326
}
53275327

53285328
/// Reduce the packed 8-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
@@ -5332,7 +5332,7 @@ pub fn _mm256_reduce_mul_epi8(a: __m256i) -> i8 {
53325332
#[target_feature(enable = "avx512bw,avx512vl")]
53335333
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
53345334
pub fn _mm256_mask_reduce_mul_epi8(k: __mmask32, a: __m256i) -> i8 {
5335-
unsafe { simd_reduce_mul_unordered(simd_select_bitmask(k, a.as_i8x32(), i8x32::splat(1))) }
5335+
unsafe { simd_reduce_mul_ordered(simd_select_bitmask(k, a.as_i8x32(), i8x32::splat(1)), 1) }
53365336
}
53375337

53385338
/// Reduce the packed 8-bit integers in a by multiplication. Returns the product of all elements in a.
@@ -5342,7 +5342,7 @@ pub fn _mm256_mask_reduce_mul_epi8(k: __mmask32, a: __m256i) -> i8 {
53425342
#[target_feature(enable = "avx512bw,avx512vl")]
53435343
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
53445344
pub fn _mm_reduce_mul_epi8(a: __m128i) -> i8 {
5345-
unsafe { simd_reduce_mul_unordered(a.as_i8x16()) }
5345+
unsafe { simd_reduce_mul_ordered(a.as_i8x16(), 1) }
53465346
}
53475347

53485348
/// Reduce the packed 8-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
@@ -5352,7 +5352,7 @@ pub fn _mm_reduce_mul_epi8(a: __m128i) -> i8 {
53525352
#[target_feature(enable = "avx512bw,avx512vl")]
53535353
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
53545354
pub fn _mm_mask_reduce_mul_epi8(k: __mmask16, a: __m128i) -> i8 {
5355-
unsafe { simd_reduce_mul_unordered(simd_select_bitmask(k, a.as_i8x16(), i8x16::splat(1))) }
5355+
unsafe { simd_reduce_mul_ordered(simd_select_bitmask(k, a.as_i8x16(), i8x16::splat(1)), 1) }
53565356
}
53575357

53585358
/// Reduce the packed 16-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.

crates/core_arch/src/x86/avx512f.rs

Lines changed: 14 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -33101,7 +33101,7 @@ pub fn _mm_mask_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
3310133101
#[target_feature(enable = "avx512f")]
3310233102
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3310333103
pub fn _mm512_reduce_add_epi32(a: __m512i) -> i32 {
33104-
unsafe { simd_reduce_add_unordered(a.as_i32x16()) }
33104+
unsafe { simd_reduce_add_ordered(a.as_i32x16(), 0) }
3310533105
}
3310633106

3310733107
/// Reduce the packed 32-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
@@ -33111,7 +33111,7 @@ pub fn _mm512_reduce_add_epi32(a: __m512i) -> i32 {
3311133111
#[target_feature(enable = "avx512f")]
3311233112
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3311333113
pub fn _mm512_mask_reduce_add_epi32(k: __mmask16, a: __m512i) -> i32 {
33114-
unsafe { simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i32x16(), i32x16::ZERO)) }
33114+
unsafe { simd_reduce_add_ordered(simd_select_bitmask(k, a.as_i32x16(), i32x16::ZERO), 0) }
3311533115
}
3311633116

3311733117
/// Reduce the packed 64-bit integers in a by addition. Returns the sum of all elements in a.
@@ -33121,7 +33121,7 @@ pub fn _mm512_mask_reduce_add_epi32(k: __mmask16, a: __m512i) -> i32 {
3312133121
#[target_feature(enable = "avx512f")]
3312233122
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3312333123
pub fn _mm512_reduce_add_epi64(a: __m512i) -> i64 {
33124-
unsafe { simd_reduce_add_unordered(a.as_i64x8()) }
33124+
unsafe { simd_reduce_add_ordered(a.as_i64x8(), 0) }
3312533125
}
3312633126

3312733127
/// Reduce the packed 64-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
@@ -33131,7 +33131,7 @@ pub fn _mm512_reduce_add_epi64(a: __m512i) -> i64 {
3313133131
#[target_feature(enable = "avx512f")]
3313233132
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3313333133
pub fn _mm512_mask_reduce_add_epi64(k: __mmask8, a: __m512i) -> i64 {
33134-
unsafe { simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i64x8(), i64x8::ZERO)) }
33134+
unsafe { simd_reduce_add_ordered(simd_select_bitmask(k, a.as_i64x8(), i64x8::ZERO), 0) }
3313533135
}
3313633136

3313733137
/// Reduce the packed single-precision (32-bit) floating-point elements in a by addition. Returns the sum of all elements in a.
@@ -33197,7 +33197,7 @@ pub fn _mm512_mask_reduce_add_pd(k: __mmask8, a: __m512d) -> f64 {
3319733197
#[target_feature(enable = "avx512f")]
3319833198
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3319933199
pub fn _mm512_reduce_mul_epi32(a: __m512i) -> i32 {
33200-
unsafe { simd_reduce_mul_unordered(a.as_i32x16()) }
33200+
unsafe { simd_reduce_mul_ordered(a.as_i32x16(), 1) }
3320133201
}
3320233202

3320333203
/// Reduce the packed 32-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
@@ -33208,11 +33208,10 @@ pub fn _mm512_reduce_mul_epi32(a: __m512i) -> i32 {
3320833208
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3320933209
pub fn _mm512_mask_reduce_mul_epi32(k: __mmask16, a: __m512i) -> i32 {
3321033210
unsafe {
33211-
simd_reduce_mul_unordered(simd_select_bitmask(
33212-
k,
33213-
a.as_i32x16(),
33214-
_mm512_set1_epi32(1).as_i32x16(),
33215-
))
33211+
simd_reduce_mul_ordered(
33212+
simd_select_bitmask(k, a.as_i32x16(), _mm512_set1_epi32(1).as_i32x16()),
33213+
1,
33214+
)
3321633215
}
3321733216
}
3321833217

@@ -33223,7 +33222,7 @@ pub fn _mm512_mask_reduce_mul_epi32(k: __mmask16, a: __m512i) -> i32 {
3322333222
#[target_feature(enable = "avx512f")]
3322433223
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3322533224
pub fn _mm512_reduce_mul_epi64(a: __m512i) -> i64 {
33226-
unsafe { simd_reduce_mul_unordered(a.as_i64x8()) }
33225+
unsafe { simd_reduce_mul_ordered(a.as_i64x8(), 1) }
3322733226
}
3322833227

3322933228
/// Reduce the packed 64-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
@@ -33234,11 +33233,10 @@ pub fn _mm512_reduce_mul_epi64(a: __m512i) -> i64 {
3323433233
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3323533234
pub fn _mm512_mask_reduce_mul_epi64(k: __mmask8, a: __m512i) -> i64 {
3323633235
unsafe {
33237-
simd_reduce_mul_unordered(simd_select_bitmask(
33238-
k,
33239-
a.as_i64x8(),
33240-
_mm512_set1_epi64(1).as_i64x8(),
33241-
))
33236+
simd_reduce_mul_ordered(
33237+
simd_select_bitmask(k, a.as_i64x8(), _mm512_set1_epi64(1).as_i64x8()),
33238+
1,
33239+
)
3324233240
}
3324333241
}
3324433242

0 commit comments

Comments
 (0)