@@ -71,8 +71,8 @@ void ConvolveHorizontally_Neon(const unsigned char* src_data,
71
71
int remainder = filter_length & 3 ;
72
72
if (remainder ) {
73
73
int remainder_offset = (filter_offset + filter_length - remainder ) * 4 ;
74
- accum +=
75
- AccumRemainder (src_data + remainder_offset, filter_values, remainder );
74
+ accum = vaddq_s32 (accum,
75
+ AccumRemainder (src_data + remainder_offset, filter_values, remainder )) ;
76
76
}
77
77
78
78
// Bring this value back in range. All of the filter scaling factors
@@ -128,10 +128,10 @@ void Convolve4RowsHorizontally_Neon(const unsigned char* src_data[4],
128
128
return accum;
129
129
};
130
130
131
- accum0 += iteration (src_data[0 ] + start);
132
- accum1 += iteration (src_data[1 ] + start);
133
- accum2 += iteration (src_data[2 ] + start);
134
- accum3 += iteration (src_data[3 ] + start);
131
+ accum0 = vaddq_s32 (accum0, iteration (src_data[0 ] + start) );
132
+ accum1 = vaddq_s32 (accum1, iteration (src_data[1 ] + start) );
133
+ accum2 = vaddq_s32 (accum2, iteration (src_data[2 ] + start) );
134
+ accum3 = vaddq_s32 (accum3, iteration (src_data[3 ] + start) );
135
135
136
136
start += 16 ;
137
137
filter_values += 4 ;
@@ -140,14 +140,14 @@ void Convolve4RowsHorizontally_Neon(const unsigned char* src_data[4],
140
140
int remainder = filter_length & 3 ;
141
141
if (remainder ) {
142
142
int remainder_offset = (filter_offset + filter_length - remainder ) * 4 ;
143
- accum0 += AccumRemainder (src_data[0 ] + remainder_offset, filter_values,
144
- remainder );
145
- accum1 += AccumRemainder (src_data[1 ] + remainder_offset, filter_values,
146
- remainder );
147
- accum2 += AccumRemainder (src_data[2 ] + remainder_offset, filter_values,
148
- remainder );
149
- accum3 += AccumRemainder (src_data[3 ] + remainder_offset, filter_values,
150
- remainder );
143
+ accum0 = vaddq_s32 (accum0, AccumRemainder (src_data[0 ] + remainder_offset, filter_values,
144
+ remainder )) ;
145
+ accum1 = vaddq_s32 (accum1, AccumRemainder (src_data[1 ] + remainder_offset, filter_values,
146
+ remainder )) ;
147
+ accum2 = vaddq_s32 (accum2, AccumRemainder (src_data[2 ] + remainder_offset, filter_values,
148
+ remainder )) ;
149
+ accum3 = vaddq_s32 (accum3, AccumRemainder (src_data[3 ] + remainder_offset, filter_values,
150
+ remainder )) ;
151
151
}
152
152
153
153
auto pack_result = [](int32x4_t accum) {
@@ -252,8 +252,8 @@ void ConvolveVertically_Neon(const ConvolutionFilter1D::Fixed* filter_values,
252
252
accum8 = vmaxq_u8 (b, accum8);
253
253
} else {
254
254
// Set value of alpha channels to 0xFF.
255
- accum8 = vreinterpretq_u8_u32 (vreinterpretq_u32_u8 (accum8) |
256
- vdupq_n_u32 (0xFF000000 ));
255
+ accum8 = vreinterpretq_u8_u32 (vorrq_u8 ( vreinterpretq_u32_u8 (accum8),
256
+ vdupq_n_u32 (0xFF000000 ))) ;
257
257
}
258
258
259
259
// Store the convolution result (16 bytes) and advance the pixel pointers.
@@ -313,8 +313,8 @@ void ConvolveVertically_Neon(const ConvolutionFilter1D::Fixed* filter_values,
313
313
accum8 = vmaxq_u8 (b, accum8);
314
314
} else {
315
315
// Set value of alpha channels to 0xFF.
316
- accum8 = vreinterpretq_u8_u32 (vreinterpretq_u32_u8 (accum8) |
317
- vdupq_n_u32 (0xFF000000 ));
316
+ accum8 = vreinterpretq_u8_u32 (vorrq_u8 ( vreinterpretq_u32_u8 (accum8),
317
+ vdupq_n_u32 (0xFF000000 ))) ;
318
318
}
319
319
320
320
switch (remainder ) {
0 commit comments