Process 4 angles simultaneously, returning vectorized sin/cos values SIMD-optimized: vectorized angle decomposition, vector LUT loads with 4x4 transpose (AoS→SoA), and vectorized quadratic interpolation.
145 {
146
147
148
149
151
152
154
155
157
158
159 simd::simd_u32x4 pos_vec = simd::and_u32_4(angle256_vec, simd::set1_u32_4(0x3F));
160
161
162 simd::simd_u32x4 mirror_s_vec = simd::and_u32_4(quadrant_vec, simd::set1_u32_4(1));
163
164
165
166
167
168 simd::simd_u32x4 sdmask_vec = simd::sub_i32_4(simd::set1_u32_4(0), mirror_s_vec);
169
170
171 simd::simd_u32x4 cdmask_vec = simd::xor_u32_4(sdmask_vec, simd::set1_u32_4(0xFFFFFFFF));
172
173
174 simd::simd_u32x4 quadrant_bit1 = simd::and_u32_4(simd::srl_u32_4(quadrant_vec, 1), simd::set1_u32_4(1));
175 simd::simd_u32x4 svmask_vec = simd::sub_i32_4(simd::set1_u32_4(0), quadrant_bit1);
176
177
178 simd::simd_u32x4 quadrant_xor = simd::xor_u32_4(quadrant_vec, simd::srl_u32_4(quadrant_vec, 1));
179 simd::simd_u32x4 quadrant_xor_bit0 = simd::and_u32_4(quadrant_xor, simd::set1_u32_4(1));
180 simd::simd_u32x4 cvmask_vec = simd::sub_i32_4(simd::set1_u32_4(0), quadrant_xor_bit0);
181
182
183
184
185
186
191
192
193
194 simd::simd_u32x4 two_mirror_s = simd::add_i32_4(mirror_s_vec, mirror_s_vec);
195 simd::simd_u32x4 qi_next_s_vec = simd::sub_i32_4(simd::add_i32_4(qi_s_vec, simd::set1_u32_4(1)), two_mirror_s);
196
197
198
199
200
201
202
203
204
205 u32 qi0 = simd::extract_u32_4(qi_s_vec, 0);
206 u32 qi1 = simd::extract_u32_4(qi_s_vec, 1);
207 u32 qi2 = simd::extract_u32_4(qi_s_vec, 2);
208 u32 qi3 = simd::extract_u32_4(qi_s_vec, 3);
209
210 u32 qn0 = simd::extract_u32_4(qi_next_s_vec, 0);
211 u32 qn1 = simd::extract_u32_4(qi_next_s_vec, 1);
212 u32 qn2 = simd::extract_u32_4(qi_next_s_vec, 2);
213 u32 qn3 = simd::extract_u32_4(qi_next_s_vec, 3);
214
215
216
221
222
223
224
229
234
235
240
241
248
249
250
251
252
253
254
255
256
257
258
259 m0_s_v = simd::sub_i32_4(simd::xor_u32_4(m0_s_v, sdmask_vec), sdmask_vec);
260
261 simd::simd_u32x4 c_s = simd::sub_i32_4(simd::sub_i32_4(y1_s_v, y0_s_v), m0_s_v);
262
263 simd::simd_u32x4 r_s = simd::add_i32_4(simd::mulhi_su32_4(c_s, t_vec), m0_s_v);
264
265 simd::simd_u32x4 s_raw = simd::add_i32_4(simd::mulhi_su32_4(r_s, t_vec), y0_s_v);
266
267
268 m0_c_v = simd::sub_i32_4(simd::xor_u32_4(m0_c_v, cdmask_vec), cdmask_vec);
269 simd::simd_u32x4 c_c = simd::sub_i32_4(simd::sub_i32_4(y1_c_v, y0_c_v), m0_c_v);
270 simd::simd_u32x4 r_c = simd::add_i32_4(simd::mulhi_su32_4(c_c, t_vec), m0_c_v);
271 simd::simd_u32x4 c_raw = simd::add_i32_4(simd::mulhi_su32_4(r_c, t_vec), y0_c_v);
272
273
274
275
276
278 result.sin_vals = simd::sub_i32_4(simd::xor_u32_4(s_raw, svmask_vec), svmask_vec);
279 result.cos_vals = simd::sub_i32_4(simd::xor_u32_4(c_raw, cvmask_vec), cvmask_vec);
281}
platforms::simd_u32x4 simd_u32x4
const i32 sinCosPairedLut[]
expected< T, E > result
Alias for expected (Rust-style naming)