@@ -225,17 +225,13 @@ void GSSetupPrimCodeGenerator::Color()
225225 // GSVector4 c = dscan.c;
226226 armAsm->Ldr (v16, MemOperand (_dscan, offsetof (GSVertexSW, c)));
227227
228- // constexpr VectorI mask16 = VectorI::cxpr(0xFFFF);
229- armAsm->Movi (v17.V4S (), 0xFFFF );
230-
231- // local.d4.c = (GSVector4i(dscan.c * step_shift) & mask16).xzyw().pu32();
228+ // GSVector4i tmp = GSVector4i(dscan.c * step_shift).xzyw();
229+ // local.d4.c = tmp.uzp1_16(tmp); // Not currently in GSVector since that's mainly targeting x86 for now
232230 armAsm->Fmul (v2.V4S (), v16.V4S (), v3.V4S ());
233231 armAsm->Fcvtzs (v2.V4S (), v2.V4S ());
234- armAsm->And (v2.V4S (), v17.V4S ());
235232 armAsm->Rev64 (_vscratch.V4S (), v2.V4S ());
236233 armAsm->Uzp1 (v2.V4S (), v2.V4S (), _vscratch.V4S ());
237- armAsm->Uqxtn (v2.V4H (), v2.V4S ());
238- armAsm->Dup (v2.V2D (), v2.V2D (), 0 );
234+ armAsm->Uzp1 (v2.V8H (), v2.V8H (), v2.V8H ());
239235 armAsm->Str (v2, MemOperand (_locals, offsetof (GSScanlineLocalData, d4.c )));
240236
241237 // GSVector4 dr = c.xxxx();
@@ -246,25 +242,18 @@ void GSSetupPrimCodeGenerator::Color()
246242
247243 for (int i = 0 ; i < (m_sel.notest ? 1 : 4 ); i++)
248244 {
249- // VectorI r = ( VectorI(dr * shift[1 + i]) & mask16).pu32( );
245+ // VectorI r = VectorI(dr * shift[1 + i]);
250246
251247 armAsm->Fmul (v2.V4S (), v0.V4S (), VRegister (4 + i, kFormat4S ));
252248 armAsm->Fcvtzs (v2.V4S (), v2.V4S ());
253- armAsm->And (v2.V4S (), v17.V4S ());
254- armAsm->Uqxtn (v2.V4H (), v2.V4S ());
255- armAsm->Dup (v2.V2D (), v2.V2D (), 0 );
256249
257- // VectorI b = ( VectorI(db * shift[1 + i]) & mask16).pu32( );
250+ // VectorI b = VectorI(db * shift[1 + i]);
258251
259252 armAsm->Fmul (v3.V4S (), v1.V4S (), VRegister (4 + i, kFormat4S ));
260253 armAsm->Fcvtzs (v3.V4S (), v3.V4S ());
261- armAsm->And (v3.V4S (), v17.V4S ());
262- armAsm->Uqxtn (v3.V4H (), v3.V4S ());
263- armAsm->Dup (v3.V2D (), v3.V2D (), 0 );
264-
265- // m_local.d[i].rb = r.upl16(b);
266254
267- armAsm->Zip1 (v2.V8H (), v2.V8H (), v3.V8H ());
255+ // m_local.d[i].rb = r.trn1_16(b); // Not currently in GSVector since that's mainly targeting x86 for now
256+ armAsm->Trn1 (v2.V8H (), v2.V8H (), v3.V8H ());
268257 armAsm->Str (v2, _local (d[i].rb ));
269258 }
270259
@@ -278,25 +267,19 @@ void GSSetupPrimCodeGenerator::Color()
278267
279268 for (int i = 0 ; i < (m_sel.notest ? 1 : 4 ); i++)
280269 {
281- // VectorI g = ( VectorI(dg * shift[1 + i]) & mask16).pu32( );
270+ // VectorI g = VectorI(dg * shift[1 + i]);
282271
283272 armAsm->Fmul (v2.V4S (), v0.V4S (), VRegister (4 + i, kFormat4S ));
284273 armAsm->Fcvtzs (v2.V4S (), v2.V4S ());
285- armAsm->And (v2.V4S (), v17.V4S ());
286- armAsm->Uqxtn (v2.V4H (), v2.V4S ());
287- armAsm->Dup (v2.V2D (), v2.V2D (), 0 );
288274
289- // VectorI a = ( VectorI(da * shift[1 + i]) & mask16).pu32( );
275+ // VectorI a = VectorI(da * shift[1 + i]);
290276
291277 armAsm->Fmul (v3.V4S (), v1.V4S (), VRegister (4 + i, kFormat4S ));
292278 armAsm->Fcvtzs (v3.V4S (), v3.V4S ());
293- armAsm->And (v3.V4S (), v17.V4S ());
294- armAsm->Uqxtn (v3.V4H (), v3.V4S ());
295- armAsm->Dup (v3.V2D (), v3.V2D (), 0 );
296279
297- // m_local.d[i].ga = g.upl16 (a);
280+ // m_local.d[i].ga = g.trn1_16 (a); // Not currently in GSVector since that's mainly targeting x86 for now
298281
299- armAsm->Zip1 (v2.V8H (), v2.V8H (), v3.V8H ());
282+ armAsm->Trn1 (v2.V8H (), v2.V8H (), v3.V8H ());
300283 armAsm->Str (v2, _local (d[i].ga ));
301284 }
302285 }
0 commit comments