@@ -225,17 +225,14 @@ void GSSetupPrimCodeGenerator::Color()
225225 // GSVector4 c = dscan.c;
226226 armAsm->Ldr (v16, MemOperand (_dscan, offsetof (GSVertexSW, c)));
227227
228- // constexpr VectorI mask16 = VectorI::cxpr(0xFFFF);
229- armAsm->Movi (v17.V4S (), 0xFFFF );
230-
231- // local.d4.c = (GSVector4i(dscan.c * step_shift) & mask16).xzyw().pu32();
228+ // GSVector4i tmp = GSVector4i(dscan.c * step_shift).xzyw();
229+ // local.d4.c = tmp.uzp1_16(tmp); // Yeah I know this isn't in GSVector since that's mainly targeting x86 for now
232230 armAsm->Fmul (v2.V4S (), v16.V4S (), v3.V4S ());
233231 armAsm->Fcvtzs (v2.V4S (), v2.V4S ());
234232 armAsm->And (v2.V4S (), v17.V4S ());
235233 armAsm->Rev64 (_vscratch.V4S (), v2.V4S ());
236234 armAsm->Uzp1 (v2.V4S (), v2.V4S (), _vscratch.V4S ());
237- armAsm->Uqxtn (v2.V4H (), v2.V4S ());
238- armAsm->Dup (v2.V2D (), v2.V2D (), 0 );
235+ armAsm->Uzp1 (v2.V8H (), v2.V8H (), v2.V8H ());
239236 armAsm->Str (v2, MemOperand (_locals, offsetof (GSScanlineLocalData, d4.c )));
240237
241238 // GSVector4 dr = c.xxxx();
@@ -246,25 +243,18 @@ void GSSetupPrimCodeGenerator::Color()
246243
247244 for (int i = 0 ; i < (m_sel.notest ? 1 : 4 ); i++)
248245 {
249- // VectorI r = ( VectorI(dr * shift[1 + i]) & mask16).pu32( );
246+ // VectorI r = VectorI(dr * shift[1 + i]);
250247
251248 armAsm->Fmul (v2.V4S (), v0.V4S (), VRegister (4 + i, kFormat4S ));
252249 armAsm->Fcvtzs (v2.V4S (), v2.V4S ());
253- armAsm->And (v2.V4S (), v17.V4S ());
254- armAsm->Uqxtn (v2.V4H (), v2.V4S ());
255- armAsm->Dup (v2.V2D (), v2.V2D (), 0 );
256250
257- // VectorI b = ( VectorI(db * shift[1 + i]) & mask16).pu32( );
251+ // VectorI b = VectorI(db * shift[1 + i]);
258252
259253 armAsm->Fmul (v3.V4S (), v1.V4S (), VRegister (4 + i, kFormat4S ));
260254 armAsm->Fcvtzs (v3.V4S (), v3.V4S ());
261- armAsm->And (v3.V4S (), v17.V4S ());
262- armAsm->Uqxtn (v3.V4H (), v3.V4S ());
263- armAsm->Dup (v3.V2D (), v3.V2D (), 0 );
264-
265- // m_local.d[i].rb = r.upl16(b);
266255
267- armAsm->Zip1 (v2.V8H (), v2.V8H (), v3.V8H ());
256+ // m_local.d[i].rb = r.trn1_16(b); // Yeah I know this isn't in GSVector since that's mainly targeting x86 for now
257+ armAsm->Trn1 (v2.V8H (), v2.V8H (), v3.V8H ());
268258 armAsm->Str (v2, _local (d[i].rb ));
269259 }
270260
@@ -278,25 +268,19 @@ void GSSetupPrimCodeGenerator::Color()
278268
279269 for (int i = 0 ; i < (m_sel.notest ? 1 : 4 ); i++)
280270 {
281- // VectorI g = ( VectorI(dg * shift[1 + i]) & mask16).pu32( );
271+ // VectorI g = VectorI(dg * shift[1 + i]);
282272
283273 armAsm->Fmul (v2.V4S (), v0.V4S (), VRegister (4 + i, kFormat4S ));
284274 armAsm->Fcvtzs (v2.V4S (), v2.V4S ());
285- armAsm->And (v2.V4S (), v17.V4S ());
286- armAsm->Uqxtn (v2.V4H (), v2.V4S ());
287- armAsm->Dup (v2.V2D (), v2.V2D (), 0 );
288275
289- // VectorI a = ( VectorI(da * shift[1 + i]) & mask16).pu32( );
276+ // VectorI a = VectorI(da * shift[1 + i]);
290277
291278 armAsm->Fmul (v3.V4S (), v1.V4S (), VRegister (4 + i, kFormat4S ));
292279 armAsm->Fcvtzs (v3.V4S (), v3.V4S ());
293- armAsm->And (v3.V4S (), v17.V4S ());
294- armAsm->Uqxtn (v3.V4H (), v3.V4S ());
295- armAsm->Dup (v3.V2D (), v3.V2D (), 0 );
296280
297- // m_local.d[i].ga = g.upl16 (a);
281+ // m_local.d[i].ga = g.trn1_16 (a); // Yeah I know this isn't in GSVector since that's mainly targeting x86 for now
298282
299- armAsm->Zip1 (v2.V8H (), v2.V8H (), v3.V8H ());
283+ armAsm->Trn1 (v2.V8H (), v2.V8H (), v3.V8H ());
300284 armAsm->Str (v2, _local (d[i].ga ));
301285 }
302286 }
0 commit comments