diff --git a/crates/synth-backend/src/arm_encoder.rs b/crates/synth-backend/src/arm_encoder.rs index a0e3377..3e9bd27 100644 --- a/crates/synth-backend/src/arm_encoder.rs +++ b/crates/synth-backend/src/arm_encoder.rs @@ -564,11 +564,12 @@ impl ArmEncoder { ArmOp::F32Sqrt { sd, sm } => encode_vfp_2reg(0xEEB10AC0, sd, sm), // f32 pseudo-ops — multi-instruction sequences + // FPSCR RMode: 00=nearest, 01=+inf(ceil), 10=-inf(floor), 11=zero(trunc) ArmOp::F32Ceil { sd, sm } => { - return self.encode_arm_f32_rounding(sd, sm, 0b10); // VCVT toward +Inf + return self.encode_arm_f32_rounding(sd, sm, 0b01); // Round toward +Inf } ArmOp::F32Floor { sd, sm } => { - return self.encode_arm_f32_rounding(sd, sm, 0b01); // VCVT toward -Inf + return self.encode_arm_f32_rounding(sd, sm, 0b10); // Round toward -Inf } ArmOp::F32Trunc { sd, sm } => { return self.encode_arm_f32_rounding(sd, sm, 0b11); // VCVT toward zero @@ -646,11 +647,12 @@ impl ArmEncoder { ArmOp::F64Sqrt { dd, dm } => encode_vfp_2reg_f64(0xEEB10BC0, dd, dm), // f64 pseudo-ops + // FPSCR RMode: 00=nearest, 01=+inf(ceil), 10=-inf(floor), 11=zero(trunc) ArmOp::F64Ceil { dd, dm } => { - return self.encode_arm_f64_rounding(dd, dm, 0b10); + return self.encode_arm_f64_rounding(dd, dm, 0b01); } ArmOp::F64Floor { dd, dm } => { - return self.encode_arm_f64_rounding(dd, dm, 0b01); + return self.encode_arm_f64_rounding(dd, dm, 0b10); } ArmOp::F64Trunc { dd, dm } => { return self.encode_arm_f64_rounding(dd, dm, 0b11); @@ -826,20 +828,59 @@ impl ArmEncoder { /// Strategy: VCVT.S32.F32 Sd, Sm (toward zero), then VCVT.F32.S32 Sd, Sd /// For ceil/floor/nearest, we use VCVTR (round toward mode) + convert back. /// Simplified: convert to int (toward zero for trunc) then back to float. - fn encode_arm_f32_rounding(&self, sd: &VfpReg, sm: &VfpReg, _mode: u8) -> Result> { + /// Encode F32 rounding as ARM32. + /// `mode`: FPSCR RMode — 0b00=nearest, 0b01=+inf(ceil), 0b10=-inf(floor), 0b11=zero(trunc) + /// + /// For trunc (mode=0b11): uses VCVTR.S32.F32 (always rounds toward zero). + /// For ceil/floor/nearest: sets FPSCR rounding mode, uses VCVT.S32.F32 (non-R variant + /// which honours FPSCR rmode), then restores FPSCR. + fn encode_arm_f32_rounding(&self, sd: &VfpReg, sm: &VfpReg, mode: u8) -> Result> { let mut bytes = Vec::new(); let sm_num = vfp_sreg_to_num(sm); let sd_num = vfp_sreg_to_num(sd); let (vd_s, d_s) = encode_sreg(sd_num); let (vm_s, m_s) = encode_sreg(sm_num); - // VCVT.S32.F32 Sd, Sm (truncate toward zero) - // 0xEEBD0AC0 | D << 22 | Vd << 12 | M << 5 | Vm - let vcvt_to_int = 0xEEBD0AC0 | (d_s << 22) | (vd_s << 12) | (m_s << 5) | vm_s; - bytes.extend_from_slice(&vcvt_to_int.to_le_bytes()); + if mode == 0b11 { + // Trunc (toward zero): VCVTR.S32.F32 — the "R" variant always truncates. + // 0xEEBD0AC0: bit[7]=1 => round toward zero regardless of FPSCR + let vcvt_to_int = 0xEEBD0AC0 | (d_s << 22) | (vd_s << 12) | (m_s << 5) | vm_s; + bytes.extend_from_slice(&vcvt_to_int.to_le_bytes()); + } else { + // ceil/floor/nearest: manipulate FPSCR rounding mode + let rt: u32 = 12; // R12/IP as temp + + // VMRS R12, FPSCR + let vmrs = 0xEEF10A10 | (rt << 12); + bytes.extend_from_slice(&vmrs.to_le_bytes()); + + // BIC R12, R12, #(3 << 22) — clear RMode bits [23:22] + // 3<<22 = 0x00C00000. ARM rotated imm: 0x03 ror 10 (rotation=5, imm8=0x03) + let bic = 0xE3CC0000 | (rt << 12) | (0x05 << 8) | 0x03; + bytes.extend_from_slice(&bic.to_le_bytes()); - // VCVT.F32.S32 Sd, Sd (convert back to float) - // 0xEEB80A40 | D << 22 | Vd << 12 | M << 5 | Vm + // ORR R12, R12, #(mode << 22) — set desired rounding mode + if mode != 0 { + // mode<<22: rotation=5, imm8=mode + let orr = 0xE38C0000 | (rt << 12) | (0x05 << 8) | (mode as u32); + bytes.extend_from_slice(&orr.to_le_bytes()); + } + + // VMSR FPSCR, R12 + let vmsr = 0xEEE10A10 | (rt << 12); + bytes.extend_from_slice(&vmsr.to_le_bytes()); + + // VCVT.S32.F32 Sd, Sm — non-R variant (bit[7]=0), uses FPSCR rounding mode + let vcvt_to_int = 0xEEBD0A40 | (d_s << 22) | (vd_s << 12) | (m_s << 5) | vm_s; + bytes.extend_from_slice(&vcvt_to_int.to_le_bytes()); + + // Restore FPSCR: clear rmode bits back to nearest (default) + bytes.extend_from_slice(&vmrs.to_le_bytes()); + bytes.extend_from_slice(&bic.to_le_bytes()); + bytes.extend_from_slice(&vmsr.to_le_bytes()); + } + + // VCVT.F32.S32 Sd, Sd (convert integer result back to float) let (vd2, d2) = encode_sreg(sd_num); let vcvt_to_float = 0xEEB80A40 | (d2 << 22) | (vd2 << 12) | (d_s << 5) | vd_s; bytes.extend_from_slice(&vcvt_to_float.to_le_bytes()); @@ -1035,21 +1076,56 @@ impl ArmEncoder { } /// Encode F64 rounding pseudo-op as ARM32 via VCVT to integer and back. - fn encode_arm_f64_rounding(&self, dd: &VfpReg, dm: &VfpReg, _mode: u8) -> Result> { + /// Encode F64 rounding as ARM32. + /// `mode`: FPSCR RMode — 0b00=nearest, 0b01=+inf(ceil), 0b10=-inf(floor), 0b11=zero(trunc) + /// + /// For trunc: uses VCVTR.S32.F64 (always truncates). + /// For ceil/floor/nearest: sets FPSCR rounding mode, uses VCVT.S32.F64 (non-R variant), + /// then restores FPSCR. + fn encode_arm_f64_rounding(&self, dd: &VfpReg, dm: &VfpReg, mode: u8) -> Result> { let mut bytes = Vec::new(); let dm_num = vfp_dreg_to_num(dm); let dd_num = vfp_dreg_to_num(dd); let (vm, m) = encode_dreg(dm_num); let (vd, d) = encode_dreg(dd_num); - // VCVT.S32.F64 S0, Dm (truncate toward zero) - // S0: Vd=0, D=0 - let vcvt_to_int = 0xEEBD0BC0 | (m << 5) | vm; - bytes.extend_from_slice(&vcvt_to_int.to_le_bytes()); + if mode == 0b11 { + // Trunc (toward zero): VCVTR.S32.F64 — bit[7]=1, always truncates + let vcvt_to_int = 0xEEBD0BC0 | (m << 5) | vm; + bytes.extend_from_slice(&vcvt_to_int.to_le_bytes()); + } else { + // ceil/floor/nearest: manipulate FPSCR rounding mode + let rt: u32 = 12; + + // VMRS R12, FPSCR + let vmrs = 0xEEF10A10 | (rt << 12); + bytes.extend_from_slice(&vmrs.to_le_bytes()); + + // BIC R12, R12, #(3 << 22) + let bic = 0xE3CC0000 | (rt << 12) | (0x05 << 8) | 0x03; + bytes.extend_from_slice(&bic.to_le_bytes()); + + // ORR R12, R12, #(mode << 22) + if mode != 0 { + let orr = 0xE38C0000 | (rt << 12) | (0x05 << 8) | (mode as u32); + bytes.extend_from_slice(&orr.to_le_bytes()); + } + + // VMSR FPSCR, R12 + let vmsr = 0xEEE10A10 | (rt << 12); + bytes.extend_from_slice(&vmsr.to_le_bytes()); + + // VCVT.S32.F64 S0, Dm — non-R variant (bit[7]=0), uses FPSCR rmode + let vcvt_to_int = 0xEEBD0B40 | (m << 5) | vm; + bytes.extend_from_slice(&vcvt_to_int.to_le_bytes()); + + // Restore FPSCR + bytes.extend_from_slice(&vmrs.to_le_bytes()); + bytes.extend_from_slice(&bic.to_le_bytes()); + bytes.extend_from_slice(&vmsr.to_le_bytes()); + } // VCVT.F64.S32 Dd, S0 (convert back to double) - // 0xEEB80B40 | D << 22 | Vd << 12 | M << 5 | Vm - // S0: Vm=0, M=0 let vcvt_to_float = 0xEEB80B40 | (d << 22) | (vd << 12); bytes.extend_from_slice(&vcvt_to_float.to_le_bytes()); @@ -3985,8 +4061,9 @@ impl ArmEncoder { } // f32 pseudo-ops — multi-instruction sequences - ArmOp::F32Ceil { sd, sm } => self.encode_thumb_f32_rounding(sd, sm, 0b10), - ArmOp::F32Floor { sd, sm } => self.encode_thumb_f32_rounding(sd, sm, 0b01), + // FPSCR RMode: 00=nearest, 01=+inf(ceil), 10=-inf(floor), 11=zero(trunc) + ArmOp::F32Ceil { sd, sm } => self.encode_thumb_f32_rounding(sd, sm, 0b01), + ArmOp::F32Floor { sd, sm } => self.encode_thumb_f32_rounding(sd, sm, 0b10), ArmOp::F32Trunc { sd, sm } => self.encode_thumb_f32_rounding(sd, sm, 0b11), ArmOp::F32Nearest { sd, sm } => self.encode_thumb_f32_rounding(sd, sm, 0b00), ArmOp::F32Min { sd, sn, sm } => self.encode_thumb_f32_minmax(sd, sn, sm, true), @@ -4051,8 +4128,9 @@ impl ArmEncoder { } // f64 pseudo-ops - ArmOp::F64Ceil { dd, dm } => self.encode_thumb_f64_rounding(dd, dm, 0b10), - ArmOp::F64Floor { dd, dm } => self.encode_thumb_f64_rounding(dd, dm, 0b01), + // FPSCR RMode: 00=nearest, 01=+inf(ceil), 10=-inf(floor), 11=zero(trunc) + ArmOp::F64Ceil { dd, dm } => self.encode_thumb_f64_rounding(dd, dm, 0b01), + ArmOp::F64Floor { dd, dm } => self.encode_thumb_f64_rounding(dd, dm, 0b10), ArmOp::F64Trunc { dd, dm } => self.encode_thumb_f64_rounding(dd, dm, 0b11), ArmOp::F64Nearest { dd, dm } => self.encode_thumb_f64_rounding(dd, dm, 0b00), ArmOp::F64Min { dd, dn, dm } => self.encode_thumb_f64_minmax(dd, dn, dm, true), @@ -4246,18 +4324,64 @@ impl ArmEncoder { } /// Encode F32 rounding pseudo-op as Thumb-2 via VCVT to integer and back - fn encode_thumb_f32_rounding(&self, sd: &VfpReg, sm: &VfpReg, _mode: u8) -> Result> { + /// Encode F32 rounding as Thumb-2. + /// `mode`: FPSCR RMode — 0b00=nearest, 0b01=+inf(ceil), 0b10=-inf(floor), 0b11=zero(trunc) + /// + /// For trunc: uses VCVTR.S32.F32 (always truncates). + /// For ceil/floor/nearest: sets FPSCR rounding mode, uses VCVT.S32.F32 (non-R variant), + /// then restores FPSCR. + fn encode_thumb_f32_rounding(&self, sd: &VfpReg, sm: &VfpReg, mode: u8) -> Result> { let mut bytes = Vec::new(); let sm_num = vfp_sreg_to_num(sm); let sd_num = vfp_sreg_to_num(sd); let (vd_s, d_s) = encode_sreg(sd_num); let (vm_s, m_s) = encode_sreg(sm_num); - // VCVT.S32.F32 Sd, Sm (truncate toward zero) - let vcvt_to_int = 0xEEBD0AC0 | (d_s << 22) | (vd_s << 12) | (m_s << 5) | vm_s; - bytes.extend_from_slice(&vfp_to_thumb_bytes(vcvt_to_int)); + if mode == 0b11 { + // Trunc (toward zero): VCVTR.S32.F32 — bit[7]=1, always truncates + let vcvt_to_int = 0xEEBD0AC0 | (d_s << 22) | (vd_s << 12) | (m_s << 5) | vm_s; + bytes.extend_from_slice(&vfp_to_thumb_bytes(vcvt_to_int)); + } else { + // ceil/floor/nearest: manipulate FPSCR rounding mode + let rt: u32 = 12; // R12/IP as temp + + // VMRS R12, FPSCR + let vmrs = 0xEEF10A10 | (rt << 12); + bytes.extend_from_slice(&vfp_to_thumb_bytes(vmrs)); + + // BIC.W R12, R12, #(3 << 22) — clear RMode bits [23:22] + // Thumb-2 modified immediate for 3<<22 = 0x00C00000: + // BIC.W encoding: 11110 i 0 0001 S Rn | 0 imm3 Rd imm8 + // 0x00C00000 = 0x03 shifted left by 22 => Thumb mod-imm: i=0, imm3=0b101, imm8=0x03 + let bic_hw1: u16 = 0xF020 | ((rt as u16) & 0xF); // BIC, Rn=R12 + let bic_hw2: u16 = (0x05 << 12) | ((rt as u16) << 8) | 0x03; + bytes.extend_from_slice(&bic_hw1.to_le_bytes()); + bytes.extend_from_slice(&bic_hw2.to_le_bytes()); + + // ORR.W R12, R12, #(mode << 22) + if mode != 0 { + let orr_hw1: u16 = 0xF040 | ((rt as u16) & 0xF); // ORR, Rn=R12 + let orr_hw2: u16 = (0x05 << 12) | ((rt as u16) << 8) | (mode as u16); + bytes.extend_from_slice(&orr_hw1.to_le_bytes()); + bytes.extend_from_slice(&orr_hw2.to_le_bytes()); + } + + // VMSR FPSCR, R12 + let vmsr = 0xEEE10A10 | (rt << 12); + bytes.extend_from_slice(&vfp_to_thumb_bytes(vmsr)); + + // VCVT.S32.F32 Sd, Sm — non-R variant (bit[7]=0), uses FPSCR rmode + let vcvt_to_int = 0xEEBD0A40 | (d_s << 22) | (vd_s << 12) | (m_s << 5) | vm_s; + bytes.extend_from_slice(&vfp_to_thumb_bytes(vcvt_to_int)); + + // Restore FPSCR: clear rmode bits back to nearest (default) + bytes.extend_from_slice(&vfp_to_thumb_bytes(vmrs)); + bytes.extend_from_slice(&bic_hw1.to_le_bytes()); + bytes.extend_from_slice(&bic_hw2.to_le_bytes()); + bytes.extend_from_slice(&vfp_to_thumb_bytes(vmsr)); + } - // VCVT.F32.S32 Sd, Sd (convert back to float) + // VCVT.F32.S32 Sd, Sd (convert integer result back to float) let (vd2, d2) = encode_sreg(sd_num); let vcvt_to_float = 0xEEB80A40 | (d2 << 22) | (vd2 << 12) | (d_s << 5) | vd_s; bytes.extend_from_slice(&vfp_to_thumb_bytes(vcvt_to_float)); @@ -4487,16 +4611,54 @@ impl ArmEncoder { } /// Encode F64 rounding pseudo-op as Thumb-2 via VCVT to integer and back - fn encode_thumb_f64_rounding(&self, dd: &VfpReg, dm: &VfpReg, _mode: u8) -> Result> { + /// Encode F64 rounding as Thumb-2. + /// `mode`: FPSCR RMode — 0b00=nearest, 0b01=+inf(ceil), 0b10=-inf(floor), 0b11=zero(trunc) + fn encode_thumb_f64_rounding(&self, dd: &VfpReg, dm: &VfpReg, mode: u8) -> Result> { let mut bytes = Vec::new(); let dm_num = vfp_dreg_to_num(dm); let dd_num = vfp_dreg_to_num(dd); let (vm, m) = encode_dreg(dm_num); let (vd, d) = encode_dreg(dd_num); - // VCVT.S32.F64 S0, Dm - let vcvt_to_int = 0xEEBD0BC0 | (m << 5) | vm; - bytes.extend_from_slice(&vfp_to_thumb_bytes(vcvt_to_int)); + if mode == 0b11 { + // Trunc: VCVTR.S32.F64 — bit[7]=1, always truncates + let vcvt_to_int = 0xEEBD0BC0 | (m << 5) | vm; + bytes.extend_from_slice(&vfp_to_thumb_bytes(vcvt_to_int)); + } else { + let rt: u32 = 12; + + // VMRS R12, FPSCR + let vmrs = 0xEEF10A10 | (rt << 12); + bytes.extend_from_slice(&vfp_to_thumb_bytes(vmrs)); + + // BIC.W R12, R12, #(3 << 22) + let bic_hw1: u16 = 0xF020 | ((rt as u16) & 0xF); + let bic_hw2: u16 = (0x05 << 12) | ((rt as u16) << 8) | 0x03; + bytes.extend_from_slice(&bic_hw1.to_le_bytes()); + bytes.extend_from_slice(&bic_hw2.to_le_bytes()); + + // ORR.W R12, R12, #(mode << 22) + if mode != 0 { + let orr_hw1: u16 = 0xF040 | ((rt as u16) & 0xF); + let orr_hw2: u16 = (0x05 << 12) | ((rt as u16) << 8) | (mode as u16); + bytes.extend_from_slice(&orr_hw1.to_le_bytes()); + bytes.extend_from_slice(&orr_hw2.to_le_bytes()); + } + + // VMSR FPSCR, R12 + let vmsr = 0xEEE10A10 | (rt << 12); + bytes.extend_from_slice(&vfp_to_thumb_bytes(vmsr)); + + // VCVT.S32.F64 S0, Dm — non-R variant (bit[7]=0) + let vcvt_to_int = 0xEEBD0B40 | (m << 5) | vm; + bytes.extend_from_slice(&vfp_to_thumb_bytes(vcvt_to_int)); + + // Restore FPSCR + bytes.extend_from_slice(&vfp_to_thumb_bytes(vmrs)); + bytes.extend_from_slice(&bic_hw1.to_le_bytes()); + bytes.extend_from_slice(&bic_hw2.to_le_bytes()); + bytes.extend_from_slice(&vfp_to_thumb_bytes(vmsr)); + } // VCVT.F64.S32 Dd, S0 let vcvt_to_float = 0xEEB80B40 | (d << 22) | (vd << 12); @@ -5557,7 +5719,8 @@ mod tests { sm: VfpReg::S2, }; let code = encoder.encode(&op).unwrap(); - assert_eq!(code.len(), 8); // VCVT.S32.F32 + VCVT.F32.S32 + // VMRS + BIC + ORR + VMSR + VCVT.S32.F32 + VMRS + BIC + VMSR + VCVT.F32.S32 + assert_eq!(code.len(), 36); } #[test] @@ -5568,7 +5731,8 @@ mod tests { sm: VfpReg::S2, }; let code = encoder.encode(&op).unwrap(); - assert_eq!(code.len(), 8); // Two VFP instructions as Thumb-2 + // VMRS + BIC.W + ORR.W + VMSR + VCVT + VMRS + BIC.W + VMSR + VCVT.F32.S32 + assert_eq!(code.len(), 36); } #[test] diff --git a/crates/synth-backend/tests/f32_vfp_encoding_test.rs b/crates/synth-backend/tests/f32_vfp_encoding_test.rs index 94ba2f9..eb3ff70 100644 --- a/crates/synth-backend/tests/f32_vfp_encoding_test.rs +++ b/crates/synth-backend/tests/f32_vfp_encoding_test.rs @@ -425,7 +425,8 @@ fn test_f32_ceil_encodes_successfully() { sm: VfpReg::S0, }); assert!(result.is_ok(), "F32Ceil should encode successfully"); - assert_eq!(result.unwrap().len(), 8); // VCVT.S32.F32 + VCVT.F32.S32 + // VMRS + BIC.W + ORR.W + VMSR + VCVT + VMRS + BIC.W + VMSR + VCVT.F32.S32 + assert_eq!(result.unwrap().len(), 36); } #[test]