Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
230 changes: 197 additions & 33 deletions crates/synth-backend/src/arm_encoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -564,11 +564,12 @@ impl ArmEncoder {
ArmOp::F32Sqrt { sd, sm } => encode_vfp_2reg(0xEEB10AC0, sd, sm),

// f32 pseudo-ops — multi-instruction sequences
// FPSCR RMode: 00=nearest, 01=+inf(ceil), 10=-inf(floor), 11=zero(trunc)
ArmOp::F32Ceil { sd, sm } => {
return self.encode_arm_f32_rounding(sd, sm, 0b10); // VCVT toward +Inf
return self.encode_arm_f32_rounding(sd, sm, 0b01); // Round toward +Inf
}
ArmOp::F32Floor { sd, sm } => {
return self.encode_arm_f32_rounding(sd, sm, 0b01); // VCVT toward -Inf
return self.encode_arm_f32_rounding(sd, sm, 0b10); // Round toward -Inf
}
ArmOp::F32Trunc { sd, sm } => {
return self.encode_arm_f32_rounding(sd, sm, 0b11); // VCVT toward zero
Expand Down Expand Up @@ -646,11 +647,12 @@ impl ArmEncoder {
ArmOp::F64Sqrt { dd, dm } => encode_vfp_2reg_f64(0xEEB10BC0, dd, dm),

// f64 pseudo-ops
// FPSCR RMode: 00=nearest, 01=+inf(ceil), 10=-inf(floor), 11=zero(trunc)
ArmOp::F64Ceil { dd, dm } => {
return self.encode_arm_f64_rounding(dd, dm, 0b10);
return self.encode_arm_f64_rounding(dd, dm, 0b01);
}
ArmOp::F64Floor { dd, dm } => {
return self.encode_arm_f64_rounding(dd, dm, 0b01);
return self.encode_arm_f64_rounding(dd, dm, 0b10);
}
ArmOp::F64Trunc { dd, dm } => {
return self.encode_arm_f64_rounding(dd, dm, 0b11);
Expand Down Expand Up @@ -826,20 +828,59 @@ impl ArmEncoder {
/// Strategy: VCVT.S32.F32 Sd, Sm (toward zero), then VCVT.F32.S32 Sd, Sd
/// For ceil/floor/nearest, we use VCVTR (round toward mode) + convert back.
/// Simplified: convert to int (toward zero for trunc) then back to float.
fn encode_arm_f32_rounding(&self, sd: &VfpReg, sm: &VfpReg, _mode: u8) -> Result<Vec<u8>> {
/// Encode F32 rounding as ARM32.
/// `mode`: FPSCR RMode — 0b00=nearest, 0b01=+inf(ceil), 0b10=-inf(floor), 0b11=zero(trunc)
///
/// For trunc (mode=0b11): uses VCVTR.S32.F32 (always rounds toward zero).
/// For ceil/floor/nearest: sets FPSCR rounding mode, uses VCVT.S32.F32 (non-R variant
/// which honours FPSCR rmode), then restores FPSCR.
fn encode_arm_f32_rounding(&self, sd: &VfpReg, sm: &VfpReg, mode: u8) -> Result<Vec<u8>> {
let mut bytes = Vec::new();
let sm_num = vfp_sreg_to_num(sm);
let sd_num = vfp_sreg_to_num(sd);
let (vd_s, d_s) = encode_sreg(sd_num);
let (vm_s, m_s) = encode_sreg(sm_num);

// VCVT.S32.F32 Sd, Sm (truncate toward zero)
// 0xEEBD0AC0 | D << 22 | Vd << 12 | M << 5 | Vm
let vcvt_to_int = 0xEEBD0AC0 | (d_s << 22) | (vd_s << 12) | (m_s << 5) | vm_s;
bytes.extend_from_slice(&vcvt_to_int.to_le_bytes());
if mode == 0b11 {
// Trunc (toward zero): VCVTR.S32.F32 — the "R" variant always truncates.
// 0xEEBD0AC0: bit[7]=1 => round toward zero regardless of FPSCR
let vcvt_to_int = 0xEEBD0AC0 | (d_s << 22) | (vd_s << 12) | (m_s << 5) | vm_s;
bytes.extend_from_slice(&vcvt_to_int.to_le_bytes());
} else {
// ceil/floor/nearest: manipulate FPSCR rounding mode
let rt: u32 = 12; // R12/IP as temp

// VMRS R12, FPSCR
let vmrs = 0xEEF10A10 | (rt << 12);
bytes.extend_from_slice(&vmrs.to_le_bytes());

// BIC R12, R12, #(3 << 22) — clear RMode bits [23:22]
// 3<<22 = 0x00C00000. ARM rotated imm: 0x03 ror 10 (rotation=5, imm8=0x03)
let bic = 0xE3CC0000 | (rt << 12) | (0x05 << 8) | 0x03;
bytes.extend_from_slice(&bic.to_le_bytes());

// VCVT.F32.S32 Sd, Sd (convert back to float)
// 0xEEB80A40 | D << 22 | Vd << 12 | M << 5 | Vm
// ORR R12, R12, #(mode << 22) — set desired rounding mode
if mode != 0 {
// mode<<22: rotation=5, imm8=mode
let orr = 0xE38C0000 | (rt << 12) | (0x05 << 8) | (mode as u32);
bytes.extend_from_slice(&orr.to_le_bytes());
}

// VMSR FPSCR, R12
let vmsr = 0xEEE10A10 | (rt << 12);
bytes.extend_from_slice(&vmsr.to_le_bytes());

// VCVT.S32.F32 Sd, Sm — non-R variant (bit[7]=0), uses FPSCR rounding mode
let vcvt_to_int = 0xEEBD0A40 | (d_s << 22) | (vd_s << 12) | (m_s << 5) | vm_s;
bytes.extend_from_slice(&vcvt_to_int.to_le_bytes());

// Restore FPSCR: clear rmode bits back to nearest (default)
bytes.extend_from_slice(&vmrs.to_le_bytes());
bytes.extend_from_slice(&bic.to_le_bytes());
bytes.extend_from_slice(&vmsr.to_le_bytes());
}

// VCVT.F32.S32 Sd, Sd (convert integer result back to float)
let (vd2, d2) = encode_sreg(sd_num);
let vcvt_to_float = 0xEEB80A40 | (d2 << 22) | (vd2 << 12) | (d_s << 5) | vd_s;
bytes.extend_from_slice(&vcvt_to_float.to_le_bytes());
Expand Down Expand Up @@ -1035,21 +1076,56 @@ impl ArmEncoder {
}

/// Encode F64 rounding pseudo-op as ARM32 via VCVT to integer and back.
fn encode_arm_f64_rounding(&self, dd: &VfpReg, dm: &VfpReg, _mode: u8) -> Result<Vec<u8>> {
/// Encode F64 rounding as ARM32.
/// `mode`: FPSCR RMode — 0b00=nearest, 0b01=+inf(ceil), 0b10=-inf(floor), 0b11=zero(trunc)
///
/// For trunc: uses VCVTR.S32.F64 (always truncates).
/// For ceil/floor/nearest: sets FPSCR rounding mode, uses VCVT.S32.F64 (non-R variant),
/// then restores FPSCR.
fn encode_arm_f64_rounding(&self, dd: &VfpReg, dm: &VfpReg, mode: u8) -> Result<Vec<u8>> {
let mut bytes = Vec::new();
let dm_num = vfp_dreg_to_num(dm);
let dd_num = vfp_dreg_to_num(dd);
let (vm, m) = encode_dreg(dm_num);
let (vd, d) = encode_dreg(dd_num);

// VCVT.S32.F64 S0, Dm (truncate toward zero)
// S0: Vd=0, D=0
let vcvt_to_int = 0xEEBD0BC0 | (m << 5) | vm;
bytes.extend_from_slice(&vcvt_to_int.to_le_bytes());
if mode == 0b11 {
// Trunc (toward zero): VCVTR.S32.F64 — bit[7]=1, always truncates
let vcvt_to_int = 0xEEBD0BC0 | (m << 5) | vm;
bytes.extend_from_slice(&vcvt_to_int.to_le_bytes());
} else {
// ceil/floor/nearest: manipulate FPSCR rounding mode
let rt: u32 = 12;

// VMRS R12, FPSCR
let vmrs = 0xEEF10A10 | (rt << 12);
bytes.extend_from_slice(&vmrs.to_le_bytes());

// BIC R12, R12, #(3 << 22)
let bic = 0xE3CC0000 | (rt << 12) | (0x05 << 8) | 0x03;
bytes.extend_from_slice(&bic.to_le_bytes());

// ORR R12, R12, #(mode << 22)
if mode != 0 {
let orr = 0xE38C0000 | (rt << 12) | (0x05 << 8) | (mode as u32);
bytes.extend_from_slice(&orr.to_le_bytes());
}

// VMSR FPSCR, R12
let vmsr = 0xEEE10A10 | (rt << 12);
bytes.extend_from_slice(&vmsr.to_le_bytes());

// VCVT.S32.F64 S0, Dm — non-R variant (bit[7]=0), uses FPSCR rmode
let vcvt_to_int = 0xEEBD0B40 | (m << 5) | vm;
bytes.extend_from_slice(&vcvt_to_int.to_le_bytes());

// Restore FPSCR
bytes.extend_from_slice(&vmrs.to_le_bytes());
bytes.extend_from_slice(&bic.to_le_bytes());
bytes.extend_from_slice(&vmsr.to_le_bytes());
}

// VCVT.F64.S32 Dd, S0 (convert back to double)
// 0xEEB80B40 | D << 22 | Vd << 12 | M << 5 | Vm
// S0: Vm=0, M=0
let vcvt_to_float = 0xEEB80B40 | (d << 22) | (vd << 12);
bytes.extend_from_slice(&vcvt_to_float.to_le_bytes());

Expand Down Expand Up @@ -3985,8 +4061,9 @@ impl ArmEncoder {
}

// f32 pseudo-ops — multi-instruction sequences
ArmOp::F32Ceil { sd, sm } => self.encode_thumb_f32_rounding(sd, sm, 0b10),
ArmOp::F32Floor { sd, sm } => self.encode_thumb_f32_rounding(sd, sm, 0b01),
// FPSCR RMode: 00=nearest, 01=+inf(ceil), 10=-inf(floor), 11=zero(trunc)
ArmOp::F32Ceil { sd, sm } => self.encode_thumb_f32_rounding(sd, sm, 0b01),
ArmOp::F32Floor { sd, sm } => self.encode_thumb_f32_rounding(sd, sm, 0b10),
ArmOp::F32Trunc { sd, sm } => self.encode_thumb_f32_rounding(sd, sm, 0b11),
ArmOp::F32Nearest { sd, sm } => self.encode_thumb_f32_rounding(sd, sm, 0b00),
ArmOp::F32Min { sd, sn, sm } => self.encode_thumb_f32_minmax(sd, sn, sm, true),
Expand Down Expand Up @@ -4051,8 +4128,9 @@ impl ArmEncoder {
}

// f64 pseudo-ops
ArmOp::F64Ceil { dd, dm } => self.encode_thumb_f64_rounding(dd, dm, 0b10),
ArmOp::F64Floor { dd, dm } => self.encode_thumb_f64_rounding(dd, dm, 0b01),
// FPSCR RMode: 00=nearest, 01=+inf(ceil), 10=-inf(floor), 11=zero(trunc)
ArmOp::F64Ceil { dd, dm } => self.encode_thumb_f64_rounding(dd, dm, 0b01),
ArmOp::F64Floor { dd, dm } => self.encode_thumb_f64_rounding(dd, dm, 0b10),
ArmOp::F64Trunc { dd, dm } => self.encode_thumb_f64_rounding(dd, dm, 0b11),
ArmOp::F64Nearest { dd, dm } => self.encode_thumb_f64_rounding(dd, dm, 0b00),
ArmOp::F64Min { dd, dn, dm } => self.encode_thumb_f64_minmax(dd, dn, dm, true),
Expand Down Expand Up @@ -4246,18 +4324,64 @@ impl ArmEncoder {
}

/// Encode F32 rounding pseudo-op as Thumb-2 via VCVT to integer and back
fn encode_thumb_f32_rounding(&self, sd: &VfpReg, sm: &VfpReg, _mode: u8) -> Result<Vec<u8>> {
/// Encode F32 rounding as Thumb-2.
/// `mode`: FPSCR RMode — 0b00=nearest, 0b01=+inf(ceil), 0b10=-inf(floor), 0b11=zero(trunc)
///
/// For trunc: uses VCVTR.S32.F32 (always truncates).
/// For ceil/floor/nearest: sets FPSCR rounding mode, uses VCVT.S32.F32 (non-R variant),
/// then restores FPSCR.
fn encode_thumb_f32_rounding(&self, sd: &VfpReg, sm: &VfpReg, mode: u8) -> Result<Vec<u8>> {
let mut bytes = Vec::new();
let sm_num = vfp_sreg_to_num(sm);
let sd_num = vfp_sreg_to_num(sd);
let (vd_s, d_s) = encode_sreg(sd_num);
let (vm_s, m_s) = encode_sreg(sm_num);

// VCVT.S32.F32 Sd, Sm (truncate toward zero)
let vcvt_to_int = 0xEEBD0AC0 | (d_s << 22) | (vd_s << 12) | (m_s << 5) | vm_s;
bytes.extend_from_slice(&vfp_to_thumb_bytes(vcvt_to_int));
if mode == 0b11 {
// Trunc (toward zero): VCVTR.S32.F32 — bit[7]=1, always truncates
let vcvt_to_int = 0xEEBD0AC0 | (d_s << 22) | (vd_s << 12) | (m_s << 5) | vm_s;
bytes.extend_from_slice(&vfp_to_thumb_bytes(vcvt_to_int));
} else {
// ceil/floor/nearest: manipulate FPSCR rounding mode
let rt: u32 = 12; // R12/IP as temp

// VMRS R12, FPSCR
let vmrs = 0xEEF10A10 | (rt << 12);
bytes.extend_from_slice(&vfp_to_thumb_bytes(vmrs));

// BIC.W R12, R12, #(3 << 22) — clear RMode bits [23:22]
// Thumb-2 modified immediate for 3<<22 = 0x00C00000:
// BIC.W encoding: 11110 i 0 0001 S Rn | 0 imm3 Rd imm8
// 0x00C00000 = 0x03 shifted left by 22 => Thumb mod-imm: i=0, imm3=0b101, imm8=0x03
let bic_hw1: u16 = 0xF020 | ((rt as u16) & 0xF); // BIC, Rn=R12
let bic_hw2: u16 = (0x05 << 12) | ((rt as u16) << 8) | 0x03;
bytes.extend_from_slice(&bic_hw1.to_le_bytes());
bytes.extend_from_slice(&bic_hw2.to_le_bytes());

// ORR.W R12, R12, #(mode << 22)
if mode != 0 {
let orr_hw1: u16 = 0xF040 | ((rt as u16) & 0xF); // ORR, Rn=R12
let orr_hw2: u16 = (0x05 << 12) | ((rt as u16) << 8) | (mode as u16);
bytes.extend_from_slice(&orr_hw1.to_le_bytes());
bytes.extend_from_slice(&orr_hw2.to_le_bytes());
}

// VMSR FPSCR, R12
let vmsr = 0xEEE10A10 | (rt << 12);
bytes.extend_from_slice(&vfp_to_thumb_bytes(vmsr));

// VCVT.S32.F32 Sd, Sm — non-R variant (bit[7]=0), uses FPSCR rmode
let vcvt_to_int = 0xEEBD0A40 | (d_s << 22) | (vd_s << 12) | (m_s << 5) | vm_s;
bytes.extend_from_slice(&vfp_to_thumb_bytes(vcvt_to_int));

// Restore FPSCR: clear rmode bits back to nearest (default)
bytes.extend_from_slice(&vfp_to_thumb_bytes(vmrs));
bytes.extend_from_slice(&bic_hw1.to_le_bytes());
bytes.extend_from_slice(&bic_hw2.to_le_bytes());
bytes.extend_from_slice(&vfp_to_thumb_bytes(vmsr));
}

// VCVT.F32.S32 Sd, Sd (convert back to float)
// VCVT.F32.S32 Sd, Sd (convert integer result back to float)
let (vd2, d2) = encode_sreg(sd_num);
let vcvt_to_float = 0xEEB80A40 | (d2 << 22) | (vd2 << 12) | (d_s << 5) | vd_s;
bytes.extend_from_slice(&vfp_to_thumb_bytes(vcvt_to_float));
Expand Down Expand Up @@ -4487,16 +4611,54 @@ impl ArmEncoder {
}

/// Encode F64 rounding pseudo-op as Thumb-2 via VCVT to integer and back
fn encode_thumb_f64_rounding(&self, dd: &VfpReg, dm: &VfpReg, _mode: u8) -> Result<Vec<u8>> {
/// Encode F64 rounding as Thumb-2.
/// `mode`: FPSCR RMode — 0b00=nearest, 0b01=+inf(ceil), 0b10=-inf(floor), 0b11=zero(trunc)
fn encode_thumb_f64_rounding(&self, dd: &VfpReg, dm: &VfpReg, mode: u8) -> Result<Vec<u8>> {
let mut bytes = Vec::new();
let dm_num = vfp_dreg_to_num(dm);
let dd_num = vfp_dreg_to_num(dd);
let (vm, m) = encode_dreg(dm_num);
let (vd, d) = encode_dreg(dd_num);

// VCVT.S32.F64 S0, Dm
let vcvt_to_int = 0xEEBD0BC0 | (m << 5) | vm;
bytes.extend_from_slice(&vfp_to_thumb_bytes(vcvt_to_int));
if mode == 0b11 {
// Trunc: VCVTR.S32.F64 — bit[7]=1, always truncates
let vcvt_to_int = 0xEEBD0BC0 | (m << 5) | vm;
bytes.extend_from_slice(&vfp_to_thumb_bytes(vcvt_to_int));
} else {
let rt: u32 = 12;

// VMRS R12, FPSCR
let vmrs = 0xEEF10A10 | (rt << 12);
bytes.extend_from_slice(&vfp_to_thumb_bytes(vmrs));

// BIC.W R12, R12, #(3 << 22)
let bic_hw1: u16 = 0xF020 | ((rt as u16) & 0xF);
let bic_hw2: u16 = (0x05 << 12) | ((rt as u16) << 8) | 0x03;
bytes.extend_from_slice(&bic_hw1.to_le_bytes());
bytes.extend_from_slice(&bic_hw2.to_le_bytes());

// ORR.W R12, R12, #(mode << 22)
if mode != 0 {
let orr_hw1: u16 = 0xF040 | ((rt as u16) & 0xF);
let orr_hw2: u16 = (0x05 << 12) | ((rt as u16) << 8) | (mode as u16);
bytes.extend_from_slice(&orr_hw1.to_le_bytes());
bytes.extend_from_slice(&orr_hw2.to_le_bytes());
}

// VMSR FPSCR, R12
let vmsr = 0xEEE10A10 | (rt << 12);
bytes.extend_from_slice(&vfp_to_thumb_bytes(vmsr));

// VCVT.S32.F64 S0, Dm — non-R variant (bit[7]=0)
let vcvt_to_int = 0xEEBD0B40 | (m << 5) | vm;
bytes.extend_from_slice(&vfp_to_thumb_bytes(vcvt_to_int));

// Restore FPSCR
bytes.extend_from_slice(&vfp_to_thumb_bytes(vmrs));
bytes.extend_from_slice(&bic_hw1.to_le_bytes());
bytes.extend_from_slice(&bic_hw2.to_le_bytes());
bytes.extend_from_slice(&vfp_to_thumb_bytes(vmsr));
}

// VCVT.F64.S32 Dd, S0
let vcvt_to_float = 0xEEB80B40 | (d << 22) | (vd << 12);
Expand Down Expand Up @@ -5557,7 +5719,8 @@ mod tests {
sm: VfpReg::S2,
};
let code = encoder.encode(&op).unwrap();
assert_eq!(code.len(), 8); // VCVT.S32.F32 + VCVT.F32.S32
// VMRS + BIC + ORR + VMSR + VCVT.S32.F32 + VMRS + BIC + VMSR + VCVT.F32.S32
assert_eq!(code.len(), 36);
}

#[test]
Expand All @@ -5568,7 +5731,8 @@ mod tests {
sm: VfpReg::S2,
};
let code = encoder.encode(&op).unwrap();
assert_eq!(code.len(), 8); // Two VFP instructions as Thumb-2
// VMRS + BIC.W + ORR.W + VMSR + VCVT + VMRS + BIC.W + VMSR + VCVT.F32.S32
assert_eq!(code.len(), 36);
}

#[test]
Expand Down
3 changes: 2 additions & 1 deletion crates/synth-backend/tests/f32_vfp_encoding_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -425,7 +425,8 @@ fn test_f32_ceil_encodes_successfully() {
sm: VfpReg::S0,
});
assert!(result.is_ok(), "F32Ceil should encode successfully");
assert_eq!(result.unwrap().len(), 8); // VCVT.S32.F32 + VCVT.F32.S32
// VMRS + BIC.W + ORR.W + VMSR + VCVT + VMRS + BIC.W + VMSR + VCVT.F32.S32
assert_eq!(result.unwrap().len(), 36);
}

#[test]
Expand Down
Loading