From 9a9a8648d309adb15b0e89c40c5e7d4258798cfc Mon Sep 17 00:00:00 2001 From: Hannes Karppila Date: Tue, 15 Apr 2025 13:15:11 +0300 Subject: [PATCH 01/38] asm_generation: if a condition is constant, make jump unconditional --- .../fuel/abstract_instruction_set.rs | 163 +----------------- .../asm_generation/fuel/fuel_asm_builder.rs | 6 +- .../asm_generation/fuel/optimizations/misc.rs | 107 ++++++++++++ .../asm_generation/fuel/optimizations/mod.rs | 44 +++++ .../fuel/{ => optimizations}/optimizations.rs | 35 +++- .../fuel/optimizations/verify.rs | 49 ++++++ .../asm_generation/fuel/programs/abstract.rs | 12 +- 7 files changed, 252 insertions(+), 164 deletions(-) create mode 100644 sway-core/src/asm_generation/fuel/optimizations/misc.rs create mode 100644 sway-core/src/asm_generation/fuel/optimizations/mod.rs rename sway-core/src/asm_generation/fuel/{ => optimizations}/optimizations.rs (90%) create mode 100644 sway-core/src/asm_generation/fuel/optimizations/verify.rs diff --git a/sway-core/src/asm_generation/fuel/abstract_instruction_set.rs b/sway-core/src/asm_generation/fuel/abstract_instruction_set.rs index 32ef5a7f538..6069e5cd6b8 100644 --- a/sway-core/src/asm_generation/fuel/abstract_instruction_set.rs +++ b/sway-core/src/asm_generation/fuel/abstract_instruction_set.rs @@ -1,20 +1,12 @@ -use crate::{ - asm_generation::fuel::{ - allocated_abstract_instruction_set::AllocatedAbstractInstructionSet, register_allocator, - }, - asm_lang::{ - allocated_ops::AllocatedOp, Op, OrganizationalOp, RealizedOp, VirtualOp, VirtualRegister, - }, -}; - use sway_error::error::CompileError; -use sway_types::Span; -use std::{collections::HashSet, fmt}; +use crate::asm_lang::{allocated_ops::AllocatedOp, Op, RealizedOp}; -use either::Either; +use std::fmt; -use super::data_section::DataSection; +use super::{ + allocated_abstract_instruction_set::AllocatedAbstractInstructionSet, register_allocator, +}; /// An [AbstractInstructionSet] is a set of instructions that use entirely virtual registers /// and excessive moves, with the intention of later optimizing it. @@ -24,151 +16,6 @@ pub struct AbstractInstructionSet { } impl AbstractInstructionSet { - pub(crate) fn optimize(self, data_section: &DataSection) -> AbstractInstructionSet { - self.const_indexing_aggregates_function(data_section) - .dce() - .simplify_cfg() - .remove_sequential_jumps() - .remove_redundant_moves() - .remove_redundant_ops() - } - - /// Removes any jumps to the subsequent line. - fn remove_sequential_jumps(mut self) -> AbstractInstructionSet { - let dead_jumps: Vec<_> = self - .ops - .windows(2) - .enumerate() - .filter_map(|(idx, ops)| match (&ops[0].opcode, &ops[1].opcode) { - ( - Either::Right(OrganizationalOp::Jump(dst_label)), - Either::Right(OrganizationalOp::Label(label)), - ) if dst_label == label => Some(idx), - _otherwise => None, - }) - .collect(); - - // Replace the dead jumps with NOPs, as it's cheaper. - for idx in dead_jumps { - self.ops[idx] = Op { - opcode: Either::Left(VirtualOp::NOOP), - comment: "remove redundant jump operation".into(), - owning_span: None, - }; - } - - self - } - - fn remove_redundant_moves(mut self) -> AbstractInstructionSet { - // This has a lot of room for improvement. - // - // For now it is just removing MOVEs to registers which are _never_ used. It doesn't - // analyse control flow or other redundancies. Some obvious improvements are: - // - // - Perform a control flow analysis to remove MOVEs to registers which are not used - // _after_ the MOVE. - // - // - Remove the redundant use of temporaries. E.g.: - // MOVE t, a MOVE b, a - // MOVE b, t => USE b - // USE b - loop { - // Gather all the uses for each register. - let uses: HashSet<&VirtualRegister> = - self.ops.iter().fold(HashSet::new(), |mut acc, op| { - for u in &op.use_registers() { - acc.insert(u); - } - acc - }); - - // Loop again and find MOVEs which have a non-constant destination which is never used. - let mut dead_moves = Vec::new(); - for (idx, op) in self.ops.iter().enumerate() { - if let Either::Left(VirtualOp::MOVE( - dst_reg @ VirtualRegister::Virtual(_), - _src_reg, - )) = &op.opcode - { - if !uses.contains(dst_reg) { - dead_moves.push(idx); - } - } - } - - if dead_moves.is_empty() { - break; - } - - // Replace the dead moves with NOPs, as it's cheaper. - for idx in dead_moves { - self.ops[idx] = Op { - opcode: Either::Left(VirtualOp::NOOP), - comment: "remove redundant move operation".into(), - owning_span: None, - }; - } - } - - self - } - - fn remove_redundant_ops(mut self) -> AbstractInstructionSet { - self.ops.retain(|op| { - // It is easier to think in terms of operations we want to remove - // than the operations we want to retain ;-) - #[allow(clippy::match_like_matches_macro)] - // Keep the `match` for adding more ops in the future. - let remove = match &op.opcode { - Either::Left(VirtualOp::NOOP) => true, - _ => false, - }; - - !remove - }); - - self - } - - // At the moment the only verification we do is to make sure used registers are - // initialised. Without doing dataflow analysis we still can't guarantee the init is - // _before_ the use, but future refactoring to convert abstract ops into SSA and BBs will - // make this possible or even make this check redundant. - pub(crate) fn verify(self) -> Result { - macro_rules! add_virt_regs { - ($regs: expr, $set: expr) => { - let mut regs = $regs; - regs.retain(|®| matches!(reg, VirtualRegister::Virtual(_))); - $set.extend(regs.into_iter()); - }; - } - - let mut use_regs = HashSet::new(); - let mut def_regs = HashSet::new(); - for op in &self.ops { - add_virt_regs!(op.use_registers(), use_regs); - add_virt_regs!(op.def_registers(), def_regs); - } - - if def_regs.is_superset(&use_regs) { - Ok(self) - } else { - let bad_regs = use_regs - .difference(&def_regs) - .map(|reg| match reg { - VirtualRegister::Virtual(name) => format!("$r{name}"), - VirtualRegister::Constant(creg) => creg.to_string(), - }) - .collect::>() - .join(", "); - Err(CompileError::InternalOwned( - format!("Program erroneously uses uninitialized virtual registers: {bad_regs}"), - Span::dummy(), - )) - } - } - /// Allocate registers. pub(crate) fn allocate_registers( self, diff --git a/sway-core/src/asm_generation/fuel/fuel_asm_builder.rs b/sway-core/src/asm_generation/fuel/fuel_asm_builder.rs index 59cba133cb2..d5926472fa7 100644 --- a/sway-core/src/asm_generation/fuel/fuel_asm_builder.rs +++ b/sway-core/src/asm_generation/fuel/fuel_asm_builder.rs @@ -210,6 +210,10 @@ impl AsmBuilder for FuelAsmBuilder<'_, '_> { .. } = self; + let opt_level = build_config + .map(|cfg| cfg.optimization_level) + .unwrap_or_default(); + let entries = entries .clone() .into_iter() @@ -265,7 +269,7 @@ impl AsmBuilder for FuelAsmBuilder<'_, '_> { } let allocated_program = virtual_abstract_program - .into_allocated_program(fallback_fn) + .into_allocated_program(fallback_fn, opt_level) .map_err(|e| handler.emit_err(e))?; if build_config diff --git a/sway-core/src/asm_generation/fuel/optimizations/misc.rs b/sway-core/src/asm_generation/fuel/optimizations/misc.rs new file mode 100644 index 00000000000..729009770c9 --- /dev/null +++ b/sway-core/src/asm_generation/fuel/optimizations/misc.rs @@ -0,0 +1,107 @@ +use super::super::abstract_instruction_set::AbstractInstructionSet; + +use crate::asm_lang::{Op, OrganizationalOp, VirtualOp, VirtualRegister}; + +use std::collections::HashSet; + +use either::Either; + +impl AbstractInstructionSet { + /// Removes any jumps to the subsequent line. + pub(crate) fn remove_sequential_jumps(mut self) -> AbstractInstructionSet { + let dead_jumps: Vec<_> = self + .ops + .windows(2) + .enumerate() + .filter_map(|(idx, ops)| match (&ops[0].opcode, &ops[1].opcode) { + ( + Either::Right(OrganizationalOp::Jump(dst_label)), + Either::Right(OrganizationalOp::Label(label)), + ) if dst_label == label => Some(idx), + _otherwise => None, + }) + .collect(); + + // Replace the dead jumps with NOPs, as it's cheaper. + for idx in dead_jumps { + self.ops[idx] = Op { + opcode: Either::Left(VirtualOp::NOOP), + comment: "remove redundant jump operation".into(), + owning_span: None, + }; + } + + self + } + + pub(crate) fn remove_redundant_moves(mut self) -> AbstractInstructionSet { + // This has a lot of room for improvement. + // + // For now it is just removing MOVEs to registers which are _never_ used. It doesn't + // analyse control flow or other redundancies. Some obvious improvements are: + // + // - Perform a control flow analysis to remove MOVEs to registers which are not used + // _after_ the MOVE. + // + // - Remove the redundant use of temporaries. E.g.: + // MOVE t, a MOVE b, a + // MOVE b, t => USE b + // USE b + loop { + // Gather all the uses for each register. + let uses: HashSet<&VirtualRegister> = + self.ops.iter().fold(HashSet::new(), |mut acc, op| { + for u in &op.use_registers() { + acc.insert(u); + } + acc + }); + + // Loop again and find MOVEs which have a non-constant destination which is never used. + let mut dead_moves = Vec::new(); + for (idx, op) in self.ops.iter().enumerate() { + if let Either::Left(VirtualOp::MOVE( + dst_reg @ VirtualRegister::Virtual(_), + _src_reg, + )) = &op.opcode + { + if !uses.contains(dst_reg) { + dead_moves.push(idx); + } + } + } + + if dead_moves.is_empty() { + break; + } + + // Replace the dead moves with NOPs, as it's cheaper. + for idx in dead_moves { + self.ops[idx] = Op { + opcode: Either::Left(VirtualOp::NOOP), + comment: "remove redundant move operation".into(), + owning_span: None, + }; + } + } + + self + } + + pub(crate) fn remove_redundant_ops(mut self) -> AbstractInstructionSet { + self.ops.retain(|op| { + // It is easier to think in terms of operations we want to remove + // than the operations we want to retain ;-) + #[allow(clippy::match_like_matches_macro)] + // Keep the `match` for adding more ops in the future. + let remove = match &op.opcode { + Either::Left(VirtualOp::NOOP) => true, + _ => false, + }; + + !remove + }); + + self + } +} diff --git a/sway-core/src/asm_generation/fuel/optimizations/mod.rs b/sway-core/src/asm_generation/fuel/optimizations/mod.rs new file mode 100644 index 00000000000..363eaaec5f3 --- /dev/null +++ b/sway-core/src/asm_generation/fuel/optimizations/mod.rs @@ -0,0 +1,44 @@ +mod misc; +mod optimizations; +mod verify; + +use super::abstract_instruction_set::AbstractInstructionSet; + +use crate::OptLevel; + +use super::data_section::DataSection; + +impl AbstractInstructionSet { + pub(crate) fn optimize( + mut self, + data_section: &DataSection, + level: OptLevel, + ) -> AbstractInstructionSet { + match level { + // On debug builds do a single pass through the simple optimizations + OptLevel::Opt0 => self + .const_indexing_aggregates_function(data_section) + .dce() + .simplify_cfg() + .remove_sequential_jumps() + .remove_redundant_moves() + .remove_redundant_ops(), + // On release builds we can do more iterations + OptLevel::Opt1 => { + for _ in 0..10 { + // limit the number of iterations + let old = self.clone(); + self = self.optimize(data_section, OptLevel::Opt0); + if self.ops.len() == old.ops.len() { + // No improvement made, we're done here + break; + } else if old.ops.len() < self.ops.len() { + // Never accept worse results + return old; + } + } + self + } + } + } +} diff --git a/sway-core/src/asm_generation/fuel/optimizations.rs b/sway-core/src/asm_generation/fuel/optimizations/optimizations.rs similarity index 90% rename from sway-core/src/asm_generation/fuel/optimizations.rs rename to sway-core/src/asm_generation/fuel/optimizations/optimizations.rs index ac012a36bfc..f0b2e3af9fb 100644 --- a/sway-core/src/asm_generation/fuel/optimizations.rs +++ b/sway-core/src/asm_generation/fuel/optimizations/optimizations.rs @@ -5,10 +5,12 @@ use rustc_hash::{FxHashMap, FxHashSet}; use crate::{ asm_generation::fuel::compiler_constants, - asm_lang::{ControlFlowOp, Label, VirtualImmediate12, VirtualOp, VirtualRegister}, + asm_lang::{ + ConstantRegister, ControlFlowOp, Label, VirtualImmediate12, VirtualOp, VirtualRegister, + }, }; -use super::{ +use super::super::{ abstract_instruction_set::AbstractInstructionSet, analyses::liveness_analysis, data_section::DataSection, }; @@ -32,6 +34,19 @@ impl AbstractInstructionSet { Constant(u64), BaseOffset(VRegDef, u64), } + impl RegContents { + /// If the value is statically known, return it. + fn const_value(&self) -> Option { + match self { + RegContents::Constant(c) => Some(*c), + RegContents::BaseOffset(base, offset) => match base.reg { + VirtualRegister::Constant(ConstantRegister::Zero) => Some(*offset), + VirtualRegister::Constant(ConstantRegister::One) => Some(*offset + 1), + _ => None, + }, + } + } + } // What is the latest version of a vreg definition. let mut latest_version = FxHashMap::::default(); @@ -217,6 +232,22 @@ impl AbstractInstructionSet { } } either::Either::Right(ControlFlowOp::SaveRetAddr(..)) => {} + either::Either::Right(ControlFlowOp::JumpIfNotZero(reg, lab)) => { + if let Some(known_condition) = + reg_contents.get(reg).and_then(|r| r.const_value()) + { + if known_condition != 0 { + // We always jump here. Replace the instruction with unconditional jump. + op.opcode = either::Either::Right(ControlFlowOp::Jump(*lab)); + clear_state = true; + } else { + // The jump is never performed, and can be removed. + retain = false; + } + } else { + clear_state = true; + } + } either::Either::Right(_) => { clear_state = true; } diff --git a/sway-core/src/asm_generation/fuel/optimizations/verify.rs b/sway-core/src/asm_generation/fuel/optimizations/verify.rs new file mode 100644 index 00000000000..4b3c03b6b55 --- /dev/null +++ b/sway-core/src/asm_generation/fuel/optimizations/verify.rs @@ -0,0 +1,49 @@ +use std::collections::HashSet; + +use sway_error::error::CompileError; +use sway_types::Span; + +use crate::{ + asm_generation::fuel::abstract_instruction_set::AbstractInstructionSet, + asm_lang::VirtualRegister, +}; + +impl AbstractInstructionSet { + // At the moment the only verification we do is to make sure used registers are + // initialised. Without doing dataflow analysis we still can't guarantee the init is + // _before_ the use, but future refactoring to convert abstract ops into SSA and BBs will + // make this possible or even make this check redundant. + pub(crate) fn verify(self) -> Result { + macro_rules! add_virt_regs { + ($regs: expr, $set: expr) => { + let mut regs = $regs; + regs.retain(|®| matches!(reg, VirtualRegister::Virtual(_))); + $set.extend(regs.into_iter()); + }; + } + + let mut use_regs = HashSet::new(); + let mut def_regs = HashSet::new(); + for op in &self.ops { + add_virt_regs!(op.use_registers(), use_regs); + add_virt_regs!(op.def_registers(), def_regs); + } + + if def_regs.is_superset(&use_regs) { + Ok(self) + } else { + let bad_regs = use_regs + .difference(&def_regs) + .map(|reg| match reg { + VirtualRegister::Virtual(name) => format!("$r{name}"), + VirtualRegister::Constant(creg) => creg.to_string(), + }) + .collect::>() + .join(", "); + Err(CompileError::InternalOwned( + format!("Program erroneously uses uninitialized virtual registers: {bad_regs}"), + Span::dummy(), + )) + } + } +} diff --git a/sway-core/src/asm_generation/fuel/programs/abstract.rs b/sway-core/src/asm_generation/fuel/programs/abstract.rs index 50748ede023..cb206a896aa 100644 --- a/sway-core/src/asm_generation/fuel/programs/abstract.rs +++ b/sway-core/src/asm_generation/fuel/programs/abstract.rs @@ -17,6 +17,7 @@ use crate::{ VirtualImmediate18, VirtualImmediate24, }, decl_engine::DeclRefFunction, + OptLevel, }; use either::Either; use sway_error::error::CompileError; @@ -82,10 +83,11 @@ impl AbstractProgram { pub(crate) fn into_allocated_program( mut self, fallback_fn: Option, + opt_level: OptLevel, ) -> Result { let mut prologue = self.build_prologue(); self.append_globals_allocation(&mut prologue); - self.append_before_entries(&mut prologue)?; + self.append_before_entries(&mut prologue, opt_level)?; match (self.experimental.new_encoding, self.kind) { (true, ProgramKind::Contract) => { @@ -120,7 +122,7 @@ impl AbstractProgram { // Optimize and then verify abstract functions. let abstract_functions = all_functions - .map(|instruction_set| instruction_set.optimize(&self.data_section)) + .map(|instruction_set| instruction_set.optimize(&self.data_section, opt_level)) .map(AbstractInstructionSet::verify) .collect::, CompileError>>()?; @@ -152,8 +154,12 @@ impl AbstractProgram { fn append_before_entries( &self, prologue: &mut AllocatedAbstractInstructionSet, + opt_level: OptLevel, ) -> Result<(), CompileError> { - let before_entries = self.before_entries.clone().optimize(&self.data_section); + let before_entries = self + .before_entries + .clone() + .optimize(&self.data_section, opt_level); let before_entries = before_entries.verify()?; let mut before_entries = before_entries.allocate_registers()?; prologue.ops.append(&mut before_entries.ops); From 7e576ac74cabcff072111972eb37a7a6beb831f1 Mon Sep 17 00:00:00 2001 From: Hannes Karppila Date: Wed, 23 Apr 2025 17:13:39 +0300 Subject: [PATCH 02/38] Optimize using symbolic interpretation --- ...zations.rs => const_indexed_aggregates.rs} | 115 +----------- .../asm_generation/fuel/optimizations/misc.rs | 4 + .../asm_generation/fuel/optimizations/mod.rs | 16 +- .../fuel/optimizations/reachability.rs | 111 ++++++++++++ .../optimizations/symbolic_interpretation.rs | 167 ++++++++++++++++++ sway-core/src/asm_lang/mod.rs | 27 +++ sway-core/src/asm_lang/virtual_ops.rs | 128 ++++++++++++++ sway-core/src/build_config.rs | 10 ++ .../src/main.sw | 50 +++--- test/src/ir_generation/mod.rs | 8 +- test/src/ir_generation/tests/asm_block.sw | 3 +- test/src/ir_generation/tests/fn_call.sw | 6 +- .../tests/fn_call_noargs_nolocals.sw | 2 - .../ir_generation/tests/fn_call_nolocals.sw | 3 +- .../tests/predicate_data_mult_args.sw | 2 - .../tests/predicate_data_single_arg.sw | 2 - 16 files changed, 496 insertions(+), 158 deletions(-) rename sway-core/src/asm_generation/fuel/optimizations/{optimizations.rs => const_indexed_aggregates.rs} (78%) create mode 100644 sway-core/src/asm_generation/fuel/optimizations/reachability.rs create mode 100644 sway-core/src/asm_generation/fuel/optimizations/symbolic_interpretation.rs diff --git a/sway-core/src/asm_generation/fuel/optimizations/optimizations.rs b/sway-core/src/asm_generation/fuel/optimizations/const_indexed_aggregates.rs similarity index 78% rename from sway-core/src/asm_generation/fuel/optimizations/optimizations.rs rename to sway-core/src/asm_generation/fuel/optimizations/const_indexed_aggregates.rs index f0b2e3af9fb..4b939fa5b1b 100644 --- a/sway-core/src/asm_generation/fuel/optimizations/optimizations.rs +++ b/sway-core/src/asm_generation/fuel/optimizations/const_indexed_aggregates.rs @@ -1,19 +1,11 @@ -use std::collections::{BTreeSet, HashMap}; - -use either::Either; -use rustc_hash::{FxHashMap, FxHashSet}; +use rustc_hash::FxHashMap; use crate::{ asm_generation::fuel::compiler_constants, - asm_lang::{ - ConstantRegister, ControlFlowOp, Label, VirtualImmediate12, VirtualOp, VirtualRegister, - }, + asm_lang::{ConstantRegister, ControlFlowOp, VirtualImmediate12, VirtualOp, VirtualRegister}, }; -use super::super::{ - abstract_instruction_set::AbstractInstructionSet, analyses::liveness_analysis, - data_section::DataSection, -}; +use super::super::{abstract_instruction_set::AbstractInstructionSet, data_section::DataSection}; impl AbstractInstructionSet { // Aggregates that are const index accessed from a base address @@ -290,105 +282,4 @@ impl AbstractInstructionSet { self } - - pub(crate) fn dce(mut self) -> AbstractInstructionSet { - let liveness = liveness_analysis(&self.ops, false); - let ops = &self.ops; - - let mut cur_live = BTreeSet::default(); - let mut dead_indices = FxHashSet::default(); - for (rev_ix, op) in ops.iter().rev().enumerate() { - let ix = ops.len() - rev_ix - 1; - - let op_use = op.use_registers(); - let mut op_def = op.def_registers(); - op_def.append(&mut op.def_const_registers()); - - if let Either::Right(ControlFlowOp::Jump(_) | ControlFlowOp::JumpIfNotZero(..)) = - op.opcode - { - // Block boundary. Start afresh. - cur_live.clone_from(liveness.get(ix).expect("Incorrect liveness info")); - // Add use(op) to cur_live. - for u in op_use { - cur_live.insert(u.clone()); - } - continue; - } - - let dead = op_def.iter().all(|def| !cur_live.contains(def)) - && match &op.opcode { - Either::Left(op) => !op.has_side_effect(), - Either::Right(_) => false, - }; - // Remove def(op) from cur_live. - for def in &op_def { - cur_live.remove(def); - } - if dead { - dead_indices.insert(ix); - } else { - // Add use(op) to cur_live - for u in op_use { - cur_live.insert(u.clone()); - } - } - } - - // Actually delete the instructions. - let mut new_ops: Vec<_> = std::mem::take(&mut self.ops) - .into_iter() - .enumerate() - .filter_map(|(idx, op)| { - if !dead_indices.contains(&idx) { - Some(op) - } else { - None - } - }) - .collect(); - std::mem::swap(&mut self.ops, &mut new_ops); - - self - } - - // Remove unreachable instructions. - pub(crate) fn simplify_cfg(mut self) -> AbstractInstructionSet { - let ops = &self.ops; - - if ops.is_empty() { - return self; - } - - // Keep track of a map between jump labels and op indices. Useful to compute op successors. - let mut label_to_index: HashMap = HashMap::default(); - for (idx, op) in ops.iter().enumerate() { - if let Either::Right(ControlFlowOp::Label(op_label)) = op.opcode { - label_to_index.insert(op_label, idx); - } - } - - let mut reachables = vec![false; ops.len()]; - let mut worklist = vec![0]; - while let Some(op_idx) = worklist.pop() { - assert!(!reachables[op_idx]); - reachables[op_idx] = true; - let op = &ops[op_idx]; - for s in &op.successors(op_idx, ops, &label_to_index) { - if !reachables[*s] { - worklist.push(*s); - } - } - } - - let reachable_ops = self - .ops - .into_iter() - .enumerate() - .filter_map(|(idx, op)| if reachables[idx] { Some(op) } else { None }) - .collect(); - self.ops = reachable_ops; - - self - } } diff --git a/sway-core/src/asm_generation/fuel/optimizations/misc.rs b/sway-core/src/asm_generation/fuel/optimizations/misc.rs index 729009770c9..fb826a057d2 100644 --- a/sway-core/src/asm_generation/fuel/optimizations/misc.rs +++ b/sway-core/src/asm_generation/fuel/optimizations/misc.rs @@ -96,6 +96,10 @@ impl AbstractInstructionSet { // Keep the `match` for adding more ops in the future. let remove = match &op.opcode { Either::Left(VirtualOp::NOOP) => true, + Either::Left(VirtualOp::MOVE(a, b)) => a == b, + Either::Left(VirtualOp::CFEI(_, imm)) | Either::Left(VirtualOp::CFSI(_, imm)) => { + imm.value() == 0 + } _ => false, }; diff --git a/sway-core/src/asm_generation/fuel/optimizations/mod.rs b/sway-core/src/asm_generation/fuel/optimizations/mod.rs index 363eaaec5f3..d2cd18aa989 100644 --- a/sway-core/src/asm_generation/fuel/optimizations/mod.rs +++ b/sway-core/src/asm_generation/fuel/optimizations/mod.rs @@ -1,5 +1,7 @@ +mod const_indexed_aggregates; mod misc; -mod optimizations; +mod reachability; +mod symbolic_interpretation; mod verify; use super::abstract_instruction_set::AbstractInstructionSet; @@ -8,6 +10,9 @@ use crate::OptLevel; use super::data_section::DataSection; +/// Maximum number of optimization rounds to perform in release build. +const MAX_OPT_ROUNDS: usize = 10; + impl AbstractInstructionSet { pub(crate) fn optimize( mut self, @@ -18,6 +23,7 @@ impl AbstractInstructionSet { // On debug builds do a single pass through the simple optimizations OptLevel::Opt0 => self .const_indexing_aggregates_function(data_section) + .constant_register_propagation() .dce() .simplify_cfg() .remove_sequential_jumps() @@ -25,12 +31,14 @@ impl AbstractInstructionSet { .remove_redundant_ops(), // On release builds we can do more iterations OptLevel::Opt1 => { - for _ in 0..10 { - // limit the number of iterations + for _ in 0..MAX_OPT_ROUNDS { let old = self.clone(); + // run two rounds, so that if an optimization depends on another + // it will be applied at least once + self = self.optimize(data_section, OptLevel::Opt0); self = self.optimize(data_section, OptLevel::Opt0); if self.ops.len() == old.ops.len() { - // No improvement made, we're done here + // Not changed at all, we're done break; } else if old.ops.len() < self.ops.len() { // Never accept worse results diff --git a/sway-core/src/asm_generation/fuel/optimizations/reachability.rs b/sway-core/src/asm_generation/fuel/optimizations/reachability.rs new file mode 100644 index 00000000000..65b4231e712 --- /dev/null +++ b/sway-core/src/asm_generation/fuel/optimizations/reachability.rs @@ -0,0 +1,111 @@ +use std::collections::{BTreeSet, HashMap}; + +use either::Either; +use rustc_hash::FxHashSet; + +use crate::asm_lang::{ControlFlowOp, Label}; + +use super::super::{abstract_instruction_set::AbstractInstructionSet, analyses::liveness_analysis}; + +impl AbstractInstructionSet { + pub(crate) fn dce(mut self) -> AbstractInstructionSet { + let liveness = liveness_analysis(&self.ops, false); + let ops = &self.ops; + + let mut cur_live = BTreeSet::default(); + let mut dead_indices = FxHashSet::default(); + for (rev_ix, op) in ops.iter().rev().enumerate() { + let ix = ops.len() - rev_ix - 1; + + let op_use = op.use_registers(); + let mut op_def = op.def_registers(); + op_def.append(&mut op.def_const_registers()); + + if let Either::Right(ControlFlowOp::Jump(_) | ControlFlowOp::JumpIfNotZero(..)) = + op.opcode + { + // Block boundary. Start afresh. + cur_live.clone_from(liveness.get(ix).expect("Incorrect liveness info")); + // Add use(op) to cur_live. + for u in op_use { + cur_live.insert(u.clone()); + } + continue; + } + + let dead = op_def.iter().all(|def| !cur_live.contains(def)) + && match &op.opcode { + Either::Left(op) => !op.has_side_effect(), + Either::Right(_) => false, + }; + // Remove def(op) from cur_live. + for def in &op_def { + cur_live.remove(def); + } + if dead { + dead_indices.insert(ix); + } else { + // Add use(op) to cur_live + for u in op_use { + cur_live.insert(u.clone()); + } + } + } + + // Actually delete the instructions. + let mut new_ops: Vec<_> = std::mem::take(&mut self.ops) + .into_iter() + .enumerate() + .filter_map(|(idx, op)| { + if !dead_indices.contains(&idx) { + Some(op) + } else { + None + } + }) + .collect(); + std::mem::swap(&mut self.ops, &mut new_ops); + + self + } + + // Remove unreachable instructions. + pub(crate) fn simplify_cfg(mut self) -> AbstractInstructionSet { + let ops = &self.ops; + + if ops.is_empty() { + return self; + } + + // Keep track of a map between jump labels and op indices. Useful to compute op successors. + let mut label_to_index: HashMap = HashMap::default(); + for (idx, op) in ops.iter().enumerate() { + if let Either::Right(ControlFlowOp::Label(op_label)) = op.opcode { + label_to_index.insert(op_label, idx); + } + } + + let mut reachables = vec![false; ops.len()]; + let mut worklist = vec![0]; + while let Some(op_idx) = worklist.pop() { + assert!(!reachables[op_idx]); + reachables[op_idx] = true; + let op = &ops[op_idx]; + for s in &op.successors(op_idx, ops, &label_to_index) { + if !reachables[*s] { + worklist.push(*s); + } + } + } + + let reachable_ops = self + .ops + .into_iter() + .enumerate() + .filter_map(|(idx, op)| if reachables[idx] { Some(op) } else { None }) + .collect(); + self.ops = reachable_ops; + + self + } +} diff --git a/sway-core/src/asm_generation/fuel/optimizations/symbolic_interpretation.rs b/sway-core/src/asm_generation/fuel/optimizations/symbolic_interpretation.rs new file mode 100644 index 00000000000..b8153fb7f41 --- /dev/null +++ b/sway-core/src/asm_generation/fuel/optimizations/symbolic_interpretation.rs @@ -0,0 +1,167 @@ +//! Symbolic fuel-vm interpreter. + +use either::Either; +use rustc_hash::{FxHashMap, FxHashSet}; + +use crate::asm_lang::{ConstantRegister, ControlFlowOp, Label, Op, VirtualOp, VirtualRegister}; + +use super::super::abstract_instruction_set::AbstractInstructionSet; + +#[derive(Clone, Debug, PartialEq, Eq)] +enum KnownRegValue { + Const(u64), + Eq(VirtualRegister), +} + +/// What knowledge is lost after an op we don't know how to interpret? +#[derive(Clone, Debug)] +enum ResetKnown { + /// Reset all known values + Full, + /// Reset non-virtual registers in addition to defs + NonVirtual, + /// Only the `def_registers` and `def_const_registers` are reset + Defs, +} +impl ResetKnown { + fn apply(&self, op: &Op, known_values: &mut FxHashMap) { + match self { + ResetKnown::Full => { + known_values.clear(); + } + ResetKnown::NonVirtual => { + Self::Defs.apply(op, known_values); + known_values.retain(|k, _| { + if let VirtualRegister::Virtual(_) = k { + true + } else { + false + } + }); + } + ResetKnown::Defs => { + for d in op.def_registers() { + known_values.remove(d); + known_values.retain(|_, v| KnownRegValue::Eq(d.clone()) != *v); + } + for d in op.def_const_registers() { + known_values.remove(d); + known_values.retain(|_, v| KnownRegValue::Eq(d.clone()) != *v); + } + } + } + } +} + +impl AbstractInstructionSet { + /// Remove redundant temporary variable registers. + pub(crate) fn constant_register_propagation(mut self) -> AbstractInstructionSet { + if self.ops.is_empty() { + return self; + } + + // The set of labels that are jump targets + // todo: build proper control flow graph instead + let jump_target_labels: FxHashSet