diff --git a/Cargo.lock b/Cargo.lock index 82850f980..c538aaf2f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -77,7 +77,7 @@ version = "1.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" dependencies = [ - "windows-sys", + "windows-sys 0.61.2", ] [[package]] @@ -88,7 +88,7 @@ checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" dependencies = [ "anstyle", "once_cell_polyfill", - "windows-sys", + "windows-sys 0.61.2", ] [[package]] @@ -318,7 +318,7 @@ version = "3.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "faf9468729b8cbcea668e36183cb69d317348c2e08e994829fb56ebfdfbaac34" dependencies = [ - "windows-sys", + "windows-sys 0.61.2", ] [[package]] @@ -337,7 +337,7 @@ dependencies = [ "libc", "once_cell", "unicode-width", - "windows-sys", + "windows-sys 0.61.2", ] [[package]] @@ -480,6 +480,17 @@ dependencies = [ "objc2", ] +[[package]] +name = "displaydoc" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "either" version = "1.15.0" @@ -523,7 +534,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys", + "windows-sys 0.61.2", ] [[package]] @@ -895,17 +906,20 @@ dependencies = [ "object 0.38.1", "perf-event", "perfetto-recorder", + "phnt", "rayon", "sharded-offset-map", "sharded-vec-writer 0.4.0", "smallvec", "symbolic-common", "symbolic-demangle", + "target-lexicon", "tempfile", "thread_local", "tracing", "tracing-subscriber", "uuid", + "windows-sys 0.61.2", "winnow", "zerocopy", "zstd", @@ -1065,6 +1079,16 @@ dependencies = [ "minimal-lexical", ] +[[package]] +name = "nt-string" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f64f73b19d9405e886b53b9dee286e7fbb622a5276a7fd143c2d8e4dac3a0c6c" +dependencies = [ + "displaydoc", + "widestring", +] + [[package]] name = "num-traits" version = "0.2.19" @@ -1275,7 +1299,7 @@ dependencies = [ "objc2-foundation", "objc2-ui-kit", "serde", - "windows-sys", + "windows-sys 0.61.2", ] [[package]] @@ -1347,7 +1371,19 @@ dependencies = [ "nix", "prost", "rand", - "windows-sys", + "windows-sys 0.61.2", +] + +[[package]] +name = "phnt" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a88b858d3d129dd19de9bd1e4318b20753f2c92bea944fc0fb2728e444bf5f44" +dependencies = [ + "nt-string", + "quote", + "syn", + "windows-sys 0.59.0", ] [[package]] @@ -1642,7 +1678,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys", - "windows-sys", + "windows-sys 0.61.2", ] [[package]] @@ -1882,6 +1918,12 @@ dependencies = [ "syn", ] +[[package]] +name = "target-lexicon" +version = "0.13.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "adb6935a6f5c20170eeceb1a3835a49e12e19d792f6dd344ccc76a985ca5a6ca" + [[package]] name = "tempfile" version = "3.25.0" @@ -1892,7 +1934,7 @@ dependencies = [ "getrandom 0.3.4", "once_cell", "rustix", - "windows-sys", + "windows-sys 0.61.2", ] [[package]] @@ -2250,6 +2292,12 @@ dependencies = [ "winsafe", ] +[[package]] +name = "widestring" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72069c3113ab32ab29e5584db3c6ec55d416895e60715417b5b883a357c3e471" + [[package]] name = "wild-linker" version = "0.8.0" @@ -2302,6 +2350,15 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets", +] + [[package]] name = "windows-sys" version = "0.61.2" @@ -2311,6 +2368,70 @@ dependencies = [ "windows-link", ] +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + [[package]] name = "winnow" version = "0.7.15" diff --git a/Cargo.toml b/Cargo.toml index 7370e4983..22158eab9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -63,10 +63,12 @@ memmap2 = "0.9.0" mimalloc = { version = "0.1", default-features = false } object = { git = "https://github.com/gimli-rs/object", rev = "ae958b9f2c8a3a56fe19bb4b380144a32eebb770", default-features = false, features = [ "elf", + "coff", "read_core", "std", "unaligned", "archive", + "pe", ] } os_info = "3.0.0" paste = "1.0.15" @@ -97,9 +99,12 @@ tracing-subscriber = { version = "0.3.16", default-features = false, features = ] } uuid = { version = "1.0.0", features = ["v4"] } wait-timeout = "0.2.0" +walkdir = "2" wait4 = "0.1.3" which = "8.0.0" +windows-sys = "0.61.2" winnow = { version = "0.7.13", features = ["simd"] } +phnt = "0.1.2" zerocopy = { version = "0.8.27", features = ["derive"] } zstd = "0.13.0" diff --git a/justfile b/justfile new file mode 100644 index 000000000..12e7d1113 --- /dev/null +++ b/justfile @@ -0,0 +1,16 @@ + +# list all available just targets +default: + @just --list + + +# Dump the symbol table of kernel32.lib using llvm-objdump +dump-kernel32: + @llvm-objdump -a -t "C:/Program Files (x86)/Windows Kits/10/Lib/10.0.22621.0/um/x64/kernel32.Lib" > kernel32.dump + +# Compile and link a minimal PE test using clang with wild as the linker +test-pe: + cargo build -p wild-linker --bin wild + mkdir -p target/testing + clang -B./target/debug/ -fuse-ld=wild -target x86_64-pc-windows-msvc -nostdlib -e entry test_pe/test.c -o target/testing/test.exe + ./target/testing/test.exe || echo $? \ No newline at end of file diff --git a/libwild/Cargo.toml b/libwild/Cargo.toml index c3ebbba2a..272c32b55 100644 --- a/libwild/Cargo.toml +++ b/libwild/Cargo.toml @@ -45,6 +45,7 @@ sharded-vec-writer = { workspace = true } smallvec = { workspace = true } symbolic-common = { workspace = true } symbolic-demangle = { workspace = true } +target-lexicon = "0.13" thread_local = { workspace = true } tracing = { workspace = true } tracing-subscriber = { workspace = true } @@ -56,6 +57,20 @@ zstd = { workspace = true } [target.'cfg(all(target_os = "linux", any(target_arch = "x86_64", target_arch = "aarch64")))'.dependencies] perf-event = { workspace = true } +[target.'cfg(target_os = "windows")'.dependencies] +phnt = { workspace = true } + +[target.'cfg(target_os = "windows")'.dependencies.windows-sys] +workspace = true +features = [ + "Win32_System_Threading", + "Win32_System_Console", + "Win32_System_Pipes", + "Win32_Security", + "Win32_Storage_FileSystem", + "Win32_System_IO", +] + [dev-dependencies] ar = { workspace = true } tempfile = { workspace = true } diff --git a/libwild/src/arch.rs b/libwild/src/arch.rs index 6edd35f36..bd9eea0c4 100644 --- a/libwild/src/arch.rs +++ b/libwild/src/arch.rs @@ -4,6 +4,8 @@ use object::elf::EM_AARCH64; use object::elf::EM_LOONGARCH; use object::elf::EM_RISCV; use object::elf::EM_X86_64; +use object::pe::IMAGE_FILE_MACHINE_AMD64; +use object::pe::IMAGE_FILE_MACHINE_ARM64; use std::fmt::Display; #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -14,13 +16,36 @@ pub(crate) enum Architecture { LoongArch64, } +impl Default for Architecture { + fn default() -> Self { + Architecture::DEFAULT + } +} + +impl Architecture { + pub const DEFAULT: Self = const { + #[cfg(target_arch = "x86_64")] + { + Architecture::X86_64 + } + #[cfg(target_arch = "aarch64")] + { + Architecture::AArch64 + } + #[cfg(target_arch = "riscv64")] + { + Architecture::RISCV64 + } + }; +} + impl TryFrom for Architecture { type Error = crate::error::Error; fn try_from(arch: u16) -> Result { match arch { - EM_X86_64 => Ok(Self::X86_64), - EM_AARCH64 => Ok(Self::AArch64), + EM_X86_64 | IMAGE_FILE_MACHINE_AMD64 => Ok(Self::X86_64), + EM_AARCH64 | IMAGE_FILE_MACHINE_ARM64 => Ok(Self::AArch64), EM_RISCV => Ok(Self::RISCV64), EM_LOONGARCH => Ok(Self::LoongArch64), _ => bail!("Unsupported architecture: 0x{:x}", arch), diff --git a/libwild/src/archive.rs b/libwild/src/archive.rs index 1db4fac6b..732337b3b 100644 --- a/libwild/src/archive.rs +++ b/libwild/src/archive.rs @@ -3,8 +3,10 @@ //! dependency in our tests so that we can verify consistency. use crate::error::Result; +#[cfg(unix)] use std::ffi::OsStr; use std::ops::Range; +#[cfg(unix)] use std::os::unix::ffi::OsStrExt as _; use std::path::Path; @@ -94,7 +96,17 @@ impl<'data> Identifier<'data> { } pub(crate) fn as_path(&self) -> &'data std::path::Path { - Path::new(OsStr::from_bytes(self.as_slice())) + #[cfg(unix)] + { + Path::new(OsStr::from_bytes(self.as_slice())) + } + #[cfg(not(unix))] + { + // Archive member names are essentially always UTF-8/ASCII. + let s = std::str::from_utf8(self.as_slice()) + .expect("archive member name is not valid UTF-8"); + Path::new(s) + } } } diff --git a/libwild/src/args.rs b/libwild/src/args.rs index 82b37977f..5858ab37a 100644 --- a/libwild/src/args.rs +++ b/libwild/src/args.rs @@ -1,2971 +1,1675 @@ -//! A handwritten parser for our arguments. -//! -//! We don't currently use a 3rd party library like clap for a few reasons. Firstly, we need to -//! support flags like `--push-state` and `--pop-state`. These need to push and pop a state stack -//! when they're parsed. Some of the other flags then need to manipulate the state of the top of the -//! stack. Positional arguments like input files and libraries to link, then need to have the -//! current state of the stack attached to that file. -//! -//! Secondly, long arguments need to also be accepted with a single '-' in addition to the more -//! common double-dash. -//! -//! Basically, we need to be able to parse arguments in the same way as the other linkers on the -//! platform that we're targeting. - -use crate::alignment::Alignment; -use crate::arch::Architecture; -use crate::bail; -use crate::ensure; -use crate::error::Context as _; -use crate::error::Result; -use crate::input_data::FileId; -use crate::linker_script::maybe_forced_sysroot; -use crate::save_dir::SaveDir; -use crate::timing_phase; -use hashbrown::HashMap; -use hashbrown::HashSet; -use indexmap::IndexSet; -use itertools::Itertools; -use jobserver::Acquired; -use jobserver::Client; -use object::elf::GNU_PROPERTY_X86_ISA_1_BASELINE; -use object::elf::GNU_PROPERTY_X86_ISA_1_V2; -use object::elf::GNU_PROPERTY_X86_ISA_1_V3; -use object::elf::GNU_PROPERTY_X86_ISA_1_V4; -use rayon::ThreadPoolBuilder; -use std::ffi::CString; -use std::fmt::Display; -use std::mem::take; -use std::num::NonZero; -use std::num::NonZeroU32; -use std::num::NonZeroU64; -use std::num::NonZeroUsize; -use std::path::Path; -use std::path::PathBuf; -use std::str::FromStr; -use std::sync::Arc; -use std::sync::atomic::AtomicI64; - -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub(crate) enum VersionMode { - /// Don't print version - None, - /// Print version and continue linking (-v) - Verbose, - /// Print version and exit immediately (--version) - ExitAfterPrint, -} - -#[derive(Debug)] -pub(crate) enum DefsymValue { - /// A numeric value (address) - Value(u64), - /// Reference to another symbol with an optional offset - SymbolWithOffset(String, i64), -} - -#[derive(Debug)] -pub struct Args { - pub(crate) unrecognized_options: Vec, - - pub(crate) arch: Architecture, - pub(crate) lib_search_path: Vec>, - pub(crate) inputs: Vec, - pub(crate) output: Arc, - pub(crate) dynamic_linker: Option>, - pub num_threads: Option, - pub(crate) strip: Strip, - pub(crate) prepopulate_maps: bool, - pub(crate) sym_info: Option, - pub(crate) merge_sections: bool, - pub(crate) debug_fuel: Option, - pub(crate) validate_output: bool, - pub(crate) version_script_path: Option, - pub(crate) debug_address: Option, - pub(crate) write_layout: bool, - pub(crate) should_write_eh_frame_hdr: bool, - pub(crate) write_trace: bool, - pub(crate) wrap: Vec, - pub(crate) rpath: Option, - pub(crate) soname: Option, - pub(crate) files_per_group: Option, - pub(crate) exclude_libs: ExcludeLibs, - pub(crate) gc_sections: bool, - pub(crate) should_fork: bool, - pub(crate) mmap_output_file: bool, - pub(crate) build_id: BuildIdOption, - pub(crate) file_write_mode: Option, - pub(crate) no_undefined: bool, - pub(crate) allow_shlib_undefined: bool, - pub(crate) needs_origin_handling: bool, - pub(crate) needs_nodelete_handling: bool, - pub(crate) copy_relocations: CopyRelocations, - pub(crate) sysroot: Option>, - pub(crate) undefined: Vec, - pub(crate) relro: bool, - pub(crate) entry: Option, - pub(crate) export_all_dynamic_symbols: bool, - pub(crate) export_list: Vec, - pub(crate) export_list_path: Option, - pub(crate) auxiliary: Vec, - pub(crate) enable_new_dtags: bool, - pub(crate) plugin_path: Option, - pub(crate) plugin_args: Vec, - - /// Symbol definitions from `--defsym` options. Each entry is (symbol_name, value_or_symbol). - pub(crate) defsym: Vec<(String, DefsymValue)>, - - /// Section start addresses from `--section-start` options. Maps section name to address. - pub(crate) section_start: HashMap, - - /// If set, GC stats will be written to the specified filename. - pub(crate) write_gc_stats: Option, - - /// If set, and we're writing GC stats, then ignore any input files that contain any of the - /// specified substrings. - pub(crate) gc_stats_ignore: Vec, - - /// If `Some`, then we'll time how long each phase takes. We'll also measure the specified - /// counters, if any. - pub(crate) time_phase_options: Option>, - - pub(crate) verbose_gc_stats: bool, - - pub(crate) save_dir: SaveDir, - pub(crate) dependency_file: Option, - pub(crate) print_allocations: Option, - pub(crate) execstack: bool, - pub(crate) verify_allocation_consistency: bool, - pub(crate) version_mode: VersionMode, - pub(crate) demangle: bool, - pub(crate) got_plt_syms: bool, - pub(crate) b_symbolic: BSymbolicKind, - pub(crate) relax: bool, - pub(crate) should_write_linker_identity: bool, - pub(crate) hash_style: HashStyle, - pub(crate) unresolved_symbols: UnresolvedSymbols, - pub(crate) error_unresolved_symbols: bool, - pub(crate) allow_multiple_definitions: bool, - pub(crate) z_interpose: bool, - pub(crate) z_isa: Option, - pub(crate) z_stack_size: Option, - pub(crate) max_page_size: Option, - - pub(crate) relocation_model: RelocationModel, - pub(crate) should_output_executable: bool, - - /// The number of actually available threads (considering jobserver) - pub(crate) available_threads: NonZeroUsize, - - pub(crate) numeric_experiments: Vec>, - - rpath_set: IndexSet, - - jobserver_client: Option, -} - -#[derive(Debug)] -pub(crate) enum Strip { - Nothing, - Debug, - All, - Retain(HashSet>), -} - -#[derive(Debug, Clone, Copy)] -pub enum CounterKind { - Cycles, - Instructions, - CacheMisses, - BranchMisses, - PageFaults, - PageFaultsMinor, - PageFaultsMajor, - L1dRead, - L1dMiss, -} - -#[derive(Debug, Clone, Copy)] -pub(crate) enum CopyRelocations { - Allowed, - Disallowed(CopyRelocationsDisabledReason), -} - -/// Represents a command-line argument that specifies the number of threads to use, -/// triggering activation of the thread pool. -pub struct ActivatedArgs { - pub args: Args, - _jobserver_tokens: Vec, -} - -#[derive(Debug)] -pub(crate) enum BuildIdOption { - None, - Fast, - Hex(Vec), - Uuid, -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub(crate) enum HashStyle { - Gnu, - Sysv, - Both, -} - -#[derive(Debug, Clone, PartialEq, Eq)] -pub(crate) enum ExcludeLibs { - None, - All, - Some(HashSet>), -} - -impl ExcludeLibs { - pub(crate) fn should_exclude(&self, lib_path: &[u8]) -> bool { - match self { - ExcludeLibs::None => false, - ExcludeLibs::All => true, - ExcludeLibs::Some(libs) => { - let lib_path_str = String::from_utf8_lossy(lib_path); - let lib_name = lib_path_str.rsplit('/').next().unwrap_or(&lib_path_str); - - libs.contains(lib_name) - } - } - } -} - -impl HashStyle { - pub(crate) const fn includes_gnu(self) -> bool { - matches!(self, HashStyle::Gnu | HashStyle::Both) - } - - pub(crate) const fn includes_sysv(self) -> bool { - matches!(self, HashStyle::Sysv | HashStyle::Both) - } -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub(crate) enum RelocationModel { - NonRelocatable, - Relocatable, -} - -#[derive(Debug, Copy, Clone)] -pub(crate) enum Experiment { - /// How much parallelism to allow when splitting string-merge sections. - MergeStringSplitParallelism = 0, - - /// Number of bytes of string-merge sections before we'll break to a new group. - MergeStringMinGroupBytes = 1, - - GroupsPerThread = 2, - - MinGroups = 3, -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub(crate) enum FileWriteMode { - /// The existing output file, if any, will be unlinked (deleted) and a new file with the same - /// name put in its place. Any hard links to the file will not be affected. - UnlinkAndReplace, - - /// The existing output file, if any, will be edited in-place. Any hard links to the file will - /// update accordingly. If the file is locked due to currently being executed, then our write - /// will fail. - UpdateInPlace, - - /// As for `UpdateInPlace`, but if we get an error opening the file for write, fallback to - /// unlinking and replacing. - UpdateInPlaceWithFallback, -} - -#[derive(Debug, Eq, PartialEq, Clone, Copy)] -pub struct Modifiers { - /// Whether shared objects should only be linked if they're referenced. - pub(crate) as_needed: bool, - - /// Whether we're currently allowed to link against shared libraries. - pub(crate) allow_shared: bool, - - /// Whether object files in archives should be linked even if they do not contain symbols that - /// are referenced. - pub(crate) whole_archive: bool, - - /// Whether archive semantics should be applied even for regular objects. - pub(crate) archive_semantics: bool, - - /// Whether the file is known to be a temporary file that will be deleted when the linker - /// exits, e.g. an output file from a linker plugin. This doesn't affect linking, but is - /// stored in the layout file if written so that linker-diff knows not to error if the file - /// is missing. - pub(crate) temporary: bool, -} - -#[derive(Debug, Eq, PartialEq)] -pub(crate) struct Input { - pub(crate) spec: InputSpec, - /// A directory to search first. Only present when the input came from a linker script, in - /// which case this is the directory containing the linker script. - pub(crate) search_first: Option, - pub(crate) modifiers: Modifiers, -} - -#[derive(Debug, Eq, PartialEq)] -pub(crate) enum InputSpec { - /// Path (possibly just a filename) to the file. - File(Box), - /// Name of the library, without prefix and suffix. - Lib(Box), - /// Name of the library, including prefix and suffix. - Search(Box), -} - -#[derive(Debug, Eq, PartialEq)] -pub(crate) enum BSymbolicKind { - None, - All, - Functions, - NonWeakFunctions, - NonWeak, -} - -#[derive(Debug, Eq, PartialEq)] -pub(crate) enum UnresolvedSymbols { - /// Report all unresolved symbols. - ReportAll, - - /// Ignore unresolved symbols in shared libraries. - IgnoreInSharedLibs, - - /// Ignore unresolved symbols in object files. - IgnoreInObjectFiles, - - /// Ignore all unresolved symbols. - IgnoreAll, -} - -pub const WILD_UNSUPPORTED_ENV: &str = "WILD_UNSUPPORTED"; -pub const VALIDATE_ENV: &str = "WILD_VALIDATE_OUTPUT"; -pub const WRITE_LAYOUT_ENV: &str = "WILD_WRITE_LAYOUT"; -pub const WRITE_TRACE_ENV: &str = "WILD_WRITE_TRACE"; -pub const REFERENCE_LINKER_ENV: &str = "WILD_REFERENCE_LINKER"; -pub(crate) const FILES_PER_GROUP_ENV: &str = "WILD_FILES_PER_GROUP"; - -/// Set this environment variable if you get a failure during writing due to too much or too little -/// space being allocated to some section. When set, each time we allocate during layout, we'll -/// check that what we're doing is consistent with writing and fail in a more easy to debug way. i.e -/// we'll report the particular combination of value flags, resolution flags etc that triggered the -/// inconsistency. -pub(crate) const WRITE_VERIFY_ALLOCATIONS_ENV: &str = "WILD_VERIFY_ALLOCATIONS"; - -// These flags don't currently affect our behaviour. TODO: Assess whether we should error or warn if -// these are given. This is tricky though. On the one hand we want to be a drop-in replacement for -// other linkers. On the other, we should perhaps somehow let the user know that we don't support a -// feature. -const SILENTLY_IGNORED_FLAGS: &[&str] = &[ - // Just like other modern linkers, we don't need groups in order to resolve cycles. - "start-group", - "end-group", - // TODO: This is supposed to suppress built-in search paths, but I don't think we have any - // built-in search paths. Perhaps we should? - "nostdlib", - // TODO - "no-undefined-version", - "fatal-warnings", - "color-diagnostics", - "undefined-version", - "sort-common", - "stats", -]; -const SILENTLY_IGNORED_SHORT_FLAGS: &[&str] = &[ - "(", - ")", - // On Illumos, the Clang driver inserts a meaningless -C flag before calling any non-GNU ld - // linker. - #[cfg(target_os = "illumos")] - "C", -]; - -const IGNORED_FLAGS: &[&str] = &[ - "gdb-index", - "fix-cortex-a53-835769", - "fix-cortex-a53-843419", - "discard-all", - "use-android-relr-tags", - "x", // alias for --discard-all -]; - -// These flags map to the default behavior of the linker. -const DEFAULT_FLAGS: &[&str] = &[ - "no-call-graph-profile-sort", - "no-copy-dt-needed-entries", - "no-add-needed", - "discard-locals", - "no-fatal-warnings", - "no-use-android-relr-tags", -]; -const DEFAULT_SHORT_FLAGS: &[&str] = &[ - "X", // alias for --discard-locals - "EL", // little endian -]; - -impl Default for Args { - fn default() -> Self { - Args { - arch: default_target_arch(), - unrecognized_options: Vec::new(), - - lib_search_path: Vec::new(), - inputs: Vec::new(), - output: Arc::from(Path::new("a.out")), - should_output_executable: true, - dynamic_linker: None, - time_phase_options: None, - num_threads: None, - strip: Strip::Nothing, - // For now, we default to --gc-sections. This is different to other linkers, but other - // than being different, there doesn't seem to be any downside to doing - // this. We don't currently do any less work if we're not GCing sections, - // but do end up writing more, so --no-gc-sections will almost always be as - // slow or slower than --gc-sections. For that reason, the latter is - // probably a good default. - gc_sections: true, - prepopulate_maps: false, - sym_info: None, - merge_sections: true, - copy_relocations: CopyRelocations::Allowed, - debug_fuel: None, - validate_output: std::env::var(VALIDATE_ENV).is_ok_and(|v| v == "1"), - write_layout: std::env::var(WRITE_LAYOUT_ENV).is_ok_and(|v| v == "1"), - write_trace: std::env::var(WRITE_TRACE_ENV).is_ok_and(|v| v == "1"), - verify_allocation_consistency: std::env::var(WRITE_VERIFY_ALLOCATIONS_ENV) - .is_ok_and(|v| v == "1"), - print_allocations: std::env::var("WILD_PRINT_ALLOCATIONS") - .ok() - .and_then(|s| s.parse().ok()) - .map(FileId::from_encoded), - relocation_model: RelocationModel::NonRelocatable, - version_script_path: None, - debug_address: None, - should_write_eh_frame_hdr: false, - write_gc_stats: None, - wrap: Vec::new(), - gc_stats_ignore: Vec::new(), - verbose_gc_stats: false, - rpath: None, - soname: None, - enable_new_dtags: true, - execstack: false, - should_fork: true, - mmap_output_file: true, - needs_origin_handling: false, - needs_nodelete_handling: false, - should_write_linker_identity: true, - file_write_mode: None, - build_id: BuildIdOption::None, - files_per_group: None, - exclude_libs: ExcludeLibs::None, - no_undefined: false, - allow_shlib_undefined: false, - version_mode: VersionMode::None, - sysroot: None, - save_dir: Default::default(), - dependency_file: None, - demangle: true, - undefined: Vec::new(), - relro: true, - entry: None, - b_symbolic: BSymbolicKind::None, - export_all_dynamic_symbols: false, - export_list: Vec::new(), - export_list_path: None, - defsym: Vec::new(), - section_start: HashMap::new(), - got_plt_syms: false, - relax: true, - hash_style: HashStyle::Both, - jobserver_client: None, - available_threads: NonZeroUsize::new(1).unwrap(), - unresolved_symbols: UnresolvedSymbols::ReportAll, - error_unresolved_symbols: true, - allow_multiple_definitions: false, - z_interpose: false, - z_stack_size: None, - z_isa: None, - max_page_size: None, - auxiliary: Vec::new(), - numeric_experiments: Vec::new(), - rpath_set: Default::default(), - plugin_path: None, - plugin_args: Vec::new(), - } - } -} - -// Parse the supplied input arguments, which should not include the program name. -pub(crate) fn parse I, S: AsRef, I: Iterator>(input: F) -> Result { - use crate::input_data::MAX_FILES_PER_GROUP; - - // SAFETY: Should be called early before other descriptors are opened and - // so we open it before the arguments are parsed (can open a file). - let jobserver_client = unsafe { Client::from_env() }; - - let files_per_group = std::env::var(FILES_PER_GROUP_ENV) - .ok() - .map(|s| s.parse()) - .transpose()?; - - if let Some(x) = files_per_group { - ensure!( - x <= MAX_FILES_PER_GROUP, - "{FILES_PER_GROUP_ENV}={x} but maximum is {MAX_FILES_PER_GROUP}" - ); - } - - let mut args = Args { - files_per_group, - jobserver_client, - ..Default::default() - }; - - args.save_dir = SaveDir::new(&input)?; - - let mut input = input(); - - let mut modifier_stack = vec![Modifiers::default()]; - - if std::env::var(REFERENCE_LINKER_ENV).is_ok() { - args.write_layout = true; - args.write_trace = true; - } - - let arg_parser = setup_argument_parser(); - while let Some(arg) = input.next() { - let arg = arg.as_ref(); - - arg_parser.handle_argument(&mut args, &mut modifier_stack, arg, &mut input)?; - } - - // Copy relocations are only permitted when building executables. - if !args.should_output_executable { - args.copy_relocations = - CopyRelocations::Disallowed(CopyRelocationsDisabledReason::SharedObject); - } - - if !args.rpath_set.is_empty() { - args.rpath = Some(take(&mut args.rpath_set).into_iter().join(":")); - } - - if !args.unrecognized_options.is_empty() { - let options_list = args.unrecognized_options.join(", "); - bail!("unrecognized option(s): {}", options_list); - } - - if !args.auxiliary.is_empty() && args.should_output_executable { - bail!("-f may not be used without -shared"); - } - - Ok(args) -} - -const fn default_target_arch() -> Architecture { - // We default to targeting the architecture that we're running on. We don't support running on - // architectures that we can't target. - #[cfg(target_arch = "x86_64")] - { - Architecture::X86_64 - } - #[cfg(target_arch = "aarch64")] - { - Architecture::AArch64 - } - #[cfg(target_arch = "riscv64")] - { - Architecture::RISCV64 - } - #[cfg(target_arch = "loongarch64")] - { - Architecture::LoongArch64 - } -} - -pub(crate) fn read_args_from_file(path: &Path) -> Result> { - let contents = std::fs::read_to_string(path) - .with_context(|| format!("Failed to read arguments from file `{}`", path.display()))?; - arguments_from_string(&contents) -} - -impl Args { - pub fn parse I, S: AsRef, I: Iterator>(input: F) -> Result { - timing_phase!("Parse args"); - parse(input) - } - - /// Uses 1 debug fuel, returning how much fuel remains. Debug fuel is intended to be used when - /// debugging certain kinds of bugs, so this function isn't normally referenced. To use it, the - /// caller should take a different branch depending on whether the value is still positive. You - /// can then do a binary search. - pub(crate) fn use_debug_fuel(&self) -> i64 { - let Some(fuel) = self.debug_fuel.as_ref() else { - return i64::MAX; - }; - fuel.fetch_sub(1, std::sync::atomic::Ordering::AcqRel) - 1 - } - - /// Returns whether there was sufficient fuel. If the last bit of fuel was used, then calls - /// `last_cb`. - #[allow(unused)] - pub(crate) fn use_debug_fuel_on_last(&self, last_cb: impl FnOnce()) -> bool { - match self.use_debug_fuel() { - 1.. => true, - 0 => { - last_cb(); - true - } - _ => false, - } - } - - pub(crate) fn trace_span_for_file( - &self, - file_id: FileId, - ) -> Option { - let should_trace = self.print_allocations == Some(file_id); - should_trace.then(|| tracing::trace_span!(crate::debug_trace::TRACE_SPAN_NAME).entered()) - } - - pub fn should_fork(&self) -> bool { - self.should_fork - } - - pub(crate) fn loadable_segment_alignment(&self) -> Alignment { - if let Some(max_page_size) = self.max_page_size { - return max_page_size; - } - - match self.arch { - Architecture::X86_64 => Alignment { exponent: 12 }, - Architecture::AArch64 => Alignment { exponent: 16 }, - Architecture::RISCV64 => Alignment { exponent: 12 }, - Architecture::LoongArch64 => Alignment { exponent: 16 }, - } - } - - /// Adds a linker script to our outputs. Note, this is only called for scripts specified via - /// flags like -T. Where a linker script is just listed as an argument, this won't be called. - fn add_script(&mut self, path: &str) { - self.inputs.push(Input { - spec: InputSpec::File(Box::from(Path::new(path))), - search_first: None, - modifiers: Modifiers::default(), - }); - } - - /// Sets up the thread pool, using the explicit number of threads if specified, - /// or falling back to the jobserver protocol if available. - /// - /// - pub fn activate_thread_pool(mut self) -> Result { - timing_phase!("Activate thread pool"); - - let mut tokens = Vec::new(); - self.available_threads = self.num_threads.unwrap_or_else(|| { - if let Some(client) = &self.jobserver_client { - while let Ok(Some(acquired)) = client.try_acquire() { - tokens.push(acquired); - } - tracing::trace!(count = tokens.len(), "Acquired jobserver tokens"); - // Our parent "holds" one jobserver token, add it. - NonZeroUsize::new((tokens.len() + 1).max(1)).unwrap() - } else { - std::thread::available_parallelism().unwrap_or(NonZeroUsize::new(1).unwrap()) - } - }); - - // The pool might be already initialized, suppress the error intentionally. - let _ = ThreadPoolBuilder::new() - .num_threads(self.available_threads.get()) - .build_global(); - - Ok(ActivatedArgs { - args: self, - _jobserver_tokens: tokens, - }) - } - - pub(crate) fn numeric_experiment(&self, exp: Experiment, default: u64) -> u64 { - self.numeric_experiments - .get(exp as usize) - .copied() - .flatten() - .unwrap_or(default) - } - - pub(crate) fn strip_all(&self) -> bool { - matches!(self.strip, Strip::All) - } - - pub(crate) fn strip_debug(&self) -> bool { - matches!(self.strip, Strip::All | Strip::Debug) - } -} - -fn parse_number(s: &str) -> Result { - crate::parsing::parse_number(s).map_err(|_| crate::error!("Invalid number: {}", s)) -} - -fn parse_defsym_expression(s: &str) -> DefsymValue { - use crate::parsing::ParsedSymbolExpression; - use crate::parsing::parse_symbol_expression; - - match parse_symbol_expression(s) { - ParsedSymbolExpression::Absolute(value) => DefsymValue::Value(value), - ParsedSymbolExpression::SymbolWithOffset(sym, offset) => { - DefsymValue::SymbolWithOffset(sym.to_owned(), offset) - } - } -} - -impl Default for Modifiers { - fn default() -> Self { - Self { - as_needed: false, - allow_shared: true, - whole_archive: false, - archive_semantics: false, - temporary: false, - } - } -} - -/// Parses arguments from a string, handling quoting, escapes etc. -/// All arguments must be surrounded by a white space. -fn arguments_from_string(input: &str) -> Result> { - const QUOTES: [char; 2] = ['\'', '"']; - - let mut out = Vec::new(); - let mut chars = input.chars(); - let mut heap = None; - let mut quote = None; - let mut expect_whitespace = false; - - loop { - let Some(mut ch) = chars.next() else { - if let Some(quote) = quote.take() { - bail!("Missing closing '{quote}'"); - } - if let Some(arg) = heap.take() { - out.push(arg); - } - break; - }; - - ensure!( - !expect_whitespace || ch.is_whitespace(), - "Expected white space after quoted argument" - ); - expect_whitespace = false; - - if QUOTES.contains(&ch) { - if let Some(qchr) = quote { - if qchr == ch { - // close the argument - if let Some(arg) = heap.take() { - out.push(arg); - } - quote = None; - expect_whitespace = true; - } else { - // accept the other quoting character as normal char - heap.get_or_insert(String::new()).push(ch); - } - } else { - // beginning of a new argument - ensure!(heap.is_none(), "Missing opening quote '{ch}'"); - quote = Some(ch); - } - } else if ch.is_whitespace() { - if quote.is_none() { - if let Some(arg) = heap.take() { - out.push(arg); - } - } else { - heap.get_or_insert(String::new()).push(ch); - } - } else { - if ch == '\\' { - ch = chars.next().context("Invalid escape")?; - } - heap.get_or_insert(String::new()).push(ch); - } - } - - Ok(out) -} - -fn warn_unsupported(opt: &str) -> Result { - match std::env::var(WILD_UNSUPPORTED_ENV) - .unwrap_or_default() - .as_str() - { - "warn" | "" => crate::error::warning(&format!("{opt} is not yet supported")), - "ignore" => {} - "error" => bail!("{opt} is not yet supported"), - other => bail!("Unsupported value for {WILD_UNSUPPORTED_ENV}={other}"), - } - Ok(()) -} - -struct ArgumentParser { - options: HashMap<&'static str, OptionHandler>, - short_options: HashMap<&'static str, OptionHandler>, // Short option lookup - prefix_options: HashMap<&'static str, PrefixOptionHandler>, // For options like -L, -l, etc. -} - -#[derive(Clone)] -struct OptionHandler { - help_text: &'static str, - handler: OptionHandlerFn, - short_names: Vec<&'static str>, -} - -struct PrefixOptionHandler { - help_text: &'static str, - handler: fn(&mut Args, &mut Vec, &str) -> Result<()>, - sub_options: HashMap<&'static str, SubOption>, -} - -#[allow(clippy::enum_variant_names)] -#[derive(Clone, Copy)] -enum OptionHandlerFn { - NoParam(fn(&mut Args, &mut Vec) -> Result<()>), - WithParam(fn(&mut Args, &mut Vec, &str) -> Result<()>), - OptionalParam(fn(&mut Args, &mut Vec, Option<&str>) -> Result<()>), -} - -impl OptionHandlerFn { - fn help_suffix_long(&self) -> &'static str { - match self { - OptionHandlerFn::NoParam(_) => "", - OptionHandlerFn::WithParam(_) => "=", - OptionHandlerFn::OptionalParam(_) => "[=]", - } - } - - fn help_suffix_short(&self) -> &'static str { - match self { - OptionHandlerFn::NoParam(_) => "", - OptionHandlerFn::WithParam(_) => " ", - OptionHandlerFn::OptionalParam(_) => " []", - } - } -} - -struct OptionDeclaration<'a, T> { - parser: &'a mut ArgumentParser, - long_names: Vec<&'static str>, - short_names: Vec<&'static str>, - prefixes: Vec<&'static str>, - sub_options: HashMap<&'static str, SubOption>, - help_text: &'static str, - _phantom: std::marker::PhantomData, -} - -struct NoParam; -struct WithParam; -struct WithOptionalParam; - -#[derive(Clone, Copy)] -enum SubOptionHandler { - /// Handler without value parameter (exact match) - NoValue(fn(&mut Args, &mut Vec) -> Result<()>), - /// Handler with value parameter (prefix match) - WithValue(fn(&mut Args, &mut Vec, &str) -> Result<()>), -} - -#[derive(Clone, Copy)] -struct SubOption { - help: &'static str, - handler: SubOptionHandler, -} - -impl SubOption { - fn with_value(&self) -> bool { - matches!(self.handler, SubOptionHandler::WithValue(_)) - } -} - -impl Default for ArgumentParser { - fn default() -> Self { - Self::new() - } -} - -impl ArgumentParser { - #[must_use] - fn new() -> Self { - Self { - options: HashMap::new(), - short_options: HashMap::new(), - prefix_options: HashMap::new(), - } - } - - fn declare(&mut self) -> OptionDeclaration<'_, NoParam> { - OptionDeclaration { - parser: self, - long_names: Vec::new(), - short_names: Vec::new(), - prefixes: Vec::new(), - sub_options: HashMap::new(), - help_text: "", - _phantom: std::marker::PhantomData, - } - } - - fn declare_with_param(&mut self) -> OptionDeclaration<'_, WithParam> { - OptionDeclaration { - parser: self, - long_names: Vec::new(), - short_names: Vec::new(), - prefixes: Vec::new(), - sub_options: HashMap::new(), - help_text: "", - _phantom: std::marker::PhantomData, - } - } - - fn declare_with_optional_param(&mut self) -> OptionDeclaration<'_, WithOptionalParam> { - OptionDeclaration { - parser: self, - long_names: Vec::new(), - short_names: Vec::new(), - prefixes: Vec::new(), - sub_options: HashMap::new(), - help_text: "", - _phantom: std::marker::PhantomData, - } - } - - fn handle_argument, I: Iterator>( - &self, - args: &mut Args, - modifier_stack: &mut Vec, - arg: &str, - input: &mut I, - ) -> Result<()> { - // TODO @lapla-cogito standardize the interface. @file doesn't use a leading hyphen. - // Handle `@file`option (recursively) - merging in the options contained in the file - if let Some(path) = arg.strip_prefix('@') { - let file_args = read_args_from_file(Path::new(path))?; - let mut file_arg_iter = file_args.iter(); - while let Some(file_arg) = file_arg_iter.next() { - self.handle_argument(args, modifier_stack, file_arg, &mut file_arg_iter)?; - } - return Ok(()); - } - - if let Some(stripped) = strip_option(arg) { - // Check for option with '=' syntax - if let Some(eq_pos) = stripped.find('=') { - let option_name = &stripped[..eq_pos]; - let value = &stripped[eq_pos + 1..]; - - if let Some(handler) = self.options.get(option_name) { - match &handler.handler { - OptionHandlerFn::WithParam(f) => f(args, modifier_stack, value)?, - OptionHandlerFn::OptionalParam(f) => f(args, modifier_stack, Some(value))?, - OptionHandlerFn::NoParam(_) => return Ok(()), - } - return Ok(()); - } - } else { - if stripped == "build-id" - && let Some(handler) = self.options.get(stripped) - && let OptionHandlerFn::WithParam(f) = &handler.handler - { - f(args, modifier_stack, "fast")?; - return Ok(()); - } - - if let Some(handler) = self.options.get(stripped) { - match &handler.handler { - OptionHandlerFn::NoParam(f) => f(args, modifier_stack)?, - OptionHandlerFn::WithParam(f) => { - let next_arg = - input.next().context(format!("Missing argument to {arg}"))?; - f(args, modifier_stack, next_arg.as_ref())?; - } - OptionHandlerFn::OptionalParam(f) => { - f(args, modifier_stack, None)?; - } - } - return Ok(()); - } - } - } - - if arg.starts_with('-') && !arg.starts_with("--") && arg.len() > 1 { - let option_name = &arg[1..]; - if let Some(handler) = self.short_options.get(option_name) { - match &handler.handler { - OptionHandlerFn::NoParam(f) => f(args, modifier_stack)?, - OptionHandlerFn::WithParam(f) => { - let next_arg = - input.next().context(format!("Missing argument to {arg}"))?; - f(args, modifier_stack, next_arg.as_ref())?; - } - OptionHandlerFn::OptionalParam(f) => { - f(args, modifier_stack, None)?; - } - } - return Ok(()); - } - } - - // Prefix options. These should be handled after processing long and short options, - // because some options (like `-hashstyle=gnu`) can be misinterpreted as prefix options. - for (prefix, handler) in &self.prefix_options { - if let Some(rest) = arg.strip_prefix(&format!("-{prefix}")) { - let value = if rest.is_empty() { - let next_arg = input - .next() - .context(format!("Missing argument to -{prefix}"))?; - next_arg.as_ref().to_owned() - } else { - rest.to_owned() - }; - - if let Some((key, param_value)) = value.split_once('=') { - // Value has '=', look up key with trailing '=' - if let Some(sub) = handler.sub_options.get(format!("{key}=").as_str()) { - match sub.handler { - SubOptionHandler::NoValue(_) => { - (handler.handler)(args, modifier_stack, &value)?; - } - SubOptionHandler::WithValue(f) => f(args, modifier_stack, param_value)?, - } - } else { - // Fall back to the main handler - (handler.handler)(args, modifier_stack, &value)?; - } - } else { - // No '=' in value, look up exact match - if let Some(sub) = handler.sub_options.get(value.as_str()) { - match sub.handler { - SubOptionHandler::NoValue(f) => f(args, modifier_stack)?, - SubOptionHandler::WithValue(_) => { - bail!("Option -{prefix} {value} requires a value"); - } - } - } else { - // Fall back to the main handler - (handler.handler)(args, modifier_stack, &value)?; - } - } - return Ok(()); - } - } - - if arg.starts_with('-') { - if let Some(stripped) = strip_option(arg) - && IGNORED_FLAGS.contains(&stripped) - { - warn_unsupported(arg)?; - return Ok(()); - } - - args.unrecognized_options.push(arg.to_owned()); - return Ok(()); - } - - args.save_dir.handle_file(arg); - args.inputs.push(Input { - spec: InputSpec::File(Box::from(Path::new(arg))), - search_first: None, - modifiers: *modifier_stack.last().unwrap(), - }); - - Ok(()) - } - - #[must_use] - fn generate_help(&self) -> String { - let mut help = String::new(); - help.push_str("USAGE:\n wild [OPTIONS] [FILES...]\n\nOPTIONS:\n"); - - let mut prefix_options: Vec<_> = self.prefix_options.iter().collect(); - prefix_options.sort_by_key(|(prefix, _)| *prefix); - - // TODO: This is ad-hoc - help.push_str(&format!( - " {:<31} Read options from a file\n", - format!("@"), - )); - - let mut help_to_options: HashMap<&str, Vec> = HashMap::new(); - let mut processed_short_options: HashSet<&str> = HashSet::new(); - - // Collect all long options and their associated short options - for (long_name, handler) in &self.options { - if !handler.help_text.is_empty() { - let long_suffix = handler.handler.help_suffix_long(); - let mut option_names = vec![format!("--{long_name}{long_suffix}")]; - - // Add associated short options - let short_suffix = handler.handler.help_suffix_short(); - for short_char in &handler.short_names { - option_names.push(format!("-{short_char}{short_suffix}")); - } - - help_to_options - .entry(handler.help_text) - .or_default() - .extend(option_names); - } - - // Mark short options of help-less handlers as processed - for short_name in &handler.short_names { - processed_short_options.insert(short_name); - } - } - - for (prefix, handler) in prefix_options { - if !processed_short_options.contains(prefix) && !handler.help_text.is_empty() { - help.push_str(&format!( - " -{:<30} {}\n", - format!("{prefix} "), - handler.help_text - )); - - // Add sub-options if they exist - let mut sub_options: Vec<_> = handler.sub_options.iter().collect(); - sub_options.sort_by_key(|(name, _)| *name); - - for (sub_name, sub) in sub_options { - let display_name = if sub.with_value() && sub_name.ends_with('=') { - // sub_name ends with '=' (e.g., "max-page-size="), so add - format!("{sub_name}") - } else { - sub_name.to_string() - }; - help.push_str(&format!( - " -{prefix} {display_name:<30} {sub_help}\n", - sub_help = sub.help - )); - } - } - } - - // Add short-only options - for (short_char, handler) in &self.short_options { - if !processed_short_options.contains(short_char) && !handler.help_text.is_empty() { - let short_suffix = handler.handler.help_suffix_short(); - help_to_options - .entry(handler.help_text) - .or_default() - .push(format!("-{short_char}{short_suffix}")); - } - } - - let mut sorted_help_groups: Vec<_> = help_to_options.into_iter().collect(); - sorted_help_groups.sort_by_key(|(_, option_names)| { - option_names.iter().min().unwrap_or(&String::new()).clone() - }); - - for (help_text, mut option_names) in sorted_help_groups { - option_names.sort_by(|a, b| { - let a_is_short = a.len() == 2 && a.starts_with('-'); - let b_is_short = b.len() == 2 && b.starts_with('-'); - match (a_is_short, b_is_short) { - (true, false) => std::cmp::Ordering::Less, // short options first - (false, true) => std::cmp::Ordering::Greater, // long options after - _ => a.cmp(b), // same type, alphabetical - } - }); - - let option_names_str = option_names.join(", "); - help.push_str(&format!(" {option_names_str:<30} {help_text}\n")); - } - - help - } -} - -impl<'a, T> OptionDeclaration<'a, T> { - #[must_use] - fn long(mut self, name: &'static str) -> Self { - self.long_names.push(name); - self - } - - #[must_use] - fn short(mut self, option: &'static str) -> Self { - self.short_names.push(option); - self - } - - #[must_use] - fn help(mut self, text: &'static str) -> Self { - self.help_text = text; - self - } - - fn prefix(mut self, prefix: &'static str) -> Self { - self.prefixes.push(prefix); - self - } - - #[must_use] - fn sub_option( - mut self, - name: &'static str, - help: &'static str, - handler: fn(&mut Args, &mut Vec) -> Result<()>, - ) -> Self { - self.sub_options.insert( - name, - SubOption { - help, - handler: SubOptionHandler::NoValue(handler), - }, - ); - self - } - - #[must_use] - fn sub_option_with_value( - mut self, - name: &'static str, - help: &'static str, - handler: fn(&mut Args, &mut Vec, &str) -> Result<()>, - ) -> Self { - self.sub_options.insert( - name, - SubOption { - help, - handler: SubOptionHandler::WithValue(handler), - }, - ); - self - } -} - -impl<'a> OptionDeclaration<'a, NoParam> { - fn execute(self, handler: fn(&mut Args, &mut Vec) -> Result<()>) { - let option_handler = OptionHandler { - help_text: self.help_text, - handler: OptionHandlerFn::NoParam(handler), - short_names: self.short_names.clone(), - }; - - for name in self.long_names { - self.parser.options.insert(name, option_handler.clone()); - } - - for option in self.short_names { - self.parser - .short_options - .insert(option, option_handler.clone()); - } - } -} - -impl<'a> OptionDeclaration<'a, WithParam> { - fn execute(self, handler: fn(&mut Args, &mut Vec, &str) -> Result<()>) { - let mut short_names = self.short_names.clone(); - short_names.extend_from_slice(&self.prefixes); - - let option_handler = OptionHandler { - help_text: self.help_text, - handler: OptionHandlerFn::WithParam(handler), - short_names, - }; - - for name in self.long_names { - self.parser.options.insert(name, option_handler.clone()); - } - - for option in self.short_names { - self.parser - .short_options - .insert(option, option_handler.clone()); - } - - for prefix in self.prefixes { - let prefix_handler = PrefixOptionHandler { - help_text: self.help_text, - sub_options: self.sub_options.clone(), - handler, - }; - - self.parser.prefix_options.insert(prefix, prefix_handler); - } - } -} - -impl<'a> OptionDeclaration<'a, WithOptionalParam> { - fn execute(self, handler: fn(&mut Args, &mut Vec, Option<&str>) -> Result<()>) { - let option_handler = OptionHandler { - help_text: self.help_text, - handler: OptionHandlerFn::OptionalParam(handler), - short_names: self.short_names.clone(), - }; - - for name in self.long_names { - self.parser.options.insert(name, option_handler.clone()); - } - - for option in self.short_names { - self.parser - .short_options - .insert(option, option_handler.clone()); - } - } -} - -fn strip_option(arg: &str) -> Option<&str> { - arg.strip_prefix("--").or(arg.strip_prefix('-')) -} - -fn setup_argument_parser() -> ArgumentParser { - let mut parser = ArgumentParser::new(); - - parser - .declare_with_param() - .prefix("L") - .help("Add directory to library search path") - .execute(|args, _modifier_stack, value| { - let handle_sysroot = |path| { - args.sysroot - .as_ref() - .and_then(|sysroot| maybe_forced_sysroot(path, sysroot)) - .unwrap_or_else(|| Box::from(path)) - }; - - let dir = handle_sysroot(Path::new(value)); - args.save_dir.handle_file(value); - args.lib_search_path.push(dir); - Ok(()) - }); - - parser - .declare_with_param() - .prefix("l") - .help("Link with library") - .sub_option_with_value( - ":filename", - "Link with specific file", - |args, modifier_stack, value| { - let stripped = value.strip_prefix(':').unwrap_or(value); - let spec = InputSpec::File(Box::from(Path::new(stripped))); - args.inputs.push(Input { - spec, - search_first: None, - modifiers: *modifier_stack.last().unwrap(), - }); - Ok(()) - }, - ) - .sub_option_with_value( - "libname", - "Link with library libname.so or libname.a", - |args, modifier_stack, value| { - let spec = InputSpec::Lib(Box::from(value)); - args.inputs.push(Input { - spec, - search_first: None, - modifiers: *modifier_stack.last().unwrap(), - }); - Ok(()) - }, - ) - .execute(|args, modifier_stack, value| { - let spec = if let Some(stripped) = value.strip_prefix(':') { - InputSpec::Search(Box::from(stripped)) - } else { - InputSpec::Lib(Box::from(value)) - }; - args.inputs.push(Input { - spec, - search_first: None, - modifiers: *modifier_stack.last().unwrap(), - }); - Ok(()) - }); - - parser - .declare_with_param() - .prefix("u") - .help("Force resolution of the symbol") - .execute(|args, _modifier_stack, value| { - args.undefined.push(value.to_owned()); - Ok(()) - }); - - parser - .declare_with_param() - .prefix("m") - .help("Set target architecture") - .sub_option("elf_x86_64", "x86-64 ELF target", |args, _| { - args.arch = Architecture::X86_64; - Ok(()) - }) - .sub_option( - "elf_x86_64_sol2", - "x86-64 ELF target (Solaris)", - |args, _| { - if args.dynamic_linker.is_none() { - args.dynamic_linker = Some(Path::new("/lib/amd64/ld.so.1").into()); - } - args.arch = Architecture::X86_64; - Ok(()) - }, - ) - .sub_option("aarch64elf", "AArch64 ELF target", |args, _| { - args.arch = Architecture::AArch64; - Ok(()) - }) - .sub_option("aarch64linux", "AArch64 ELF target (Linux)", |args, _| { - args.arch = Architecture::AArch64; - Ok(()) - }) - .sub_option("elf64lriscv", "RISC-V 64-bit ELF target", |args, _| { - args.arch = Architecture::RISCV64; - Ok(()) - }) - .sub_option( - "elf64loongarch", - "LoongArch 64-bit ELF target", - |args, _| { - args.arch = Architecture::LoongArch64; - Ok(()) - }, - ) - .execute(|_args, _modifier_stack, value| { - bail!("-m {value} is not yet supported"); - }); - - parser - .declare_with_param() - .prefix("z") - .help("Linker option") - .sub_option("now", "Resolve all symbols immediately", |_, _| Ok(())) - .sub_option( - "origin", - "Mark object as requiring immediate $ORIGIN", - |args, _| { - args.needs_origin_handling = true; - Ok(()) - }, - ) - .sub_option("relro", "Enable RELRO program header", |args, _| { - args.relro = true; - Ok(()) - }) - .sub_option("norelro", "Disable RELRO program header", |args, _| { - args.relro = false; - Ok(()) - }) - .sub_option("notext", "Do not report DT_TEXTREL as an error", |_, _| { - Ok(()) - }) - .sub_option("nostart-stop-gc", "Disable start/stop symbol GC", |_, _| { - Ok(()) - }) - .sub_option( - "execstack", - "Mark object as requiring an executable stack", - |args, _| { - args.execstack = true; - Ok(()) - }, - ) - .sub_option( - "noexecstack", - "Mark object as not requiring an executable stack", - |args, _| { - args.execstack = false; - Ok(()) - }, - ) - .sub_option("nocopyreloc", "Disable copy relocations", |args, _| { - args.copy_relocations = - CopyRelocations::Disallowed(CopyRelocationsDisabledReason::Flag); - Ok(()) - }) - .sub_option( - "nodelete", - "Mark shared object as non-deletable", - |args, _| { - args.needs_nodelete_handling = true; - Ok(()) - }, - ) - .sub_option( - "defs", - "Report unresolved symbol references in object files", - |args, _| { - args.no_undefined = true; - Ok(()) - }, - ) - .sub_option( - "undefs", - "Do not report unresolved symbol references in object files", - |args, _| { - args.no_undefined = false; - Ok(()) - }, - ) - .sub_option("muldefs", "Allow multiple definitions", |args, _| { - args.allow_multiple_definitions = true; - Ok(()) - }) - .sub_option("lazy", "Use lazy binding (default)", |_, _| Ok(())) - .sub_option( - "interpose", - "Mark object to interpose all DSOs but executable", - |args, _| { - args.z_interpose = true; - Ok(()) - }, - ) - .sub_option_with_value( - "stack-size=", - "Set size of stack segment", - |args, _, value| { - let size: u64 = parse_number(value)?; - args.z_stack_size = NonZero::new(size); - - Ok(()) - }, - ) - .sub_option( - "x86-64-baseline", - "Mark x86-64-baseline ISA as needed", - |args, _| { - args.z_isa = NonZero::new(GNU_PROPERTY_X86_ISA_1_BASELINE); - Ok(()) - }, - ) - .sub_option("x86-64-v2", "Mark x86-64-v2 ISA as needed", |args, _| { - args.z_isa = NonZero::new(GNU_PROPERTY_X86_ISA_1_V2); - Ok(()) - }) - .sub_option("x86-64-v3", "Mark x86-64-v3 ISA as needed", |args, _| { - args.z_isa = NonZero::new(GNU_PROPERTY_X86_ISA_1_V3); - Ok(()) - }) - .sub_option("x86-64-v4", "Mark x86-64-v4 ISA as needed", |args, _| { - args.z_isa = NonZero::new(GNU_PROPERTY_X86_ISA_1_V4); - Ok(()) - }) - .sub_option_with_value( - "max-page-size=", - "Set maximum page size for load segments", - |args, _, value| { - let size: u64 = parse_number(value)?; - if !size.is_power_of_two() { - bail!("Invalid alignment {size:#x}"); - } - args.max_page_size = Some(Alignment { - exponent: size.trailing_zeros() as u8, - }); - - Ok(()) - }, - ) - .execute(|_args, _modifier_stack, value| { - warn_unsupported(&("-z ".to_owned() + value))?; - Ok(()) - }); - - parser - .declare_with_param() - .prefix("R") - .help("Add runtime library search path") - .execute(|args, _modifier_stack, value| { - if Path::new(value).is_file() { - args.unrecognized_options - .push(format!("-R,{value}(filename)")); - } else { - args.rpath_set.insert(value.to_string()); - } - Ok(()) - }); - - parser - .declare_with_param() - .prefix("O") - .execute(|_args, _modifier_stack, _value| - // We don't use opt-level for now. - Ok(())); - - parser - .declare() - .long("static") - .long("Bstatic") - .help("Disallow linking of shared libraries") - .execute(|_args, modifier_stack| { - modifier_stack.last_mut().unwrap().allow_shared = false; - Ok(()) - }); - - parser - .declare() - .long("Bdynamic") - .help("Allow linking of shared libraries") - .execute(|_args, modifier_stack| { - modifier_stack.last_mut().unwrap().allow_shared = true; - Ok(()) - }); - - parser - .declare_with_param() - .long("output") - .short("o") - .help("Set the output filename") - .execute(|args, _modifier_stack, value| { - args.output = Arc::from(Path::new(value)); - Ok(()) - }); - - parser - .declare() - .long("strip-all") - .short("s") - .help("Strip all symbols") - .execute(|args, _modifier_stack| { - args.strip = Strip::All; - Ok(()) - }); - - parser - .declare() - .long("strip-debug") - .short("S") - .help("Strip debug symbols") - .execute(|args, _modifier_stack| { - args.strip = Strip::Debug; - Ok(()) - }); - - parser - .declare() - .long("gc-sections") - .help("Enable removal of unused sections") - .execute(|args, _modifier_stack| { - args.gc_sections = true; - Ok(()) - }); - - parser - .declare() - .long("no-gc-sections") - .help("Disable removal of unused sections") - .execute(|args, _modifier_stack| { - args.gc_sections = false; - Ok(()) - }); - - parser - .declare() - .long("shared") - .long("Bshareable") - .help("Create a shared library") - .execute(|args, _modifier_stack| { - args.should_output_executable = false; - Ok(()) - }); - - parser - .declare() - .long("pie") - .long("pic-executable") - .help("Create a position-independent executable") - .execute(|args, _modifier_stack| { - args.relocation_model = RelocationModel::Relocatable; - args.should_output_executable = true; - Ok(()) - }); - - parser - .declare() - .long("no-pie") - .help("Do not create a position-dependent executable (default)") - .execute(|args, _modifier_stack| { - args.relocation_model = RelocationModel::NonRelocatable; - args.should_output_executable = true; - Ok(()) - }); - - parser - .declare_with_param() - .long("pack-dyn-relocs") - .help("Specify dynamic relocation packing format") - .execute(|_args, _modifier_stack, value| { - if value != "none" { - warn_unsupported(&format!("--pack-dyn-relocs={value}"))?; - } - Ok(()) - }); - - parser - .declare() - .long("help") - .help("Show this help message") - .execute(|_args, _modifier_stack| { - use std::io::Write as _; - let parser = setup_argument_parser(); - let mut stdout = std::io::stdout().lock(); - writeln!(stdout, "{}", parser.generate_help())?; - - // The following listing is something autoconf detection relies on. - writeln!(stdout, "wild: supported targets:elf64 -x86-64 elf64-littleaarch64 elf64-littleriscv elf64-loongarch")?; - writeln!(stdout, "wild: supported emulations: elf_x86_64 aarch64elf elf64lriscv elf64loongarch")?; - - std::process::exit(0); - }); - - parser - .declare() - .long("version") - .help("Show version information and exit") - .execute(|args, _modifier_stack| { - args.version_mode = VersionMode::ExitAfterPrint; - Ok(()) - }); - - parser - .declare() - .short("v") - .help("Print version and continue linking") - .execute(|args, _modifier_stack| { - args.version_mode = VersionMode::Verbose; - Ok(()) - }); - - parser - .declare() - .long("demangle") - .help("Enable symbol demangling") - .execute(|args, _modifier_stack| { - args.demangle = true; - Ok(()) - }); - - parser - .declare() - .long("no-demangle") - .help("Disable symbol demangling") - .execute(|args, _modifier_stack| { - args.demangle = false; - Ok(()) - }); - - parser - .declare_with_optional_param() - .long("time") - .help("Show timing information") - .execute(|args, _modifier_stack, value| { - match value { - Some(v) => args.time_phase_options = Some(parse_time_phase_options(v)?), - None => args.time_phase_options = Some(Vec::new()), - } - Ok(()) - }); - - parser - .declare_with_param() - .long("dynamic-linker") - .help("Set dynamic linker path") - .execute(|args, _modifier_stack, value| { - args.dynamic_linker = Some(Box::from(Path::new(value))); - Ok(()) - }); - - parser - .declare() - .long("no-dynamic-linker") - .help("Omit the load-time dynamic linker request") - .execute(|args, _modifier_stack| { - args.dynamic_linker = None; - Ok(()) - }); - - parser - .declare() - .long("mmap-output-file") - .help("Write output file using mmap (default)") - .execute(|args, _modifier_stack| { - args.mmap_output_file = true; - Ok(()) - }); - - parser - .declare() - .long("no-mmap-output-file") - .help("Write output file without mmap") - .execute(|args, _modifier_stack| { - args.mmap_output_file = false; - Ok(()) - }); - - parser - .declare_with_param() - .long("entry") - .short("e") - .help("Set the entry point") - .execute(|args, _modifier_stack, value| { - args.entry = Some(value.to_owned()); - Ok(()) - }); - - parser - .declare_with_optional_param() - .long("threads") - .help("Use multiple threads for linking") - .execute(|args, _modifier_stack, value| { - match value { - Some(v) => { - args.num_threads = Some(NonZeroUsize::try_from(v.parse::()?)?); - } - None => { - args.num_threads = None; // Default behaviour - } - } - Ok(()) - }); - - parser - .declare() - .long("no-threads") - .help("Use a single thread") - .execute(|args, _modifier_stack| { - args.num_threads = Some(NonZeroUsize::new(1).unwrap()); - Ok(()) - }); - - parser - .declare_with_param() - .long("wild-experiments") - .help("List of numbers. Used to tweak internal parameters. '_' keeps default value.") - .execute(|args, _modifier_stack, value| { - args.numeric_experiments = value - .split(',') - .map(|p| { - if p == "_" { - Ok(None) - } else { - Ok(Some(p.parse()?)) - } - }) - .collect::>>>()?; - Ok(()) - }); - - parser - .declare() - .long("as-needed") - .help("Set DT_NEEDED if used") - .execute(|_args, modifier_stack| { - modifier_stack.last_mut().unwrap().as_needed = true; - Ok(()) - }); - - parser - .declare() - .long("no-as-needed") - .help("Always set DT_NEEDED") - .execute(|_args, modifier_stack| { - modifier_stack.last_mut().unwrap().as_needed = false; - Ok(()) - }); - - parser - .declare() - .long("whole-archive") - .help("Include all objects from archives") - .execute(|_args, modifier_stack| { - modifier_stack.last_mut().unwrap().whole_archive = true; - Ok(()) - }); - - parser - .declare() - .long("no-whole-archive") - .help("Disable --whole-archive") - .execute(|_args, modifier_stack| { - modifier_stack.last_mut().unwrap().whole_archive = false; - Ok(()) - }); - - parser - .declare() - .long("push-state") - .help("Save current linker flags") - .execute(|_args, modifier_stack| { - modifier_stack.push(*modifier_stack.last().unwrap()); - Ok(()) - }); - - parser - .declare() - .long("pop-state") - .help("Restore previous linker flags") - .execute(|_args, modifier_stack| { - modifier_stack.pop(); - if modifier_stack.is_empty() { - bail!("Mismatched --pop-state"); - } - Ok(()) - }); - - parser - .declare() - .long("eh-frame-hdr") - .help("Create .eh_frame_hdr section") - .execute(|args, _modifier_stack| { - args.should_write_eh_frame_hdr = true; - Ok(()) - }); - - parser - .declare() - .long("no-eh-frame-hdr") - .help("Don't create .eh_frame_hdr section") - .execute(|args, _modifier_stack| { - args.should_write_eh_frame_hdr = false; - Ok(()) - }); - - parser - .declare() - .long("export-dynamic") - .short("E") - .help("Export all dynamic symbols") - .execute(|args, _modifier_stack| { - args.export_all_dynamic_symbols = true; - Ok(()) - }); - - parser - .declare() - .long("no-export-dynamic") - .help("Do not export dynamic symbols") - .execute(|args, _modifier_stack| { - args.export_all_dynamic_symbols = false; - Ok(()) - }); - - parser - .declare_with_param() - .long("soname") - .prefix("h") - .help("Set shared object name") - .execute(|args, _modifier_stack, value| { - args.soname = Some(value.to_owned()); - Ok(()) - }); - - parser - .declare_with_param() - .long("rpath") - .help("Add directory to runtime library search path") - .execute(|args, _modifier_stack, value| { - args.rpath_set.insert(value.to_string()); - Ok(()) - }); - - parser - .declare() - .long("no-string-merge") - .help("Disable section merging") - .execute(|args, _modifier_stack| { - args.merge_sections = false; - Ok(()) - }); - - parser - .declare() - .long("no-undefined") - .help("Do not allow unresolved symbols in object files") - .execute(|args, _modifier_stack| { - args.no_undefined = true; - Ok(()) - }); - - parser - .declare() - .long("allow-multiple-definition") - .help("Allow multiple definitions of symbols") - .execute(|args, _modifier_stack| { - args.allow_multiple_definitions = true; - Ok(()) - }); - - parser - .declare() - .long("relax") - .help("Enable target-specific optimization (instruction relaxation)") - .execute(|args, _modifier_stack| { - args.relax = true; - Ok(()) - }); - - parser - .declare() - .long("no-relax") - .help("Disable relaxation") - .execute(|args, _modifier_stack| { - args.relax = false; - Ok(()) - }); - - parser - .declare() - .long("validate-output") - .execute(|args, _modifier_stack| { - args.validate_output = true; - Ok(()) - }); - - parser - .declare() - .long("write-layout") - .execute(|args, _modifier_stack| { - args.write_layout = true; - Ok(()) - }); - - parser - .declare() - .long("write-trace") - .execute(|args, _modifier_stack| { - args.write_trace = true; - Ok(()) - }); - - parser - .declare() - .long("got-plt-syms") - .help("Write symbol table entries that point to the GOT/PLT entry for symbols") - .execute(|args, _modifier_stack| { - args.got_plt_syms = true; - Ok(()) - }); - - parser - .declare() - .long("Bsymbolic") - .help("Bind global references locally") - .execute(|args, _modifier_stack| { - args.b_symbolic = BSymbolicKind::All; - Ok(()) - }); - - parser - .declare() - .long("Bsymbolic-functions") - .help("Bind global function references locally") - .execute(|args, _modifier_stack| { - args.b_symbolic = BSymbolicKind::Functions; - Ok(()) - }); - - parser - .declare() - .long("Bsymbolic-non-weak-functions") - .help("Bind non-weak global function references locally") - .execute(|args, _modifier_stack| { - args.b_symbolic = BSymbolicKind::NonWeakFunctions; - Ok(()) - }); - - parser - .declare() - .long("Bsymbolic-non-weak") - .help("Bind non-weak global references locally") - .execute(|args, _modifier_stack| { - args.b_symbolic = BSymbolicKind::NonWeak; - Ok(()) - }); - - parser - .declare() - .long("Bno-symbolic") - .help("Do not bind global symbol references locally") - .execute(|args, _modifier_stack| { - args.b_symbolic = BSymbolicKind::None; - Ok(()) - }); - - parser - .declare_with_param() - .long("thread-count") - .help("Set the number of threads to use") - .execute(|args, _modifier_stack, value| { - args.num_threads = Some(NonZeroUsize::try_from(value.parse::()?)?); - Ok(()) - }); - - parser - .declare_with_param() - .long("exclude-libs") - .help("Exclude libraries") - .execute(|args, _modifier_stack, value| { - for lib in value.split([',', ':']) { - if lib.is_empty() { - continue; - } - - if lib == "ALL" { - args.exclude_libs = ExcludeLibs::All; - return Ok(()); - } - - match &mut args.exclude_libs { - ExcludeLibs::All => {} - ExcludeLibs::None => { - let mut set = HashSet::new(); - set.insert(Box::from(lib)); - args.exclude_libs = ExcludeLibs::Some(set); - } - ExcludeLibs::Some(set) => { - set.insert(Box::from(lib)); - } - } - } - - Ok(()) - }); - - parser - .declare_with_param() - .long("version-script") - .help("Use version script") - .execute(|args, _modifier_stack, value| { - args.save_dir.handle_file(value); - args.version_script_path = Some(PathBuf::from(value)); - Ok(()) - }); - - parser - .declare_with_param() - .long("script") - .prefix("T") - .help("Use linker script") - .execute(|args, _modifier_stack, value| { - args.save_dir.handle_file(value); - args.add_script(value); - Ok(()) - }); - - parser - .declare_with_param() - .long("export-dynamic-symbol") - .help("Export dynamic symbol") - .execute(|args, _modifier_stack, value| { - args.export_list.push(value.to_owned()); - Ok(()) - }); - - parser - .declare_with_param() - .long("export-dynamic-symbol-list") - .help("Export dynamic symbol list") - .execute(|args, _modifier_stack, value| { - args.export_list_path = Some(PathBuf::from(value)); - Ok(()) - }); - - parser - .declare_with_param() - .long("dynamic-list") - .help("Read the dynamic symbol list from a file") - .execute(|args, _modifier_stack, value| { - args.b_symbolic = BSymbolicKind::All; - args.export_list_path = Some(PathBuf::from(value)); - Ok(()) - }); - - parser - .declare_with_param() - .long("write-gc-stats") - .help("Write GC statistics") - .execute(|args, _modifier_stack, value| { - args.write_gc_stats = Some(PathBuf::from(value)); - Ok(()) - }); - - parser - .declare_with_param() - .long("gc-stats-ignore") - .help("Ignore files in GC stats") - .execute(|args, _modifier_stack, value| { - args.gc_stats_ignore.push(value.to_owned()); - Ok(()) - }); - - parser - .declare() - .long("no-identity-comment") - .help("Don't write the linker name and version in .comment") - .execute(|args, _modifier_stack| { - args.should_write_linker_identity = false; - Ok(()) - }); - - parser - .declare_with_param() - .long("debug-address") - .help("Set debug address") - .execute(|args, _modifier_stack, value| { - args.debug_address = Some(parse_number(value).context("Invalid --debug-address")?); - Ok(()) - }); - - parser - .declare_with_param() - .long("debug-fuel") - .execute(|args, _modifier_stack, value| { - args.debug_fuel = Some(AtomicI64::new(value.parse()?)); - args.num_threads = Some(NonZeroUsize::new(1).unwrap()); - Ok(()) - }); - - parser - .declare_with_param() - .long("unresolved-symbols") - .help("Specify how to handle unresolved symbols") - .execute(|args, _modifier_stack, value| { - args.unresolved_symbols = match value { - "report-all" => UnresolvedSymbols::ReportAll, - "ignore-in-shared-libs" => UnresolvedSymbols::IgnoreInSharedLibs, - "ignore-in-object-files" => UnresolvedSymbols::IgnoreInObjectFiles, - "ignore-all" => UnresolvedSymbols::IgnoreAll, - _ => bail!("Invalid unresolved-symbols value {value}"), - }; - Ok(()) - }); - - parser - .declare_with_param() - .long("undefined") - .help("Force resolution of the symbol") - .execute(|args, _modifier_stack, value| { - args.undefined.push(value.to_owned()); - Ok(()) - }); - - parser - .declare_with_param() - .long("wrap") - .help("Use a wrapper function") - .execute(|args, _modifier_stack, value| { - args.wrap.push(value.to_owned()); - Ok(()) - }); - - parser - .declare_with_param() - .long("defsym") - .help("Define a symbol alias: --defsym=symbol=value") - .execute(|args, _modifier_stack, value| { - let parts: Vec<&str> = value.splitn(2, '=').collect(); - if parts.len() != 2 { - bail!("Invalid --defsym format. Expected: --defsym=symbol=value"); - } - let symbol_name = parts[0].to_owned(); - let value_str = parts[1]; - - let defsym_value = parse_defsym_expression(value_str); - - args.defsym.push((symbol_name, defsym_value)); - Ok(()) - }); - - parser - .declare_with_param() - .long("section-start") - .help("Set start address for a section: --section-start=.section=address") - .execute(|args, _modifier_stack, value| { - let parts: Vec<&str> = value.splitn(2, '=').collect(); - if parts.len() != 2 { - bail!("Invalid --section-start format. Expected: --section-start=.section=address"); - } - - let section_name = parts[0].to_owned(); - let address = parse_number(parts[1]).with_context(|| { - format!( - "Invalid address `{}` in --section-start={}", - parts[1], value - ) - })?; - args.section_start.insert(section_name, address); - - Ok(()) - }); - - parser - .declare_with_param() - .long("hash-style") - .help("Set hash style") - .execute(|args, _modifier_stack, value| { - args.hash_style = match value { - "gnu" => HashStyle::Gnu, - "sysv" => HashStyle::Sysv, - "both" => HashStyle::Both, - _ => bail!("Unknown hash-style `{value}`"), - }; - Ok(()) - }); - - parser - .declare() - .long("enable-new-dtags") - .help("Use DT_RUNPATH and DT_FLAGS/DT_FLAGS_1 (default)") - .execute(|args, _modifier_stack| { - args.enable_new_dtags = true; - Ok(()) - }); - - parser - .declare() - .long("disable-new-dtags") - .help("Use DT_RPATH and individual dynamic entries instead of DT_FLAGS") - .execute(|args, _modifier_stack| { - args.enable_new_dtags = false; - Ok(()) - }); - - parser - .declare_with_param() - .long("retain-symbols-file") - .help( - "Filter symtab to contain only symbols listed in the supplied file. \ - One symbol per line.", - ) - .execute(|args, _modifier_stack, value| { - // The performance this flag is not especially optimised. For one, we copy each string - // to the heap. We also do two lookups in the hashset for each symbol. This is a pretty - // obscure flag that we don't expect to be used much, so at this stage, it doesn't seem - // worthwhile to optimise it. - let contents = std::fs::read_to_string(value) - .with_context(|| format!("Failed to read `{value}`"))?; - args.strip = Strip::Retain( - contents - .lines() - .filter_map(|l| { - if l.is_empty() { - None - } else { - Some(l.as_bytes().to_owned()) - } - }) - .collect(), - ); - Ok(()) - }); - - parser - .declare_with_param() - .long("build-id") - .help("Generate build ID") - .execute(|args, _modifier_stack, value| { - args.build_id = match value { - "none" => BuildIdOption::None, - "fast" | "md5" | "sha1" => BuildIdOption::Fast, - "uuid" => BuildIdOption::Uuid, - s if s.starts_with("0x") || s.starts_with("0X") => { - let hex_string = &s[2..]; - let decoded_bytes = hex::decode(hex_string) - .with_context(|| format!("Invalid Hex Build Id `0x{hex_string}`"))?; - BuildIdOption::Hex(decoded_bytes) - } - s => bail!( - "Invalid build-id value `{s}` valid values are `none`, `fast`, `md5`, `sha1` and `uuid`" - ), - }; - Ok(()) - }); - - parser - .declare_with_param() - .long("icf") - .help("Enable identical code folding (merge duplicate functions)") - .execute(|_args, _modifier_stack, value| { - match value { - "none" => {} - other => warn_unsupported(&format!("--icf={other}"))?, - } - Ok(()) - }); - - parser - .declare_with_param() - .long("sysroot") - .help("Set system root") - .execute(|args, _modifier_stack, value| { - args.save_dir.handle_file(value); - let sysroot = std::fs::canonicalize(value).unwrap_or_else(|_| PathBuf::from(value)); - args.sysroot = Some(Box::from(sysroot.as_path())); - for path in &mut args.lib_search_path { - if let Some(new_path) = maybe_forced_sysroot(path, &sysroot) { - *path = new_path; - } - } - Ok(()) - }); - - parser - .declare_with_param() - .long("auxiliary") - .short("f") - .help("Set DT_AUXILIARY to a given value") - .execute(|args, _modifier_stack, value| { - args.auxiliary.push(value.to_owned()); - Ok(()) - }); - - parser - .declare_with_param() - .long("plugin-opt") - .help("Pass options to the plugin") - .execute(|args, _modifier_stack, value| { - args.plugin_args - .push(CString::new(value).context("Invalid --plugin-opt argument")?); - Ok(()) - }); - - parser - .declare_with_param() - .long("dependency-file") - .help("Write dependency rules") - .execute(|args, _modifier_stack, value| { - args.dependency_file = Some(PathBuf::from(value)); - Ok(()) - }); - - parser - .declare_with_param() - .long("plugin") - .help("Load plugin") - .execute(|args, _modifier_stack, value| { - args.plugin_path = Some(value.to_owned()); - Ok(()) - }); - - parser - .declare_with_param() - .long("rpath-link") - .help("Add runtime library search path") - .execute(|_args, _modifier_stack, _value| { - // TODO - Ok(()) - }); - - parser - .declare_with_param() - .long("sym-info") - .help("Show symbol information. Accepts symbol name or ID.") - .execute(|args, _modifier_stack, value| { - args.sym_info = Some(value.to_owned()); - Ok(()) - }); - - parser - .declare() - .long("start-lib") - .help("Start library group") - .execute(|_args, modifier_stack| { - modifier_stack.last_mut().unwrap().archive_semantics = true; - Ok(()) - }); - - parser - .declare() - .long("end-lib") - .help("End library group") - .execute(|_args, modifier_stack| { - modifier_stack.last_mut().unwrap().archive_semantics = false; - Ok(()) - }); - - parser - .declare() - .long("no-fork") - .help("Do not fork while linking") - .execute(|args, _modifier_stack| { - args.should_fork = false; - Ok(()) - }); - - parser - .declare() - .long("update-in-place") - .help("Update file in place") - .execute(|args, _modifier_stack| { - args.file_write_mode = Some(FileWriteMode::UpdateInPlace); - Ok(()) - }); - - parser - .declare() - .long("no-update-in-place") - .help("Delete and recreate the file") - .execute(|args, _modifier_stack| { - args.file_write_mode = Some(FileWriteMode::UnlinkAndReplace); - Ok(()) - }); - - parser - .declare() - .long("EB") - .help("Big-endian (not supported)") - .execute(|_args, _modifier_stack| { - bail!("Big-endian target is not supported"); - }); - - parser - .declare() - .long("prepopulate-maps") - .help("Prepopulate maps") - .execute(|args, _modifier_stack| { - args.prepopulate_maps = true; - Ok(()) - }); - - parser - .declare() - .long("verbose-gc-stats") - .help("Show GC statistics") - .execute(|args, _modifier_stack| { - args.verbose_gc_stats = true; - Ok(()) - }); - - parser - .declare() - .long("allow-shlib-undefined") - .help("Allow undefined symbol references in shared libraries") - .execute(|args, _modifier_stack| { - args.allow_shlib_undefined = true; - Ok(()) - }); - - parser - .declare() - .long("no-allow-shlib-undefined") - .help("Disallow undefined symbol references in shared libraries") - .execute(|args, _modifier_stack| { - args.allow_shlib_undefined = false; - Ok(()) - }); - - parser - .declare() - .long("error-unresolved-symbols") - .help("Treat unresolved symbols as errors") - .execute(|args, _modifier_stack| { - args.error_unresolved_symbols = true; - Ok(()) - }); - - parser - .declare() - .long("warn-unresolved-symbols") - .help("Treat unresolved symbols as warnings") - .execute(|args, _modifier_stack| { - args.error_unresolved_symbols = false; - Ok(()) - }); - - add_silently_ignored_flags(&mut parser); - add_default_flags(&mut parser); - - parser -} - -fn add_silently_ignored_flags(parser: &mut ArgumentParser) { - for flag in SILENTLY_IGNORED_FLAGS { - let mut declaration = parser.declare(); - declaration = declaration.long(flag); - declaration.execute(|_args, _modifier_stack| Ok(())); - } - for flag in SILENTLY_IGNORED_SHORT_FLAGS { - let mut declaration = parser.declare(); - declaration = declaration.short(flag); - declaration.execute(|_args, _modifier_stack| Ok(())); - } -} - -fn add_default_flags(parser: &mut ArgumentParser) { - for flag in DEFAULT_FLAGS { - let mut declaration = parser.declare(); - declaration = declaration.long(flag); - declaration.execute(|_args, _modifier_stack| Ok(())); - } - for flag in DEFAULT_SHORT_FLAGS { - let mut declaration = parser.declare(); - declaration = declaration.short(flag); - declaration.execute(|_args, _modifier_stack| Ok(())); - } -} - -fn parse_time_phase_options(input: &str) -> Result> { - input.split(',').map(|s| s.parse()).collect() -} - -impl FromStr for CounterKind { - type Err = crate::error::Error; - - fn from_str(s: &str) -> Result { - Ok(match s { - "cycles" => CounterKind::Cycles, - "instructions" => CounterKind::Instructions, - "cache-misses" => CounterKind::CacheMisses, - "branch-misses" => CounterKind::BranchMisses, - "page-faults" => CounterKind::PageFaults, - "page-faults-minor" => CounterKind::PageFaultsMinor, - "page-faults-major" => CounterKind::PageFaultsMajor, - "l1d-read" => CounterKind::L1dRead, - "l1d-miss" => CounterKind::L1dMiss, - other => bail!("Unsupported performance counter `{other}`"), - }) - } -} - -#[derive(Debug, Clone, Copy)] -pub(crate) enum CopyRelocationsDisabledReason { - Flag, - SharedObject, -} - -impl Display for CopyRelocationsDisabledReason { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - // Reason should make sense after the word "because". - let reason = match self { - CopyRelocationsDisabledReason::Flag => "the flag -z nocopyreloc was supplied", - CopyRelocationsDisabledReason::SharedObject => "output is a shared object", - }; - - Display::fmt(&reason, f) - } -} - -#[cfg(test)] -mod tests { - use super::SILENTLY_IGNORED_FLAGS; - use super::VersionMode; - use crate::Args; - use crate::args::InputSpec; - use itertools::Itertools; - use std::fs::File; - use std::io::BufWriter; - use std::io::Write; - use std::num::NonZeroUsize; - use std::path::Path; - use std::path::PathBuf; - use std::str::FromStr; - use tempfile::NamedTempFile; - - const INPUT1: &[&str] = &[ - "-pie", - "-z", - "relro", - "-zrelro", - "-hash-style=gnu", - "--hash-style=gnu", - "-build-id", - "--build-id", - "--eh-frame-hdr", - "-m", - "elf_x86_64", - "-dynamic-linker", - "/lib64/ld-linux-x86-64.so.2", - "-o", - "/build/target/debug/deps/c1-a212b73b12b6d123", - "/lib/x86_64-linux-gnu/Scrt1.o", - "/lib/x86_64-linux-gnu/crti.o", - "/usr/bin/../lib/gcc/x86_64-linux-gnu/12/crtbeginS.o", - "-L/build/target/debug/deps", - "-L/tool/lib/rustlib/x86_64/lib", - "-L/tool/lib/rustlib/x86_64/lib", - "-L/usr/bin/../lib/gcc/x86_64-linux-gnu/12", - "-L/usr/bin/../lib/gcc/x86_64-linux-gnu/12/../../../../lib64", - "-L/lib/x86_64-linux-gnu", - "-L/lib/../lib64", - "-L/usr/lib/x86_64-linux-gnu", - "-L/usr/lib/../lib64", - "-L", - "/lib", - "-L/usr/lib", - "/tmp/rustcDcR20O/symbols.o", - "/build/target/debug/deps/c1-a212b73b12b6d123.1.rcgu.o", - "/build/target/debug/deps/c1-a212b73b12b6d123.2.rcgu.o", - "/build/target/debug/deps/c1-a212b73b12b6d123.3.rcgu.o", - "/build/target/debug/deps/c1-a212b73b12b6d123.4.rcgu.o", - "/build/target/debug/deps/c1-a212b73b12b6d123.5.rcgu.o", - "/build/target/debug/deps/c1-a212b73b12b6d123.6.rcgu.o", - "/build/target/debug/deps/c1-a212b73b12b6d123.7.rcgu.o", - "--as-needed", - "-as-needed", - "-Bstatic", - "/tool/lib/rustlib/x86_64/lib/libstd-6498d8891e016dca.rlib", - "/tool/lib/rustlib/x86_64/lib/libpanic_unwind-3debdee1a9058d84.rlib", - "/tool/lib/rustlib/x86_64/lib/libobject-8339c5bd5cbc92bf.rlib", - "/tool/lib/rustlib/x86_64/lib/libmemchr-160ebcebb54c11ba.rlib", - "/tool/lib/rustlib/x86_64/lib/libaddr2line-95c75789f1b65e37.rlib", - "/tool/lib/rustlib/x86_64/lib/libgimli-7e8094f2d6258832.rlib", - "/tool/lib/rustlib/x86_64/lib/librustc_demangle-bac9783ef1b45db0.rlib", - "/tool/lib/rustlib/x86_64/lib/libstd_detect-a1cd87df2f2d8e76.rlib", - "/tool/lib/rustlib/x86_64/lib/libhashbrown-7fd06d468d7dba16.rlib", - "/tool/lib/rustlib/x86_64/lib/librustc_std_workspace_alloc-5ac19487656e05bf.rlib", - "/tool/lib/rustlib/x86_64/lib/libminiz_oxide-c7c35d32cf825c11.rlib", - "/tool/lib/rustlib/x86_64/lib/libadler-c523f1571362e70b.rlib", - "/tool/lib/rustlib/x86_64/lib/libunwind-85f17c92b770a911.rlib", - "/tool/lib/rustlib/x86_64/lib/libcfg_if-598d3ba148dadcea.rlib", - "/tool/lib/rustlib/x86_64/lib/liblibc-a58ec2dab545caa4.rlib", - "/tool/lib/rustlib/x86_64/lib/liballoc-f9dda8cca149f0fc.rlib", - "/tool/lib/rustlib/x86_64/lib/librustc_std_workspace_core-7ba4c315dd7a3503.rlib", - "/tool/lib/rustlib/x86_64/lib/libcore-5ac2993e19124966.rlib", - "/tool/lib/rustlib/x86_64/lib/libcompiler_builtins-df2fb7f50dec519a.rlib", - "-Bdynamic", - "-lgcc_s", - "-lutil", - "-lrt", - "-lpthread", - "-lm", - "-ldl", - "-lc", - "--eh-frame-hdr", - "-z", - "noexecstack", - "-znoexecstack", - "--gc-sections", - "-z", - "relro", - "-z", - "now", - "-z", - "lazy", - "-soname=fpp", - "-soname", - "bar", - "/usr/bin/../lib/gcc/x86_64-linux-gnu/12/crtendS.o", - "/lib/x86_64-linux-gnu/crtn.o", - "--version-script", - "a.ver", - "--no-threads", - "--no-add-needed", - "--no-copy-dt-needed-entries", - "--discard-locals", - "--use-android-relr-tags", - "--pack-dyn-relocs=relr", - "-X", - "-EL", - "-O", - "1", - "-O3", - "-v", - "--sysroot=/usr/aarch64-linux-gnu", - "--demangle", - "--no-demangle", - "-l:lib85caec4suo0pxg06jm2ma7b0o.so", - "-rpath", - "foo/", - "-rpath=bar/", - "-Rbaz", - "-R", - "somewhere", - // Adding the same rpath multiple times should not create duplicates - "-rpath", - "foo/", - "-x", - "--discard-all", - "--dependency-file=deps.d", - ]; - - const FILE_OPTIONS: &[&str] = &["-pie"]; - - const INLINE_OPTIONS: &[&str] = &["-L", "/lib"]; - - fn write_options_to_file(file: &File, options: &[&str]) { - let mut writer = BufWriter::new(file); - for option in options { - writeln!(writer, "{option}").expect("Failed to write to temporary file"); - } - } - - #[track_caller] - fn assert_contains(c: &[Box], v: &str) { - assert!(c.iter().any(|p| p.as_ref() == Path::new(v))); - } - - fn input1_assertions(args: &Args) { - assert_eq!( - args.inputs - .iter() - .filter_map(|i| match &i.spec { - InputSpec::File(_) | InputSpec::Search(_) => None, - InputSpec::Lib(lib_name) => Some(lib_name.as_ref()), - }) - .collect_vec(), - &["gcc_s", "util", "rt", "pthread", "m", "dl", "c"] - ); - assert_contains(&args.lib_search_path, "/lib"); - assert_contains(&args.lib_search_path, "/usr/lib"); - assert!(!args.inputs.iter().any(|i| match &i.spec { - InputSpec::File(f) => f.as_ref() == Path::new("/usr/bin/ld"), - InputSpec::Lib(_) | InputSpec::Search(_) => false, - })); - assert_eq!( - args.version_script_path, - Some(PathBuf::from_str("a.ver").unwrap()) - ); - assert_eq!(args.soname, Some("bar".to_owned())); - assert_eq!(args.num_threads, Some(NonZeroUsize::new(1).unwrap())); - assert_eq!(args.version_mode, VersionMode::Verbose); - assert_eq!( - args.sysroot, - Some(Box::from(Path::new("/usr/aarch64-linux-gnu"))) - ); - assert!(args.inputs.iter().any(|i| match &i.spec { - InputSpec::File(_) | InputSpec::Lib(_) => false, - InputSpec::Search(lib) => lib.as_ref() == "lib85caec4suo0pxg06jm2ma7b0o.so", - })); - assert_eq!(args.rpath.as_deref(), Some("foo/:bar/:baz:somewhere")); - assert_eq!( - args.dependency_file, - Some(PathBuf::from_str("deps.d").unwrap()) - ); - } - - fn inline_and_file_options_assertions(args: &Args) { - assert_contains(&args.lib_search_path, "/lib"); - } - - #[test] - fn test_parse_inline_only_options() { - let args = super::parse(|| INPUT1.iter()).unwrap(); - input1_assertions(&args); - } - - #[test] - fn test_parse_file_only_options() { - // Create a temporary file containing the same options (one per line) as INPUT1 - let file = NamedTempFile::new().expect("Could not create temp file"); - write_options_to_file(file.as_file(), INPUT1); - - // pass the name of the file where options are as the only inline option "@filename" - let inline_options = [format!("@{}", file.path().to_str().unwrap())]; - let args = super::parse(|| inline_options.iter()).unwrap(); - input1_assertions(&args); - } - - #[test] - fn test_parse_mixed_file_and_inline_options() { - // Create a temporary file containing some options - let file = NamedTempFile::new().expect("Could not create temp file"); - write_options_to_file(file.as_file(), FILE_OPTIONS); - - // create an inline option referring to "@filename" - let file_option = format!("@{}", file.path().to_str().unwrap()); - // start with the set of inline options - let mut inline_options = INLINE_OPTIONS.to_vec(); - // and extend with the "@filename" option - inline_options.push(&file_option); - - // confirm that this works and the resulting set of options is correct - let args = super::parse(|| inline_options.iter()).unwrap(); - inline_and_file_options_assertions(&args); - } - - #[test] - fn test_parse_overlapping_file_and_inline_options() { - // Create a set of file options that has a duplicate of an inline option - let mut file_options = FILE_OPTIONS.to_vec(); - file_options.append(&mut INLINE_OPTIONS.to_vec()); - // and save them to a file - let file = NamedTempFile::new().expect("Could not create temp file"); - write_options_to_file(file.as_file(), &file_options); - - // pass the name of the file where options are, as an inline option "@filename" - let file_option = format!("@{}", file.path().to_str().unwrap()); - // start with the set of inline options - let mut inline_options = INLINE_OPTIONS.to_vec(); - // and extend with the "@filename" option - inline_options.push(&file_option); - - // confirm that this works and the resulting set of options is correct - let args = super::parse(|| inline_options.iter()).unwrap(); - inline_and_file_options_assertions(&args); - } - - #[test] - fn test_parse_recursive_file_option() { - // Create a temporary file containing a @file option - let file1 = NamedTempFile::new().expect("Could not create temp file"); - let file2 = NamedTempFile::new().expect("Could not create temp file"); - let file_option = format!("@{}", file2.path().to_str().unwrap()); - write_options_to_file(file1.as_file(), &[&file_option]); - write_options_to_file(file2.as_file(), INPUT1); - - // pass the name of the file where options are, as an inline option "@filename" - let inline_options = [format!("@{}", file1.path().to_str().unwrap())]; - - // confirm that this works and the resulting set of options is correct - let args = super::parse(|| inline_options.iter()) - .expect("Recursive @file options should parse correctly but be ignored"); - input1_assertions(&args); - } - - #[test] - fn test_arguments_from_string() { - use super::arguments_from_string; - - assert!(arguments_from_string("").unwrap().is_empty()); - assert!(arguments_from_string("''").unwrap().is_empty()); - assert!(arguments_from_string("\"\"").unwrap().is_empty()); - assert_eq!( - arguments_from_string(r#""foo" "bar""#).unwrap(), - ["foo", "bar"] - ); - assert_eq!( - arguments_from_string(r#""foo\"" "\"b\"ar""#).unwrap(), - ["foo\"", "\"b\"ar"] - ); - assert_eq!( - arguments_from_string(" foo bar ").unwrap(), - ["foo", "bar"] - ); - assert!(arguments_from_string("'foo''bar'").is_err()); - assert_eq!( - arguments_from_string("'foo' 'bar' baz").unwrap(), - ["foo", "bar", "baz"] - ); - assert_eq!(arguments_from_string("foo\nbar").unwrap(), ["foo", "bar"]); - assert_eq!( - arguments_from_string(r#"'foo' "bar" baz"#).unwrap(), - ["foo", "bar", "baz"] - ); - assert_eq!(arguments_from_string("'foo bar'").unwrap(), ["foo bar"]); - assert_eq!( - arguments_from_string("'foo \" bar'").unwrap(), - ["foo \" bar"] - ); - assert!(arguments_from_string("foo\\").is_err()); - assert!(arguments_from_string("'foo").is_err()); - assert!(arguments_from_string("foo\"").is_err()); - } - - #[test] - fn test_ignored_flags() { - for flag in SILENTLY_IGNORED_FLAGS { - assert!(!flag.starts_with('-')); - } - } -} +//! A handwritten parser for our arguments. +//! +//! We don't currently use a 3rd party library like clap for a few reasons. Firstly, we need to +//! support flags like `--push-state` and `--pop-state`. These need to push and pop a state stack +//! when they're parsed. Some of the other flags then need to manipulate the state of the top of the +//! stack. Positional arguments like input files and libraries to link, then need to have the +//! current state of the stack attached to that file. +//! +//! Secondly, long arguments need to also be accepted with a single '-' in addition to the more +//! common double-dash. +//! +//! Basically, we need to be able to parse arguments in the same way as the other linkers on the +//! platform that we're targeting. + +pub(crate) mod consts; +pub(crate) mod linux; +pub(crate) mod windows; + +pub(crate) use consts::*; + +use crate::alignment::Alignment; +use crate::arch::Architecture; +use crate::bail; +use crate::error::Context as _; +use crate::error::Result; +use crate::input_data::FileId; +use crate::save_dir::SaveDir; +use crate::target_os::Os; +use hashbrown::HashMap; +use hashbrown::HashSet; +use jobserver::Client; +use std::fmt::Display; +use std::num::NonZeroUsize; +use std::path::Path; +use std::path::PathBuf; +use std::sync::Arc; +use std::sync::atomic::AtomicI64; +use target_lexicon::Triple; + +// ── Shared type definitions (format-agnostic) ──────────────────────────────── + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(crate) enum VersionMode { + /// Don't print version + None, + /// Print version and continue linking (-v) + Verbose, + /// Print version and exit immediately (--version) + ExitAfterPrint, +} + +#[derive(Debug)] +pub(crate) enum DefsymValue { + /// A numeric value (address) + Value(u64), + /// Reference to another symbol with an optional offset + SymbolWithOffset(String, i64), +} + +#[derive(Debug)] +pub(crate) enum Strip { + Nothing, + Debug, + All, + Retain(HashSet>), +} + +#[derive(Debug, Clone, Copy)] +pub enum CounterKind { + Cycles, + Instructions, + CacheMisses, + BranchMisses, + PageFaults, + PageFaultsMinor, + PageFaultsMajor, + L1dRead, + L1dMiss, +} + +#[derive(Debug, Clone, Copy)] +pub(crate) enum CopyRelocations { + Allowed, + Disallowed(CopyRelocationsDisabledReason), +} + +#[derive(Debug, Clone, Copy)] +pub(crate) enum CopyRelocationsDisabledReason { + Flag, + SharedObject, +} + +impl Display for CopyRelocationsDisabledReason { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let reason = match self { + CopyRelocationsDisabledReason::Flag => "the flag -z nocopyreloc was supplied", + CopyRelocationsDisabledReason::SharedObject => "output is a shared object", + }; + Display::fmt(&reason, f) + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(crate) enum RelocationModel { + NonRelocatable, + Relocatable, +} + +#[derive(Debug, Copy, Clone)] +pub(crate) enum Experiment { + /// How much parallelism to allow when splitting string-merge sections. + MergeStringSplitParallelism = 0, + + /// Number of bytes of string-merge sections before we'll break to a new group. + MergeStringMinGroupBytes = 1, + + GroupsPerThread = 2, + + MinGroups = 3, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(crate) enum FileWriteMode { + /// The existing output file, if any, will be unlinked (deleted) and a new file with the same + /// name put in its place. Any hard links to the file will not be affected. + UnlinkAndReplace, + + /// The existing output file, if any, will be edited in-place. Any hard links to the file will + /// update accordingly. If the file is locked due to currently being executed, then our write + /// will fail. + UpdateInPlace, + + /// As for `UpdateInPlace`, but if we get an error opening the file for write, fallback to + /// unlinking and replacing. + UpdateInPlaceWithFallback, +} + +#[derive(Debug, Clone, Copy, Eq, PartialEq)] +pub(crate) enum BSymbolicKind { + None, + All, + Functions, + NonWeakFunctions, + NonWeak, +} + +#[derive(Debug, Eq, PartialEq)] +pub(crate) enum UnresolvedSymbols { + /// Report all unresolved symbols. + ReportAll, + + /// Ignore unresolved symbols in shared libraries. + IgnoreInSharedLibs, + + /// Ignore unresolved symbols in object files. + IgnoreInObjectFiles, + + /// Ignore all unresolved symbols. + IgnoreAll, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub(crate) enum ExcludeLibs { + None, + All, + Some(HashSet>), +} + +impl ExcludeLibs { + pub(crate) fn should_exclude(&self, lib_path: &[u8]) -> bool { + match self { + ExcludeLibs::None => false, + ExcludeLibs::All => true, + ExcludeLibs::Some(libs) => { + let lib_path_str = String::from_utf8_lossy(lib_path); + let lib_name = lib_path_str.rsplit('/').next().unwrap_or(&lib_path_str); + + libs.contains(lib_name) + } + } + } +} + +#[derive(Debug, Eq, PartialEq, Clone, Copy)] +pub struct Modifiers { + /// Whether shared objects should only be linked if they're referenced. + pub(crate) as_needed: bool, + + /// Whether we're currently allowed to link against shared libraries. + pub(crate) allow_shared: bool, + + /// Whether object files in archives should be linked even if they do not contain symbols that + /// are referenced. + pub(crate) whole_archive: bool, + + /// Whether archive semantics should be applied even for regular objects. + pub(crate) archive_semantics: bool, + + /// Whether the file is known to be a temporary file that will be deleted when the linker + /// exits, e.g. an output file from a linker plugin. This doesn't affect linking, but is + /// stored in the layout file if written so that linker-diff knows not to error if the file + /// is missing. + pub(crate) temporary: bool, +} + +impl Default for Modifiers { + fn default() -> Self { + Self { + as_needed: false, + allow_shared: true, + whole_archive: false, + archive_semantics: false, + temporary: false, + } + } +} + +#[derive(Debug, Eq, PartialEq)] +pub(crate) struct Input { + pub(crate) spec: InputSpec, + /// A directory to search first. Only present when the input came from a linker script, in + /// which case this is the directory containing the linker script. + pub(crate) search_first: Option, + pub(crate) modifiers: Modifiers, +} + +#[derive(Debug, Eq, PartialEq)] +pub(crate) enum InputSpec { + /// Path (possibly just a filename) to the file. + File(Box), + /// Name of the library, without prefix and suffix. + Lib(Box), + /// Name of the library, including prefix and suffix. + Search(Box), +} + +// ── End shared type definitions ────────────────────────────────────────────── + +// ── Argument parser infrastructure ─────────────────────────────────────────── + +pub(crate) struct ArgumentParser { + options: HashMap<&'static str, OptionHandler>, + short_options: HashMap<&'static str, OptionHandler>, + prefix_options: HashMap<&'static str, PrefixOptionHandler>, + case_insensitive: bool, + has_option_prefix: fn(&str) -> bool, + strip_option: for<'a> fn(&'a str) -> Option<&'a str>, + find_separator: fn(&str) -> Option, +} + +struct OptionHandler { + help_text: &'static str, + handler: OptionHandlerFn, + short_names: Vec<&'static str>, +} + +impl Clone for OptionHandler { + fn clone(&self) -> Self { + Self { + help_text: self.help_text, + handler: self.handler, + short_names: self.short_names.clone(), + } + } +} + +struct PrefixOptionHandler { + help_text: &'static str, + handler: fn(&mut Args, &mut Vec, &str) -> Result<()>, + sub_options: HashMap<&'static str, SubOption>, +} + +#[allow(clippy::enum_variant_names)] +enum OptionHandlerFn { + NoParam(fn(&mut Args, &mut Vec) -> Result<()>), + WithParam(fn(&mut Args, &mut Vec, &str) -> Result<()>), + OptionalParam(fn(&mut Args, &mut Vec, Option<&str>) -> Result<()>), +} + +impl Clone for OptionHandlerFn { + fn clone(&self) -> Self { + *self + } +} + +impl Copy for OptionHandlerFn {} + +impl OptionHandlerFn { + fn help_suffix_long(&self) -> &'static str { + match self { + OptionHandlerFn::NoParam(_) => "", + OptionHandlerFn::WithParam(_) => "=", + OptionHandlerFn::OptionalParam(_) => "[=]", + } + } + + fn help_suffix_short(&self) -> &'static str { + match self { + OptionHandlerFn::NoParam(_) => "", + OptionHandlerFn::WithParam(_) => " ", + OptionHandlerFn::OptionalParam(_) => " []", + } + } +} + +pub(crate) struct OptionDeclaration<'a, T, S> { + parser: &'a mut ArgumentParser, + long_names: Vec<&'static str>, + short_names: Vec<&'static str>, + prefixes: Vec<&'static str>, + sub_options: HashMap<&'static str, SubOption>, + help_text: &'static str, + _phantom: std::marker::PhantomData, +} + +pub struct NoParam; +pub struct WithParam; +pub struct WithOptionalParam; + +enum SubOptionHandler { + /// Handler without value parameter (exact match) + NoValue(fn(&mut Args, &mut Vec) -> Result<()>), + /// Handler with value parameter (prefix match) + WithValue(fn(&mut Args, &mut Vec, &str) -> Result<()>), +} + +impl Clone for SubOptionHandler { + fn clone(&self) -> Self { + *self + } +} + +impl Copy for SubOptionHandler {} + +struct SubOption { + help: &'static str, + handler: SubOptionHandler, +} + +impl Clone for SubOption { + fn clone(&self) -> Self { + *self + } +} + +impl Copy for SubOption {} + +impl SubOption { + fn with_value(&self) -> bool { + matches!(self.handler, SubOptionHandler::WithValue(_)) + } +} + +impl Default for ArgumentParser { + fn default() -> Self { + Self::new() + } +} + +impl ArgumentParser { + #[must_use] + pub fn new() -> Self { + Self { + options: HashMap::new(), + short_options: HashMap::new(), + prefix_options: HashMap::new(), + case_insensitive: false, + has_option_prefix: |arg| arg.starts_with('-'), + strip_option: |arg| arg.strip_prefix("--").or(arg.strip_prefix('-')), + find_separator: |stripped| stripped.find('='), + } + } + + #[must_use] + pub fn new_case_insensitive() -> Self { + Self { + options: HashMap::new(), + short_options: HashMap::new(), + prefix_options: HashMap::new(), + case_insensitive: true, + has_option_prefix: |arg| arg.starts_with('/') || arg.starts_with('-'), + strip_option: |arg| arg.strip_prefix('/').or(arg.strip_prefix('-')), + find_separator: |stripped| stripped.find(':'), + } + } + + pub fn declare(&mut self) -> OptionDeclaration<'_, T, NoParam> { + OptionDeclaration { + parser: self, + long_names: Vec::new(), + short_names: Vec::new(), + prefixes: Vec::new(), + sub_options: HashMap::new(), + help_text: "", + _phantom: std::marker::PhantomData, + } + } + + pub fn declare_with_param(&mut self) -> OptionDeclaration<'_, T, WithParam> { + OptionDeclaration { + parser: self, + long_names: Vec::new(), + short_names: Vec::new(), + prefixes: Vec::new(), + sub_options: HashMap::new(), + help_text: "", + _phantom: std::marker::PhantomData, + } + } + + pub fn declare_with_optional_param(&mut self) -> OptionDeclaration<'_, T, WithOptionalParam> { + OptionDeclaration { + parser: self, + long_names: Vec::new(), + short_names: Vec::new(), + prefixes: Vec::new(), + sub_options: HashMap::new(), + help_text: "", + _phantom: std::marker::PhantomData, + } + } + + fn get_option_handler(&self, option_name: &str) -> Option<&OptionHandler> { + if self.case_insensitive { + if let Some(handler) = self.options.get(option_name) { + return Some(handler); + } + for (key, handler) in &self.options { + if key.eq_ignore_ascii_case(option_name) { + return Some(handler); + } + } + None + } else { + self.options.get(option_name) + } + } + + pub(crate) fn handle_argument, I: Iterator>( + &self, + args: &mut Args, + modifier_stack: &mut Vec, + arg: &str, + input: &mut I, + ) -> Result<()> { + // TODO @lapla-cogito standardize the interface. @file doesn't use a leading hyphen. + // Handle `@file`option (recursively) - merging in the options contained in the file + if let Some(path) = arg.strip_prefix('@') { + let file_args = read_args_from_file(Path::new(path))?; + let mut file_arg_iter = file_args.iter(); + while let Some(file_arg) = file_arg_iter.next() { + self.handle_argument(args, modifier_stack, file_arg, &mut file_arg_iter)?; + } + return Ok(()); + } + + if let Some(stripped) = (self.strip_option)(arg) { + // Check for option with separator syntax + if let Some(eq_pos) = (self.find_separator)(stripped) { + let option_name = &stripped[..eq_pos]; + let value = &stripped[eq_pos + 1..]; + + if let Some(handler) = self.get_option_handler(option_name) { + match &handler.handler { + OptionHandlerFn::WithParam(f) => f(args, modifier_stack, value)?, + OptionHandlerFn::OptionalParam(f) => f(args, modifier_stack, Some(value))?, + OptionHandlerFn::NoParam(_) => return Ok(()), + } + return Ok(()); + } + } else { + if stripped == "build-id" + && let Some(handler) = self.get_option_handler(stripped) + && let OptionHandlerFn::WithParam(f) = &handler.handler + { + f(args, modifier_stack, "fast")?; + return Ok(()); + } + + if let Some(handler) = self.get_option_handler(stripped) { + match &handler.handler { + OptionHandlerFn::NoParam(f) => f(args, modifier_stack)?, + OptionHandlerFn::WithParam(f) => { + let next_arg = + input.next().context(format!("Missing argument to {arg}"))?; + f(args, modifier_stack, next_arg.as_ref())?; + } + OptionHandlerFn::OptionalParam(f) => { + f(args, modifier_stack, None)?; + } + } + return Ok(()); + } + } + } + + if arg.starts_with('-') && !arg.starts_with("--") && arg.len() > 1 { + let option_name = &arg[1..]; + if let Some(handler) = self.short_options.get(option_name) { + match &handler.handler { + OptionHandlerFn::NoParam(f) => f(args, modifier_stack)?, + OptionHandlerFn::WithParam(f) => { + let next_arg = + input.next().context(format!("Missing argument to {arg}"))?; + f(args, modifier_stack, next_arg.as_ref())?; + } + OptionHandlerFn::OptionalParam(f) => { + f(args, modifier_stack, None)?; + } + } + return Ok(()); + } + } + + // Prefix options. These should be handled after processing long and short options, + // because some options (like `-hashstyle=gnu`) can be misinterpreted as prefix options. + for (prefix, handler) in &self.prefix_options { + if let Some(rest) = arg.strip_prefix(&format!("-{prefix}")) { + let value = if rest.is_empty() { + let next_arg = input + .next() + .context(format!("Missing argument to -{prefix}"))?; + next_arg.as_ref().to_owned() + } else { + rest.to_owned() + }; + + if let Some((key, param_value)) = value.split_once('=') { + // Value has '=', look up key with trailing '=' + if let Some(sub) = handler.sub_options.get(format!("{key}=").as_str()) { + match sub.handler { + SubOptionHandler::NoValue(_) => { + (handler.handler)(args, modifier_stack, &value)?; + } + SubOptionHandler::WithValue(f) => f(args, modifier_stack, param_value)?, + } + } else { + // Fall back to the main handler + (handler.handler)(args, modifier_stack, &value)?; + } + } else { + // No '=' in value, look up exact match + if let Some(sub) = handler.sub_options.get(value.as_str()) { + match sub.handler { + SubOptionHandler::NoValue(f) => f(args, modifier_stack)?, + SubOptionHandler::WithValue(_) => { + bail!("Option -{prefix} {value} requires a value"); + } + } + } else { + // Fall back to the main handler + (handler.handler)(args, modifier_stack, &value)?; + } + } + return Ok(()); + } + } + + if (self.has_option_prefix)(arg) { + if let Some(stripped) = (self.strip_option)(arg) + && IGNORED_FLAGS.contains(&stripped) + { + warn_unsupported(arg)?; + return Ok(()); + } + + args.unrecognized_options.push(arg.to_owned()); + return Ok(()); + } + + args.save_dir.handle_file(arg); + args.inputs.push(Input { + spec: InputSpec::File(Box::from(Path::new(arg))), + search_first: None, + modifiers: *modifier_stack.last().unwrap(), + }); + + Ok(()) + } + + #[must_use] + fn generate_help(&self) -> String { + let mut help = String::new(); + help.push_str("USAGE:\n wild [OPTIONS] [FILES...]\n\nOPTIONS:\n"); + + let mut prefix_options: Vec<_> = self.prefix_options.iter().collect(); + prefix_options.sort_by_key(|(prefix, _)| *prefix); + + // TODO: This is ad-hoc + help.push_str(&format!( + " {:<31} Read options from a file\n", + format!("@"), + )); + + let mut help_to_options: HashMap<&str, Vec> = HashMap::new(); + let mut processed_short_options: HashSet<&str> = HashSet::new(); + + // Collect all long options and their associated short options + for (long_name, handler) in &self.options { + if !handler.help_text.is_empty() { + let long_suffix = handler.handler.help_suffix_long(); + let mut option_names = vec![format!("--{long_name}{long_suffix}")]; + + // Add associated short options + let short_suffix = handler.handler.help_suffix_short(); + for short_char in &handler.short_names { + option_names.push(format!("-{short_char}{short_suffix}")); + } + + help_to_options + .entry(handler.help_text) + .or_default() + .extend(option_names); + } + + // Mark short options of help-less handlers as processed + for short_name in &handler.short_names { + processed_short_options.insert(short_name); + } + } + + for (prefix, handler) in prefix_options { + if !processed_short_options.contains(prefix) && !handler.help_text.is_empty() { + help.push_str(&format!( + " -{:<30} {}\n", + format!("{prefix} "), + handler.help_text + )); + + // Add sub-options if they exist + let mut sub_options: Vec<_> = handler.sub_options.iter().collect(); + sub_options.sort_by_key(|(name, _)| *name); + + for (sub_name, sub) in sub_options { + let display_name = if sub.with_value() && sub_name.ends_with('=') { + // sub_name ends with '=' (e.g., "max-page-size="), so add + format!("{sub_name}") + } else { + sub_name.to_string() + }; + help.push_str(&format!( + " -{prefix} {display_name:<30} {sub_help}\n", + sub_help = sub.help + )); + } + } + } + + // Add short-only options + for (short_char, handler) in &self.short_options { + if !processed_short_options.contains(short_char) && !handler.help_text.is_empty() { + let short_suffix = handler.handler.help_suffix_short(); + help_to_options + .entry(handler.help_text) + .or_default() + .push(format!("-{short_char}{short_suffix}")); + } + } + + let mut sorted_help_groups: Vec<_> = help_to_options.into_iter().collect(); + sorted_help_groups.sort_by_key(|(_, option_names)| { + option_names.iter().min().unwrap_or(&String::new()).clone() + }); + + for (help_text, mut option_names) in sorted_help_groups { + option_names.sort_by(|a, b| { + let a_is_short = a.len() == 2 && a.starts_with('-'); + let b_is_short = b.len() == 2 && b.starts_with('-'); + match (a_is_short, b_is_short) { + (true, false) => std::cmp::Ordering::Less, // short options first + (false, true) => std::cmp::Ordering::Greater, // long options after + _ => a.cmp(b), // same type, alphabetical + } + }); + + let option_names_str = option_names.join(", "); + help.push_str(&format!(" {option_names_str:<30} {help_text}\n")); + } + + help + } +} + +impl<'a, T, S> OptionDeclaration<'a, T, S> { + #[must_use] + pub fn long(mut self, name: &'static str) -> Self { + self.long_names.push(name); + self + } + + #[must_use] + pub fn short(mut self, option: &'static str) -> Self { + self.short_names.push(option); + self + } + + #[must_use] + pub fn help(mut self, text: &'static str) -> Self { + self.help_text = text; + self + } + + pub fn prefix(mut self, prefix: &'static str) -> Self { + self.prefixes.push(prefix); + self + } + + #[must_use] + pub fn sub_option( + mut self, + name: &'static str, + help: &'static str, + handler: fn(&mut Args, &mut Vec) -> Result<()>, + ) -> Self { + self.sub_options.insert( + name, + SubOption { + help, + handler: SubOptionHandler::NoValue(handler), + }, + ); + self + } + + #[must_use] + pub fn sub_option_with_value( + mut self, + name: &'static str, + help: &'static str, + handler: fn(&mut Args, &mut Vec, &str) -> Result<()>, + ) -> Self { + self.sub_options.insert( + name, + SubOption { + help, + handler: SubOptionHandler::WithValue(handler), + }, + ); + self + } +} + +impl<'a, T> OptionDeclaration<'a, T, NoParam> { + pub fn execute(self, handler: fn(&mut Args, &mut Vec) -> Result<()>) { + let option_handler = OptionHandler { + help_text: self.help_text, + handler: OptionHandlerFn::NoParam(handler), + short_names: self.short_names.clone(), + }; + + for name in self.long_names { + self.parser.options.insert(name, option_handler.clone()); + } + + for option in self.short_names { + self.parser + .short_options + .insert(option, option_handler.clone()); + } + } +} + +impl<'a, T> OptionDeclaration<'a, T, WithParam> { + pub fn execute(self, handler: fn(&mut Args, &mut Vec, &str) -> Result<()>) { + let mut short_names = self.short_names.clone(); + short_names.extend_from_slice(&self.prefixes); + + let option_handler = OptionHandler { + help_text: self.help_text, + handler: OptionHandlerFn::WithParam(handler), + short_names, + }; + + for name in self.long_names { + self.parser.options.insert(name, option_handler.clone()); + } + + for option in self.short_names { + self.parser + .short_options + .insert(option, option_handler.clone()); + } + + for prefix in self.prefixes { + let prefix_handler = PrefixOptionHandler { + help_text: self.help_text, + sub_options: self.sub_options.clone(), + handler, + }; + + self.parser.prefix_options.insert(prefix, prefix_handler); + } + } +} + +impl<'a, T> OptionDeclaration<'a, T, WithOptionalParam> { + pub fn execute( + self, + handler: fn(&mut Args, &mut Vec, Option<&str>) -> Result<()>, + ) { + let option_handler = OptionHandler { + help_text: self.help_text, + handler: OptionHandlerFn::OptionalParam(handler), + short_names: self.short_names.clone(), + }; + + for name in self.long_names { + self.parser.options.insert(name, option_handler.clone()); + } + + for option in self.short_names { + self.parser + .short_options + .insert(option, option_handler.clone()); + } + } +} + +// ── End argument parser infrastructure ─────────────────────────────────────── + +pub(crate) fn add_silently_ignored_flags(parser: &mut ArgumentParser) { + fn noop(_args: &mut Args, _modifier_stack: &mut Vec) -> Result<()> { + Ok(()) + } + for flag in SILENTLY_IGNORED_FLAGS { + parser.declare().long(flag).execute(noop); + } + for flag in SILENTLY_IGNORED_SHORT_FLAGS { + parser.declare().short(flag).execute(noop); + } +} + +pub(crate) fn add_default_flags(parser: &mut ArgumentParser) { + fn noop(_args: &mut Args, _modifier_stack: &mut Vec) -> Result<()> { + Ok(()) + } + for flag in DEFAULT_FLAGS { + parser.declare().long(flag).execute(noop); + } + for flag in DEFAULT_SHORT_FLAGS { + parser.declare().short(flag).execute(noop); + } +} + +pub(crate) fn read_args_from_file(path: &Path) -> Result> { + let contents = std::fs::read_to_string(path) + .with_context(|| format!("Failed to read arguments from file `{}`", path.display()))?; + arguments_from_string(&contents) +} + +/// Parses arguments from a string, handling quoting, escapes etc. +/// All arguments must be surrounded by a white space. +pub(crate) fn arguments_from_string(input: &str) -> Result> { + const QUOTES: [char; 2] = ['\'', '"']; + + let mut out = Vec::new(); + let mut chars = input.chars(); + let mut heap = None; + let mut quote = None; + let mut expect_whitespace = false; + + loop { + let Some(mut ch) = chars.next() else { + if let Some(quote) = quote.take() { + bail!("Missing closing '{quote}'"); + } + if let Some(arg) = heap.take() { + out.push(arg); + } + break; + }; + + crate::ensure!( + !expect_whitespace || ch.is_whitespace(), + "Expected white space after quoted argument" + ); + expect_whitespace = false; + + if QUOTES.contains(&ch) { + if let Some(qchr) = quote { + if qchr == ch { + // close the argument + if let Some(arg) = heap.take() { + out.push(arg); + } + quote = None; + expect_whitespace = true; + } else { + // accept the other quoting character as normal char + heap.get_or_insert(String::new()).push(ch); + } + } else { + // beginning of a new argument + crate::ensure!(heap.is_none(), "Missing opening quote '{ch}'"); + quote = Some(ch); + } + } else if ch.is_whitespace() { + if quote.is_none() { + if let Some(arg) = heap.take() { + out.push(arg); + } + } else { + heap.get_or_insert(String::new()).push(ch); + } + } else { + if ch == '\\' && (quote.is_some() || !cfg!(target_os = "windows")) { + ch = chars.next().context("Invalid escape")?; + } + heap.get_or_insert(String::new()).push(ch); + } + } + + Ok(out) +} + +pub(super) fn warn_unsupported(opt: &str) -> Result { + match std::env::var(WILD_UNSUPPORTED_ENV) + .unwrap_or_default() + .as_str() + { + "warn" | "" => crate::error::warning(&format!("{opt} is not yet supported")), + "ignore" => {} + "error" => bail!("{opt} is not yet supported"), + other => bail!("Unsupported value for {WILD_UNSUPPORTED_ENV}={other}"), + } + Ok(()) +} + +/// The output binary format. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(crate) enum OutputFormat { + Elf, + Pe, +} + +impl Default for OutputFormat { + fn default() -> Self { + match Os::DEFAULT { + Os::Linux => OutputFormat::Elf, + Os::Windows => OutputFormat::Pe, + Os::MacOS => todo!("macOS linking not yet supported"), + } + } +} + +/// Result of pre-scanning args for target-determining flags. +#[derive(Debug)] +pub(crate) struct DetectedTarget { + pub format: OutputFormat, + /// Architecture from `--target` triple. `None` if no `--target` was given. + pub arch: Option, +} + +/// Known `-m` emulation values that imply ELF output. +const ELF_EMULATIONS: &[&str] = &[ + "elf_x86_64", + "elf_x86_64_sol2", + "aarch64elf", + "aarch64linux", + "elf64lriscv", + "elf64loongarch", +]; + +/// Map `target_lexicon::Architecture` to Wild's `Architecture`. +fn map_triple_arch(arch: target_lexicon::Architecture) -> Result { + use target_lexicon::Architecture as TL; + match arch { + TL::X86_64 | TL::X86_64h => Ok(Architecture::X86_64), + TL::Aarch64(_) => Ok(Architecture::AArch64), + TL::Riscv64(_) => Ok(Architecture::RISCV64), + TL::LoongArch64 => Ok(Architecture::LoongArch64), + other => bail!("unsupported architecture in target triple: {other}"), + } +} + +/// Map `target_lexicon::BinaryFormat` to `OutputFormat`. +fn map_binary_format(fmt: target_lexicon::BinaryFormat) -> Result { + match fmt { + target_lexicon::BinaryFormat::Elf => Ok(OutputFormat::Elf), + target_lexicon::BinaryFormat::Coff => Ok(OutputFormat::Pe), + other => bail!("unsupported binary format: {other}"), + } +} + +/// Extract the target triple value from a flag, handling all prefix styles. +/// Returns `(Some(value), consumed_next)` if the arg is a target flag. +fn extract_target_value<'a>(arg: &'a str, next_arg: Option<&'a str>) -> (Option<&'a str>, bool) { + // Combined forms: --target=VAL, -target=VAL, /TARGET:VAL + if let Some(val) = arg + .strip_prefix("--target=") + .or_else(|| arg.strip_prefix("-target=")) + .or_else(|| arg.strip_prefix("/TARGET:")) + .or_else(|| arg.strip_prefix("/target:")) + { + return (Some(val), false); + } + // Space-separated: --target VAL, -target VAL, /TARGET VAL + if matches!(arg, "--target" | "-target" | "/TARGET" | "/target") { + if let Some(val) = next_arg { + return (Some(val), true); + } + } + (None, false) +} + +/// Pre-scan CLI arguments to determine the output format and architecture. +/// +/// Recognizes: +/// - `--target=` / `-target=` / `/TARGET:` — primary (parsed by target-lexicon) +/// - `-m ` — overrides format to ELF when present +/// +/// Priority: `-m` overrides format from `--target`. Architecture comes from `--target` only. +pub(crate) fn detect_target(args: &[String]) -> Result { + let mut from_triple: Option<(OutputFormat, Architecture)> = None; + let mut m_implies_elf = false; + + let mut i = 0; + while i < args.len() { + let next = if i + 1 < args.len() { + Some(args[i + 1].as_str()) + } else { + None + }; + let (target_val, consumed_next) = extract_target_value(&args[i], next); + + if let Some(val) = target_val { + let triple: Triple = val + .parse() + .map_err(|e| anyhow::anyhow!("invalid target triple '{val}': {e}"))?; + let format = map_binary_format(triple.binary_format)?; + let arch = map_triple_arch(triple.architecture)?; + from_triple = Some((format, arch)); + if consumed_next { + i += 1; + } + } + // Check for -m (implies ELF) + else if args[i] == "-m" || args[i] == "--m" { + if let Some(next_val) = next { + if ELF_EMULATIONS.contains(&next_val) { + m_implies_elf = true; + } + i += 1; + } + } else if let Some(emu) = args[i].strip_prefix("-m") { + if ELF_EMULATIONS.contains(&emu) { + m_implies_elf = true; + } + } + + i += 1; + } + + match (from_triple, m_implies_elf) { + (Some((_, arch)), true) => { + // -m overrides format to ELF; arch from triple preserved + Ok(DetectedTarget { + format: OutputFormat::Elf, + arch: Some(arch), + }) + } + (Some((format, arch)), false) => Ok(DetectedTarget { + format, + arch: Some(arch), + }), + (None, true) => Ok(DetectedTarget { + format: OutputFormat::Elf, + arch: None, + }), + (None, false) => Ok(DetectedTarget { + format: OutputFormat::default(), + arch: None, + }), + } +} + +/// Map Wild `Architecture` to the GNU ld `-m` emulation name. +fn arch_to_elf_emulation(arch: Architecture) -> &'static str { + match arch { + Architecture::X86_64 => "elf_x86_64", + Architecture::AArch64 => "aarch64linux", + Architecture::RISCV64 => "elf64lriscv", + Architecture::LoongArch64 => "elf64loongarch", + } +} + +/// Map Wild `Architecture` to the MSVC `/MACHINE:` value. +fn arch_to_machine_value(arch: Architecture) -> &'static str { + match arch { + Architecture::X86_64 => "X64", + Architecture::AArch64 => "ARM64", + Architecture::RISCV64 => "X64", + Architecture::LoongArch64 => "X64", + } +} + +/// Strip `--target`/`-target`/`/TARGET` flags and inject a synthetic `-m` or `/MACHINE:` flag +/// from the detected architecture so the format-specific parser picks it up. +/// +/// The user's explicit `-m` or `/MACHINE:` flags are preserved and will override the injected one +/// since they appear later in the argument list. +pub(crate) fn filter_and_inject_target_flags( + args: &[String], + format: OutputFormat, + arch: Option, +) -> Vec { + let mut result = Vec::with_capacity(args.len() + 2); + + // Inject synthetic arch flag at the front (user's explicit flags override later) + if let Some(arch) = arch { + match format { + OutputFormat::Elf => { + result.push("-m".to_string()); + result.push(arch_to_elf_emulation(arch).to_string()); + } + OutputFormat::Pe => { + result.push(format!("/MACHINE:{}", arch_to_machine_value(arch))); + } + } + } + + // Strip --target flags, keep everything else + let mut i = 0; + while i < args.len() { + let arg = &args[i]; + if arg.starts_with("--target=") + || arg.starts_with("-target=") + || arg.starts_with("/TARGET:") + || arg.starts_with("/target:") + { + // Skip this combined arg + } else if matches!(arg.as_str(), "--target" | "-target" | "/TARGET" | "/target") { + i += 1; // skip value too + } else { + result.push(arg.clone()); + } + i += 1; + } + result +} + +/// Format-specific parsed arguments. +pub enum TargetArgs { + Elf(linux::ElfArgs), + #[allow(dead_code)] + Pe(windows::PeArgs), +} + +impl std::fmt::Debug for TargetArgs { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + TargetArgs::Elf(e) => e.fmt(f), + TargetArgs::Pe(p) => p.fmt(f), + } + } +} + +/// Parsed linker arguments. Common fields are directly accessible. +/// Format-specific fields are accessible via `Deref`/`DerefMut` through `target_args`. +/// +/// `T` defaults to `TargetArgs` (the enum). During parsing, `T` is set to the +/// concrete format type (e.g. `ElfArgs` or `PeArgs`). +#[derive(Debug)] +pub struct Args { + // ── Infrastructure ─────────────────────────────────────────────────────── + pub should_fork: bool, + pub(crate) output: Arc, + pub(crate) arch: Architecture, + pub(crate) inputs: Vec, + pub(crate) lib_search_path: Vec>, + pub num_threads: Option, + pub(crate) available_threads: NonZeroUsize, + pub(crate) save_dir: SaveDir, + pub(crate) unrecognized_options: Vec, + pub(crate) files_per_group: Option, + pub(crate) write_layout: bool, + pub(crate) write_trace: bool, + pub(crate) jobserver_client: Option, + + // ── Core linker behavior ───────────────────────────────────────────────── + pub(crate) strip: Strip, + pub(crate) gc_sections: bool, + pub(crate) merge_sections: bool, + pub(crate) relax: bool, + pub(crate) demangle: bool, + pub(crate) no_undefined: bool, + pub(crate) allow_shlib_undefined: bool, + pub(crate) error_unresolved_symbols: bool, + pub(crate) allow_multiple_definitions: bool, + pub(crate) unresolved_symbols: UnresolvedSymbols, + pub(crate) undefined: Vec, + pub(crate) copy_relocations: CopyRelocations, + pub(crate) sysroot: Option>, + pub(crate) dynamic_linker: Option>, + pub(crate) entry: Option, + pub(crate) wrap: Vec, + pub(crate) exclude_libs: ExcludeLibs, + pub(crate) b_symbolic: BSymbolicKind, + pub(crate) export_list: Vec, + pub(crate) defsym: Vec<(String, DefsymValue)>, + pub(crate) section_start: HashMap, + pub(crate) max_page_size: Option, + pub(crate) execstack: bool, + pub(crate) version_mode: VersionMode, + pub(crate) relocation_model: RelocationModel, + pub(crate) should_output_executable: bool, + pub(crate) export_all_dynamic_symbols: bool, + pub(crate) version_script_path: Option, + pub(crate) export_list_path: Option, + + // ── Output/writing ─────────────────────────────────────────────────────── + pub(crate) mmap_output_file: bool, + pub(crate) file_write_mode: Option, + pub(crate) prepopulate_maps: bool, + pub(crate) should_write_linker_identity: bool, + + // ── Debug/diagnostic ───────────────────────────────────────────────────── + pub(crate) debug_fuel: Option, + pub(crate) validate_output: bool, + pub(crate) sym_info: Option, + pub(crate) debug_address: Option, + pub(crate) print_allocations: Option, + pub(crate) verify_allocation_consistency: bool, + pub(crate) time_phase_options: Option>, + pub(crate) numeric_experiments: Vec>, + pub(crate) write_gc_stats: Option, + pub(crate) gc_stats_ignore: Vec, + pub(crate) verbose_gc_stats: bool, + pub(crate) dependency_file: Option, + + // ── Format-specific ────────────────────────────────────────────────────── + pub target_args: T, +} + +impl Default for Args { + fn default() -> Self { + Args { + // Infrastructure + should_fork: true, + arch: Architecture::DEFAULT, + unrecognized_options: Vec::new(), + lib_search_path: Vec::new(), + inputs: Vec::new(), + output: Arc::from(Path::new("a.out")), + num_threads: None, + write_layout: std::env::var(WRITE_LAYOUT_ENV).is_ok_and(|v| v == "1"), + write_trace: std::env::var(WRITE_TRACE_ENV).is_ok_and(|v| v == "1"), + files_per_group: None, + save_dir: Default::default(), + jobserver_client: None, + available_threads: NonZeroUsize::new(1).unwrap(), + // Core linker behavior + strip: Strip::Nothing, + gc_sections: true, + merge_sections: true, + relax: true, + demangle: true, + no_undefined: false, + allow_shlib_undefined: false, + error_unresolved_symbols: true, + allow_multiple_definitions: false, + unresolved_symbols: UnresolvedSymbols::ReportAll, + undefined: Vec::new(), + copy_relocations: CopyRelocations::Allowed, + sysroot: None, + dynamic_linker: None, + entry: None, + wrap: Vec::new(), + exclude_libs: ExcludeLibs::None, + b_symbolic: BSymbolicKind::None, + export_list: Vec::new(), + defsym: Vec::new(), + section_start: HashMap::new(), + max_page_size: None, + execstack: false, + version_mode: VersionMode::None, + relocation_model: RelocationModel::NonRelocatable, + should_output_executable: true, + export_all_dynamic_symbols: false, + version_script_path: None, + export_list_path: None, + // Output/writing + mmap_output_file: true, + file_write_mode: None, + prepopulate_maps: false, + should_write_linker_identity: true, + // Debug/diagnostic + debug_fuel: None, + validate_output: std::env::var(VALIDATE_ENV).is_ok_and(|v| v == "1"), + sym_info: None, + debug_address: None, + print_allocations: std::env::var("WILD_PRINT_ALLOCATIONS") + .ok() + .and_then(|s| s.parse().ok()) + .map(FileId::from_encoded), + verify_allocation_consistency: std::env::var(WRITE_VERIFY_ALLOCATIONS_ENV) + .is_ok_and(|v| v == "1"), + time_phase_options: None, + numeric_experiments: Vec::new(), + write_gc_stats: None, + gc_stats_ignore: Vec::new(), + verbose_gc_stats: false, + dependency_file: None, + // Format-specific + target_args: T::default(), + } + } +} + +impl std::ops::Deref for Args { + type Target = T; + fn deref(&self) -> &T { + &self.target_args + } +} + +impl std::ops::DerefMut for Args { + fn deref_mut(&mut self) -> &mut T { + &mut self.target_args + } +} + +impl Args { + /// Transform the target-specific part while preserving common fields. + pub fn map_target(self, f: impl FnOnce(T) -> U) -> Args { + Args { + // Infrastructure + should_fork: self.should_fork, + output: self.output, + arch: self.arch, + inputs: self.inputs, + lib_search_path: self.lib_search_path, + num_threads: self.num_threads, + available_threads: self.available_threads, + save_dir: self.save_dir, + unrecognized_options: self.unrecognized_options, + files_per_group: self.files_per_group, + write_layout: self.write_layout, + write_trace: self.write_trace, + jobserver_client: self.jobserver_client, + // Core linker behavior + strip: self.strip, + gc_sections: self.gc_sections, + merge_sections: self.merge_sections, + relax: self.relax, + demangle: self.demangle, + no_undefined: self.no_undefined, + allow_shlib_undefined: self.allow_shlib_undefined, + error_unresolved_symbols: self.error_unresolved_symbols, + allow_multiple_definitions: self.allow_multiple_definitions, + unresolved_symbols: self.unresolved_symbols, + undefined: self.undefined, + copy_relocations: self.copy_relocations, + sysroot: self.sysroot, + dynamic_linker: self.dynamic_linker, + entry: self.entry, + wrap: self.wrap, + exclude_libs: self.exclude_libs, + b_symbolic: self.b_symbolic, + export_list: self.export_list, + defsym: self.defsym, + section_start: self.section_start, + max_page_size: self.max_page_size, + execstack: self.execstack, + version_mode: self.version_mode, + relocation_model: self.relocation_model, + should_output_executable: self.should_output_executable, + export_all_dynamic_symbols: self.export_all_dynamic_symbols, + version_script_path: self.version_script_path, + export_list_path: self.export_list_path, + // Output/writing + mmap_output_file: self.mmap_output_file, + file_write_mode: self.file_write_mode, + prepopulate_maps: self.prepopulate_maps, + should_write_linker_identity: self.should_write_linker_identity, + // Debug/diagnostic + debug_fuel: self.debug_fuel, + validate_output: self.validate_output, + sym_info: self.sym_info, + debug_address: self.debug_address, + print_allocations: self.print_allocations, + verify_allocation_consistency: self.verify_allocation_consistency, + time_phase_options: self.time_phase_options, + numeric_experiments: self.numeric_experiments, + write_gc_stats: self.write_gc_stats, + gc_stats_ignore: self.gc_stats_ignore, + verbose_gc_stats: self.verbose_gc_stats, + dependency_file: self.dependency_file, + // Format-specific + target_args: f(self.target_args), + } + } + + pub fn map_ref_target(&self, f: impl FnOnce(&T) -> U) -> U { + f(&self.target_args) + } + + /// Uses 1 debug fuel, returning how much fuel remains. Debug fuel is intended to be used when + /// debugging certain kinds of bugs, so this function isn't normally referenced. To use it, the + /// caller should take a different branch depending on whether the value is still positive. You + /// can then do a binary search. + pub(crate) fn use_debug_fuel(&self) -> i64 { + let Some(fuel) = self.debug_fuel.as_ref() else { + return i64::MAX; + }; + fuel.fetch_sub(1, std::sync::atomic::Ordering::AcqRel) - 1 + } + + /// Returns whether there was sufficient fuel. If the last bit of fuel was used, then calls + /// `last_cb`. + #[allow(unused)] + pub(crate) fn use_debug_fuel_on_last(&self, last_cb: impl FnOnce()) -> bool { + match self.use_debug_fuel() { + 1.. => true, + 0 => { + last_cb(); + true + } + _ => false, + } + } + + pub(crate) fn trace_span_for_file( + &self, + file_id: FileId, + ) -> Option { + let should_trace = self.print_allocations == Some(file_id); + should_trace.then(|| tracing::trace_span!(crate::debug_trace::TRACE_SPAN_NAME).entered()) + } + + pub fn should_fork(&self) -> bool { + self.should_fork + } + + pub(crate) fn numeric_experiment(&self, exp: Experiment, default: u64) -> u64 { + self.numeric_experiments + .get(exp as usize) + .copied() + .flatten() + .unwrap_or(default) + } + + pub(crate) fn loadable_segment_alignment(&self) -> Alignment { + if let Some(max_page_size) = self.max_page_size { + return max_page_size; + } + + match self.arch { + Architecture::X86_64 => Alignment { exponent: 12 }, + Architecture::AArch64 => Alignment { exponent: 16 }, + Architecture::RISCV64 => Alignment { exponent: 12 }, + Architecture::LoongArch64 => Alignment { exponent: 16 }, + } + } + + pub(crate) fn strip_all(&self) -> bool { + matches!(self.strip, Strip::All) + } + + pub(crate) fn strip_debug(&self) -> bool { + matches!(self.strip, Strip::All | Strip::Debug) + } +} + +/// Linker args with an activated thread pool. Holds jobserver tokens for the +/// duration of the link to keep the threads available. +pub struct ActivatedArgs { + pub args: Args, + _jobserver_tokens: Vec, +} + +impl ActivatedArgs { + pub fn map_target(self, f: impl FnOnce(T) -> U) -> ActivatedArgs { + ActivatedArgs { + args: self.args.map_target(f), + _jobserver_tokens: self._jobserver_tokens, + } + } +} + +impl Args { + /// Sets up the thread pool, using the explicit number of threads if specified, + /// or falling back to the jobserver protocol if available. + /// + /// + pub fn activate_thread_pool(mut self) -> Result> { + crate::timing_phase!("Activate thread pool"); + + let mut tokens = Vec::new(); + self.available_threads = self.num_threads.unwrap_or_else(|| { + if let Some(client) = &self.jobserver_client { + while let Ok(Some(acquired)) = client.try_acquire() { + tokens.push(acquired); + } + tracing::trace!(count = tokens.len(), "Acquired jobserver tokens"); + // Our parent "holds" one jobserver token, add it. + NonZeroUsize::new((tokens.len() + 1).max(1)).unwrap() + } else { + std::thread::available_parallelism().unwrap_or(NonZeroUsize::new(1).unwrap()) + } + }); + + // The pool might be already initialized, suppress the error intentionally. + let _ = rayon::ThreadPoolBuilder::new() + .num_threads(self.available_threads.get()) + .build_global(); + + Ok(ActivatedArgs { + args: self, + _jobserver_tokens: tokens, + }) + } +} + +impl Args { + /// Parse CLI arguments. Detects target format from `--target=`, `-m`, + /// or host default, then routes to the format-specific parser. + pub fn parse I, S: AsRef, I: Iterator>(input: F) -> Result { + let all_args: Vec = input().map(|s| s.as_ref().to_owned()).collect(); + let detected = detect_target(&all_args)?; + let filtered = filter_and_inject_target_flags(&all_args, detected.format, detected.arch); + + match detected.format { + OutputFormat::Elf => { + let elf_args = linux::parse(|| filtered.iter().map(|s| s.as_str()))?; + Ok(elf_args.map_target(TargetArgs::Elf)) + } + OutputFormat::Pe => { + let pe_args = windows::parse(|| filtered.iter().map(|s| s.as_str()))?; + Ok(pe_args.map_target(TargetArgs::Pe)) + } + } + } +} + +/// Top-level parse function. +pub fn parse I, S: AsRef, I: Iterator>( + input: F, +) -> Result> { + Args::parse(input) +} + +#[cfg(test)] +mod tests { + use super::*; + + fn to_strings(args: &[&str]) -> Vec { + args.iter().map(|s| s.to_string()).collect() + } + + // ---- detect_target tests ---- + + #[test] + fn test_detect_format_from_triple_linux_x86() { + let args = to_strings(&["--target=x86_64-unknown-linux-gnu", "-o", "out"]); + let result = detect_target(&args).unwrap(); + assert_eq!(result.format, OutputFormat::Elf); + assert_eq!(result.arch, Some(Architecture::X86_64)); + } + + #[test] + fn test_detect_format_from_triple_windows() { + let args = to_strings(&["-target=x86_64-pc-windows-msvc", "/OUT:foo.exe"]); + let result = detect_target(&args).unwrap(); + assert_eq!(result.format, OutputFormat::Pe); + assert_eq!(result.arch, Some(Architecture::X86_64)); + } + + #[test] + fn test_detect_format_from_slash_target() { + let args = to_strings(&["/TARGET:aarch64-pc-windows-msvc", "foo.obj"]); + let result = detect_target(&args).unwrap(); + assert_eq!(result.format, OutputFormat::Pe); + assert_eq!(result.arch, Some(Architecture::AArch64)); + } + + #[test] + fn test_detect_format_space_separated() { + let args = to_strings(&["--target", "aarch64-unknown-linux-gnu", "-o", "out"]); + let result = detect_target(&args).unwrap(); + assert_eq!(result.format, OutputFormat::Elf); + assert_eq!(result.arch, Some(Architecture::AArch64)); + } + + #[test] + fn test_detect_format_from_m_flag() { + let args = to_strings(&["-m", "elf_x86_64", "-o", "out"]); + let result = detect_target(&args).unwrap(); + assert_eq!(result.format, OutputFormat::Elf); + assert_eq!(result.arch, None); + } + + #[test] + fn test_m_flag_overrides_target_format() { + let args = to_strings(&["--target=x86_64-pc-windows-msvc", "-m", "elf_x86_64"]); + let result = detect_target(&args).unwrap(); + assert_eq!(result.format, OutputFormat::Elf); + } + + #[test] + fn test_detect_format_default_no_flags() { + let args = to_strings(&["-o", "out", "foo.o"]); + let result = detect_target(&args).unwrap(); + assert_eq!(result.format, OutputFormat::default()); + assert_eq!(result.arch, None); + } + + #[test] + fn test_detect_format_riscv_triple() { + let args = to_strings(&["--target=riscv64gc-unknown-linux-gnu", "-o", "out"]); + let result = detect_target(&args).unwrap(); + assert_eq!(result.format, OutputFormat::Elf); + assert_eq!(result.arch, Some(Architecture::RISCV64)); + } + + // ---- filter_and_inject_target_flags tests ---- + + #[test] + fn test_filter_strips_target_equals() { + let args = to_strings(&["--target=x86_64-unknown-linux-gnu", "-o", "out", "foo.o"]); + let filtered = + filter_and_inject_target_flags(&args, OutputFormat::Elf, Some(Architecture::X86_64)); + assert_eq!(filtered[0], "-m"); + assert_eq!(filtered[1], "elf_x86_64"); + assert_eq!(filtered[2], "-o"); + assert!(!filtered.iter().any(|a| a.contains("--target"))); + } + + #[test] + fn test_filter_strips_target_space() { + let args = to_strings(&["--target", "aarch64-unknown-linux-gnu", "-o", "out"]); + let filtered = + filter_and_inject_target_flags(&args, OutputFormat::Elf, Some(Architecture::AArch64)); + assert_eq!(filtered[0], "-m"); + assert_eq!(filtered[1], "aarch64linux"); + assert!( + !filtered + .iter() + .any(|a| a == "--target" || a.contains("linux-gnu")) + ); + } + + #[test] + fn test_filter_strips_slash_target() { + let args = to_strings(&["/TARGET:x86_64-pc-windows-msvc", "/OUT:foo.exe", "bar.obj"]); + let filtered = + filter_and_inject_target_flags(&args, OutputFormat::Pe, Some(Architecture::X86_64)); + assert_eq!(filtered[0], "/MACHINE:X64"); + assert_eq!(filtered[1], "/OUT:foo.exe"); + } + + #[test] + fn test_filter_preserves_m_flag() { + let args = to_strings(&[ + "--target=x86_64-unknown-linux-gnu", + "-m", + "aarch64linux", + "-o", + "out", + ]); + let filtered = + filter_and_inject_target_flags(&args, OutputFormat::Elf, Some(Architecture::X86_64)); + assert_eq!(filtered[0], "-m"); + assert_eq!(filtered[1], "elf_x86_64"); + assert!(filtered.contains(&"-m".to_string())); + assert!(filtered.contains(&"aarch64linux".to_string())); + } + + #[test] + fn test_filter_no_target_no_inject() { + let args = to_strings(&["-o", "out", "foo.o"]); + let filtered = filter_and_inject_target_flags(&args, OutputFormat::Elf, None); + assert_eq!(filtered, args); + } +} diff --git a/libwild/src/args/consts.rs b/libwild/src/args/consts.rs new file mode 100644 index 000000000..7a6363a4b --- /dev/null +++ b/libwild/src/args/consts.rs @@ -0,0 +1,64 @@ +pub const WILD_UNSUPPORTED_ENV: &str = "WILD_UNSUPPORTED"; +pub const VALIDATE_ENV: &str = "WILD_VALIDATE_OUTPUT"; +pub const WRITE_LAYOUT_ENV: &str = "WILD_WRITE_LAYOUT"; +pub const WRITE_TRACE_ENV: &str = "WILD_WRITE_TRACE"; +pub const REFERENCE_LINKER_ENV: &str = "WILD_REFERENCE_LINKER"; +pub(crate) const FILES_PER_GROUP_ENV: &str = "WILD_FILES_PER_GROUP"; + +/// Set this environment variable if you get a failure during writing due to too much or too little +/// space being allocated to some section. When set, each time we allocate during layout, we'll +/// check that what we're doing is consistent with writing and fail in a more easy to debug way. i.e +/// we'll report the particular combination of value flags, resolution flags etc that triggered the +/// inconsistency. +pub(crate) const WRITE_VERIFY_ALLOCATIONS_ENV: &str = "WILD_VERIFY_ALLOCATIONS"; + +// These flags don't currently affect our behaviour. TODO: Assess whether we should error or warn if +// these are given. This is tricky though. On the one hand we want to be a drop-in replacement for +// other linkers. On the other, we should perhaps somehow let the user know that we don't support a +// feature. +pub(super) const SILENTLY_IGNORED_FLAGS: &[&str] = &[ + // Just like other modern linkers, we don't need groups in order to resolve cycles. + "start-group", + "end-group", + // TODO: This is supposed to suppress built-in search paths, but I don't think we have any + // built-in search paths. Perhaps we should? + "nostdlib", + // TODO + "no-undefined-version", + "fatal-warnings", + "color-diagnostics", + "undefined-version", + "sort-common", + "stats", +]; +pub(super) const SILENTLY_IGNORED_SHORT_FLAGS: &[&str] = &[ + "(", + ")", + // On Illumos, the Clang driver inserts a meaningless -C flag before calling any non-GNU ld + // linker. + #[cfg(target_os = "illumos")] + "C", +]; + +pub(super) const IGNORED_FLAGS: &[&str] = &[ + "gdb-index", + "fix-cortex-a53-835769", + "fix-cortex-a53-843419", + "discard-all", + "use-android-relr-tags", + "x", // alias for --discard-all +]; + +// These flags map to the default behavior of the linker. +pub(super) const DEFAULT_FLAGS: &[&str] = &[ + "no-call-graph-profile-sort", + "no-copy-dt-needed-entries", + "no-add-needed", + "discard-locals", + "no-fatal-warnings", + "no-use-android-relr-tags", +]; +pub(super) const DEFAULT_SHORT_FLAGS: &[&str] = &[ + "X", // alias for --discard-locals + "EL", // little endian +]; diff --git a/libwild/src/args/linux.rs b/libwild/src/args/linux.rs new file mode 100644 index 000000000..66953dcf3 --- /dev/null +++ b/libwild/src/args/linux.rs @@ -0,0 +1,1852 @@ +//! A handwritten parser for our arguments. +//! +//! We don't currently use a 3rd party library like clap for a few reasons. Firstly, we need to +//! support flags like `--push-state` and `--pop-state`. These need to push and pop a state stack +//! when they're parsed. Some of the other flags then need to manipulate the state of the top of the +//! stack. Positional arguments like input files and libraries to link, then need to have the +//! current state of the stack attached to that file. +//! +//! Secondly, long arguments need to also be accepted with a single '-' in addition to the more +//! common double-dash. +//! +//! Basically, we need to be able to parse arguments in the same way as the other linkers on the +//! platform that we're targeting. + +use super::ArgumentParser; +use super::BSymbolicKind; +use super::CopyRelocations; +use super::CopyRelocationsDisabledReason; +use super::CounterKind; +use super::DefsymValue; +use super::ExcludeLibs; +use super::FileWriteMode; +use super::Input; +use super::InputSpec; +use super::Modifiers; +use super::RelocationModel; +use super::Strip; +use super::UnresolvedSymbols; +use super::VersionMode; +use super::warn_unsupported; +use crate::alignment::Alignment; +use hashbrown::HashSet; +use crate::arch::Architecture; +use crate::bail; +use crate::ensure; +use crate::error::Context as _; +use crate::error::Result; +use crate::linker_script::maybe_forced_sysroot; +use crate::save_dir::SaveDir; +use crate::timing_phase; +use indexmap::IndexSet; +use itertools::Itertools; +use jobserver::Client; +use object::elf::GNU_PROPERTY_X86_ISA_1_BASELINE; +use object::elf::GNU_PROPERTY_X86_ISA_1_V2; +use object::elf::GNU_PROPERTY_X86_ISA_1_V3; +use object::elf::GNU_PROPERTY_X86_ISA_1_V4; +use std::ffi::CString; +use std::mem::take; +use std::num::NonZero; +use std::num::NonZeroU32; +use std::num::NonZeroU64; +use std::num::NonZeroUsize; +use std::path::Path; +use std::path::PathBuf; +use std::str::FromStr; +use std::sync::Arc; +use std::sync::atomic::AtomicI64; + + +/// ELF-specific linker arguments. Common fields (output, arch, strip, gc_sections, etc.) +/// live on `Args`. Access them via direct field access on `Args`, +/// and ELF-specific fields are accessible via `Deref`/`DerefMut`. +#[derive(Debug)] +pub struct ElfArgs { + pub(crate) should_write_eh_frame_hdr: bool, + pub(crate) rpath: Option, + pub(crate) soname: Option, + pub(crate) enable_new_dtags: bool, + pub(crate) build_id: BuildIdOption, + pub(crate) needs_origin_handling: bool, + pub(crate) needs_nodelete_handling: bool, + pub(crate) relro: bool, + pub(crate) auxiliary: Vec, + pub(crate) got_plt_syms: bool, + pub(crate) hash_style: HashStyle, + pub(crate) z_interpose: bool, + pub(crate) z_isa: Option, + pub(crate) z_stack_size: Option, + pub(crate) plugin_path: Option, + pub(crate) plugin_args: Vec, + rpath_set: IndexSet, +} + + +#[derive(Debug)] +pub(crate) enum BuildIdOption { + None, + Fast, + Hex(Vec), + Uuid, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(crate) enum HashStyle { + Gnu, + Sysv, + Both, +} + + +impl HashStyle { + pub(crate) const fn includes_gnu(self) -> bool { + matches!(self, HashStyle::Gnu | HashStyle::Both) + } + + pub(crate) const fn includes_sysv(self) -> bool { + matches!(self, HashStyle::Sysv | HashStyle::Both) + } +} + + +use super::consts::*; + +impl Default for ElfArgs { + fn default() -> Self { + ElfArgs { + should_write_eh_frame_hdr: false, + rpath: None, + soname: None, + enable_new_dtags: true, + build_id: BuildIdOption::None, + needs_origin_handling: false, + needs_nodelete_handling: false, + relro: true, + auxiliary: Vec::new(), + got_plt_syms: false, + hash_style: HashStyle::Both, + z_interpose: false, + z_isa: None, + z_stack_size: None, + plugin_path: None, + plugin_args: Vec::new(), + rpath_set: Default::default(), + } + } +} + +// Parse the supplied input arguments, which should not include the program name. +pub(crate) fn parse I, S: AsRef, I: Iterator>( + input: F, +) -> Result> { + use crate::input_data::MAX_FILES_PER_GROUP; + + // SAFETY: Should be called early before other descriptors are opened and + // so we open it before the arguments are parsed (can open a file). + let jobserver_client = unsafe { Client::from_env() }; + + let files_per_group = std::env::var(FILES_PER_GROUP_ENV) + .ok() + .map(|s| s.parse()) + .transpose()?; + + if let Some(x) = files_per_group { + ensure!( + x <= MAX_FILES_PER_GROUP, + "{FILES_PER_GROUP_ENV}={x} but maximum is {MAX_FILES_PER_GROUP}" + ); + } + + let mut args = super::Args:: { + files_per_group, + jobserver_client, + ..Default::default() + }; + + args.save_dir = SaveDir::new(&input)?; + + let mut input = input(); + + let mut modifier_stack = vec![Modifiers::default()]; + + if std::env::var(REFERENCE_LINKER_ENV).is_ok() { + args.write_layout = true; + args.write_trace = true; + } + + let arg_parser = setup_argument_parser(); + while let Some(arg) = input.next() { + let arg = arg.as_ref(); + + arg_parser.handle_argument(&mut args, &mut modifier_stack, arg, &mut input)?; + } + + // Copy relocations are only permitted when building executables. + if !args.should_output_executable { + args.copy_relocations = + CopyRelocations::Disallowed(CopyRelocationsDisabledReason::SharedObject); + } + + if !args.rpath_set.is_empty() { + args.rpath = Some(take(&mut args.rpath_set).into_iter().join(":")); + } + + if !args.unrecognized_options.is_empty() { + let options_list = args.unrecognized_options.join(", "); + bail!("unrecognized option(s): {}", options_list); + } + + if !args.auxiliary.is_empty() && args.should_output_executable { + bail!("-f may not be used without -shared"); + } + + Ok(args) +} + +impl super::Args { + pub fn parse_elf I, S: AsRef, I: Iterator>( + input: F, + ) -> Result> { + timing_phase!("Parse args"); + parse(input) + } +} + +impl super::Args { + /// Adds a linker script to our outputs. Note, this is only called for scripts specified via + /// flags like -T. Where a linker script is just listed as an argument, this won't be called. + fn add_script(&mut self, path: &str) { + self.inputs.push(Input { + spec: InputSpec::File(Box::from(Path::new(path))), + search_first: None, + modifiers: Modifiers::default(), + }); + } +} + +fn parse_number(s: &str) -> Result { + crate::parsing::parse_number(s).map_err(|_| crate::error!("Invalid number: {}", s)) +} + +fn parse_defsym_expression(s: &str) -> DefsymValue { + use crate::parsing::ParsedSymbolExpression; + use crate::parsing::parse_symbol_expression; + + match parse_symbol_expression(s) { + ParsedSymbolExpression::Absolute(value) => DefsymValue::Value(value), + ParsedSymbolExpression::SymbolWithOffset(sym, offset) => { + DefsymValue::SymbolWithOffset(sym.to_owned(), offset) + } + } +} + + +fn setup_argument_parser() -> ArgumentParser { + let mut parser = ArgumentParser::new(); + + parser + .declare_with_param() + .prefix("L") + .help("Add directory to library search path") + .execute(|args: &mut super::Args, _modifier_stack, value| { + let handle_sysroot = |path| { + args.sysroot + .as_ref() + .and_then(|sysroot| maybe_forced_sysroot(path, sysroot)) + .unwrap_or_else(|| Box::from(path)) + }; + + let dir = handle_sysroot(Path::new(value)); + args.save_dir.handle_file(value); + args.lib_search_path.push(dir); + Ok(()) + }); + + parser + .declare_with_param() + .prefix("l") + .help("Link with library") + .sub_option_with_value( + ":filename", + "Link with specific file", + |args, modifier_stack, value| { + let stripped = value.strip_prefix(':').unwrap_or(value); + let spec = InputSpec::File(Box::from(Path::new(stripped))); + args.inputs.push(Input { + spec, + search_first: None, + modifiers: *modifier_stack.last().unwrap(), + }); + Ok(()) + }, + ) + .sub_option_with_value( + "libname", + "Link with library libname.so or libname.a", + |args, modifier_stack, value| { + let spec = InputSpec::Lib(Box::from(value)); + args.inputs.push(Input { + spec, + search_first: None, + modifiers: *modifier_stack.last().unwrap(), + }); + Ok(()) + }, + ) + .execute(|args: &mut super::Args, modifier_stack, value| { + let spec = if let Some(stripped) = value.strip_prefix(':') { + InputSpec::Search(Box::from(stripped)) + } else { + InputSpec::Lib(Box::from(value)) + }; + args.inputs.push(Input { + spec, + search_first: None, + modifiers: *modifier_stack.last().unwrap(), + }); + Ok(()) + }); + + parser + .declare_with_param() + .prefix("u") + .help("Force resolution of the symbol") + .execute(|args: &mut super::Args, _modifier_stack, value| { + args.undefined.push(value.to_owned()); + Ok(()) + }); + + parser + .declare_with_param() + .prefix("m") + .help("Set target architecture") + .sub_option("elf_x86_64", "x86-64 ELF target", |args, _| { + args.arch = Architecture::X86_64; + Ok(()) + }) + .sub_option( + "elf_x86_64_sol2", + "x86-64 ELF target (Solaris)", + |args, _| { + if args.dynamic_linker.is_none() { + args.dynamic_linker = Some(Path::new("/lib/amd64/ld.so.1").into()); + } + args.arch = Architecture::X86_64; + Ok(()) + }, + ) + .sub_option("aarch64elf", "AArch64 ELF target", |args, _| { + args.arch = Architecture::AArch64; + Ok(()) + }) + .sub_option("aarch64linux", "AArch64 ELF target (Linux)", |args, _| { + args.arch = Architecture::AArch64; + Ok(()) + }) + .sub_option("elf64lriscv", "RISC-V 64-bit ELF target", |args, _| { + args.arch = Architecture::RISCV64; + Ok(()) + }) + .sub_option( + "elf64loongarch", + "LoongArch 64-bit ELF target", + |args, _| { + args.arch = Architecture::LoongArch64; + Ok(()) + }, + ) + .execute(|_args: &mut super::Args, _modifier_stack, value| { + bail!("-m {value} is not yet supported"); + }); + + parser + .declare_with_param() + .prefix("z") + .help("Linker option") + .sub_option("now", "Resolve all symbols immediately", |_, _| Ok(())) + .sub_option( + "origin", + "Mark object as requiring immediate $ORIGIN", + |args, _| { + args.needs_origin_handling = true; + Ok(()) + }, + ) + .sub_option("relro", "Enable RELRO program header", |args, _| { + args.relro = true; + Ok(()) + }) + .sub_option("norelro", "Disable RELRO program header", |args, _| { + args.relro = false; + Ok(()) + }) + .sub_option("notext", "Do not report DT_TEXTREL as an error", |_, _| { + Ok(()) + }) + .sub_option("nostart-stop-gc", "Disable start/stop symbol GC", |_, _| { + Ok(()) + }) + .sub_option( + "execstack", + "Mark object as requiring an executable stack", + |args, _| { + args.execstack = true; + Ok(()) + }, + ) + .sub_option( + "noexecstack", + "Mark object as not requiring an executable stack", + |args, _| { + args.execstack = false; + Ok(()) + }, + ) + .sub_option("nocopyreloc", "Disable copy relocations", |args, _| { + args.copy_relocations = + CopyRelocations::Disallowed(CopyRelocationsDisabledReason::Flag); + Ok(()) + }) + .sub_option( + "nodelete", + "Mark shared object as non-deletable", + |args, _| { + args.needs_nodelete_handling = true; + Ok(()) + }, + ) + .sub_option( + "defs", + "Report unresolved symbol references in object files", + |args, _| { + args.no_undefined = true; + Ok(()) + }, + ) + .sub_option( + "undefs", + "Do not report unresolved symbol references in object files", + |args, _| { + args.no_undefined = false; + Ok(()) + }, + ) + .sub_option("muldefs", "Allow multiple definitions", |args, _| { + args.allow_multiple_definitions = true; + Ok(()) + }) + .sub_option("lazy", "Use lazy binding (default)", |_, _| Ok(())) + .sub_option( + "interpose", + "Mark object to interpose all DSOs but executable", + |args, _| { + args.z_interpose = true; + Ok(()) + }, + ) + .sub_option_with_value( + "stack-size=", + "Set size of stack segment", + |args, _, value| { + let size: u64 = parse_number(value)?; + args.z_stack_size = NonZero::new(size); + + Ok(()) + }, + ) + .sub_option( + "x86-64-baseline", + "Mark x86-64-baseline ISA as needed", + |args, _| { + args.z_isa = NonZero::new(GNU_PROPERTY_X86_ISA_1_BASELINE); + Ok(()) + }, + ) + .sub_option("x86-64-v2", "Mark x86-64-v2 ISA as needed", |args, _| { + args.z_isa = NonZero::new(GNU_PROPERTY_X86_ISA_1_V2); + Ok(()) + }) + .sub_option("x86-64-v3", "Mark x86-64-v3 ISA as needed", |args, _| { + args.z_isa = NonZero::new(GNU_PROPERTY_X86_ISA_1_V3); + Ok(()) + }) + .sub_option("x86-64-v4", "Mark x86-64-v4 ISA as needed", |args, _| { + args.z_isa = NonZero::new(GNU_PROPERTY_X86_ISA_1_V4); + Ok(()) + }) + .sub_option_with_value( + "max-page-size=", + "Set maximum page size for load segments", + |args, _, value| { + let size: u64 = parse_number(value)?; + if !size.is_power_of_two() { + bail!("Invalid alignment {size:#x}"); + } + args.max_page_size = Some(Alignment { + exponent: size.trailing_zeros() as u8, + }); + + Ok(()) + }, + ) + .execute(|_args: &mut super::Args, _modifier_stack, value| { + warn_unsupported(&("-z ".to_owned() + value))?; + Ok(()) + }); + + parser + .declare_with_param() + .prefix("R") + .help("Add runtime library search path") + .execute(|args: &mut super::Args, _modifier_stack, value| { + if Path::new(value).is_file() { + args.unrecognized_options + .push(format!("-R,{value}(filename)")); + } else { + args.rpath_set.insert(value.to_string()); + } + Ok(()) + }); + + parser + .declare_with_param() + .prefix("O") + .execute(|_args: &mut super::Args, _modifier_stack, _value| + // We don't use opt-level for now. + Ok(())); + + parser + .declare() + .long("static") + .long("Bstatic") + .help("Disallow linking of shared libraries") + .execute(|_args: &mut super::Args, modifier_stack| { + modifier_stack.last_mut().unwrap().allow_shared = false; + Ok(()) + }); + + parser + .declare() + .long("Bdynamic") + .help("Allow linking of shared libraries") + .execute(|_args: &mut super::Args, modifier_stack| { + modifier_stack.last_mut().unwrap().allow_shared = true; + Ok(()) + }); + + parser + .declare_with_param() + .long("output") + .short("o") + .help("Set the output filename") + .execute(|args: &mut super::Args, _modifier_stack, value| { + args.output = Arc::from(Path::new(value)); + Ok(()) + }); + + parser + .declare() + .long("strip-all") + .short("s") + .help("Strip all symbols") + .execute(|args: &mut super::Args, _modifier_stack| { + args.strip = Strip::All; + Ok(()) + }); + + parser + .declare() + .long("strip-debug") + .short("S") + .help("Strip debug symbols") + .execute(|args: &mut super::Args, _modifier_stack| { + args.strip = Strip::Debug; + Ok(()) + }); + + parser + .declare() + .long("gc-sections") + .help("Enable removal of unused sections") + .execute(|args: &mut super::Args, _modifier_stack| { + args.gc_sections = true; + Ok(()) + }); + + parser + .declare() + .long("no-gc-sections") + .help("Disable removal of unused sections") + .execute(|args: &mut super::Args, _modifier_stack| { + args.gc_sections = false; + Ok(()) + }); + + parser + .declare() + .long("shared") + .long("Bshareable") + .help("Create a shared library") + .execute(|args: &mut super::Args, _modifier_stack| { + args.should_output_executable = false; + Ok(()) + }); + + parser + .declare() + .long("pie") + .long("pic-executable") + .help("Create a position-independent executable") + .execute(|args: &mut super::Args, _modifier_stack| { + args.relocation_model = RelocationModel::Relocatable; + args.should_output_executable = true; + Ok(()) + }); + + parser + .declare() + .long("no-pie") + .help("Do not create a position-dependent executable (default)") + .execute(|args: &mut super::Args, _modifier_stack| { + args.relocation_model = RelocationModel::NonRelocatable; + args.should_output_executable = true; + Ok(()) + }); + + parser + .declare_with_param() + .long("pack-dyn-relocs") + .help("Specify dynamic relocation packing format") + .execute(|_args: &mut super::Args, _modifier_stack, value| { + if value != "none" { + warn_unsupported(&format!("--pack-dyn-relocs={value}"))?; + } + Ok(()) + }); + + parser + .declare() + .long("help") + .help("Show this help message") + .execute(|_args: &mut super::Args, _modifier_stack| { + use std::io::Write as _; + let parser = setup_argument_parser(); + let mut stdout = std::io::stdout().lock(); + writeln!(stdout, "{}", parser.generate_help())?; + + // The following listing is something autoconf detection relies on. + writeln!(stdout, "wild: supported targets:elf64 -x86-64 elf64-littleaarch64 elf64-littleriscv elf64-loongarch")?; + writeln!(stdout, "wild: supported emulations: elf_x86_64 aarch64elf elf64lriscv elf64loongarch")?; + + std::process::exit(0); + }); + + parser + .declare() + .long("version") + .help("Show version information and exit") + .execute(|args: &mut super::Args, _modifier_stack| { + args.version_mode = VersionMode::ExitAfterPrint; + Ok(()) + }); + + parser + .declare() + .short("v") + .help("Print version and continue linking") + .execute(|args: &mut super::Args, _modifier_stack| { + args.version_mode = VersionMode::Verbose; + Ok(()) + }); + + parser + .declare() + .long("demangle") + .help("Enable symbol demangling") + .execute(|args: &mut super::Args, _modifier_stack| { + args.demangle = true; + Ok(()) + }); + + parser + .declare() + .long("no-demangle") + .help("Disable symbol demangling") + .execute(|args: &mut super::Args, _modifier_stack| { + args.demangle = false; + Ok(()) + }); + + parser + .declare_with_optional_param() + .long("time") + .help("Show timing information") + .execute(|args: &mut super::Args, _modifier_stack, value| { + match value { + Some(v) => args.time_phase_options = Some(parse_time_phase_options(v)?), + None => args.time_phase_options = Some(Vec::new()), + } + Ok(()) + }); + + parser + .declare_with_param() + .long("dynamic-linker") + .help("Set dynamic linker path") + .execute(|args: &mut super::Args, _modifier_stack, value| { + args.dynamic_linker = Some(Box::from(Path::new(value))); + Ok(()) + }); + + parser + .declare() + .long("no-dynamic-linker") + .help("Omit the load-time dynamic linker request") + .execute(|args: &mut super::Args, _modifier_stack| { + args.dynamic_linker = None; + Ok(()) + }); + + parser + .declare() + .long("mmap-output-file") + .help("Write output file using mmap (default)") + .execute(|args: &mut super::Args, _modifier_stack| { + args.mmap_output_file = true; + Ok(()) + }); + + parser + .declare() + .long("no-mmap-output-file") + .help("Write output file without mmap") + .execute(|args: &mut super::Args, _modifier_stack| { + args.mmap_output_file = false; + Ok(()) + }); + + parser + .declare_with_param() + .long("entry") + .short("e") + .help("Set the entry point") + .execute(|args: &mut super::Args, _modifier_stack, value| { + args.entry = Some(value.to_owned()); + Ok(()) + }); + + parser + .declare_with_optional_param() + .long("threads") + .help("Use multiple threads for linking") + .execute(|args: &mut super::Args, _modifier_stack, value| { + match value { + Some(v) => { + args.num_threads = Some(NonZeroUsize::try_from(v.parse::()?)?); + } + None => { + args.num_threads = None; // Default behaviour + } + } + Ok(()) + }); + + parser + .declare() + .long("no-threads") + .help("Use a single thread") + .execute(|args: &mut super::Args, _modifier_stack| { + args.num_threads = Some(NonZeroUsize::new(1).unwrap()); + Ok(()) + }); + + parser + .declare_with_param() + .long("wild-experiments") + .help("List of numbers. Used to tweak internal parameters. '_' keeps default value.") + .execute(|args: &mut super::Args, _modifier_stack, value| { + args.numeric_experiments = value + .split(',') + .map(|p| { + if p == "_" { + Ok(None) + } else { + Ok(Some(p.parse()?)) + } + }) + .collect::>>>()?; + Ok(()) + }); + + parser + .declare() + .long("as-needed") + .help("Set DT_NEEDED if used") + .execute(|_args: &mut super::Args, modifier_stack| { + modifier_stack.last_mut().unwrap().as_needed = true; + Ok(()) + }); + + parser + .declare() + .long("no-as-needed") + .help("Always set DT_NEEDED") + .execute(|_args: &mut super::Args, modifier_stack| { + modifier_stack.last_mut().unwrap().as_needed = false; + Ok(()) + }); + + parser + .declare() + .long("whole-archive") + .help("Include all objects from archives") + .execute(|_args: &mut super::Args, modifier_stack| { + modifier_stack.last_mut().unwrap().whole_archive = true; + Ok(()) + }); + + parser + .declare() + .long("no-whole-archive") + .help("Disable --whole-archive") + .execute(|_args: &mut super::Args, modifier_stack| { + modifier_stack.last_mut().unwrap().whole_archive = false; + Ok(()) + }); + + parser + .declare() + .long("push-state") + .help("Save current linker flags") + .execute(|_args: &mut super::Args, modifier_stack| { + modifier_stack.push(*modifier_stack.last().unwrap()); + Ok(()) + }); + + parser + .declare() + .long("pop-state") + .help("Restore previous linker flags") + .execute(|_args: &mut super::Args, modifier_stack| { + modifier_stack.pop(); + if modifier_stack.is_empty() { + bail!("Mismatched --pop-state"); + } + Ok(()) + }); + + parser + .declare() + .long("eh-frame-hdr") + .help("Create .eh_frame_hdr section") + .execute(|args: &mut super::Args, _modifier_stack| { + args.should_write_eh_frame_hdr = true; + Ok(()) + }); + + parser + .declare() + .long("no-eh-frame-hdr") + .help("Don't create .eh_frame_hdr section") + .execute(|args: &mut super::Args, _modifier_stack| { + args.should_write_eh_frame_hdr = false; + Ok(()) + }); + + parser + .declare() + .long("export-dynamic") + .short("E") + .help("Export all dynamic symbols") + .execute(|args: &mut super::Args, _modifier_stack| { + args.export_all_dynamic_symbols = true; + Ok(()) + }); + + parser + .declare() + .long("no-export-dynamic") + .help("Do not export dynamic symbols") + .execute(|args: &mut super::Args, _modifier_stack| { + args.export_all_dynamic_symbols = false; + Ok(()) + }); + + parser + .declare_with_param() + .long("soname") + .prefix("h") + .help("Set shared object name") + .execute(|args: &mut super::Args, _modifier_stack, value| { + args.soname = Some(value.to_owned()); + Ok(()) + }); + + parser + .declare_with_param() + .long("rpath") + .help("Add directory to runtime library search path") + .execute(|args: &mut super::Args, _modifier_stack, value| { + args.rpath_set.insert(value.to_string()); + Ok(()) + }); + + parser + .declare() + .long("no-string-merge") + .help("Disable section merging") + .execute(|args: &mut super::Args, _modifier_stack| { + args.merge_sections = false; + Ok(()) + }); + + parser + .declare() + .long("no-undefined") + .help("Do not allow unresolved symbols in object files") + .execute(|args: &mut super::Args, _modifier_stack| { + args.no_undefined = true; + Ok(()) + }); + + parser + .declare() + .long("allow-multiple-definition") + .help("Allow multiple definitions of symbols") + .execute(|args: &mut super::Args, _modifier_stack| { + args.allow_multiple_definitions = true; + Ok(()) + }); + + parser + .declare() + .long("relax") + .help("Enable target-specific optimization (instruction relaxation)") + .execute(|args: &mut super::Args, _modifier_stack| { + args.relax = true; + Ok(()) + }); + + parser + .declare() + .long("no-relax") + .help("Disable relaxation") + .execute(|args: &mut super::Args, _modifier_stack| { + args.relax = false; + Ok(()) + }); + + parser + .declare() + .long("validate-output") + .execute(|args: &mut super::Args, _modifier_stack| { + args.validate_output = true; + Ok(()) + }); + + parser + .declare() + .long("write-layout") + .execute(|args: &mut super::Args, _modifier_stack| { + args.write_layout = true; + Ok(()) + }); + + parser + .declare() + .long("write-trace") + .execute(|args: &mut super::Args, _modifier_stack| { + args.write_trace = true; + Ok(()) + }); + + parser + .declare() + .long("got-plt-syms") + .help("Write symbol table entries that point to the GOT/PLT entry for symbols") + .execute(|args: &mut super::Args, _modifier_stack| { + args.got_plt_syms = true; + Ok(()) + }); + + parser + .declare() + .long("Bsymbolic") + .help("Bind global references locally") + .execute(|args: &mut super::Args, _modifier_stack| { + args.b_symbolic = BSymbolicKind::All; + Ok(()) + }); + + parser + .declare() + .long("Bsymbolic-functions") + .help("Bind global function references locally") + .execute(|args: &mut super::Args, _modifier_stack| { + args.b_symbolic = BSymbolicKind::Functions; + Ok(()) + }); + + parser + .declare() + .long("Bsymbolic-non-weak-functions") + .help("Bind non-weak global function references locally") + .execute(|args: &mut super::Args, _modifier_stack| { + args.b_symbolic = BSymbolicKind::NonWeakFunctions; + Ok(()) + }); + + parser + .declare() + .long("Bsymbolic-non-weak") + .help("Bind non-weak global references locally") + .execute(|args: &mut super::Args, _modifier_stack| { + args.b_symbolic = BSymbolicKind::NonWeak; + Ok(()) + }); + + parser + .declare() + .long("Bno-symbolic") + .help("Do not bind global symbol references locally") + .execute(|args: &mut super::Args, _modifier_stack| { + args.b_symbolic = BSymbolicKind::None; + Ok(()) + }); + + parser + .declare_with_param() + .long("thread-count") + .help("Set the number of threads to use") + .execute(|args: &mut super::Args, _modifier_stack, value| { + args.num_threads = Some(NonZeroUsize::try_from(value.parse::()?)?); + Ok(()) + }); + + parser + .declare_with_param() + .long("exclude-libs") + .help("Exclude libraries") + .execute(|args: &mut super::Args, _modifier_stack, value| { + for lib in value.split([',', ':']) { + if lib.is_empty() { + continue; + } + + if lib == "ALL" { + args.exclude_libs = ExcludeLibs::All; + return Ok(()); + } + + match &mut args.exclude_libs { + ExcludeLibs::All => {} + ExcludeLibs::None => { + let mut set = HashSet::new(); + set.insert(Box::from(lib)); + args.exclude_libs = ExcludeLibs::Some(set); + } + ExcludeLibs::Some(set) => { + set.insert(Box::from(lib)); + } + } + } + + Ok(()) + }); + + parser + .declare_with_param() + .long("version-script") + .help("Use version script") + .execute(|args: &mut super::Args, _modifier_stack, value| { + args.save_dir.handle_file(value); + args.version_script_path = Some(PathBuf::from(value)); + Ok(()) + }); + + parser + .declare_with_param() + .long("script") + .prefix("T") + .help("Use linker script") + .execute(|args: &mut super::Args, _modifier_stack, value| { + args.save_dir.handle_file(value); + args.add_script(value); + Ok(()) + }); + + parser + .declare_with_param() + .long("export-dynamic-symbol") + .help("Export dynamic symbol") + .execute(|args: &mut super::Args, _modifier_stack, value| { + args.export_list.push(value.to_owned()); + Ok(()) + }); + + parser + .declare_with_param() + .long("export-dynamic-symbol-list") + .help("Export dynamic symbol list") + .execute(|args: &mut super::Args, _modifier_stack, value| { + args.export_list_path = Some(PathBuf::from(value)); + Ok(()) + }); + + parser + .declare_with_param() + .long("dynamic-list") + .help("Read the dynamic symbol list from a file") + .execute(|args: &mut super::Args, _modifier_stack, value| { + args.b_symbolic = BSymbolicKind::All; + args.export_list_path = Some(PathBuf::from(value)); + Ok(()) + }); + + parser + .declare_with_param() + .long("write-gc-stats") + .help("Write GC statistics") + .execute(|args: &mut super::Args, _modifier_stack, value| { + args.write_gc_stats = Some(PathBuf::from(value)); + Ok(()) + }); + + parser + .declare_with_param() + .long("gc-stats-ignore") + .help("Ignore files in GC stats") + .execute(|args: &mut super::Args, _modifier_stack, value| { + args.gc_stats_ignore.push(value.to_owned()); + Ok(()) + }); + + parser + .declare() + .long("no-identity-comment") + .help("Don't write the linker name and version in .comment") + .execute(|args: &mut super::Args, _modifier_stack| { + args.should_write_linker_identity = false; + Ok(()) + }); + + parser + .declare_with_param() + .long("debug-address") + .help("Set debug address") + .execute(|args: &mut super::Args, _modifier_stack, value| { + args.debug_address = Some(parse_number(value).context("Invalid --debug-address")?); + Ok(()) + }); + + parser + .declare_with_param() + .long("debug-fuel") + .execute(|args: &mut super::Args, _modifier_stack, value| { + args.debug_fuel = Some(AtomicI64::new(value.parse()?)); + args.num_threads = Some(NonZeroUsize::new(1).unwrap()); + Ok(()) + }); + + parser + .declare_with_param() + .long("unresolved-symbols") + .help("Specify how to handle unresolved symbols") + .execute(|args: &mut super::Args, _modifier_stack, value| { + args.unresolved_symbols = match value { + "report-all" => UnresolvedSymbols::ReportAll, + "ignore-in-shared-libs" => UnresolvedSymbols::IgnoreInSharedLibs, + "ignore-in-object-files" => UnresolvedSymbols::IgnoreInObjectFiles, + "ignore-all" => UnresolvedSymbols::IgnoreAll, + _ => bail!("Invalid unresolved-symbols value {value}"), + }; + Ok(()) + }); + + parser + .declare_with_param() + .long("undefined") + .help("Force resolution of the symbol") + .execute(|args: &mut super::Args, _modifier_stack, value| { + args.undefined.push(value.to_owned()); + Ok(()) + }); + + parser + .declare_with_param() + .long("wrap") + .help("Use a wrapper function") + .execute(|args: &mut super::Args, _modifier_stack, value| { + args.wrap.push(value.to_owned()); + Ok(()) + }); + + parser + .declare_with_param() + .long("defsym") + .help("Define a symbol alias: --defsym=symbol=value") + .execute(|args: &mut super::Args, _modifier_stack, value| { + let parts: Vec<&str> = value.splitn(2, '=').collect(); + if parts.len() != 2 { + bail!("Invalid --defsym format. Expected: --defsym=symbol=value"); + } + let symbol_name = parts[0].to_owned(); + let value_str = parts[1]; + + let defsym_value = parse_defsym_expression(value_str); + + args.defsym.push((symbol_name, defsym_value)); + Ok(()) + }); + + parser + .declare_with_param() + .long("section-start") + .help("Set start address for a section: --section-start=.section=address") + .execute(|args: &mut super::Args, _modifier_stack, value| { + let parts: Vec<&str> = value.splitn(2, '=').collect(); + if parts.len() != 2 { + bail!("Invalid --section-start format. Expected: --section-start=.section=address"); + } + + let section_name = parts[0].to_owned(); + let address = parse_number(parts[1]).with_context(|| { + format!( + "Invalid address `{}` in --section-start={}", + parts[1], value + ) + })?; + args.section_start.insert(section_name, address); + + Ok(()) + }); + + parser + .declare_with_param() + .long("hash-style") + .help("Set hash style") + .execute(|args: &mut super::Args, _modifier_stack, value| { + args.hash_style = match value { + "gnu" => HashStyle::Gnu, + "sysv" => HashStyle::Sysv, + "both" => HashStyle::Both, + _ => bail!("Unknown hash-style `{value}`"), + }; + Ok(()) + }); + + parser + .declare() + .long("enable-new-dtags") + .help("Use DT_RUNPATH and DT_FLAGS/DT_FLAGS_1 (default)") + .execute(|args: &mut super::Args, _modifier_stack| { + args.enable_new_dtags = true; + Ok(()) + }); + + parser + .declare() + .long("disable-new-dtags") + .help("Use DT_RPATH and individual dynamic entries instead of DT_FLAGS") + .execute(|args: &mut super::Args, _modifier_stack| { + args.enable_new_dtags = false; + Ok(()) + }); + + parser + .declare_with_param() + .long("retain-symbols-file") + .help( + "Filter symtab to contain only symbols listed in the supplied file. \ + One symbol per line.", + ) + .execute(|args: &mut super::Args, _modifier_stack, value| { + // The performance this flag is not especially optimised. For one, we copy each string + // to the heap. We also do two lookups in the hashset for each symbol. This is a pretty + // obscure flag that we don't expect to be used much, so at this stage, it doesn't seem + // worthwhile to optimise it. + let contents = std::fs::read_to_string(value) + .with_context(|| format!("Failed to read `{value}`"))?; + args.strip = Strip::Retain( + contents + .lines() + .filter_map(|l| { + if l.is_empty() { + None + } else { + Some(l.as_bytes().to_owned()) + } + }) + .collect(), + ); + Ok(()) + }); + + parser + .declare_with_param() + .long("build-id") + .help("Generate build ID") + .execute(|args: &mut super::Args, _modifier_stack, value| { + args.build_id = match value { + "none" => BuildIdOption::None, + "fast" | "md5" | "sha1" => BuildIdOption::Fast, + "uuid" => BuildIdOption::Uuid, + s if s.starts_with("0x") || s.starts_with("0X") => { + let hex_string = &s[2..]; + let decoded_bytes = hex::decode(hex_string) + .with_context(|| format!("Invalid Hex Build Id `0x{hex_string}`"))?; + BuildIdOption::Hex(decoded_bytes) + } + s => bail!( + "Invalid build-id value `{s}` valid values are `none`, `fast`, `md5`, `sha1` and `uuid`" + ), + }; + Ok(()) + }); + + parser + .declare_with_param() + .long("icf") + .help("Enable identical code folding (merge duplicate functions)") + .execute(|_args: &mut super::Args, _modifier_stack, value| { + match value { + "none" => {} + other => warn_unsupported(&format!("--icf={other}"))?, + } + Ok(()) + }); + + parser + .declare_with_param() + .long("sysroot") + .help("Set system root") + .execute(|args: &mut super::Args, _modifier_stack, value| { + args.save_dir.handle_file(value); + let sysroot = std::fs::canonicalize(value).unwrap_or_else(|_| PathBuf::from(value)); + args.sysroot = Some(Box::from(sysroot.as_path())); + for path in &mut args.lib_search_path { + if let Some(new_path) = maybe_forced_sysroot(path, &sysroot) { + *path = new_path; + } + } + Ok(()) + }); + + parser + .declare_with_param() + .long("auxiliary") + .short("f") + .help("Set DT_AUXILIARY to a given value") + .execute(|args: &mut super::Args, _modifier_stack, value| { + args.auxiliary.push(value.to_owned()); + Ok(()) + }); + + parser + .declare_with_param() + .long("plugin-opt") + .help("Pass options to the plugin") + .execute(|args: &mut super::Args, _modifier_stack, value| { + args.plugin_args + .push(CString::new(value).context("Invalid --plugin-opt argument")?); + Ok(()) + }); + + parser + .declare_with_param() + .long("dependency-file") + .help("Write dependency rules") + .execute(|args: &mut super::Args, _modifier_stack, value| { + args.dependency_file = Some(PathBuf::from(value)); + Ok(()) + }); + + parser + .declare_with_param() + .long("plugin") + .help("Load plugin") + .execute(|args: &mut super::Args, _modifier_stack, value| { + args.plugin_path = Some(value.to_owned()); + Ok(()) + }); + + parser + .declare_with_param() + .long("rpath-link") + .help("Add runtime library search path") + .execute(|_args: &mut super::Args, _modifier_stack, _value| { + // TODO + Ok(()) + }); + + parser + .declare_with_param() + .long("sym-info") + .help("Show symbol information. Accepts symbol name or ID.") + .execute(|args: &mut super::Args, _modifier_stack, value| { + args.sym_info = Some(value.to_owned()); + Ok(()) + }); + + parser + .declare() + .long("start-lib") + .help("Start library group") + .execute(|_args: &mut super::Args, modifier_stack| { + modifier_stack.last_mut().unwrap().archive_semantics = true; + Ok(()) + }); + + parser + .declare() + .long("end-lib") + .help("End library group") + .execute(|_args: &mut super::Args, modifier_stack| { + modifier_stack.last_mut().unwrap().archive_semantics = false; + Ok(()) + }); + + parser + .declare() + .long("no-fork") + .help("Do not fork while linking") + .execute(|args: &mut super::Args, _modifier_stack| { + args.should_fork = false; + Ok(()) + }); + + parser + .declare() + .long("update-in-place") + .help("Update file in place") + .execute(|args: &mut super::Args, _modifier_stack| { + args.file_write_mode = Some(FileWriteMode::UpdateInPlace); + Ok(()) + }); + + parser + .declare() + .long("no-update-in-place") + .help("Delete and recreate the file") + .execute(|args: &mut super::Args, _modifier_stack| { + args.file_write_mode = Some(FileWriteMode::UnlinkAndReplace); + Ok(()) + }); + + parser + .declare() + .long("EB") + .help("Big-endian (not supported)") + .execute(|_args: &mut super::Args, _modifier_stack| { + bail!("Big-endian target is not supported"); + }); + + parser + .declare() + .long("prepopulate-maps") + .help("Prepopulate maps") + .execute(|args: &mut super::Args, _modifier_stack| { + args.prepopulate_maps = true; + Ok(()) + }); + + parser + .declare() + .long("verbose-gc-stats") + .help("Show GC statistics") + .execute(|args: &mut super::Args, _modifier_stack| { + args.verbose_gc_stats = true; + Ok(()) + }); + + parser + .declare() + .long("allow-shlib-undefined") + .help("Allow undefined symbol references in shared libraries") + .execute(|args: &mut super::Args, _modifier_stack| { + args.allow_shlib_undefined = true; + Ok(()) + }); + + parser + .declare() + .long("no-allow-shlib-undefined") + .help("Disallow undefined symbol references in shared libraries") + .execute(|args: &mut super::Args, _modifier_stack| { + args.allow_shlib_undefined = false; + Ok(()) + }); + + parser + .declare() + .long("error-unresolved-symbols") + .help("Treat unresolved symbols as errors") + .execute(|args: &mut super::Args, _modifier_stack| { + args.error_unresolved_symbols = true; + Ok(()) + }); + + parser + .declare() + .long("warn-unresolved-symbols") + .help("Treat unresolved symbols as warnings") + .execute(|args: &mut super::Args, _modifier_stack| { + args.error_unresolved_symbols = false; + Ok(()) + }); + + add_silently_ignored_flags(&mut parser); + add_default_flags(&mut parser); + + parser +} + +fn add_silently_ignored_flags(parser: &mut ArgumentParser) { + super::add_silently_ignored_flags(parser); +} + +fn add_default_flags(parser: &mut ArgumentParser) { + super::add_default_flags(parser); +} + +fn parse_time_phase_options(input: &str) -> Result> { + input.split(',').map(|s| s.parse()).collect() +} + +impl FromStr for CounterKind { + type Err = crate::error::Error; + + fn from_str(s: &str) -> Result { + Ok(match s { + "cycles" => CounterKind::Cycles, + "instructions" => CounterKind::Instructions, + "cache-misses" => CounterKind::CacheMisses, + "branch-misses" => CounterKind::BranchMisses, + "page-faults" => CounterKind::PageFaults, + "page-faults-minor" => CounterKind::PageFaultsMinor, + "page-faults-major" => CounterKind::PageFaultsMajor, + "l1d-read" => CounterKind::L1dRead, + "l1d-miss" => CounterKind::L1dMiss, + other => bail!("Unsupported performance counter `{other}`"), + }) + } +} + + +#[cfg(test)] +mod tests { + use super::SILENTLY_IGNORED_FLAGS; + use super::VersionMode; + use crate::args::linux::ElfArgs; + use crate::args::InputSpec; + use itertools::Itertools; + use std::fs::File; + use std::io::BufWriter; + use std::io::Write; + use std::num::NonZeroUsize; + use std::path::Path; + use std::path::PathBuf; + use std::str::FromStr; + use tempfile::NamedTempFile; + + const INPUT1: &[&str] = &[ + "-pie", + "-z", + "relro", + "-zrelro", + "-hash-style=gnu", + "--hash-style=gnu", + "-build-id", + "--build-id", + "--eh-frame-hdr", + "-m", + "elf_x86_64", + "-dynamic-linker", + "/lib64/ld-linux-x86-64.so.2", + "-o", + "/build/target/debug/deps/c1-a212b73b12b6d123", + "/lib/x86_64-linux-gnu/Scrt1.o", + "/lib/x86_64-linux-gnu/crti.o", + "/usr/bin/../lib/gcc/x86_64-linux-gnu/12/crtbeginS.o", + "-L/build/target/debug/deps", + "-L/tool/lib/rustlib/x86_64/lib", + "-L/tool/lib/rustlib/x86_64/lib", + "-L/usr/bin/../lib/gcc/x86_64-linux-gnu/12", + "-L/usr/bin/../lib/gcc/x86_64-linux-gnu/12/../../../../lib64", + "-L/lib/x86_64-linux-gnu", + "-L/lib/../lib64", + "-L/usr/lib/x86_64-linux-gnu", + "-L/usr/lib/../lib64", + "-L", + "/lib", + "-L/usr/lib", + "/tmp/rustcDcR20O/symbols.o", + "/build/target/debug/deps/c1-a212b73b12b6d123.1.rcgu.o", + "/build/target/debug/deps/c1-a212b73b12b6d123.2.rcgu.o", + "/build/target/debug/deps/c1-a212b73b12b6d123.3.rcgu.o", + "/build/target/debug/deps/c1-a212b73b12b6d123.4.rcgu.o", + "/build/target/debug/deps/c1-a212b73b12b6d123.5.rcgu.o", + "/build/target/debug/deps/c1-a212b73b12b6d123.6.rcgu.o", + "/build/target/debug/deps/c1-a212b73b12b6d123.7.rcgu.o", + "--as-needed", + "-as-needed", + "-Bstatic", + "/tool/lib/rustlib/x86_64/lib/libstd-6498d8891e016dca.rlib", + "/tool/lib/rustlib/x86_64/lib/libpanic_unwind-3debdee1a9058d84.rlib", + "/tool/lib/rustlib/x86_64/lib/libobject-8339c5bd5cbc92bf.rlib", + "/tool/lib/rustlib/x86_64/lib/libmemchr-160ebcebb54c11ba.rlib", + "/tool/lib/rustlib/x86_64/lib/libaddr2line-95c75789f1b65e37.rlib", + "/tool/lib/rustlib/x86_64/lib/libgimli-7e8094f2d6258832.rlib", + "/tool/lib/rustlib/x86_64/lib/librustc_demangle-bac9783ef1b45db0.rlib", + "/tool/lib/rustlib/x86_64/lib/libstd_detect-a1cd87df2f2d8e76.rlib", + "/tool/lib/rustlib/x86_64/lib/libhashbrown-7fd06d468d7dba16.rlib", + "/tool/lib/rustlib/x86_64/lib/librustc_std_workspace_alloc-5ac19487656e05bf.rlib", + "/tool/lib/rustlib/x86_64/lib/libminiz_oxide-c7c35d32cf825c11.rlib", + "/tool/lib/rustlib/x86_64/lib/libadler-c523f1571362e70b.rlib", + "/tool/lib/rustlib/x86_64/lib/libunwind-85f17c92b770a911.rlib", + "/tool/lib/rustlib/x86_64/lib/libcfg_if-598d3ba148dadcea.rlib", + "/tool/lib/rustlib/x86_64/lib/liblibc-a58ec2dab545caa4.rlib", + "/tool/lib/rustlib/x86_64/lib/liballoc-f9dda8cca149f0fc.rlib", + "/tool/lib/rustlib/x86_64/lib/librustc_std_workspace_core-7ba4c315dd7a3503.rlib", + "/tool/lib/rustlib/x86_64/lib/libcore-5ac2993e19124966.rlib", + "/tool/lib/rustlib/x86_64/lib/libcompiler_builtins-df2fb7f50dec519a.rlib", + "-Bdynamic", + "-lgcc_s", + "-lutil", + "-lrt", + "-lpthread", + "-lm", + "-ldl", + "-lc", + "--eh-frame-hdr", + "-z", + "noexecstack", + "-znoexecstack", + "--gc-sections", + "-z", + "relro", + "-z", + "now", + "-z", + "lazy", + "-soname=fpp", + "-soname", + "bar", + "/usr/bin/../lib/gcc/x86_64-linux-gnu/12/crtendS.o", + "/lib/x86_64-linux-gnu/crtn.o", + "--version-script", + "a.ver", + "--no-threads", + "--no-add-needed", + "--no-copy-dt-needed-entries", + "--discard-locals", + "--use-android-relr-tags", + "--pack-dyn-relocs=relr", + "-X", + "-EL", + "-O", + "1", + "-O3", + "-v", + "--sysroot=/usr/aarch64-linux-gnu", + "--demangle", + "--no-demangle", + "-l:lib85caec4suo0pxg06jm2ma7b0o.so", + "-rpath", + "foo/", + "-rpath=bar/", + "-Rbaz", + "-R", + "somewhere", + // Adding the same rpath multiple times should not create duplicates + "-rpath", + "foo/", + "-x", + "--discard-all", + "--dependency-file=deps.d", + ]; + + const FILE_OPTIONS: &[&str] = &["-pie"]; + + const INLINE_OPTIONS: &[&str] = &["-L", "/lib"]; + + fn write_options_to_file(file: &File, options: &[&str]) { + let mut writer = BufWriter::new(file); + for option in options { + writeln!(writer, "{option}").expect("Failed to write to temporary file"); + } + } + + #[track_caller] + fn assert_contains(c: &[Box], v: &str) { + assert!(c.iter().any(|p| p.as_ref() == Path::new(v))); + } + + fn input1_assertions(args: &super::super::Args) { + assert_eq!( + args.inputs + .iter() + .filter_map(|i| match &i.spec { + InputSpec::File(_) | InputSpec::Search(_) => None, + InputSpec::Lib(lib_name) => Some(lib_name.as_ref()), + }) + .collect_vec(), + &["gcc_s", "util", "rt", "pthread", "m", "dl", "c"] + ); + assert_contains(&args.lib_search_path, "/lib"); + assert_contains(&args.lib_search_path, "/usr/lib"); + assert!(!args.inputs.iter().any(|i| match &i.spec { + InputSpec::File(f) => f.as_ref() == Path::new("/usr/bin/ld"), + InputSpec::Lib(_) | InputSpec::Search(_) => false, + })); + assert_eq!( + args.version_script_path, + Some(PathBuf::from_str("a.ver").unwrap()) + ); + assert_eq!(args.soname, Some("bar".to_owned())); + assert_eq!(args.num_threads, Some(NonZeroUsize::new(1).unwrap())); + assert_eq!(args.version_mode, VersionMode::Verbose); + assert_eq!( + args.sysroot, + Some(Box::from(Path::new("/usr/aarch64-linux-gnu"))) + ); + assert!(args.inputs.iter().any(|i| match &i.spec { + InputSpec::File(_) | InputSpec::Lib(_) => false, + InputSpec::Search(lib) => lib.as_ref() == "lib85caec4suo0pxg06jm2ma7b0o.so", + })); + assert_eq!(args.rpath.as_deref(), Some("foo/:bar/:baz:somewhere")); + assert_eq!( + args.dependency_file, + Some(PathBuf::from_str("deps.d").unwrap()) + ); + } + + fn inline_and_file_options_assertions(args: &super::super::Args) { + assert_contains(&args.lib_search_path, "/lib"); + } + + #[test] + fn test_parse_inline_only_options() { + let args = super::parse(|| INPUT1.iter()).unwrap(); + input1_assertions(&args); + } + + #[test] + fn test_parse_file_only_options() { + // Create a temporary file containing the same options (one per line) as INPUT1 + let file = NamedTempFile::new().expect("Could not create temp file"); + write_options_to_file(file.as_file(), INPUT1); + + // pass the name of the file where options are as the only inline option "@filename" + let inline_options = [format!("@{}", file.path().to_str().unwrap())]; + let args = super::parse(|| inline_options.iter()).unwrap(); + input1_assertions(&args); + } + + #[test] + fn test_parse_mixed_file_and_inline_options() { + // Create a temporary file containing some options + let file = NamedTempFile::new().expect("Could not create temp file"); + write_options_to_file(file.as_file(), FILE_OPTIONS); + + // create an inline option referring to "@filename" + let file_option = format!("@{}", file.path().to_str().unwrap()); + // start with the set of inline options + let mut inline_options = INLINE_OPTIONS.to_vec(); + // and extend with the "@filename" option + inline_options.push(&file_option); + + // confirm that this works and the resulting set of options is correct + let args = super::parse(|| inline_options.iter()).unwrap(); + inline_and_file_options_assertions(&args); + } + + #[test] + fn test_parse_overlapping_file_and_inline_options() { + // Create a set of file options that has a duplicate of an inline option + let mut file_options = FILE_OPTIONS.to_vec(); + file_options.append(&mut INLINE_OPTIONS.to_vec()); + // and save them to a file + let file = NamedTempFile::new().expect("Could not create temp file"); + write_options_to_file(file.as_file(), &file_options); + + // pass the name of the file where options are, as an inline option "@filename" + let file_option = format!("@{}", file.path().to_str().unwrap()); + // start with the set of inline options + let mut inline_options = INLINE_OPTIONS.to_vec(); + // and extend with the "@filename" option + inline_options.push(&file_option); + + // confirm that this works and the resulting set of options is correct + let args = super::parse(|| inline_options.iter()).unwrap(); + inline_and_file_options_assertions(&args); + } + + #[test] + fn test_parse_recursive_file_option() { + // Create a temporary file containing a @file option + let file1 = NamedTempFile::new().expect("Could not create temp file"); + let file2 = NamedTempFile::new().expect("Could not create temp file"); + let file_option = format!("@{}", file2.path().to_str().unwrap()); + write_options_to_file(file1.as_file(), &[&file_option]); + write_options_to_file(file2.as_file(), INPUT1); + + // pass the name of the file where options are, as an inline option "@filename" + let inline_options = [format!("@{}", file1.path().to_str().unwrap())]; + + // confirm that this works and the resulting set of options is correct + let args = super::parse(|| inline_options.iter()) + .expect("Recursive @file options should parse correctly but be ignored"); + input1_assertions(&args); + } + + #[test] + fn test_arguments_from_string() { + use crate::args::arguments_from_string; + + assert!(arguments_from_string("").unwrap().is_empty()); + assert!(arguments_from_string("''").unwrap().is_empty()); + assert!(arguments_from_string("\"\"").unwrap().is_empty()); + assert_eq!( + arguments_from_string(r#""foo" "bar""#).unwrap(), + ["foo", "bar"] + ); + assert_eq!( + arguments_from_string(r#""foo\"" "\"b\"ar""#).unwrap(), + ["foo\"", "\"b\"ar"] + ); + assert_eq!( + arguments_from_string(" foo bar ").unwrap(), + ["foo", "bar"] + ); + assert!(arguments_from_string("'foo''bar'").is_err()); + assert_eq!( + arguments_from_string("'foo' 'bar' baz").unwrap(), + ["foo", "bar", "baz"] + ); + assert_eq!(arguments_from_string("foo\nbar").unwrap(), ["foo", "bar"]); + assert_eq!( + arguments_from_string(r#"'foo' "bar" baz"#).unwrap(), + ["foo", "bar", "baz"] + ); + assert_eq!(arguments_from_string("'foo bar'").unwrap(), ["foo bar"]); + assert_eq!( + arguments_from_string("'foo \" bar'").unwrap(), + ["foo \" bar"] + ); + #[cfg(not(target_os = "windows"))] + assert!(arguments_from_string("foo\\").is_err()); + #[cfg(target_os = "windows")] + assert_eq!(arguments_from_string("foo\\").unwrap(), ["foo\\"]); + assert!(arguments_from_string("'foo").is_err()); + assert!(arguments_from_string("foo\"").is_err()); + } + + #[test] + fn test_ignored_flags() { + for flag in SILENTLY_IGNORED_FLAGS { + assert!(!flag.starts_with('-')); + } + } +} diff --git a/libwild/src/args/windows.rs b/libwild/src/args/windows.rs new file mode 100644 index 000000000..2ff483e54 --- /dev/null +++ b/libwild/src/args/windows.rs @@ -0,0 +1,1508 @@ +use super::ArgumentParser; +use super::Input; +use super::InputSpec; +use super::Modifiers; +use super::add_default_flags; +use super::add_silently_ignored_flags; +use super::consts::FILES_PER_GROUP_ENV; +use super::consts::REFERENCE_LINKER_ENV; +use crate::arch::Architecture; +use crate::bail; +use crate::ensure; +use crate::error::Result; +use crate::save_dir::SaveDir; +use jobserver::Client; +use std::num::NonZeroUsize; +use std::path::Path; +use std::path::PathBuf; +use std::sync::Arc; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum WindowsSubsystem { + Console, + Windows, + Native, + Posix, + BootApplication, + EfiApplication, + EfiBootServiceDriver, + EfiRom, + EfiRuntimeDriver, +} + +/// PE/COFF-specific linker arguments. Common fields (output, arch, inputs, etc.) +/// live on `Args`. Access them via direct field access on `Args`, +/// and PE-specific fields are accessible via `Deref`/`DerefMut`. +#[derive(Debug)] +pub struct PeArgs { + // Windows-specific fields + pub(crate) base_address: Option, + pub(crate) subsystem: Option, + pub(crate) heap_size: Option, + pub(crate) stack_size: Option, + pub(crate) is_dll: bool, + pub(crate) debug_info: bool, + pub(crate) def_file: Option, + pub(crate) import_lib: Option, + pub(crate) manifest_file: Option, + pub(crate) map_file: Option, + pub(crate) pdb_file: Option, + pub(crate) version: Option, + pub(crate) large_address_aware: bool, + pub(crate) dynamic_base: bool, + pub(crate) nx_compat: bool, + pub(crate) terminal_server_aware: bool, + pub(crate) high_entropy_va: bool, + pub(crate) no_default_libs: Vec, + pub(crate) ignore_all_default_libs: bool, +} + +impl Default for PeArgs { + fn default() -> Self { + Self { + base_address: None, + subsystem: None, + heap_size: None, + stack_size: None, + is_dll: false, + debug_info: false, + def_file: None, + import_lib: None, + manifest_file: None, + map_file: None, + pdb_file: None, + version: None, + large_address_aware: true, + dynamic_base: true, + nx_compat: true, + terminal_server_aware: true, + high_entropy_va: true, + no_default_libs: Vec::new(), + ignore_all_default_libs: false, + } + } +} + + +impl super::Args { + /// Check if a specific library should be ignored due to /NODEFAULTLIB + pub fn should_ignore_default_lib(&self, lib_name: &str) -> bool { + self.ignore_all_default_libs || self.no_default_libs.contains(&lib_name.to_string()) + } + + /// Get the list of specifically ignored default libraries + pub fn ignored_default_libs(&self) -> &[String] { + &self.no_default_libs + } + + /// Check if all default libraries should be ignored + pub fn ignores_all_default_libs(&self) -> bool { + self.ignore_all_default_libs + } +} + +/// Parse Windows linker arguments from the given input iterator. +pub(crate) fn parse I, S: AsRef, I: Iterator>( + input: F, +) -> Result> { + use crate::input_data::MAX_FILES_PER_GROUP; + + // SAFETY: Should be called early before other descriptors are opened. + let jobserver_client = unsafe { Client::from_env() }; + + let files_per_group: Option = std::env::var(FILES_PER_GROUP_ENV) + .ok() + .map(|s| s.parse()) + .transpose()?; + + if let Some(x) = files_per_group { + ensure!( + x <= MAX_FILES_PER_GROUP, + "{FILES_PER_GROUP_ENV}={x} but maximum is {MAX_FILES_PER_GROUP}" + ); + } + + let mut args = super::Args:: { + output: Arc::from(Path::new("a.exe")), + should_write_linker_identity: false, + files_per_group, + jobserver_client, + ..Default::default() + }; + + args.save_dir = SaveDir::new(&input)?; + + let mut input = input(); + + let mut modifier_stack = vec![Modifiers::default()]; + + if std::env::var(REFERENCE_LINKER_ENV).is_ok() { + args.write_layout = true; + args.write_trace = true; + } + + let arg_parser = setup_windows_argument_parser(); + while let Some(arg) = input.next() { + let arg = arg.as_ref(); + arg_parser.handle_argument(&mut args, &mut modifier_stack, arg, &mut input)?; + } + + if !args.unrecognized_options.is_empty() { + let options_list = args.unrecognized_options.join(", "); + bail!("unrecognized option(s): {}", options_list); + } + + Ok(args) +} + +pub(crate) fn setup_windows_argument_parser() -> ArgumentParser { + // Helper function for unimplemented options + fn unimplemented_option(option: &str) -> Result<()> { + crate::bail!("Option {} is not yet implemented", option) + } + + let mut parser = ArgumentParser::new_case_insensitive(); + // /ALIGN - Specifies the alignment of each section. + parser + .declare_with_param() + .long("ALIGN") + .help("/ALIGN - Specifies the alignment of each section.") + .execute(|_args: &mut super::Args, _modifier_stack, _value| { + unimplemented_option("/ALIGN") + }); + // /ALLOWBIND - Specifies that a DLL can't be bound. + parser + .declare() + .long("ALLOWBIND") + .help("/ALLOWBIND - Specifies that a DLL can't be bound.") + .execute(|_, _| unimplemented_option("/ALLOWBIND")); + // /ALLOWISOLATION - Specifies behavior for manifest lookup. + parser + .declare() + .long("ALLOWISOLATION") + .help("/ALLOWISOLATION - Specifies behavior for manifest lookup.") + .execute(|_, _| unimplemented_option("/ALLOWISOLATION")); + // /APPCONTAINER - Specifies whether the app must run within an appcontainer process environment. + parser + .declare() + .long("APPCONTAINER") + .help("/APPCONTAINER - Specifies whether the app must run within an appcontainer process environment.") + .execute(|_, _| unimplemented_option("/APPCONTAINER")); + // /ARM64XFUNCTIONPADMINX64 - Specifies the minimum number of bytes of padding between x64 functions in ARM64X images. 17.8 + parser + .declare_with_param() + .long("ARM64XFUNCTIONPADMINX64") + .help("/ARM64XFUNCTIONPADMINX64 - Specifies the minimum number of bytes of padding between x64 functions in ARM64X images. 17.8") + .execute(|_, _, _| unimplemented_option("/ARM64XFUNCTIONPADMINX64")); + // /ASSEMBLYDEBUG - Adds the DebuggableAttribute to a managed image. + parser + .declare() + .long("ASSEMBLYDEBUG") + .help("/ASSEMBLYDEBUG - Adds the DebuggableAttribute to a managed image.") + .execute(|_, _| unimplemented_option("/ASSEMBLYDEBUG")); + // /ASSEMBLYLINKRESOURCE - Creates a link to a managed resource. + parser + .declare_with_param() + .long("ASSEMBLYLINKRESOURCE") + .help("/ASSEMBLYLINKRESOURCE - Creates a link to a managed resource.") + .execute(|_, _, _| unimplemented_option("/ASSEMBLYLINKRESOURCE")); + // /ASSEMBLYMODULE - Specifies that a Microsoft intermediate language (MSIL) module should be imported into the assembly. + parser + .declare_with_param() + .long("ASSEMBLYMODULE") + .help("/ASSEMBLYMODULE - Specifies that a Microsoft intermediate language (MSIL) module should be imported into the assembly.") + .execute(|_, _, _| unimplemented_option("/ASSEMBLYMODULE")); + // /ASSEMBLYRESOURCE - Embeds a managed resource file in an assembly. + parser + .declare_with_param() + .long("ASSEMBLYRESOURCE") + .help("/ASSEMBLYRESOURCE - Embeds a managed resource file in an assembly.") + .execute(|_, _, _| unimplemented_option("/ASSEMBLYRESOURCE")); + // /BASE - Sets a base address for the program. + parser + .declare_with_param() + .long("BASE") + .help("/BASE - Sets a base address for the program.") + .execute(|args, _modifier_stack, value| { + // Parse hexadecimal base address + let base = if value.starts_with("0x") || value.starts_with("0X") { + u64::from_str_radix(&value[2..], 16) + } else { + value.parse::() + }; + + match base { + Ok(addr) => { + args.base_address = Some(addr); + Ok(()) + } + Err(_) => { + crate::bail!("Invalid base address: {}", value); + } + } + }); + // /CETCOMPAT - Marks the binary as CET Shadow Stack compatible. + parser + .declare() + .long("CETCOMPAT") + .help("/CETCOMPAT - Marks the binary as CET Shadow Stack compatible.") + .execute(|_, _| unimplemented_option("/CETCOMPAT")); + // /CGTHREADS - Sets number of cl.exe threads to use for optimization and code generation when link-time code generation is specified. + parser + .declare_with_param() + .long("CGTHREADS") + .help("/CGTHREADS - Sets number of cl.exe threads to use for optimization and code generation when link-time code generation is specified.") + .execute(|args, _modifier_stack, value| { + match value.parse::() { + Ok(threads) => { + if threads > 0 { + args.num_threads = NonZeroUsize::new(threads); + } + Ok(()) + } + Err(_) => { + crate::bail!("Invalid thread count: {}", value); + } + } + }); + // /CLRIMAGETYPE - Sets the type (IJW, pure, or safe) of a CLR image. + parser + .declare_with_param() + .long("CLRIMAGETYPE") + .help("/CLRIMAGETYPE - Sets the type (IJW, pure, or safe) of a CLR image.") + .execute(|_, _, _| unimplemented_option("/CLRIMAGETYPE")); + // /CLRSUPPORTLASTERROR - Preserves the last error code of functions that are called through the P/Invoke mechanism. + parser + .declare() + .long("CLRSUPPORTLASTERROR") + .help("/CLRSUPPORTLASTERROR - Preserves the last error code of functions that are called through the P/Invoke mechanism.") + .execute(|_, _| unimplemented_option("/CLRSUPPORTLASTERROR")); + // /CLRTHREADATTRIBUTE - Specifies the threading attribute to apply to the entry point of your CLR program. + parser + .declare_with_param() + .long("CLRTHREADATTRIBUTE") + .help("/CLRTHREADATTRIBUTE - Specifies the threading attribute to apply to the entry point of your CLR program.") + .execute(|_, _, _| unimplemented_option("/CLRTHREADATTRIBUTE")); + // /CLRUNMANAGEDCODECHECK - Specifies whether the linker applies the SuppressUnmanagedCodeSecurity attribute to linker-generated P/Invoke stubs that call from managed code into native DLLs. + parser + .declare() + .long("CLRUNMANAGEDCODECHECK") + .help("/CLRUNMANAGEDCODECHECK - Specifies whether the linker applies the SuppressUnmanagedCodeSecurity attribute to linker-generated P/Invoke stubs that call from managed code into native DLLs.") + .execute(|_, _| unimplemented_option("/CLRUNMANAGEDCODECHECK")); + // /DEBUG - Creates debugging information. + parser + .declare_with_optional_param() + .long("DEBUG") + .help("/DEBUG - Creates debugging information.") + .sub_option("FULL", "Full debugging information.", |args, _| { + args.debug_info = true; + Ok(()) + }) + .sub_option( + "FASTLINK", + "Produces a PDB with limited debug information.", + |args, _| { + args.debug_info = true; + Ok(()) + }, + ) + .execute(|args, _, _value| { + args.debug_info = true; + Ok(()) + }); + // /DEBUGTYPE - Specifies which data to include in debugging information. + parser + .declare_with_param() + .long("DEBUGTYPE") + .help("/DEBUGTYPE - Specifies which data to include in debugging information.") + .execute(|_, _, _| unimplemented_option("/DEBUGTYPE")); + // /DEF - Passes a module-definition (.def) file to the linker. + parser + .declare_with_param() + .long("DEF") + .help("/DEF - Passes a module-definition (.def) file to the linker.") + .execute(|args, _modifier_stack, value| { + args.def_file = Some(PathBuf::from(value)); + Ok(()) + }); + // /DEFAULTLIB - Searches the specified library when external references are resolved. + parser + .declare_with_optional_param() + .long("DEFAULTLIB") // Add lowercase version for case-insensitive matching + .help("/DEFAULTLIB - Searches the specified library when external references are resolved.") + .execute(|args, _modifier_stack, value| { + if let Some(lib_name) = value { + // Add library to inputs + args.inputs.push(Input { + spec: InputSpec::Lib(lib_name.into()), + search_first: None, + modifiers: Modifiers::default(), + }); + } + Ok(()) + }); + // /DELAY - Controls the delayed loading of DLLs. + parser + .declare_with_optional_param() + .long("DELAY") + .help("/DELAY - Controls the delayed loading of DLLs.") + .execute(|_, _, _| unimplemented_option("/DELAY")); + // /DELAYLOAD - Causes the delayed loading of the specified DLL. + parser + .declare_with_optional_param() + .long("DELAYLOAD") + .help("/DELAYLOAD - Causes the delayed loading of the specified DLL.") + .execute(|_, _, _| unimplemented_option("/DELAYLOAD")); + // /DELAYSIGN - Partially signs an assembly. + parser + .declare_with_optional_param() + .long("DELAYSIGN") + .help("/DELAYSIGN - Partially signs an assembly.") + .execute(|_, _, _| unimplemented_option("/DELAYSIGN")); + // /DEPENDENTLOADFLAG - Sets default flags on dependent DLL loads. + parser + .declare_with_optional_param() + .long("DEPENDENTLOADFLAG") + .help("/DEPENDENTLOADFLAG - Sets default flags on dependent DLL loads.") + .execute(|_, _, _| unimplemented_option("/DEPENDENTLOADFLAG")); + // /DLL - Builds a DLL. + parser + .declare() + .long("DLL") + .help("/DLL - Builds a DLL.") + .execute(|args, _modifier_stack| { + args.is_dll = true; + args.should_output_executable = false; + Ok(()) + }); + // /DRIVER - Creates a kernel mode driver. + parser + .declare_with_param() + .long("DRIVER") + .help("/DRIVER - Creates a kernel mode driver.") + .sub_option( + "UPONLY", + "Runs only on a uniprocessor system.", + |_, _| unimplemented_option("/DRIVER:UPONLY"), + ) + .sub_option( + "WDM", + "Creates a Windows Driver Model driver.", + |_, _| unimplemented_option("/DRIVER:WDM"), + ) + .execute(|_, _, _| unimplemented_option("/DRIVER")); + // /DYNAMICBASE - Specifies whether to generate an executable image that's rebased at load time by using the address space layout randomization (ASLR) feature. + parser + .declare_with_optional_param() + .long("DYNAMICBASE") + .help("/DYNAMICBASE - Specifies whether to generate an executable image that's rebased at load time by using the address space layout randomization (ASLR) feature.") + .execute(|args, _modifier_stack, value| { + match value { + Some("NO") => args.dynamic_base = false, + _ => args.dynamic_base = true, + } + Ok(()) + }); + // /DYNAMICDEOPT - Enable C++ Dynamic Debugging (Preview) and step in anywhere with on-demand function deoptimization. + parser + .declare_with_optional_param() + .long("DYNAMICDEOPT") + .help("/DYNAMICDEOPT - Enable C++ Dynamic Debugging (Preview) and step in anywhere with on-demand function deoptimization.") + .execute(|_, _, _| unimplemented_option("/DYNAMICDEOPT")); + // /ENTRY - Sets the starting address. Also accepts -e (used by clang driver). + parser + .declare_with_param() + .long("ENTRY") + .short("e") + .help("/ENTRY - Sets the starting address.") + .execute(|args, _modifier_stack, value| { + args.entry = Some(value.to_string()); + Ok(()) + }); + // /ERRORREPORT - Deprecated. Error reporting is controlled by Windows Error Reporting (WER) settings. + parser + .declare_with_optional_param() + .long("ERRORREPORT") + .help("/ERRORREPORT - Deprecated. Error reporting is controlled by Windows Error Reporting (WER) settings.") + .execute(|_, _, _| unimplemented_option("/ERRORREPORT")); + // /EXPORT - Exports a function. + parser + .declare_with_param() + .long("EXPORT") + .help("/EXPORT - Exports a function.") + .execute(|_args, _modifier_stack, _value| unimplemented_option("/EXPORT")); + // /FILEALIGN - Aligns sections within the output file on multiples of a specified value. + parser + .declare_with_param() + .long("FILEALIGN") + .help("/FILEALIGN - Aligns sections within the output file on multiples of a specified value.") + .execute(|_args, _modifier_stack, _value| unimplemented_option("/FILEALIGN")); + // /FIXED - Creates a program that can be loaded only at its preferred base address. + parser + .declare_with_optional_param() + .long("FIXED") + .help("/FIXED - Creates a program that can be loaded only at its preferred base address.") + .execute(|_, _, _| unimplemented_option("/FIXED")); + // /FORCE - Forces a link to complete even with unresolved symbols or symbols defined more than once. + parser + .declare_with_optional_param() + .long("FORCE") + .help("/FORCE - Forces a link to complete even with unresolved symbols or symbols defined more than once.") + .execute(|_, _, _| unimplemented_option("/FORCE")); + // /FUNCTIONPADMIN - Creates an image that can be hot patched. + parser + .declare_with_optional_param() + .long("FUNCTIONPADMIN") + .help("/FUNCTIONPADMIN - Creates an image that can be hot patched.") + .execute(|_, _, _| unimplemented_option("/FUNCTIONPADMIN")); + // /GENPROFILE , /FASTGENPROFILE - Both of these options specify generation of a .pgd file by the linker to support profile-guided optimization (PGO). /GENPROFILE and /FASTGENPROFILE use different default parameters. + parser + .declare_with_optional_param() + .long("GENPROFILE") + .help("/GENPROFILE , /FASTGENPROFILE - Both of these options specify generation of a .pgd file by the linker to support profile-guided optimization (PGO). /GENPROFILE and /FASTGENPROFILE use different default parameters.") + .execute(|_, _, _| unimplemented_option("/GENPROFILE")); + // /GUARD - Enables Control Flow Guard protection. + parser + .declare_with_optional_param() + .long("GUARD") + .help("/GUARD - Enables Control Flow Guard protection.") + .execute(|_, _, _| unimplemented_option("/GUARD")); + // /HEAP - Sets the size of the heap, in bytes. + parser + .declare_with_optional_param() + .long("HEAP") + .help("/HEAP - Sets the size of the heap, in bytes.") + .execute(|args, _modifier_stack, value| { + if let Some(heap_value) = value { + // Parse heap size format: size[,reserve] + let heap_size_str = heap_value.split(',').next().unwrap_or(heap_value); + match heap_size_str.parse::() { + Ok(size) => { + args.heap_size = Some(size); + Ok(()) + } + Err(_) => { + crate::bail!("Invalid heap size: {}", heap_value); + } + } + } else { + // Default heap size or just enable heap specification + Ok(()) + } + }); + // /HIGHENTROPYVA - Specifies support for high-entropy 64-bit address space layout randomization (ASLR). + parser + .declare_with_optional_param() + .long("HIGHENTROPYVA") + .help("/HIGHENTROPYVA - Specifies support for high-entropy 64-bit address space layout randomization (ASLR).") + .execute(|args, _modifier_stack, value| { + match value { + Some("NO") => args.high_entropy_va = false, + _ => args.high_entropy_va = true, + } + Ok(()) + }); + // /IDLOUT - Specifies the name of the .idl file and other MIDL output files. + parser + .declare_with_optional_param() + .long("IDLOUT") + .help("/IDLOUT - Specifies the name of the .idl file and other MIDL output files.") + .execute(|_, _, _| unimplemented_option("/IDLOUT")); + // /IGNORE - Suppresses output of specified linker warnings. + parser + .declare_with_optional_param() + .long("IGNORE") + .help("/IGNORE - Suppresses output of specified linker warnings.") + .execute(|_, _, _| unimplemented_option("/IGNORE")); + // /IGNOREIDL - Prevents the processing of attribute information into an .idl file. + parser + .declare_with_optional_param() + .long("IGNOREIDL") + .help("/IGNOREIDL - Prevents the processing of attribute information into an .idl file.") + .execute(|_, _, _| unimplemented_option("/IGNOREIDL")); + // /ILK - Overrides the default incremental database file name. + parser + .declare_with_optional_param() + .long("ILK") + .help("/ILK - Overrides the default incremental database file name.") + .execute(|_, _, _| unimplemented_option("/ILK")); + // /IMPLIB - Overrides the default import library name. + parser + .declare_with_param() + .long("IMPLIB") + .help("/IMPLIB - Overrides the default import library name.") + .execute(|args, _modifier_stack, value| { + args.import_lib = Some(PathBuf::from(value)); + Ok(()) + }); + // /INCLUDE - Forces symbol references. + parser + .declare_with_param() + .long("INCLUDE") + .help("/INCLUDE - Forces symbol references.") + .execute(|_args, _modifier_stack, _value| { + // TODO: Implement symbol forcing + Ok(()) + }); + // /INCREMENTAL - Controls incremental linking. + parser + .declare_with_optional_param() + .long("INCREMENTAL") + .help("/INCREMENTAL - Controls incremental linking.") + .sub_option("NO", "Disable incremental linking.", |_, _| { + unimplemented_option("/INCREMENTAL:NO") + }) + .sub_option("YES", "Enable incremental linking.", |_, _| { + unimplemented_option("/INCREMENTAL:YES") + }) + .execute(|_, _, _| unimplemented_option("/INCREMENTAL")); + // /INFERASANLIBS - Uses inferred sanitizer libraries. + parser + .declare_with_optional_param() + .long("INFERASANLIBS") + .help("/INFERASANLIBS - Uses inferred sanitizer libraries.") + .execute(|_, _, _| unimplemented_option("/INFERASANLIBS")); + // /INTEGRITYCHECK - Specifies that the module requires a signature check at load time. + parser + .declare_with_optional_param() + .long("INTEGRITYCHECK") + .help( + "/INTEGRITYCHECK - Specifies that the module requires a signature check at load time.", + ) + .execute(|_, _, _| unimplemented_option("/INTEGRITYCHECK")); + // /KERNEL - Create a kernel mode binary. + parser + .declare_with_optional_param() + .long("KERNEL") + .help("/KERNEL - Create a kernel mode binary.") + .execute(|_, _, _| unimplemented_option("/KERNEL")); + // /KEYCONTAINER - Specifies a key container to sign an assembly. + parser + .declare_with_optional_param() + .long("KEYCONTAINER") + .help("/KEYCONTAINER - Specifies a key container to sign an assembly.") + .execute(|_, _, _| unimplemented_option("/KEYCONTAINER")); + // /KEYFILE - Specifies a key or key pair to sign an assembly. + parser + .declare_with_optional_param() + .long("KEYFILE") + .help("/KEYFILE - Specifies a key or key pair to sign an assembly.") + .execute(|_, _, _| unimplemented_option("/KEYFILE")); + // /LARGEADDRESSAWARE - Tells the compiler that the application supports addresses larger than 2 gigabytes + parser + .declare_with_optional_param() + .long("LARGEADDRESSAWARE") + .help("/LARGEADDRESSAWARE - Tells the compiler that the application supports addresses larger than 2 gigabytes") + .execute(|args, _modifier_stack, value| { + match value { + Some("NO") => args.large_address_aware = false, + _ => args.large_address_aware = true, + } + Ok(()) + }); + // /LIBPATH - Specifies a path to search before the environmental library path. + parser + .declare_with_param() + .long("LIBPATH") + .help("/LIBPATH - Specifies a path to search before the environmental library path.") + .execute(|args, _modifier_stack, value| { + let path = Path::new(value).into(); + args.lib_search_path.push(path); + Ok(()) + }); + // /LINKREPRO - Specifies a path to generate link repro artifacts in. + parser + .declare_with_optional_param() + .long("LINKREPRO") + .help("/LINKREPRO - Specifies a path to generate link repro artifacts in.") + .execute(|_, _, _| unimplemented_option("/LINKREPRO")); + // /LINKREPROFULLPATHRSP - Generates a response file containing the absolute paths to all the files that the linker took as input. + parser + .declare_with_optional_param() + .long("LINKREPROFULLPATHRSP") + .help("/LINKREPROFULLPATHRSP - Generates a response file containing the absolute paths to all the files that the linker took as input.") + .execute(|_, _, _| unimplemented_option("/LINKREPROFULLPATHRSP")); + // /LINKREPROTARGET - Generates a link repro only when producing the specified target. 16.1 + parser + .declare_with_optional_param() + .long("LINKREPROTARGET") + .help("/LINKREPROTARGET - Generates a link repro only when producing the specified target. 16.1") + .execute(|_, _, _| unimplemented_option("/LINKREPROTARGET")); + // /LTCG - Specifies link-time code generation. + parser + .declare_with_optional_param() + .long("LTCG") + .help("/LTCG - Specifies link-time code generation.") + .sub_option("NOSTATUS", "Do not display progress.", |_, _| { + unimplemented_option("/LTCG:NOSTATUS") + }) + .sub_option("STATUS", "Display progress.", |_, _| { + unimplemented_option("/LTCG:STATUS") + }) + .sub_option("INCREMENTAL", "Enable incremental LTCG.", |_, _| { + unimplemented_option("/LTCG:INCREMENTAL") + }) + .execute(|_, _, _| unimplemented_option("/LTCG")); + // /MACHINE - Specifies the target platform. + parser + .declare_with_param() + .long("MACHINE") + .help("/MACHINE - Specifies the target platform.") + .sub_option("ARM", "ARM", |args, _| { + args.arch = Architecture::AArch64; + Ok(()) + }) + .sub_option("ARM64", "ARM64", |args, _| { + args.arch = Architecture::AArch64; + Ok(()) + }) + .sub_option("ARM64EC", "ARM64EC", |args, _| { + args.arch = Architecture::AArch64; + Ok(()) + }) + .sub_option("EBC", "EBC", |_args, _| { + // EFI Byte Code - not commonly supported + Ok(()) + }) + .sub_option("X64", "X64", |args, _| { + args.arch = Architecture::X86_64; + Ok(()) + }) + .sub_option("X86", "X86", |args, _| { + args.arch = Architecture::X86_64; // Treat as X86_64 for simplicity + Ok(()) + }) + .execute(|args, _, value| { + // Handle direct architecture specification + match value.to_uppercase().as_str() { + "ARM" | "ARM64" | "ARM64EC" => args.arch = Architecture::AArch64, + "X64" | "X86" => args.arch = Architecture::X86_64, + _ => {} // Ignore unknown architectures + } + Ok(()) + }); + // /MANIFEST - Creates a side-by-side manifest file and optionally embeds it in the binary. + parser + .declare_with_optional_param() + .long("MANIFEST") + .help("/MANIFEST - Creates a side-by-side manifest file and optionally embeds it in the binary.") + .execute(|_, _, _| unimplemented_option("/MANIFEST")); + // /MANIFESTDEPENDENCY - Specifies a section in the manifest file. + parser + .declare_with_optional_param() + .long("MANIFESTDEPENDENCY") + .help("/MANIFESTDEPENDENCY - Specifies a section in the manifest file.") + .execute(|_, _, _| unimplemented_option("/MANIFESTDEPENDENCY")); + // /MANIFESTFILE - Changes the default name of the manifest file. + parser + .declare_with_param() + .long("MANIFESTFILE") + .help("/MANIFESTFILE - Changes the default name of the manifest file.") + .execute(|args, _modifier_stack, value| { + args.manifest_file = Some(PathBuf::from(value)); + Ok(()) + }); + // /MANIFESTINPUT - Specifies a manifest input file for the linker to process and embed in the binary. You can use this option multiple times to specify more than one manifest input file. + parser + .declare_with_optional_param() + .long("MANIFESTINPUT") + .help("/MANIFESTINPUT - Specifies a manifest input file for the linker to process and embed in the binary. You can use this option multiple times to specify more than one manifest input file.") + .execute(|_, _, _| unimplemented_option("/MANIFESTINPUT")); + // /MANIFESTUAC - Specifies whether User Account Control (UAC) information is embedded in the program manifest. + parser + .declare_with_optional_param() + .long("MANIFESTUAC") + .help("/MANIFESTUAC - Specifies whether User Account Control (UAC) information is embedded in the program manifest.") + .execute(|_, _, _| unimplemented_option("/MANIFESTUAC")); + // /MAP - Creates a mapfile. + parser + .declare_with_optional_param() + .long("MAP") + .help("/MAP - Creates a mapfile.") + .execute(|args, _modifier_stack, value| { + match value { + Some(filename) => args.map_file = Some(PathBuf::from(filename)), + None => { + // Default map file name based on output name + let output_stem = args + .output + .file_stem() + .unwrap_or_else(|| std::ffi::OsStr::new("output")); + let mut map_name = output_stem.to_os_string(); + map_name.push(".map"); + args.map_file = Some(PathBuf::from(map_name)); + } + } + Ok(()) + }); + // /MAPINFO - Includes the specified information in the mapfile. + parser + .declare_with_optional_param() + .long("MAPINFO") + .help("/MAPINFO - Includes the specified information in the mapfile.") + .execute(|_, _, _| unimplemented_option("/MAPINFO")); + // /MERGE - Combines sections. + parser + .declare_with_optional_param() + .long("MERGE") + .help("/MERGE - Combines sections.") + .execute(|_, _, _| unimplemented_option("/MERGE")); + // /MIDL - Specifies MIDL command-line options. + parser + .declare_with_optional_param() + .long("MIDL") + .help("/MIDL - Specifies MIDL command-line options.") + .execute(|_, _, _| unimplemented_option("/MIDL")); + // /NATVIS - Adds debugger visualizers from a Natvis file to the program database (PDB). + parser + .declare_with_optional_param() + .long("NATVIS") + .help( + "/NATVIS - Adds debugger visualizers from a Natvis file to the program database (PDB).", + ) + .execute(|_, _, _| unimplemented_option("/NATVIS")); + // /NOASSEMBLY - Suppresses the creation of a .NET Framework assembly. + parser + .declare_with_optional_param() + .long("NOASSEMBLY") + .help("/NOASSEMBLY - Suppresses the creation of a .NET Framework assembly.") + .execute(|_, _, _| unimplemented_option("/NOASSEMBLY")); + // /NODEFAULTLIB - Ignores all (or the specified) default libraries when external references are resolved. + parser + .declare_with_optional_param() + .long("NODEFAULTLIB") + .help("/NODEFAULTLIB - Ignores all (or the specified) default libraries when external references are resolved.") + .execute(|args, _modifier_stack, value| { + match value { + Some(lib_name) => { + // Ignore specific library + args.no_default_libs.push(lib_name.to_string()); + } + None => { + // Ignore all default libraries + args.ignore_all_default_libs = true; + } + } + Ok(()) + }); + // /NOENTRY - Creates a resource-only DLL. + parser + .declare_with_optional_param() + .long("NOENTRY") + .help("/NOENTRY - Creates a resource-only DLL.") + .execute(|_, _, _| unimplemented_option("/NOENTRY")); + // /NOFUNCTIONPADSECTION - Disables function padding for functions in the specified section. 17.8 + parser + .declare_with_optional_param() + .long("NOFUNCTIONPADSECTION") + .help("/NOFUNCTIONPADSECTION - Disables function padding for functions in the specified section. 17.8") + .execute(|_, _, _| unimplemented_option("/NOFUNCTIONPADSECTION")); + // /NOLOGO - Suppresses the startup banner. + parser + .declare_with_optional_param() + .long("NOLOGO") + .help("/NOLOGO - Suppresses the startup banner.") + .execute(|_, _, _| Ok(())); + // /NXCOMPAT - Marks an executable as verified to be compatible with the Windows Data Execution Prevention feature. + parser + .declare_with_optional_param() + .long("NXCOMPAT") + .help("/NXCOMPAT - Marks an executable as verified to be compatible with the Windows Data Execution Prevention feature.") + .execute(|args, _modifier_stack, value| { + match value { + Some("NO") => args.nx_compat = false, + _ => args.nx_compat = true, + } + Ok(()) + }); + // /OPT - Controls LINK optimizations. + parser + .declare_with_param() + .long("OPT") + .help("/OPT - Controls LINK optimizations.") + .sub_option( + "REF", + "Eliminate unreferenced functions and data.", + |_, _| unimplemented_option("/OPT:REF"), + ) + .sub_option( + "NOREF", + "Keep unreferenced functions and data.", + |_, _| unimplemented_option("/OPT:NOREF"), + ) + .sub_option("ICF", "Fold identical COMDATs.", |_, _| { + unimplemented_option("/OPT:ICF") + }) + .sub_option("NOICF", "Disable identical COMDAT folding.", |_, _| { + unimplemented_option("/OPT:NOICF") + }) + .sub_option( + "LBR", + "Enable profile guided optimizations (LBR).", + |_, _| unimplemented_option("/OPT:LBR"), + ) + .sub_option( + "NOLBR", + "Disable profile guided optimizations (no LBR).", + |_, _| unimplemented_option("/OPT:NOLBR"), + ) + .execute(|_, _, _| unimplemented_option("/OPT")); + // /ORDER - Places COMDATs into the image in a predetermined order. + parser + .declare_with_optional_param() + .long("ORDER") + .help("/ORDER - Places COMDATs into the image in a predetermined order.") + .execute(|_, _, _| unimplemented_option("/ORDER")); + // /OUT - Specifies the output file name. + parser + .declare_with_param() + .long("OUT") + .help("/OUT - Specifies the output file name.") + .execute(|args, _modifier_stack, value| { + args.output = Arc::from(Path::new(value)); + Ok(()) + }); + // /PDB - Creates a PDB file. + parser + .declare_with_optional_param() + .long("PDB") + .help("/PDB - Creates a PDB file.") + .execute(|args, _modifier_stack, value| { + match value { + Some(filename) => args.pdb_file = Some(PathBuf::from(filename)), + None => { + // Default PDB file name based on output name + let output_stem = args + .output + .file_stem() + .unwrap_or_else(|| std::ffi::OsStr::new("output")); + let mut pdb_name = output_stem.to_os_string(); + pdb_name.push(".pdb"); + args.pdb_file = Some(PathBuf::from(pdb_name)); + } + } + Ok(()) + }); + // /PDBALTPATH - Uses an alternate location to save a PDB file. + parser + .declare_with_optional_param() + .long("PDBALTPATH") + .help("/PDBALTPATH - Uses an alternate location to save a PDB file.") + .execute(|_, _, _| unimplemented_option("/PDBALTPATH")); + // /PDBSTRIPPED - Creates a PDB file that has no private symbols. + parser + .declare_with_optional_param() + .long("PDBSTRIPPED") + .help("/PDBSTRIPPED - Creates a PDB file that has no private symbols.") + .execute(|_, _, _| unimplemented_option("/PDBSTRIPPED")); + // /PGD - Specifies a .pgd file for profile-guided optimizations. + parser + .declare_with_optional_param() + .long("PGD") + .help("/PGD - Specifies a .pgd file for profile-guided optimizations.") + .execute(|_, _, _| unimplemented_option("/PGD")); + // /POGOSAFEMODE - Obsolete Creates a thread-safe PGO instrumented build. + parser + .declare_with_optional_param() + .long("POGOSAFEMODE") + .help("/POGOSAFEMODE - Obsolete Creates a thread-safe PGO instrumented build.") + .execute(|_, _, _| unimplemented_option("/POGOSAFEMODE")); + // /PROFILE - Produces an output file that can be used with the Performance Tools profiler. + parser + .declare_with_optional_param() + .long("PROFILE") + .help("/PROFILE - Produces an output file that can be used with the Performance Tools profiler.") + .execute(|_, _, _| unimplemented_option("/PROFILE")); + // /RELEASE - Sets the Checksum in the .exe header. + parser + .declare_with_optional_param() + .long("RELEASE") + .help("/RELEASE - Sets the Checksum in the .exe header.") + .execute(|_, _, _| unimplemented_option("/RELEASE")); + // /SAFESEH - Specifies that the image will contain a table of safe exception handlers. + parser + .declare_with_optional_param() + .long("SAFESEH") + .help( + "/SAFESEH - Specifies that the image will contain a table of safe exception handlers.", + ) + .execute(|_, _, _| unimplemented_option("/SAFESEH")); + // /SECTION - Overrides the attributes of a section. + parser + .declare_with_optional_param() + .long("SECTION") + .help("/SECTION - Overrides the attributes of a section.") + .execute(|_, _, _| unimplemented_option("/SECTION")); + // /SOURCELINK - Specifies a SourceLink file to add to the PDB. + parser + .declare_with_optional_param() + .long("SOURCELINK") + .help("/SOURCELINK - Specifies a SourceLink file to add to the PDB.") + .execute(|_, _, _| unimplemented_option("/SOURCELINK")); + // /STACK - Sets the size of the stack in bytes. + parser + .declare_with_optional_param() + .long("STACK") + .help("/STACK - Sets the size of the stack in bytes.") + .execute(|args, _modifier_stack, value| { + if let Some(stack_value) = value { + // Parse stack size format: size[,reserve] + let stack_size_str = stack_value.split(',').next().unwrap_or(stack_value); + match stack_size_str.parse::() { + Ok(size) => { + args.stack_size = Some(size); + Ok(()) + } + Err(_) => { + crate::bail!("Invalid stack size: {}", stack_value); + } + } + } else { + // Default stack size or just enable stack specification + Ok(()) + } + }); + // /STUB - Attaches an MS-DOS stub program to a Win32 program. + parser + .declare_with_optional_param() + .long("STUB") + .help("/STUB - Attaches an MS-DOS stub program to a Win32 program.") + .execute(|_, _, _| unimplemented_option("/STUB")); + // /SUBSYSTEM - Tells the operating system how to run the .exe file. + parser + .declare_with_param() + .long("SUBSYSTEM") + .help("/SUBSYSTEM - Tells the operating system how to run the .exe file.") + .sub_option("BOOT_APPLICATION", "Boot application", |args, _| { + args.subsystem = Some(WindowsSubsystem::BootApplication); + Ok(()) + }) + .sub_option("CONSOLE", "Console", |args, _| { + args.subsystem = Some(WindowsSubsystem::Console); + Ok(()) + }) + .sub_option("WINDOWS", "Windows GUI", |args, _| { + args.subsystem = Some(WindowsSubsystem::Windows); + Ok(()) + }) + .sub_option("NATIVE", "Native", |args, _| { + args.subsystem = Some(WindowsSubsystem::Native); + Ok(()) + }) + .sub_option("POSIX", "POSIX", |args, _| { + args.subsystem = Some(WindowsSubsystem::Posix); + Ok(()) + }) + .sub_option("EFI_APPLICATION", "EFI application", |args, _| { + args.subsystem = Some(WindowsSubsystem::EfiApplication); + Ok(()) + }) + .sub_option( + "EFI_BOOT_SERVICE_DRIVER", + "EFI boot service driver", + |args, _| { + args.subsystem = Some(WindowsSubsystem::EfiBootServiceDriver); + Ok(()) + }, + ) + .sub_option("EFI_ROM", "EFI ROM", |args, _| { + args.subsystem = Some(WindowsSubsystem::EfiRom); + Ok(()) + }) + .sub_option("EFI_RUNTIME_DRIVER", "EFI runtime driver", |args, _| { + args.subsystem = Some(WindowsSubsystem::EfiRuntimeDriver); + Ok(()) + }) + .execute(|args, _, value| { + // Handle direct subsystem specification + match value.to_uppercase().as_str() { + "BOOT_APPLICATION" => args.subsystem = Some(WindowsSubsystem::BootApplication), + "CONSOLE" => args.subsystem = Some(WindowsSubsystem::Console), + "WINDOWS" => args.subsystem = Some(WindowsSubsystem::Windows), + "NATIVE" => args.subsystem = Some(WindowsSubsystem::Native), + "POSIX" => args.subsystem = Some(WindowsSubsystem::Posix), + "EFI_APPLICATION" => args.subsystem = Some(WindowsSubsystem::EfiApplication), + "EFI_BOOT_SERVICE_DRIVER" => { + args.subsystem = Some(WindowsSubsystem::EfiBootServiceDriver) + } + "EFI_ROM" => args.subsystem = Some(WindowsSubsystem::EfiRom), + "EFI_RUNTIME_DRIVER" => args.subsystem = Some(WindowsSubsystem::EfiRuntimeDriver), + _ => {} // Ignore unknown subsystems + } + Ok(()) + }); + // /SWAPRUN - Tells the operating system to copy the linker output to a swap file before it's run. + parser + .declare_with_optional_param() + .long("SWAPRUN") + .help("/SWAPRUN - Tells the operating system to copy the linker output to a swap file before it's run.") + .execute(|_, _, _| unimplemented_option("/SWAPRUN")); + // /TIME - Output linker pass timing information. + parser + .declare_with_optional_param() + .long("TIME") + .help("/TIME - Output linker pass timing information.") + .execute(|_, _, _| unimplemented_option("/TIME")); + // /TLBID - Specifies the resource ID of the linker-generated type library. + parser + .declare_with_optional_param() + .long("TLBID") + .help("/TLBID - Specifies the resource ID of the linker-generated type library.") + .execute(|_, _, _| unimplemented_option("/TLBID")); + // /TLBOUT - Specifies the name of the .tlb file and other MIDL output files. + parser + .declare_with_optional_param() + .long("TLBOUT") + .help("/TLBOUT - Specifies the name of the .tlb file and other MIDL output files.") + .execute(|_, _, _| unimplemented_option("/TLBOUT")); + // /TSAWARE - Creates an application that is designed specifically to run under Terminal Server. + parser + .declare_with_optional_param() + .long("TSAWARE") + .help("/TSAWARE - Creates an application that is designed specifically to run under Terminal Server.") + .execute(|args, _modifier_stack, value| { + match value { + Some("NO") => args.terminal_server_aware = false, + _ => args.terminal_server_aware = true, + } + Ok(()) + }); + // /USEPROFILE - Uses profile-guided optimization training data to create an optimized image. + parser + .declare_with_optional_param() + .long("USEPROFILE") + .help("/USEPROFILE - Uses profile-guided optimization training data to create an optimized image.") + .execute(|_, _, _| unimplemented_option("/USEPROFILE")); + // /VERBOSE - Prints linker progress messages. + parser + .declare_with_optional_param() + .long("VERBOSE") + .help("/VERBOSE - Prints linker progress messages.") + .execute(|_, _, _| unimplemented_option("/VERBOSE")); + // /VERSION - Assigns a version number. + parser + .declare_with_param() + .long("VERSION") + .help("/VERSION - Assigns a version number.") + .execute(|args, _modifier_stack, value| { + args.version = Some(value.to_string()); + Ok(()) + }); + // /WHOLEARCHIVE - Includes every object file from specified static libraries. + parser + .declare_with_optional_param() + .long("WHOLEARCHIVE") + .help("/WHOLEARCHIVE - Includes every object file from specified static libraries.") + .execute(|_, _, _| unimplemented_option("/WHOLEARCHIVE")); + // /WINMD - Enables generation of a Windows Runtime Metadata file. + parser + .declare_with_optional_param() + .long("WINMD") + .help("/WINMD - Enables generation of a Windows Runtime Metadata file.") + .execute(|_, _, _| unimplemented_option("/WINMD")); + // /WINMDFILE - Specifies the file name for the Windows Runtime Metadata (winmd) output file that's generated by the /WINMD linker option. + parser + .declare_with_optional_param() + .long("WINMDFILE") + .help("/WINMDFILE - Specifies the file name for the Windows Runtime Metadata (winmd) output file that's generated by the /WINMD linker option.") + .execute(|_, _, _| unimplemented_option("/WINMDFILE")); + // /WINMDKEYFILE - Specifies a key or key pair to sign a Windows Runtime Metadata file. + parser + .declare_with_optional_param() + .long("WINMDKEYFILE") + .help( + "/WINMDKEYFILE - Specifies a key or key pair to sign a Windows Runtime Metadata file.", + ) + .execute(|_, _, _| unimplemented_option("/WINMDKEYFILE")); + // /WINMDKEYCONTAINER - Specifies a key container to sign a Windows Metadata file. + parser + .declare_with_optional_param() + .long("WINMDKEYCONTAINER") + .help("/WINMDKEYCONTAINER - Specifies a key container to sign a Windows Metadata file.") + .execute(|_, _, _| unimplemented_option("/WINMDKEYCONTAINER")); + // /WINMDDELAYSIGN - Partially signs a Windows Runtime Metadata ( .winmd ) file by placing the public key in the winmd file. + parser + .declare_with_optional_param() + .long("WINMDDELAYSIGN") + .help("/WINMDDELAYSIGN - Partially signs a Windows Runtime Metadata ( .winmd ) file by placing the public key in the winmd file.") + .execute(|_, _, _| unimplemented_option("/WINMDDELAYSIGN")); + // /WX - Treats linker warnings as errors. + parser + .declare_with_optional_param() + .long("WX") + .help("/WX - Treats linker warnings as errors.") + .execute(|_args: &mut super::Args, _modifier_stack, _value| { + unimplemented_option("/WX") + }); + + add_silently_ignored_flags(&mut parser); + add_default_flags(&mut parser); + + parser +} + + +#[cfg(test)] +mod tests { + use super::*; + use crate::args::InputSpec; + use std::path::Path; + + // Example Windows linker flags from Rust compilation + const WINDOWS_LINKER_ARGS: &[&str] = &[ + "--target=x86_64-pc-windows-msvc", + r#"C:\Users\Samuel\AppData\Local\Temp\rustc7RL5Io\symbols.o"#, + "dummy.dummy.6cfbe55db138f4b-cgu.0.rcgu.o", + "dummy.3wxfnlvokcqcl6j45c8xeicgz.rcgu.o", + r#"C:\Users\Samuel\.rustup\toolchains\stable-x86_64-pc-windows-msvc\lib\rustlib\x86_64-pc-windows-msvc\lib\libstd-efa6c7783284bd31.rlib"#, + r#"C:\Users\Samuel\.rustup\toolchains\stable-x86_64-pc-windows-msvc\lib\rustlib\x86_64-pc-windows-msvc\lib\libpanic_unwind-43468c47cff21662.rlib"#, + r#"C:\Users\Samuel\.rustup\toolchains\stable-x86_64-pc-windows-msvc\lib\rustlib\x86_64-pc-windows-msvc\lib\libwindows_targets-3935b75a1bd1c449.rlib"#, + r#"C:\Users\Samuel\.rustup\toolchains\stable-x86_64-pc-windows-msvc\lib\rustlib\x86_64-pc-windows-msvc\lib\librustc_demangle-cc0fa0adec36251f.rlib"#, + r#"C:\Users\Samuel\.rustup\toolchains\stable-x86_64-pc-windows-msvc\lib\rustlib\x86_64-pc-windows-msvc\lib\libstd_detect-22f2c46a93af1174.rlib"#, + r#"C:\Users\Samuel\.rustup\toolchains\stable-x86_64-pc-windows-msvc\lib\rustlib\x86_64-pc-windows-msvc\lib\libhashbrown-c835068eb56f6efb.rlib"#, + r#"C:\Users\Samuel\.rustup\toolchains\stable-x86_64-pc-windows-msvc\lib\rustlib\x86_64-pc-windows-msvc\lib\librustc_std_workspace_alloc-abe24411cb8f5bd4.rlib"#, + r#"C:\Users\Samuel\.rustup\toolchains\stable-x86_64-pc-windows-msvc\lib\rustlib\x86_64-pc-windows-msvc\lib\libunwind-b5e24931eb1ae1bd.rlib"#, + r#"C:\Users\Samuel\.rustup\toolchains\stable-x86_64-pc-windows-msvc\lib\rustlib\x86_64-pc-windows-msvc\lib\libcfg_if-8dc64876e32b9d07.rlib"#, + r#"C:\Users\Samuel\.rustup\toolchains\stable-x86_64-pc-windows-msvc\lib\rustlib\x86_64-pc-windows-msvc\lib\librustc_std_workspace_core-214bcacef209824d.rlib"#, + r#"C:\Users\Samuel\.rustup\toolchains\stable-x86_64-pc-windows-msvc\lib\rustlib\x86_64-pc-windows-msvc\lib\liballoc-3e14ad51a3206bab.rlib"#, + r#"C:\Users\Samuel\.rustup\toolchains\stable-x86_64-pc-windows-msvc\lib\rustlib\x86_64-pc-windows-msvc\lib\libcore-a55e6b132b0b5f5d.rlib"#, + r#"C:\Users\Samuel\.rustup\toolchains\stable-x86_64-pc-windows-msvc\lib\rustlib\x86_64-pc-windows-msvc\lib\libcompiler_builtins-b994e165f6ecc9e9.rlib"#, + "kernel32.lib", + "kernel32.lib", + "kernel32.lib", + "ntdll.lib", + "userenv.lib", + "ws2_32.lib", + "dbghelp.lib", + "/defaultlib:msvcrt", + "/NXCOMPAT", + "/OUT:dummy.exe", + "/DEBUG", + ]; + + #[track_caller] + fn assert_contains_file(inputs: &[Input], file_path: &str) { + assert!(inputs.iter().any(|input| match &input.spec { + InputSpec::File(path) => path.as_ref() == Path::new(file_path), + _ => false, + })); + } + + #[track_caller] + fn assert_contains_lib(inputs: &[Input], lib_name: &str) { + assert!(inputs.iter().any(|input| match &input.spec { + InputSpec::Lib(name) => name.as_ref() == lib_name, + _ => false, + })); + } + + /// Extract Args from unified Args, panicking if it's not the Pe variant. + #[track_caller] + fn unwrap_pe(args: crate::args::Args) -> crate::args::Args { + args.map_target(|t| match t { + crate::args::TargetArgs::Pe(pe) => pe, + other => panic!( + "Expected Pe variant, got {:?}", + std::mem::discriminant(&other) + ), + }) + } + + #[test] + fn test_parse_windows_linker_args() { + let args = unwrap_pe(crate::args::parse(|| WINDOWS_LINKER_ARGS.iter()) + .unwrap()); + + // Test that key flags were parsed correctly + assert!(args.debug_info); // /DEBUG flag + assert!(args.nx_compat); // /NXCOMPAT flag + + // Test that output file was set + assert_eq!(args.output.as_ref(), Path::new("dummy.exe")); + + // Test that input files were collected + assert_contains_file(&args.inputs, "dummy.dummy.6cfbe55db138f4b-cgu.0.rcgu.o"); + assert_contains_file(&args.inputs, "dummy.3wxfnlvokcqcl6j45c8xeicgz.rcgu.o"); + + // Test that library files were collected + assert_contains_file(&args.inputs, "kernel32.lib"); + assert_contains_file(&args.inputs, "ntdll.lib"); + assert_contains_file(&args.inputs, "userenv.lib"); + assert_contains_file(&args.inputs, "ws2_32.lib"); + assert_contains_file(&args.inputs, "dbghelp.lib"); + + // Test that rlib files were collected + assert!(args.inputs.iter().any(|input| { + match &input.spec { + InputSpec::File(path) => path + .to_string_lossy() + .contains("libstd-efa6c7783284bd31.rlib"), + _ => false, + } + })); + assert!(args.inputs.iter().any(|input| { + match &input.spec { + InputSpec::File(path) => path + .to_string_lossy() + .contains("libcore-a55e6b132b0b5f5d.rlib"), + _ => false, + } + })); + + // Test that /defaultlib was handled and added library to inputs + assert_contains_lib(&args.inputs, "msvcrt"); + + // Verify some key libraries are present + let lib_names: Vec<&str> = args + .inputs + .iter() + .filter_map(|input| match &input.spec { + InputSpec::Lib(lib_name) => Some(lib_name.as_ref()), + _ => None, + }) + .collect(); + assert!(lib_names.contains(&"msvcrt")); + } + + #[test] + fn test_minimal_windows_args() { + let minimal_args = &[ + "--target=x86_64-pc-windows-msvc", + "/OUT:test.exe", + "/DEBUG", + "test.obj", + ]; + + let args = unwrap_pe(crate::args::parse(|| minimal_args.iter()) + .unwrap()); + + assert_eq!(args.output.as_ref(), Path::new("test.exe")); + println!("Debug info value: {}", args.debug_info); + assert!(args.debug_info); + assert_contains_file(&args.inputs, "test.obj"); + } + + #[test] + fn test_debug_flag_simple() { + let minimal_args = &["--target=x86_64-pc-windows-msvc","/DEBUG"]; + + let result = crate::args::parse(|| minimal_args.iter()); + match result { + Ok(args) => { + let windows_args = unwrap_pe(args); + println!( + "Simple debug test - Debug info value: {}", + windows_args.debug_info + ); + println!( + "Unrecognized options: {:?}", + windows_args.unrecognized_options + ); + assert!(windows_args.debug_info); + } + Err(e) => { + println!("Parse error: {:?}", e); + panic!("Failed to parse arguments: {:?}", e); + } + } + } + + #[test] + fn test_defaultlib_parsing() { + let minimal_args = &["--target=x86_64-pc-windows-msvc","/defaultlib:msvcrt"]; + + let args = unwrap_pe(crate::args::parse(|| minimal_args.iter()) + .unwrap()); + + let lib_names: Vec<&str> = args + .inputs + .iter() + .filter_map(|input| match &input.spec { + InputSpec::Lib(lib_name) => Some(lib_name.as_ref()), + _ => None, + }) + .collect(); + + println!("Found libraries: {:?}", lib_names); + println!("Unrecognized options: {:?}", args.unrecognized_options); + + assert_contains_lib(&args.inputs, "msvcrt"); + } + + #[test] + fn test_required_parameters() { + // Test that IMPLIB requires a parameter + let implib_args = &["--target=x86_64-pc-windows-msvc","/IMPLIB"]; + + let result = crate::args::parse(|| implib_args.iter()); + match result { + Ok(_) => panic!("Expected error for IMPLIB without parameter"), + Err(e) => { + let error_msg = format!("{:?}", e); + assert!( + error_msg.contains("Missing argument") || error_msg.contains("IMPLIB"), + "Error should mention missing argument for IMPLIB: {}", + error_msg + ); + } + } + + // Test that EXPORT requires a parameter + let export_args = &["--target=x86_64-pc-windows-msvc","/EXPORT"]; + + let result = crate::args::parse(|| export_args.iter()); + match result { + Ok(_) => panic!("Expected error for EXPORT without parameter"), + Err(e) => { + let error_msg = format!("{:?}", e); + assert!( + error_msg.contains("Missing argument") || error_msg.contains("EXPORT"), + "Error should mention missing argument for EXPORT: {}", + error_msg + ); + } + } + + // Test that VERSION requires a parameter + let version_args = &["--target=x86_64-pc-windows-msvc","/VERSION"]; + + let result = crate::args::parse(|| version_args.iter()); + match result { + Ok(_) => panic!("Expected error for VERSION without parameter"), + Err(e) => { + let error_msg = format!("{:?}", e); + assert!( + error_msg.contains("Missing argument") || error_msg.contains("VERSION"), + "Error should mention missing argument for VERSION: {}", + error_msg + ); + } + } + } + + #[test] + fn test_unimplemented_options() { + // Test that unimplemented options return proper error messages + let appcontainer_args = &["--target=x86_64-pc-windows-msvc","/APPCONTAINER"]; + + let result = crate::args::parse(|| appcontainer_args.iter()); + match result { + Ok(_) => panic!("Expected error for unimplemented APPCONTAINER option"), + Err(e) => { + let error_msg = format!("{:?}", e); + assert!( + error_msg.contains("not yet implemented") && error_msg.contains("APPCONTAINER"), + "Error should mention APPCONTAINER is not implemented: {}", + error_msg + ); + } + } + + // Test another unimplemented option + let assemblydebug_args = &["--target=x86_64-pc-windows-msvc","/ASSEMBLYDEBUG"]; + + let result = crate::args::parse(|| assemblydebug_args.iter()); + match result { + Ok(_) => panic!("Expected error for unimplemented ASSEMBLYDEBUG option"), + Err(e) => { + let error_msg = format!("{:?}", e); + assert!( + error_msg.contains("not yet implemented") + && error_msg.contains("ASSEMBLYDEBUG"), + "Error should mention ASSEMBLYDEBUG is not implemented: {}", + error_msg + ); + } + } + } + + #[test] + fn test_case_insensitive_parsing() { + // Test uppercase /ENTRY:main and /OUT:test.exe + let args_upper = &["--target=x86_64-pc-windows-msvc","/ENTRY:main", "/OUT:test.exe"]; + let result_upper = unwrap_pe(crate::args::parse(|| args_upper.iter()) + .unwrap()); + assert_eq!(result_upper.entry, Some("main".to_string())); + assert_eq!(result_upper.output.as_ref(), Path::new("test.exe")); + + // Test lowercase /entry:main and /out:test.exe + let args_lower = &["--target=x86_64-pc-windows-msvc","/entry:main", "/out:test.exe"]; + let result_lower = unwrap_pe(crate::args::parse(|| args_lower.iter()) + .unwrap()); + assert_eq!(result_lower.entry, Some("main".to_string())); + assert_eq!(result_lower.output.as_ref(), Path::new("test.exe")); + + // Test mixed case /Entry:main and /Out:test.exe + let args_mixed = &["--target=x86_64-pc-windows-msvc","/Entry:main", "/Out:test.exe"]; + let result_mixed = unwrap_pe(crate::args::parse(|| args_mixed.iter()) + .unwrap()); + assert_eq!(result_mixed.entry, Some("main".to_string())); + assert_eq!(result_mixed.output.as_ref(), Path::new("test.exe")); + } + + #[test] + fn test_nodefaultlib_parsing() { + // Test /NODEFAULTLIB without parameter (ignore all default libraries) + let args_all = &["--target=x86_64-pc-windows-msvc","/NODEFAULTLIB"]; + let result_all = unwrap_pe(crate::args::parse(|| args_all.iter()) + .unwrap()); + assert!(result_all.ignore_all_default_libs); + assert!(result_all.no_default_libs.is_empty()); + + // Test /NODEFAULTLIB with specific library name + let args_specific = &["--target=x86_64-pc-windows-msvc","/NODEFAULTLIB:msvcrt"]; + let result_specific = unwrap_pe(crate::args::parse(|| args_specific.iter()) + .unwrap()); + assert!(!result_specific.ignore_all_default_libs); + assert_eq!(result_specific.no_default_libs, vec!["msvcrt"]); + + // Test multiple specific libraries + let args_multiple = &[ + "--target=x86_64-pc-windows-msvc", + "/NODEFAULTLIB:msvcrt", + "/NODEFAULTLIB:kernel32", + ]; + let result_multiple = unwrap_pe(crate::args::parse(|| args_multiple.iter()) + .unwrap()); + assert!(!result_multiple.ignore_all_default_libs); + assert_eq!(result_multiple.no_default_libs, vec!["msvcrt", "kernel32"]); + + // Test case-insensitive matching + let args_case_insensitive = &["--target=x86_64-pc-windows-msvc","/nodefaultlib:msvcrt"]; + let result_case_insensitive = unwrap_pe(crate::args::parse(|| args_case_insensitive.iter()) + .unwrap()); + assert!(!result_case_insensitive.ignore_all_default_libs); + assert_eq!(result_case_insensitive.no_default_libs, vec!["msvcrt"]); + } + + #[test] + fn test_nodefaultlib_helper_methods() { + // Test helper methods for ignore all default libraries + let args_all = &["--target=x86_64-pc-windows-msvc","/NODEFAULTLIB"]; + let result_all = unwrap_pe(crate::args::parse(|| args_all.iter()) + .unwrap()); + + assert!(result_all.ignores_all_default_libs()); + assert!(result_all.should_ignore_default_lib("msvcrt")); + assert!(result_all.should_ignore_default_lib("kernel32")); + assert!(result_all.ignored_default_libs().is_empty()); + + // Test helper methods for specific libraries + let args_specific = &[ + "--target=x86_64-pc-windows-msvc", + "/NODEFAULTLIB:msvcrt", + "/NODEFAULTLIB:kernel32", + ]; + let result_specific = unwrap_pe(crate::args::parse(|| args_specific.iter()) + .unwrap()); + + assert!(!result_specific.ignores_all_default_libs()); + assert!(result_specific.should_ignore_default_lib("msvcrt")); + assert!(result_specific.should_ignore_default_lib("kernel32")); + assert!(!result_specific.should_ignore_default_lib("user32")); + assert_eq!( + result_specific.ignored_default_libs(), + &["msvcrt", "kernel32"] + ); + } +} diff --git a/libwild/src/coff.rs b/libwild/src/coff.rs new file mode 100644 index 000000000..53ed27402 --- /dev/null +++ b/libwild/src/coff.rs @@ -0,0 +1,1164 @@ +//! COFF object file wrapper for PE linking. +//! +//! Provides a unified `CoffObjectFile` type that implements the `ObjectFile` trait, +//! abstracting over regular COFF and COFF bigobj files. + +use crate::args::Args; +use crate::args::windows::PeArgs; +use crate::arch::Architecture; +use crate::bail; +use crate::error::Context as _; +use crate::error::Result; +use crate::input_data::InputBytes; +use crate::layout::DynamicSymbolDefinition; +use crate::layout::OutputRecordLayout; +use crate::output_section_id::OutputSectionId; +use crate::output_section_id::OutputSections; +use crate::output_section_map::OutputSectionMap; +use crate::output_section_part_map::OutputSectionPartMap; +use crate::platform; +use crate::resolution::LoadedMetrics; +use crate::resolution::UnloadedSection; +use crate::symbol_db::SymbolDb; +use crate::symbol_db::Visibility; +use object::LittleEndian; +use object::pe; +use object::read::coff::CoffHeader; +use object::read::coff::ImageSymbol; +use rayon::Scope; +use std::borrow::Cow; +use std::fmt::Display; + +// ── PE Platform ───────────────────────────────────────────────────────────── + +pub(crate) struct PePlatform; + +impl<'data> platform::Platform<'data> for PePlatform { + type Relaxation = NeverRelaxation; + type File = CoffObjectFile<'data>; + + fn finish_link( + _file_loader: &mut crate::input_data::FileLoader<'data>, + args: &'data Args, + _plugin: &mut Option>, + mut symbol_db: SymbolDb<'data, CoffObjectFile<'data>>, + mut per_symbol_flags: crate::value_flags::PerSymbolFlags, + resolver: crate::resolution::Resolver<'data, CoffObjectFile<'data>>, + mut output_sections: OutputSections<'data>, + layout_rules_builder: crate::layout_rules::LayoutRulesBuilder<'data>, + output_kind: crate::OutputKind, + ) -> Result>>> { + let layout_rules = layout_rules_builder.build(); + let resolved = resolver.resolve_sections_and_canonicalise_undefined( + &mut symbol_db, + &mut per_symbol_flags, + &mut output_sections, + &layout_rules, + )?; + + // PE does its own layout and writing — the ELF Layout type doesn't apply. + crate::pe_writer::link(&symbol_db, &resolved, args, output_kind)?; + + Ok(None) + } + + fn elf_header_arch_magic() -> u16 { + unreachable!("PE does not use ELF headers") + } + + fn get_dynamic_relocation_type( + _relocation: linker_utils::elf::DynamicRelocationKind, + ) -> u32 { + unreachable!("PE does not use ELF dynamic relocations") + } + + fn write_plt_entry(_plt_entry: &mut [u8], _got_address: u64, _plt_address: u64) -> Result { + unreachable!("PE does not use PLT") + } + + fn relocation_from_raw(_r_type: u32) -> Result { + unreachable!("PE does not use ELF relocations") + } + + fn rel_type_to_string(_r_type: u32) -> Cow<'static, str> { + unreachable!("PE does not use ELF relocations") + } + + fn local_symbols_in_debug_info() -> bool { + false + } + + fn tp_offset_start( + _layout: &crate::layout::Layout<'data, Self::File>, + ) -> u64 { + 0 + } + + fn get_property_class(_property_type: u32) -> Option { + None + } + + fn merge_eflags(_eflags: impl Iterator) -> Result { + Ok(0) + } + + fn high_part_relocations() -> &'static [u32] { + &[] + } + + fn get_source_info( + _object: &Self::File, + _relocations: &>::RelocationSections, + _section: &>::SectionHeader, + _offset_in_section: u64, + ) -> Result { + bail!("Source info not supported for PE/COFF") + } + + fn new_relaxation( + _relocation_kind: u32, + _section_bytes: &[u8], + _offset_in_section: u64, + _flags: crate::value_flags::ValueFlags, + _output_kind: crate::OutputKind, + _section_flags: >::SectionFlags, + _non_zero_address: bool, + _relax_deltas: Option<&linker_utils::relaxation::SectionRelaxDeltas>, + ) -> Option { + None + } +} + +/// Stub relaxation type for PE — no relaxations supported. +pub(crate) enum NeverRelaxation {} + +impl platform::Relaxation for NeverRelaxation { + fn apply(&self, _section_bytes: &mut [u8], _offset: &mut u64, _addend: &mut i64) { + match *self {} + } + + fn rel_info(&self) -> linker_utils::elf::RelocationKindInfo { + match *self {} + } + + fn debug_kind(&self) -> impl std::fmt::Debug { + match *self {} + } + + fn next_modifier(&self) -> linker_utils::relaxation::RelocationModifier { + match *self {} + } + + fn is_mandatory(&self) -> bool { + match *self {} + } +} + +// ── Core COFF object file type ────────────────────────────────────────────── + +/// A parsed COFF object file that implements the `ObjectFile` trait. +/// Handles both regular COFF and COFF bigobj formats uniformly. +#[derive(Debug)] +pub(crate) struct CoffObjectFile<'data> { + data: &'data [u8], + sections: &'data [pe::ImageSectionHeader], + /// Pre-parsed symbols (leaked allocation to satisfy `'data` lifetime). + symbols: &'data [CoffSymbol], + /// Pre-resolved section names (leaked, parallel to `sections`). + section_names: &'data [&'data [u8]], + /// String table for resolving symbol names. + strings: object::read::StringTable<'data>, + machine: u16, +} + +/// Pre-parsed COFF symbol entry. +#[derive(Debug, Clone, Copy)] +pub(crate) struct CoffSymbol { + name_data_offset: u32, + section_number: i32, + storage_class: u8, + value: u32, + number_of_aux_symbols: u8, + has_name: bool, +} + +/// COFF section characteristics flags. +#[derive(Debug, Clone, Copy, Default)] +pub(crate) struct CoffSectionFlags(u32); + +/// COFF section content type. +#[derive(Debug, Clone, Copy, Default)] +pub(crate) struct CoffSectionType(u32); + +/// Stub segment type for COFF. +#[derive(Debug, Clone, Copy, Default)] +pub(crate) struct CoffSegmentType; + +impl platform::SegmentType for CoffSegmentType {} + +/// Stub program segment def for COFF. +#[derive(Debug, Clone, Copy)] +pub(crate) struct CoffProgramSegmentDef; + +impl Display for CoffProgramSegmentDef { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "CoffSegment") + } +} + +impl platform::ProgramSegmentDef for CoffProgramSegmentDef { + fn is_writable(self) -> bool { + false + } + fn is_executable(self) -> bool { + false + } + fn always_keep(self) -> bool { + false + } + fn is_loadable(self) -> bool { + false + } + fn is_stack(self) -> bool { + false + } + fn is_tls(self) -> bool { + false + } + fn order_key(self) -> usize { + 0 + } + fn should_include_section( + self, + _section_info: &crate::output_section_id::SectionOutputInfo, + _section_id: OutputSectionId, + ) -> bool { + false + } +} + +/// COFF symbol name (no versioning). +#[derive(Debug)] +pub(crate) struct CoffRawSymbolName<'data> { + name: &'data [u8], +} + +/// Stub verneed table — COFF has no symbol versioning. +#[derive(Debug)] +pub(crate) struct NeverVerneed; + +/// Stub dynamic tag values — COFF has no dynamic linking. +#[derive(Debug)] +pub(crate) enum NeverDynamicTagValues {} + +// ── Parsing ───────────────────────────────────────────────────────────────── + +impl<'data> CoffObjectFile<'data> { + fn parse_impl(data: &'data [u8]) -> Result { + let kind = + object::FileKind::parse(data).context("Failed to identify COFF file kind")?; + match kind { + object::FileKind::Coff => Self::parse_regular(data), + object::FileKind::CoffBig => Self::parse_big(data), + _ => bail!("Not a COFF file"), + } + } + + fn parse_regular(data: &'data [u8]) -> Result { + let mut offset = 0; + let header = pe::ImageFileHeader::parse(data, &mut offset) + .context("Failed to parse COFF header")?; + let machine = header.machine.get(LittleEndian); + let (section_table, sym_table) = header + .sections(data, offset) + .and_then(|s| header.symbols(data).map(|sym| (s, sym))) + .context("Failed to parse COFF sections/symbols")?; + let strings = sym_table.strings(); + + let symbols = Self::collect_symbols(data, &sym_table)?; + let sections = section_table.iter().as_slice(); + let section_names = Self::resolve_all_section_names(sections, &strings)?; + + Ok(CoffObjectFile { + data, + sections, + symbols, + section_names, + strings, + machine, + }) + } + + fn parse_big(data: &'data [u8]) -> Result { + let mut offset = 0; + let header = pe::AnonObjectHeaderBigobj::parse(data, &mut offset) + .context("Failed to parse COFF bigobj header")?; + let machine = header.machine.get(LittleEndian); + let (section_table, sym_table) = header + .sections(data, offset) + .and_then(|s| header.symbols(data).map(|sym| (s, sym))) + .context("Failed to parse COFF bigobj sections/symbols")?; + let strings = sym_table.strings(); + + let symbols = Self::collect_symbols(data, &sym_table)?; + let sections = section_table.iter().as_slice(); + let section_names = Self::resolve_all_section_names(sections, &strings)?; + + Ok(CoffObjectFile { + data, + sections, + symbols, + section_names, + strings, + machine, + }) + } + + fn collect_symbols( + data: &'data [u8], + sym_table: &object::read::coff::SymbolTable<'data, &'data [u8], Coff>, + ) -> Result<&'data [CoffSymbol]> { + let mut symbols_vec = Vec::new(); + for (_, symbol) in sym_table.iter() { + let raw_name = symbol.raw_name(); + let name_data_offset = + (raw_name.as_ptr() as usize - data.as_ptr() as usize) as u32; + let has_name = *raw_name != [0u8; 8]; + + symbols_vec.push(CoffSymbol { + name_data_offset, + section_number: symbol.section_number() as i32, + storage_class: symbol.storage_class(), + value: symbol.value(), + number_of_aux_symbols: symbol.number_of_aux_symbols(), + has_name, + }); + for _ in 0..symbol.number_of_aux_symbols() { + symbols_vec.push(CoffSymbol { + name_data_offset: 0, + section_number: 0, + storage_class: pe::IMAGE_SYM_CLASS_NULL, + value: 0, + number_of_aux_symbols: 0, + has_name: false, + }); + } + } + + Ok(Box::leak(symbols_vec.into_boxed_slice())) + } + + fn resolve_all_section_names( + sections: &'data [pe::ImageSectionHeader], + strings: &object::read::StringTable<'data>, + ) -> Result<&'data [&'data [u8]]> { + let names: Vec<&'data [u8]> = sections + .iter() + .map(|header| Self::resolve_section_name(header, strings)) + .collect::>()?; + Ok(Box::leak(names.into_boxed_slice())) + } + + fn resolve_section_name( + header: &'data pe::ImageSectionHeader, + strings: &object::read::StringTable<'data>, + ) -> Result<&'data [u8]> { + let name = &header.name; + if name[0] == b'/' { + let offset_str = &name[1..]; + let len = offset_str.iter().position(|&b| b == 0).unwrap_or(7); + let offset_str = std::str::from_utf8(&offset_str[..len]) + .context("Invalid COFF section name string table reference")?; + let offset: u32 = offset_str + .trim() + .parse() + .context("Invalid COFF section name string table offset")?; + strings + .get(offset) + .map_err(|()| crate::error!("COFF section name string table offset out of range")) + } else { + let len = name.iter().position(|&b| b == 0).unwrap_or(8); + Ok(&name[..len]) + } + } + + fn resolve_symbol_name(&self, sym: &CoffSymbol) -> Result<&'data [u8]> { + let off = sym.name_data_offset as usize; + let name = self + .data + .get(off..off + 8) + .context("COFF symbol name offset out of range")?; + if name[..4] == [0, 0, 0, 0] { + let offset = u32::from_le_bytes(name[4..8].try_into().unwrap()); + self.strings + .get(offset) + .map_err(|()| crate::error!("Invalid COFF symbol string table offset")) + } else { + let len = name.iter().position(|&b| b == 0).unwrap_or(8); + Ok(&name[..len]) + } + } + + fn section_index_of(&self, header: &pe::ImageSectionHeader) -> usize { + let ptr_offset = (header as *const _ as usize) + .wrapping_sub(self.sections.as_ptr() as usize); + ptr_offset / core::mem::size_of::() + } +} + +// ── Symbol trait impl ─────────────────────────────────────────────────────── + +impl platform::Symbol for CoffSymbol { + fn as_common(&self) -> Option { + if self.storage_class == pe::IMAGE_SYM_CLASS_EXTERNAL + && self.section_number == 0 + && self.value != 0 + { + let size = self.value as u64; + let alignment = crate::alignment::Alignment::new(1).unwrap(); + let part_id = crate::output_section_id::BSS.part_id_with_alignment(alignment); + Some(platform::CommonSymbol { size, part_id }) + } else { + None + } + } + + fn is_undefined(&self) -> bool { + self.section_number == 0 + && self.value == 0 + && self.storage_class == pe::IMAGE_SYM_CLASS_EXTERNAL + } + + fn is_local(&self) -> bool { + self.storage_class != pe::IMAGE_SYM_CLASS_EXTERNAL + && self.storage_class != pe::IMAGE_SYM_CLASS_WEAK_EXTERNAL + } + + fn is_absolute(&self) -> bool { + self.section_number == pe::IMAGE_SYM_ABSOLUTE as i32 + } + + fn is_weak(&self) -> bool { + self.storage_class == pe::IMAGE_SYM_CLASS_WEAK_EXTERNAL + } + + fn visibility(&self) -> Visibility { + Visibility::Default + } + + fn value(&self) -> u64 { + self.value as u64 + } + + fn size(&self) -> u64 { + if self.is_common() { + self.value as u64 + } else { + 0 + } + } + + fn section_index(&self) -> object::SectionIndex { + if self.section_number > 0 { + object::SectionIndex(self.section_number as usize) + } else { + object::SectionIndex(0) + } + } + + fn has_name(&self) -> bool { + self.has_name + } + + fn debug_string(&self) -> String { + format!( + "sect={} class={} val={}", + self.section_number, self.storage_class, self.value + ) + } + + fn is_tls(&self) -> bool { + false + } + + fn is_interposable(&self) -> bool { + false + } + + fn is_func(&self) -> bool { + false + } + + fn is_ifunc(&self) -> bool { + false + } + + fn is_hidden(&self) -> bool { + false + } + + fn is_gnu_unique(&self) -> bool { + false + } +} + +// ── Section trait impls ───────────────────────────────────────────────────── + +impl<'data> platform::SectionHeader<'data, CoffObjectFile<'data>> + for pe::ImageSectionHeader +{ + fn flags(&self) -> CoffSectionFlags { + CoffSectionFlags(self.characteristics.get(LittleEndian)) + } + + fn attributes(&self) -> () {} + + fn section_type(&self) -> CoffSectionType { + CoffSectionType(self.characteristics.get(LittleEndian)) + } +} + +impl platform::SectionFlags for CoffSectionFlags { + fn is_alloc(self) -> bool { + self.0 & pe::IMAGE_SCN_MEM_DISCARDABLE == 0 + } + + fn is_writable(self) -> bool { + self.0 & pe::IMAGE_SCN_MEM_WRITE != 0 + } + + fn is_executable(self) -> bool { + self.0 & pe::IMAGE_SCN_MEM_EXECUTE != 0 + } + + fn is_tls(self) -> bool { + false + } + + fn is_merge_section(self) -> bool { + false + } + + fn is_strings(self) -> bool { + false + } + + fn should_retain(self) -> bool { + false + } + + fn should_exclude(&self) -> bool { + self.0 & pe::IMAGE_SCN_LNK_REMOVE != 0 + } + + fn is_group(self) -> bool { + self.0 & pe::IMAGE_SCN_LNK_COMDAT != 0 + } +} + +impl platform::SectionType for CoffSectionType { + fn is_null(self) -> bool { + self.0 == 0 + } + + fn is_note(self) -> bool { + false + } + + fn is_prog_bits(self) -> bool { + self.0 & pe::IMAGE_SCN_CNT_CODE != 0 + || self.0 & pe::IMAGE_SCN_CNT_INITIALIZED_DATA != 0 + } + + fn is_no_bits(self) -> bool { + self.0 & pe::IMAGE_SCN_CNT_UNINITIALIZED_DATA != 0 + } +} + +impl platform::SectionAttributes for () { + fn merge(&mut self, _rhs: Self) {} + + fn apply(&self, _output_sections: &mut OutputSections, _section_id: OutputSectionId) {} +} + +// ── RawSymbolName ─────────────────────────────────────────────────────────── + +impl<'data> platform::RawSymbolName<'data> for CoffRawSymbolName<'data> { + fn parse(bytes: &'data [u8]) -> Self { + CoffRawSymbolName { name: bytes } + } + + fn name(&self) -> &'data [u8] { + self.name + } + + fn version_name(&self) -> Option<&'data [u8]> { + None + } + + fn is_default(&self) -> bool { + true + } +} + +// ── VerneedTable ──────────────────────────────────────────────────────────── + +impl<'data> platform::VerneedTable<'data> for NeverVerneed { + fn version_name(&self, _local_symbol_index: object::SymbolIndex) -> Option<&'data [u8]> { + None + } +} + +// ── DynamicTagValues ──────────────────────────────────────────────────────── + +impl<'data> platform::DynamicTagValues<'data> for NeverDynamicTagValues { + fn lib_name(&self, _input: &crate::input_data::InputRef<'data>) -> &'data [u8] { + match *self {} + } +} + +// ── NonAddressableIndexes ─────────────────────────────────────────────────── + +impl platform::NonAddressableIndexes for () { + fn new<'data, O: platform::ObjectFile<'data>>(_symbol_db: &SymbolDb<'data, O>) -> Self {} +} + +// ── ObjectFile trait implementation ───────────────────────────────────────── + +impl<'data> platform::ObjectFile<'data> for CoffObjectFile<'data> { + type Args = PeArgs; + type Symbol = CoffSymbol; + type SectionHeader = pe::ImageSectionHeader; + type SectionIterator = core::slice::Iter<'data, pe::ImageSectionHeader>; + type SectionFlags = CoffSectionFlags; + type SectionType = CoffSectionType; + type SegmentType = CoffSegmentType; + type SectionAttributes = (); + type DynamicTagValues = NeverDynamicTagValues; + type DynamicEntry = (); + type RelocationList = &'data [pe::ImageRelocation]; + type RelocationSections = (); + type VersionNames = (); + type RawSymbolName = CoffRawSymbolName<'data>; + type VerneedTable = NeverVerneed; + type FileLayoutState = (); + type LayoutProperties = (); + type SymbolVersionIndex = (); + type DynamicLayoutState = (); + type DynamicLayout = (); + type NonAddressableCounts = (); + type NonAddressableIndexes = (); + type EpilogueLayout = (); + type GroupLayoutExt = (); + type CommonGroupStateExt = (); + type LayoutResourcesExt = (); + type ProgramSegmentDef = CoffProgramSegmentDef; + + // ── Parsing ───────────────────────────────────────────────────────── + + fn parse_bytes(input: &'data [u8], _is_dynamic: bool) -> Result { + Self::parse_impl(input) + } + + fn parse(input: &InputBytes<'data>, args: &Args) -> Result { + let file = Self::parse_impl(input.data)?; + + let file_arch = Architecture::try_from(file.machine)?; + if file_arch != args.arch { + bail!( + "`{input}` has incompatible architecture: {file_arch}, expecting {}", + args.arch, + ); + } + + Ok(file) + } + + fn is_dynamic(&self) -> bool { + false + } + + // ── Symbols ───────────────────────────────────────────────────────── + + fn num_symbols(&self) -> usize { + self.symbols.len() + } + + fn symbols(&self) -> &'data [CoffSymbol] { + self.symbols + } + + fn enumerate_symbols( + &self, + ) -> impl Iterator { + self.symbols + .iter() + .enumerate() + .map(|(i, s)| (object::SymbolIndex(i), s)) + } + + fn symbols_iter(&self) -> impl Iterator { + self.symbols.iter() + } + + fn symbol(&self, index: object::SymbolIndex) -> Result<&'data CoffSymbol> { + self.symbols + .get(index.0) + .with_context(|| format!("Invalid COFF symbol index {}", index.0)) + } + + fn symbol_name(&self, symbol: &CoffSymbol) -> Result<&'data [u8]> { + self.resolve_symbol_name(symbol) + } + + fn symbol_section( + &self, + symbol: &CoffSymbol, + _index: object::SymbolIndex, + ) -> Result> { + if symbol.section_number > 0 { + Ok(Some(object::SectionIndex(symbol.section_number as usize))) + } else { + Ok(None) + } + } + + fn symbol_versions(&self) -> &[()] { + &[] + } + + fn symbol_version_debug(&self, _symbol_index: object::SymbolIndex) -> Option { + None + } + + fn get_version_names(&self) -> Result<()> { + Ok(()) + } + + fn get_symbol_name_and_version( + &self, + symbol: &CoffSymbol, + _local_index: usize, + _version_names: &(), + ) -> Result> { + let name = self.resolve_symbol_name(symbol)?; + Ok(CoffRawSymbolName { name }) + } + + fn verneed_table(&self) -> Result { + Ok(NeverVerneed) + } + + // ── Sections ──────────────────────────────────────────────────────── + + fn num_sections(&self) -> usize { + self.sections.len() + } + + fn section_iter(&self) -> Self::SectionIterator { + self.sections.iter() + } + + fn enumerate_sections( + &self, + ) -> impl Iterator { + self.sections + .iter() + .enumerate() + .map(|(i, s)| (object::SectionIndex(i + 1), s)) + } + + fn section(&self, index: object::SectionIndex) -> Result<&'data pe::ImageSectionHeader> { + let idx = index + .0 + .checked_sub(1) + .with_context(|| format!("Invalid COFF section index {}", index.0))?; + self.sections + .get(idx) + .with_context(|| format!("COFF section index {} out of range", index.0)) + } + + fn section_by_name( + &self, + name: &str, + ) -> Option<(object::SectionIndex, &'data pe::ImageSectionHeader)> { + for (i, section_name) in self.section_names.iter().enumerate() { + if *section_name == name.as_bytes() { + return Some((object::SectionIndex(i + 1), &self.sections[i])); + } + } + None + } + + fn section_name( + &self, + section_header: &pe::ImageSectionHeader, + ) -> Result<&'data [u8]> { + let index = self.section_index_of(section_header); + self.section_names + .get(index) + .copied() + .context("Section header not found in this file") + } + + fn section_size(&self, header: &pe::ImageSectionHeader) -> Result { + Ok(header.size_of_raw_data.get(LittleEndian) as u64) + } + + fn section_alignment(&self, header: &pe::ImageSectionHeader) -> Result { + let chars = header.characteristics.get(LittleEndian); + let align_field = (chars & pe::IMAGE_SCN_ALIGN_MASK) >> 20; + if align_field == 0 { + Ok(1) + } else { + Ok(1u64 << (align_field - 1)) + } + } + + fn raw_section_data(&self, section: &pe::ImageSectionHeader) -> Result<&'data [u8]> { + let offset = section.pointer_to_raw_data.get(LittleEndian) as usize; + let size = section.size_of_raw_data.get(LittleEndian) as usize; + if size == 0 { + return Ok(&[]); + } + self.data + .get(offset..offset + size) + .context("COFF section data out of range") + } + + fn section_data( + &self, + section: &pe::ImageSectionHeader, + _member: &bumpalo_herd::Member<'data>, + _loaded_metrics: &LoadedMetrics, + ) -> Result<&'data [u8]> { + self.raw_section_data(section) + } + + fn copy_section_data(&self, section: &pe::ImageSectionHeader, out: &mut [u8]) -> Result { + let data = self.raw_section_data(section)?; + out[..data.len()].copy_from_slice(data); + Ok(()) + } + + fn section_data_cow(&self, section: &pe::ImageSectionHeader) -> Result> { + self.raw_section_data(section).map(Cow::Borrowed) + } + + fn section_display_name(&self, index: object::SectionIndex) -> Cow<'data, str> { + let idx = index.0.checked_sub(1).unwrap_or(0); + if let Some(name) = self.section_names.get(idx) { + String::from_utf8_lossy(name) + } else { + Cow::Owned(format!("section {}", index.0)) + } + } + + // ── Relocations ───────────────────────────────────────────────────── + + fn relocations( + &self, + index: object::SectionIndex, + _relocations: &(), + ) -> Result<&'data [pe::ImageRelocation]> { + let idx = index.0.checked_sub(1).unwrap_or(0); + if let Some(section) = self.sections.get(idx) { + let offset = section.pointer_to_relocations.get(LittleEndian) as usize; + let count = section.number_of_relocations.get(LittleEndian) as usize; + if count == 0 { + return Ok(&[]); + } + let size = count * core::mem::size_of::(); + let reloc_data = self + .data + .get(offset..offset + size) + .context("COFF relocation data out of range")?; + Ok(object::pod::slice_from_all_bytes(reloc_data) + .map_err(|()| crate::error!("Failed to parse COFF relocations"))?) + } else { + Ok(&[]) + } + } + + fn parse_relocations(&self) -> Result<()> { + Ok(()) + } + + // ── Dynamic linking stubs ─────────────────────────────────────────── + + fn dynamic_tags(&self) -> Result<&'data [()]> { + Ok(&[]) + } + + fn dynamic_tag_values(&self) -> Option { + None + } + + fn activate_dynamic(&self, _state: &mut ()) {} + + fn dynamic_symbol_used( + &self, + _symbol_index: object::SymbolIndex, + _state: &mut (), + ) -> Result { + Ok(()) + } + + fn finalise_sizes_dynamic( + &self, + _lib_name: &[u8], + _state: &mut (), + _mem_sizes: &mut OutputSectionPartMap, + ) -> Result { + Ok(()) + } + + fn apply_non_addressable_indexes_dynamic( + &self, + _indexes: &mut (), + _counts: &mut (), + _state: &mut (), + ) -> Result { + Ok(()) + } + + fn finalise_layout_dynamic( + &self, + _state: (), + _memory_offsets: &mut OutputSectionPartMap, + _section_layouts: &OutputSectionMap, + ) {} + + // ── Layout stubs ──────────────────────────────────────────────────── + + fn new_epilogue_layout( + _args: &Args, + _output_kind: crate::OutputKind, + _dynamic_symbol_definitions: &mut [DynamicSymbolDefinition<'_>], + ) {} + + fn apply_non_addressable_indexes_epilogue(_counts: &mut (), _state: &mut ()) {} + + fn apply_non_addressable_indexes<'groups>( + _symbol_db: &SymbolDb<'data, Self>, + _counts: &(), + _mem_sizes_iter: impl Iterator>, + ) { + } + + fn finalise_sizes_epilogue( + _state: &mut (), + _mem_sizes: &mut OutputSectionPartMap, + _dynamic_symbol_definitions: &[DynamicSymbolDefinition<'data>], + _properties: &(), + _symbol_db: &SymbolDb<'data, Self>, + ) { + } + + fn finalise_sizes_all( + _mem_sizes: &mut OutputSectionPartMap, + _symbol_db: &SymbolDb<'data, Self>, + ) { + } + + fn apply_late_size_adjustments_epilogue( + _state: &mut (), + _current_sizes: &OutputSectionPartMap, + _extra_sizes: &mut OutputSectionPartMap, + _dynamic_symbol_defs: &[DynamicSymbolDefinition], + ) -> Result { + Ok(()) + } + + fn finalise_layout_epilogue( + _epilogue_state: &mut (), + _memory_offsets: &mut OutputSectionPartMap, + _symbol_db: &SymbolDb<'data, Self>, + _common_state: &(), + _dynsym_start_index: u32, + _dynamic_symbol_defs: &[DynamicSymbolDefinition], + ) -> Result { + Ok(()) + } + + fn process_gnu_note_section( + &self, + _state: &mut (), + _section_index: object::SectionIndex, + ) -> Result { + Ok(()) + } + + fn create_layout_properties<'states, 'files, P: platform::Platform<'data, File = Self>>( + _args: &Args, + _objects: impl Iterator, + _states: impl Iterator + Clone, + ) -> Result<()> + where + 'data: 'files, + 'data: 'states, + { + Ok(()) + } + + fn load_exception_frame_data<'scope, P: platform::Platform<'data, File = Self>>( + _object: &mut crate::layout::ObjectLayoutState<'data, Self>, + _common: &mut crate::layout::CommonGroupState<'data, Self>, + _eh_frame_section_index: object::SectionIndex, + _resources: &'scope crate::layout::GraphResources<'data, '_, Self>, + _queue: &mut crate::layout::LocalWorkQueue, + _scope: &Scope<'scope>, + ) -> Result { + Ok(()) + } + + fn non_empty_section_loaded<'scope, P: platform::Platform<'data, File = Self>>( + _object: &mut crate::layout::ObjectLayoutState<'data, Self>, + _common: &mut crate::layout::CommonGroupState<'data, Self>, + _queue: &mut crate::layout::LocalWorkQueue, + _unloaded: UnloadedSection, + _resources: &'scope crate::layout::GraphResources<'data, 'scope, Self>, + _scope: &Scope<'scope>, + ) -> Result { + Ok(()) + } + + fn finalise_group_layout(_memory_offsets: &OutputSectionPartMap) {} + + fn finalise_find_required_sections( + _groups: &[crate::layout::GroupState<'data, Self>], + ) { + } + + fn pre_finalise_sizes_prelude( + _common: &mut crate::layout::CommonGroupState<'data, Self>, + _args: &Args, + ) { + } + + fn finalise_object_sizes( + _object: &mut crate::layout::ObjectLayoutState<'data, Self>, + _common: &mut crate::layout::CommonGroupState<'data, Self>, + ) { + } + + fn finalise_object_layout( + _object: &crate::layout::ObjectLayoutState<'data, Self>, + _memory_offsets: &mut OutputSectionPartMap, + ) { + } + + fn compute_object_addresses( + _object: &crate::layout::ObjectLayoutState<'data, Self>, + _memory_offsets: &mut OutputSectionPartMap, + ) { + } + + fn frame_data_base_address(_memory_offsets: &OutputSectionPartMap) -> u64 { + 0 + } + + fn should_enforce_undefined( + &self, + _resources: &crate::layout::GraphResources<'data, '_, Self>, + ) -> bool { + false + } + + fn layout_resources_ext( + _groups: &[crate::grouping::Group<'data, Self>], + ) -> () { + } + + fn load_object_section_relocations<'scope, P: platform::Platform<'data, File = Self>>( + _state: &crate::layout::ObjectLayoutState<'data, Self>, + _common: &mut crate::layout::CommonGroupState<'data, Self>, + _queue: &mut crate::layout::LocalWorkQueue, + _resources: &'scope crate::layout::GraphResources<'data, '_, Self>, + _section: crate::layout::Section, + _scope: &Scope<'scope>, + ) -> Result { + Ok(()) + } + + fn load_object_debug_relocations<'scope, P: platform::Platform<'data, File = Self>>( + _state: &crate::layout::ObjectLayoutState<'data, Self>, + _common: &mut crate::layout::CommonGroupState<'data, Self>, + _queue: &mut crate::layout::LocalWorkQueue, + _resources: &'scope crate::layout::GraphResources<'data, '_, Self>, + _section: crate::layout::Section, + _scope: &Scope<'scope>, + ) -> Result { + Ok(()) + } + + fn create_dynamic_symbol_definition( + _symbol_db: &SymbolDb<'data, Self>, + _symbol_id: crate::symbol_db::SymbolId, + ) -> Result> { + bail!("PE does not support dynamic symbol definitions") + } + + fn validate_section( + _section_info: &crate::output_section_id::SectionOutputInfo, + _section_flags: CoffSectionFlags, + _section_layout: &OutputRecordLayout, + _merge_target: OutputSectionId, + _output_sections: &OutputSections<'data>, + _section_id: OutputSectionId, + ) -> Result { + Ok(()) + } + + fn default_section_type() -> CoffSectionType { + CoffSectionType(0) + } + + fn verify_resolution_allocation( + _output_sections: &OutputSections, + _output_order: &crate::output_section_id::OutputOrder, + _output_kind: crate::OutputKind, + _mem_sizes: &OutputSectionPartMap, + _resolution: &crate::layout::Resolution, + ) -> Result { + Ok(()) + } + + fn update_segment_keep_list( + _program_segments: &crate::program_segments::ProgramSegments, + _keep_segments: &mut [bool], + _args: &Args, + ) { + } + + fn program_segment_defs() -> &'static [CoffProgramSegmentDef] { + &[] + } + + fn unconditional_segment_defs() -> &'static [CoffProgramSegmentDef] { + &[] + } + + fn apply_force_keep_sections( + _keep_sections: &mut crate::output_section_map::OutputSectionMap, + _args: &Args, + ) { + } + + fn is_zero_sized_section_content(_section_id: crate::output_section_id::OutputSectionId) -> bool { + false + } + + fn create_linker_defined_symbols( + symbols: &mut crate::parsing::InternalSymbolsBuilder<'data>, + _output_kind: crate::output_kind::OutputKind, + ) { + // The undefined symbol must always be symbol 0. + symbols + .add_symbol(crate::parsing::InternalSymDefInfo::new( + crate::parsing::SymbolPlacement::Undefined, + b"", + )) + .hide(); + } +} diff --git a/libwild/src/diagnostics.rs b/libwild/src/diagnostics.rs index e9ccfd44a..e443167e8 100644 --- a/libwild/src/diagnostics.rs +++ b/libwild/src/diagnostics.rs @@ -1,275 +1,275 @@ -use crate::Args; -use crate::elf::RawSymbolName; -use crate::grouping::SequencedInput; -use crate::input_data::FileId; -use crate::input_data::PRELUDE_FILE_ID; -use crate::platform::ObjectFile; -use crate::platform::RawSymbolName as _; -use crate::platform::Symbol as _; -use crate::resolution::ResolvedFile; -use crate::resolution::ResolvedGroup; -use crate::symbol::PreHashedSymbolName; -use crate::symbol_db::SymbolDb; -use crate::symbol_db::SymbolId; -use crate::value_flags::AtomicPerSymbolFlags; -use crate::value_flags::FlagsForSymbol as _; -use std::fmt::Write as _; - -/// Prints information about a symbol when dropped. We do this when dropped so that we can print -/// either after resolution flags have been computed, or, if layout gets an error, then before we -/// unwind. -pub(crate) enum SymbolInfoPrinter { - Disabled, - Enabled(Box), -} - -pub(crate) struct State { - loaded_file_ids: hashbrown::HashSet, - name: String, - - /// Our output the last time `update` was called. This is what will be printed when dropped - /// unless `update` is called again. - output: String, -} - -impl Drop for SymbolInfoPrinter { - fn drop(&mut self) { - self.print(); - } -} - -impl SymbolInfoPrinter { - pub(crate) fn new<'data2, O: ObjectFile<'data2>>( - args: &Args, - groups: &[ResolvedGroup<'data2, O>], - ) -> Self { - let Some(name) = args.sym_info.as_ref() else { - return Self::Disabled; - }; - - let loaded_file_ids = groups - .iter() - .flat_map(|group| { - group.files.iter().filter_map(|file| match file { - ResolvedFile::NotLoaded(_) => None, - ResolvedFile::Prelude(_) => Some(PRELUDE_FILE_ID), - ResolvedFile::Object(obj) => Some(obj.common.file_id), - ResolvedFile::Dynamic(obj) => Some(obj.common.file_id), - ResolvedFile::LinkerScript(obj) => Some(obj.file_id), - ResolvedFile::SyntheticSymbols(obj) => Some(obj.file_id), - #[cfg(feature = "plugins")] - ResolvedFile::LtoInput(obj) => Some(obj.file_id), - }) - }) - .collect(); - - Self::Enabled(Box::new(State { - loaded_file_ids, - name: name.to_owned(), - output: "SymbolInfoPrinter::update never called, so can't print symbol info".into(), - })) - } - - pub(crate) fn update<'data, O: ObjectFile<'data>>( - &mut self, - symbol_db: &SymbolDb<'data, O>, - per_symbol_flags: &AtomicPerSymbolFlags<'_>, - ) { - let Self::Enabled(state) = self else { - return; - }; - - let mut out = String::new(); - let name = symbol_db - .find_mangled_name(&state.name) - .unwrap_or_else(|| state.name.clone()); - - let matcher = NameMatcher::new(&name); - let mut target_ids = Vec::new(); - target_ids.extend(name.parse().ok().map(SymbolId::from_usize)); - - let symbol_id = symbol_db.get( - &PreHashedSymbolName::from_raw(&RawSymbolName::parse(name.as_bytes())), - true, - ); - let _ = writeln!(&mut out, "Global name `{name}` refers to: {symbol_id:?}"); - - target_ids.extend(symbol_id); - - let _ = writeln!(&mut out, "Definitions / references with name `{name}`:"); - for i in 0..symbol_db.num_symbols() { - let symbol_id = SymbolId::from_usize(i); - let canonical = symbol_db.definition(symbol_id); - let file_id = symbol_db.file_id_for_symbol(symbol_id); - let flags = per_symbol_flags.flags_for_symbol(symbol_id); - - let file_state = if state.loaded_file_ids.contains(&file_id) { - "LOADED" - } else { - "NOT LOADED" - }; - - let Ok(sym_name) = symbol_db.symbol_name(symbol_id) else { - continue; - }; - - let is_name_match = matcher.matches(sym_name.bytes(), symbol_id, symbol_db); - - let is_id_match = target_ids.contains(&symbol_id); - - if is_name_match || is_id_match { - if symbol_id != canonical { - // Show info about the canonical symbol too. Generally the canonical symbol will - // have the same name, so this won't do anything. Note, this only works if the - // related symbol is later. Fixing that would require restructuring this - // function. - target_ids.push(canonical); - } - - let file = symbol_db.file(file_id); - let local_index = symbol_id.to_input(file.symbol_id_range()); - - let sym_debug; - let input; - - match file { - SequencedInput::Prelude(_) => { - input = " ".to_owned(); - sym_debug = "Prelude symbol".to_owned(); - } - SequencedInput::Object(o) => match o.parsed.object.symbol(local_index) { - Ok(sym) => { - sym_debug = sym.debug_string(); - input = o.parsed.input.to_string(); - } - Err(e) => { - let _ = writeln!( - &mut out, - " Corrupted input (file_id #{file_id}) {}: {}", - o.parsed.input, - e.to_string() - ); - continue; - } - }, - SequencedInput::LinkerScript(s) => { - sym_debug = "Linker script symbol".to_owned(); - input = s.parsed.input.to_string(); - } - SequencedInput::SyntheticSymbols(_) => { - input = " ".to_owned(); - sym_debug = "Synthetic symbol".to_owned(); - } - #[cfg(feature = "plugins")] - SequencedInput::LtoInput(o) => { - input = o.to_string(); - sym_debug = o.symbol_properties_display(symbol_id).to_string(); - } - } - - // Versions can be either literally within the symbol name or in separate version - // tables. It's useful to know which we've got, so if we get a version from a - // separate table, we separate it visually from the rest of the name. - let version_str = symbol_db - .symbol_version_debug(symbol_id) - .map_or_else(String::new, |v| format!(" version `{v}`")); - - let canon = if symbol_id == canonical { - "".to_owned() - } else { - format!(" -> {canonical}") - }; - - let _ = writeln!( - &mut out, - " {symbol_id}{canon}: {sym_debug}: {flags} \ - \n {sym_name}{version_str}\n \ - #{local_index} in File #{file_id} {input} ({file_state})" - ); - } - } - } - - fn print(&self) { - match self { - SymbolInfoPrinter::Disabled => {} - SymbolInfoPrinter::Enabled(state) => { - println!("{}", &state.output); - } - } - } -} - -#[derive(Debug)] -struct NameMatcher { - name: String, - version: VersionMatcher, -} - -#[derive(Debug)] -enum VersionMatcher { - None, - Exact(String), - Any, -} - -impl NameMatcher { - fn new(pattern: &str) -> Self { - if let Some((n, v)) = pattern.split_once('@') { - Self { - name: n.to_owned(), - version: VersionMatcher::new(v), - } - } else { - Self { - name: pattern.to_owned(), - version: VersionMatcher::None, - } - } - } - - fn matches<'data, O: ObjectFile<'data>>( - &self, - name: &[u8], - symbol_id: SymbolId, - symbol_db: &SymbolDb<'data, O>, - ) -> bool { - if let Some(i) = name.iter().position(|b| *b == b'@') { - let (name, version) = name.split_at(i); - return name == self.name.as_bytes() && self.version.matches_at_prefixed(version); - } - - if name != self.name.as_bytes() { - return false; - } - - self.version.matches_at_prefixed( - symbol_db - .symbol_version_debug(symbol_id) - .unwrap_or_default() - .as_bytes(), - ) - } -} - -impl VersionMatcher { - fn new(n: &str) -> Self { - if n == "*" { - VersionMatcher::Any - } else { - VersionMatcher::Exact(n.to_owned()) - } - } - - fn matches_at_prefixed(&self, mut version: &[u8]) -> bool { - let is_default = version.starts_with(b"@@"); - while let Some(rest) = version.strip_prefix(b"@") { - version = rest; - } - match self { - VersionMatcher::Any => true, - VersionMatcher::Exact(v) => version == v.as_bytes(), - VersionMatcher::None => is_default || version.is_empty(), - } - } -} +use crate::Args; +use crate::elf::RawSymbolName; +use crate::grouping::SequencedInput; +use crate::input_data::FileId; +use crate::input_data::PRELUDE_FILE_ID; +use crate::platform::ObjectFile; +use crate::platform::RawSymbolName as _; +use crate::platform::Symbol as _; +use crate::resolution::ResolvedFile; +use crate::resolution::ResolvedGroup; +use crate::symbol::PreHashedSymbolName; +use crate::symbol_db::SymbolDb; +use crate::symbol_db::SymbolId; +use crate::value_flags::AtomicPerSymbolFlags; +use crate::value_flags::FlagsForSymbol as _; +use std::fmt::Write as _; + +/// Prints information about a symbol when dropped. We do this when dropped so that we can print +/// either after resolution flags have been computed, or, if layout gets an error, then before we +/// unwind. +pub(crate) enum SymbolInfoPrinter { + Disabled, + Enabled(Box), +} + +pub(crate) struct State { + loaded_file_ids: hashbrown::HashSet, + name: String, + + /// Our output the last time `update` was called. This is what will be printed when dropped + /// unless `update` is called again. + output: String, +} + +impl Drop for SymbolInfoPrinter { + fn drop(&mut self) { + self.print(); + } +} + +impl SymbolInfoPrinter { + pub(crate) fn new<'data2, O: ObjectFile<'data2>>( + args: &Args, + groups: &[ResolvedGroup<'data2, O>], + ) -> Self { + let Some(name) = args.sym_info.as_ref() else { + return Self::Disabled; + }; + + let loaded_file_ids = groups + .iter() + .flat_map(|group| { + group.files.iter().filter_map(|file| match file { + ResolvedFile::NotLoaded(_) => None, + ResolvedFile::Prelude(_) => Some(PRELUDE_FILE_ID), + ResolvedFile::Object(obj) => Some(obj.common.file_id), + ResolvedFile::Dynamic(obj) => Some(obj.common.file_id), + ResolvedFile::LinkerScript(obj) => Some(obj.file_id), + ResolvedFile::SyntheticSymbols(obj) => Some(obj.file_id), + #[cfg(feature = "plugins")] + ResolvedFile::LtoInput(obj) => Some(obj.file_id), + }) + }) + .collect(); + + Self::Enabled(Box::new(State { + loaded_file_ids, + name: name.to_owned(), + output: "SymbolInfoPrinter::update never called, so can't print symbol info".into(), + })) + } + + pub(crate) fn update<'data, O: ObjectFile<'data>>( + &mut self, + symbol_db: &SymbolDb<'data, O>, + per_symbol_flags: &AtomicPerSymbolFlags<'_>, + ) { + let Self::Enabled(state) = self else { + return; + }; + + let mut out = String::new(); + let name = symbol_db + .find_mangled_name(&state.name) + .unwrap_or_else(|| state.name.clone()); + + let matcher = NameMatcher::new(&name); + let mut target_ids = Vec::new(); + target_ids.extend(name.parse().ok().map(SymbolId::from_usize)); + + let symbol_id = symbol_db.get( + &PreHashedSymbolName::from_raw(&RawSymbolName::parse(name.as_bytes())), + true, + ); + let _ = writeln!(&mut out, "Global name `{name}` refers to: {symbol_id:?}"); + + target_ids.extend(symbol_id); + + let _ = writeln!(&mut out, "Definitions / references with name `{name}`:"); + for i in 0..symbol_db.num_symbols() { + let symbol_id = SymbolId::from_usize(i); + let canonical = symbol_db.definition(symbol_id); + let file_id = symbol_db.file_id_for_symbol(symbol_id); + let flags = per_symbol_flags.flags_for_symbol(symbol_id); + + let file_state = if state.loaded_file_ids.contains(&file_id) { + "LOADED" + } else { + "NOT LOADED" + }; + + let Ok(sym_name) = symbol_db.symbol_name(symbol_id) else { + continue; + }; + + let is_name_match = matcher.matches(sym_name.bytes(), symbol_id, symbol_db); + + let is_id_match = target_ids.contains(&symbol_id); + + if is_name_match || is_id_match { + if symbol_id != canonical { + // Show info about the canonical symbol too. Generally the canonical symbol will + // have the same name, so this won't do anything. Note, this only works if the + // related symbol is later. Fixing that would require restructuring this + // function. + target_ids.push(canonical); + } + + let file = symbol_db.file(file_id); + let local_index = symbol_id.to_input(file.symbol_id_range()); + + let sym_debug; + let input; + + match file { + SequencedInput::Prelude(_) => { + input = " ".to_owned(); + sym_debug = "Prelude symbol".to_owned(); + } + SequencedInput::Object(o) => match o.parsed.object.symbol(local_index) { + Ok(sym) => { + sym_debug = sym.debug_string(); + input = o.parsed.input.to_string(); + } + Err(e) => { + let _ = writeln!( + &mut out, + " Corrupted input (file_id #{file_id}) {}: {}", + o.parsed.input, + e.to_string() + ); + continue; + } + }, + SequencedInput::LinkerScript(s) => { + sym_debug = "Linker script symbol".to_owned(); + input = s.parsed.input.to_string(); + } + SequencedInput::SyntheticSymbols(_) => { + input = " ".to_owned(); + sym_debug = "Synthetic symbol".to_owned(); + } + #[cfg(feature = "plugins")] + SequencedInput::LtoInput(o) => { + input = o.to_string(); + sym_debug = o.symbol_properties_display(symbol_id).to_string(); + } + } + + // Versions can be either literally within the symbol name or in separate version + // tables. It's useful to know which we've got, so if we get a version from a + // separate table, we separate it visually from the rest of the name. + let version_str = symbol_db + .symbol_version_debug(symbol_id) + .map_or_else(String::new, |v| format!(" version `{v}`")); + + let canon = if symbol_id == canonical { + "".to_owned() + } else { + format!(" -> {canonical}") + }; + + let _ = writeln!( + &mut out, + " {symbol_id}{canon}: {sym_debug}: {flags} \ + \n {sym_name}{version_str}\n \ + #{local_index} in File #{file_id} {input} ({file_state})" + ); + } + } + } + + fn print(&self) { + match self { + SymbolInfoPrinter::Disabled => {} + SymbolInfoPrinter::Enabled(state) => { + println!("{}", &state.output); + } + } + } +} + +#[derive(Debug)] +struct NameMatcher { + name: String, + version: VersionMatcher, +} + +#[derive(Debug)] +enum VersionMatcher { + None, + Exact(String), + Any, +} + +impl NameMatcher { + fn new(pattern: &str) -> Self { + if let Some((n, v)) = pattern.split_once('@') { + Self { + name: n.to_owned(), + version: VersionMatcher::new(v), + } + } else { + Self { + name: pattern.to_owned(), + version: VersionMatcher::None, + } + } + } + + fn matches<'data, O: ObjectFile<'data>>( + &self, + name: &[u8], + symbol_id: SymbolId, + symbol_db: &SymbolDb<'data, O>, + ) -> bool { + if let Some(i) = name.iter().position(|b| *b == b'@') { + let (name, version) = name.split_at(i); + return name == self.name.as_bytes() && self.version.matches_at_prefixed(version); + } + + if name != self.name.as_bytes() { + return false; + } + + self.version.matches_at_prefixed( + symbol_db + .symbol_version_debug(symbol_id) + .unwrap_or_default() + .as_bytes(), + ) + } +} + +impl VersionMatcher { + fn new(n: &str) -> Self { + if n == "*" { + VersionMatcher::Any + } else { + VersionMatcher::Exact(n.to_owned()) + } + } + + fn matches_at_prefixed(&self, mut version: &[u8]) -> bool { + let is_default = version.starts_with(b"@@"); + while let Some(rest) = version.strip_prefix(b"@") { + version = rest; + } + match self { + VersionMatcher::Any => true, + VersionMatcher::Exact(v) => version == v.as_bytes(), + VersionMatcher::None => is_default || version.is_empty(), + } + } +} diff --git a/libwild/src/diff.rs b/libwild/src/diff.rs index 93afdc5f0..3784ec462 100644 --- a/libwild/src/diff.rs +++ b/libwild/src/diff.rs @@ -14,7 +14,7 @@ use std::path::PathBuf; use std::process::Command; pub(crate) fn maybe_diff() -> Result { - if let Ok(reference_linker) = std::env::var(crate::args::REFERENCE_LINKER_ENV) + if let Ok(reference_linker) = std::env::var(crate::args::consts::REFERENCE_LINKER_ENV) && let Some(paths) = run_with_linker(&reference_linker)? { run_diff(&paths)?; diff --git a/libwild/src/dwarf_address_info.rs b/libwild/src/dwarf_address_info.rs index 65c0b3e07..6b0cdeab1 100644 --- a/libwild/src/dwarf_address_info.rs +++ b/libwild/src/dwarf_address_info.rs @@ -17,10 +17,24 @@ use object::read::elf::RelocationSections; use std::borrow::Cow; use std::ffi::OsStr; use std::fmt::Display; +#[cfg(unix)] use std::os::unix::ffi::OsStrExt; use std::path::Path; use std::path::PathBuf; +/// Convert bytes to OsStr, handling both Unix and non-Unix platforms. +fn os_str_from_bytes(bytes: &[u8]) -> &OsStr { + #[cfg(unix)] + { + os_str_from_bytes(bytes) + } + #[cfg(not(unix))] + { + // On non-Unix, assume UTF-8 encoding + OsStr::new(std::str::from_utf8(bytes).unwrap_or("")) + } +} + /// The address at which we'll pretend that we loaded the section we're interested in. This value is /// arbitrary, but should be larger than the largest input section we expect to encounter and small /// enough to fit comfortably in a u32. @@ -60,7 +74,7 @@ pub(crate) fn get_source_info<'data, P: Platform<'data>>( let comp_dir = unit .comp_dir .as_ref() - .map(|dir| Path::new(OsStr::from_bytes(dir)).to_owned()) + .map(|dir| Path::new(os_str_from_bytes(dir)).to_owned()) .unwrap_or_default(); let mut rows = program.rows(); @@ -78,7 +92,7 @@ pub(crate) fn get_source_info<'data, P: Platform<'data>>( if let Some(file) = row.file(header) { path = comp_dir.clone(); - path.push(OsStr::from_bytes( + path.push(os_str_from_bytes( &dwarf.attr_string(&unit, file.path_name())?, )); } diff --git a/libwild/src/elf.rs b/libwild/src/elf.rs index 07359c7e8..6868d15d6 100644 --- a/libwild/src/elf.rs +++ b/libwild/src/elf.rs @@ -1,8 +1,9 @@ use crate::Args; use crate::alignment::Alignment; use crate::arch::Architecture; -use crate::args::BuildIdOption; use crate::args::RelocationModel; +use crate::args::linux::BuildIdOption; +use crate::args::linux::ElfArgs; use crate::bail; use crate::elf_writer; use crate::ensure; @@ -267,8 +268,9 @@ impl<'data> platform::ObjectFile<'data> for File<'data> { type CommonGroupStateExt = CommonGroupStateExt; type LayoutResourcesExt = LayoutResourcesExt<'data>; type ProgramSegmentDef = ProgramSegmentDef; + type Args = crate::args::linux::ElfArgs; - fn parse(input: &InputBytes<'data>, args: &Args) -> Result { + fn parse(input: &InputBytes<'data>, args: &Args) -> Result { let is_dynamic = input.kind == FileKind::ElfDynamic; let file = Self::parse_bytes(input.data, is_dynamic)?; @@ -677,7 +679,7 @@ impl<'data> platform::ObjectFile<'data> for File<'data> { } fn create_layout_properties<'states, 'files, P: Platform<'data, File = Self>>( - args: &Args, + args: &Args, objects: impl Iterator, states: impl Iterator + Clone, ) -> Result @@ -846,7 +848,7 @@ impl<'data> platform::ObjectFile<'data> for File<'data> { } fn new_epilogue_layout( - args: &Args, + args: &Args, output_kind: OutputKind, dynamic_symbol_definitions: &mut [DynamicSymbolDefinition<'_>], ) -> Self::EpilogueLayout { @@ -1176,7 +1178,7 @@ impl<'data> platform::ObjectFile<'data> for File<'data> { fn pre_finalise_sizes_prelude( common: &mut layout::CommonGroupState<'data, File<'data>>, - args: &Args, + args: &Args, ) { if args.should_write_eh_frame_hdr { common.allocate(part_id::EH_FRAME_HDR, size_of::() as u64); @@ -1419,7 +1421,7 @@ impl<'data> platform::ObjectFile<'data> for File<'data> { fn update_segment_keep_list( program_segments: &ProgramSegments, keep_segments: &mut [bool], - args: &Args, + args: &Args, ) { // If relro is disabled, then discard the relro segment. if !args.relro { @@ -1534,7 +1536,7 @@ impl<'data> platform::ObjectFile<'data> for File<'data> { }); } - fn apply_force_keep_sections(keep_sections: &mut OutputSectionMap, args: &Args) { + fn apply_force_keep_sections(keep_sections: &mut OutputSectionMap, args: &crate::args::Args) { // Some of these sections aren't really empty, but we just haven't allocated space for them // yet. e.g. we don't allocate space for section headers until we know which sections we're // keeping, which by inherently needs to be after this method is called. @@ -1560,6 +1562,18 @@ impl<'data> platform::ObjectFile<'data> for File<'data> { // We always consider empty sections as content except for sframe sections. section_id != output_section_id::SFRAME } + + fn wants_got_plt_syms(args: &crate::args::Args) -> bool { + args.got_plt_syms + } + + fn stack_size(args: &crate::args::Args) -> u64 { + args.z_stack_size.map_or(0, |size| size.get()) + } + + fn hash_includes_sysv(args: &crate::args::Args) -> bool { + args.hash_style.includes_sysv() + } } fn process_eh_frame_relocations< @@ -2335,7 +2349,7 @@ impl ElfLayoutProperties { pub(crate) fn new<'files, 'states, 'data: 'files + 'states, P: Platform<'data>>( objects: impl Iterator>, states: impl Iterator> + Clone, - args: &Args, + args: &Args, ) -> Result { let gnu_property_notes = merge_gnu_property_notes::

(states.clone(), args.z_isa)?; let riscv_attributes = merge_riscv_attributes::

(states)?; @@ -2938,7 +2952,7 @@ pub(crate) struct GnuHashLayout { } fn create_gnu_hash_layout( - args: &Args, + args: &Args, output_kind: OutputKind, dynamic_symbol_definitions: &mut [DynamicSymbolDefinition<'_>], ) -> Option { diff --git a/libwild/src/elf_aarch64.rs b/libwild/src/elf_aarch64.rs index e4427451c..b0601ec44 100644 --- a/libwild/src/elf_aarch64.rs +++ b/libwild/src/elf_aarch64.rs @@ -42,6 +42,37 @@ impl<'data> crate::platform::Platform<'data> for ElfAArch64 { type Relaxation = Relaxation; type File = crate::elf::File<'data>; + fn create_plugin( + linker: &'data crate::Linker, + args: &'data crate::Args, + ) -> crate::Result>> { + crate::linker_plugins::LinkerPlugin::from_args(args, &linker.linker_plugin_arena, &linker.herd) + } + + fn finish_link( + file_loader: &mut crate::input_data::FileLoader<'data>, + args: &'data crate::Args, + plugin: &mut Option>, + symbol_db: crate::symbol_db::SymbolDb<'data, Self::File>, + per_symbol_flags: crate::value_flags::PerSymbolFlags, + resolver: crate::resolution::Resolver<'data, Self::File>, + output_sections: crate::output_section_id::OutputSections<'data>, + layout_rules_builder: crate::layout_rules::LayoutRulesBuilder<'data>, + output_kind: crate::OutputKind, + ) -> crate::Result>> { + crate::elf_writer::finish_link::( + file_loader, + args, + plugin, + symbol_db, + per_symbol_flags, + resolver, + output_sections, + layout_rules_builder, + output_kind, + ) + } + fn elf_header_arch_magic() -> u16 { object::elf::EM_AARCH64 } diff --git a/libwild/src/elf_loongarch64.rs b/libwild/src/elf_loongarch64.rs index 3dcadddf5..b03f134d0 100644 --- a/libwild/src/elf_loongarch64.rs +++ b/libwild/src/elf_loongarch64.rs @@ -31,6 +31,37 @@ impl<'data> crate::platform::Platform<'data> for ElfLoongArch64 { type Relaxation = Relaxation; type File = crate::elf::File<'data>; + fn create_plugin( + linker: &'data crate::Linker, + args: &'data crate::Args, + ) -> crate::Result>> { + crate::linker_plugins::LinkerPlugin::from_args(args, &linker.linker_plugin_arena, &linker.herd) + } + + fn finish_link( + file_loader: &mut crate::input_data::FileLoader<'data>, + args: &'data crate::Args, + plugin: &mut Option>, + symbol_db: crate::symbol_db::SymbolDb<'data, Self::File>, + per_symbol_flags: crate::value_flags::PerSymbolFlags, + resolver: crate::resolution::Resolver<'data, Self::File>, + output_sections: crate::output_section_id::OutputSections<'data>, + layout_rules_builder: crate::layout_rules::LayoutRulesBuilder<'data>, + output_kind: crate::OutputKind, + ) -> crate::Result>> { + crate::elf_writer::finish_link::( + file_loader, + args, + plugin, + symbol_db, + per_symbol_flags, + resolver, + output_sections, + layout_rules_builder, + output_kind, + ) + } + fn elf_header_arch_magic() -> u16 { object::elf::EM_LOONGARCH } diff --git a/libwild/src/elf_riscv64.rs b/libwild/src/elf_riscv64.rs index 94b88d80d..8a4906493 100644 --- a/libwild/src/elf_riscv64.rs +++ b/libwild/src/elf_riscv64.rs @@ -50,6 +50,37 @@ impl<'data> crate::platform::Platform<'data> for ElfRiscV64 { type Relaxation = Relaxation; type File = crate::elf::File<'data>; + fn create_plugin( + linker: &'data crate::Linker, + args: &'data crate::Args, + ) -> crate::Result>> { + crate::linker_plugins::LinkerPlugin::from_args(args, &linker.linker_plugin_arena, &linker.herd) + } + + fn finish_link( + file_loader: &mut crate::input_data::FileLoader<'data>, + args: &'data crate::Args, + plugin: &mut Option>, + symbol_db: crate::symbol_db::SymbolDb<'data, Self::File>, + per_symbol_flags: crate::value_flags::PerSymbolFlags, + resolver: crate::resolution::Resolver<'data, Self::File>, + output_sections: crate::output_section_id::OutputSections<'data>, + layout_rules_builder: crate::layout_rules::LayoutRulesBuilder<'data>, + output_kind: crate::OutputKind, + ) -> crate::Result>> { + crate::elf_writer::finish_link::( + file_loader, + args, + plugin, + symbol_db, + per_symbol_flags, + resolver, + output_sections, + layout_rules_builder, + output_kind, + ) + } + fn elf_header_arch_magic() -> u16 { object::elf::EM_RISCV } diff --git a/libwild/src/elf_writer.rs b/libwild/src/elf_writer.rs index af2b652a6..8fb0f7e86 100644 --- a/libwild/src/elf_writer.rs +++ b/libwild/src/elf_writer.rs @@ -5,7 +5,8 @@ use self::elf::get_page_mask; use crate::OutputKind; use crate::alignment; use crate::args::Args; -use crate::args::BuildIdOption; +use crate::args::linux::BuildIdOption; +use crate::args::linux::ElfArgs; use crate::bail; use crate::debug_assert_bail; use crate::elf; @@ -151,6 +152,56 @@ struct RelocationCache { high_part_symbols: HashMap, } +/// Shared ELF finish_link: resolve, compute layout, write output. +pub(crate) fn finish_link<'data, P: Platform<'data, File = crate::elf::File<'data>>>( + file_loader: &mut crate::input_data::FileLoader<'data>, + args: &'data Args, + plugin: &mut Option>, + mut symbol_db: crate::symbol_db::SymbolDb<'data, crate::elf::File<'data>>, + mut per_symbol_flags: crate::value_flags::PerSymbolFlags, + mut resolver: crate::resolution::Resolver<'data, crate::elf::File<'data>>, + mut output_sections: crate::output_section_id::OutputSections<'data>, + mut layout_rules_builder: crate::layout_rules::LayoutRulesBuilder<'data>, + output_kind: OutputKind, +) -> crate::Result>>> { + if let Some(plugin) = plugin.as_mut() + && plugin.is_initialised() + { + plugin.all_symbols_read( + &mut symbol_db, + &mut resolver, + file_loader, + &mut per_symbol_flags, + &mut output_sections, + &mut layout_rules_builder, + )?; + } + + if let crate::version_script::VersionScript::Rust(rust_vscript) = &symbol_db.version_script { + symbol_db.handle_rust_version_script(rust_vscript, &mut per_symbol_flags); + } + + let layout_rules = layout_rules_builder.build(); + let resolved = resolver.resolve_sections_and_canonicalise_undefined( + &mut symbol_db, + &mut per_symbol_flags, + &mut output_sections, + &layout_rules, + )?; + + let mut output = crate::file_writer::Output::new(args, output_kind); + let layout = crate::layout::compute::

( + symbol_db, + per_symbol_flags, + resolved, + output_sections, + &mut output, + )?; + + output.write(&layout, write::

)?; + Ok(Some(layout)) +} + pub(crate) fn write<'data, P: Platform<'data, File = crate::elf::File<'data>>>( sized_output: &mut SizedOutput, layout: &ElfLayout<'data>, @@ -1580,7 +1631,7 @@ fn write_section_raw<'out, 'data>( fn write_symbols<'data>( object: &ObjectLayout<'data, crate::elf::File<'data>>, symbol_writer: &mut SymbolTableWriter, - layout: &ElfLayout, + layout: &ElfLayout<'data>, ) -> Result { for ((sym_index, sym), flags) in object .object @@ -4261,7 +4312,7 @@ struct DynamicEntryWriter { } struct DynamicEntryInputs<'layout> { - args: &'layout Args, + args: &'layout Args, has_static_tls: bool, has_variant_pcs: bool, section_layouts: &'layout OutputSectionMap, diff --git a/libwild/src/elf_x86_64.rs b/libwild/src/elf_x86_64.rs index f6c8a6d1d..e9dd82eeb 100644 --- a/libwild/src/elf_x86_64.rs +++ b/libwild/src/elf_x86_64.rs @@ -50,6 +50,37 @@ impl<'data> crate::platform::Platform<'data> for ElfX86_64 { type Relaxation = Relaxation; type File = crate::elf::File<'data>; + fn create_plugin( + linker: &'data crate::Linker, + args: &'data crate::Args, + ) -> crate::Result>> { + crate::linker_plugins::LinkerPlugin::from_args(args, &linker.linker_plugin_arena, &linker.herd) + } + + fn finish_link( + file_loader: &mut crate::input_data::FileLoader<'data>, + args: &'data crate::Args, + plugin: &mut Option>, + symbol_db: crate::symbol_db::SymbolDb<'data, Self::File>, + per_symbol_flags: crate::value_flags::PerSymbolFlags, + resolver: crate::resolution::Resolver<'data, Self::File>, + output_sections: crate::output_section_id::OutputSections<'data>, + layout_rules_builder: crate::layout_rules::LayoutRulesBuilder<'data>, + output_kind: crate::OutputKind, + ) -> crate::Result>> { + crate::elf_writer::finish_link::( + file_loader, + args, + plugin, + symbol_db, + per_symbol_flags, + resolver, + output_sections, + layout_rules_builder, + output_kind, + ) + } + fn elf_header_arch_magic() -> u16 { object::elf::EM_X86_64 } diff --git a/libwild/src/file_kind.rs b/libwild/src/file_kind.rs index 57314657f..dbbca3b4e 100644 --- a/libwild/src/file_kind.rs +++ b/libwild/src/file_kind.rs @@ -11,6 +11,8 @@ use object::read::elf::SectionHeader; pub(crate) enum FileKind { ElfObject, ElfDynamic, + CoffObject, + CoffImport, Archive, ThinArchive, Text, @@ -48,6 +50,12 @@ impl FileKind { object::elf::ET_DYN => Ok(FileKind::ElfDynamic), t => bail!("Unsupported ELF kind {t}"), } + } else if let Ok(kind) = object::FileKind::parse(bytes) { + match kind { + object::FileKind::Coff | object::FileKind::CoffBig => Ok(FileKind::CoffObject), + object::FileKind::CoffImport => Ok(FileKind::CoffImport), + _ => bail!("Couldn't identify file type"), + } } else if bytes.is_ascii() { Ok(FileKind::Text) } else if bytes.starts_with(b"BC") { @@ -94,6 +102,8 @@ impl std::fmt::Display for FileKind { let s = match self { FileKind::ElfObject => "ELF object", FileKind::ElfDynamic => "ELF dynamic", + FileKind::CoffObject => "COFF object", + FileKind::CoffImport => "COFF import", FileKind::Archive => "archive", FileKind::ThinArchive => "thin archive", FileKind::Text => "text", diff --git a/libwild/src/file_writer.rs b/libwild/src/file_writer.rs index 93afaaf4a..ade8f433a 100644 --- a/libwild/src/file_writer.rs +++ b/libwild/src/file_writer.rs @@ -111,7 +111,7 @@ struct SectionAllocation { } impl Output { - pub(crate) fn new(args: &Args, output_kind: OutputKind) -> Output { + pub(crate) fn new(args: &Args, output_kind: OutputKind) -> Output { let file_write_mode = args .file_write_mode .unwrap_or_else(|| default_file_write_mode(args, output_kind)); @@ -231,7 +231,7 @@ impl Output { } /// Returns the file write mode that we should use to write to the specified path. -fn default_file_write_mode(args: &Args, output_kind: OutputKind) -> FileWriteMode { +fn default_file_write_mode(args: &Args, output_kind: OutputKind) -> FileWriteMode { if output_kind.is_shared_object() { return FileWriteMode::UnlinkAndReplace; } diff --git a/libwild/src/fs.rs b/libwild/src/fs.rs index 2c96862ec..2fe2292d1 100644 --- a/libwild/src/fs.rs +++ b/libwild/src/fs.rs @@ -1,6 +1,15 @@ use crate::error::Result; use std::fs::File; +#[inline(always)] +#[cfg(windows)] +/// On Windows, we don't need to do anything special to make a file executable. +pub(crate) fn make_executable(_: &File) -> Result { + Ok(()) +} + +#[inline(always)] +#[cfg(unix)] pub(crate) fn make_executable(file: &File) -> Result { use std::os::unix::prelude::PermissionsExt; diff --git a/libwild/src/gc_stats.rs b/libwild/src/gc_stats.rs index b3e722dfc..8ef1675fd 100644 --- a/libwild/src/gc_stats.rs +++ b/libwild/src/gc_stats.rs @@ -28,9 +28,9 @@ use hashbrown::HashMap; use itertools::Itertools; use std::path::PathBuf; -pub(crate) fn maybe_write_gc_stats<'data, O: ObjectFile<'data>>( +pub(crate) fn maybe_write_gc_stats<'data, O: ObjectFile<'data>, T>( group_layouts: &[GroupLayout<'data, O>], - args: &Args, + args: &Args, ) -> Result { let Some(stats_file) = args.write_gc_stats.as_ref() else { return Ok(()); @@ -46,10 +46,10 @@ struct InputFile<'data> { discarded_names: Vec<&'data [u8]>, } -fn write_gc_stats<'data, O: ObjectFile<'data>>( +fn write_gc_stats<'data, O: ObjectFile<'data>, T>( group_layouts: &[GroupLayout<'data, O>], stats_file: &std::path::Path, - args: &Args, + args: &Args, ) -> Result { use std::io::Write as _; diff --git a/libwild/src/grouping.rs b/libwild/src/grouping.rs index ae055886c..c62d16ce3 100644 --- a/libwild/src/grouping.rs +++ b/libwild/src/grouping.rs @@ -189,7 +189,7 @@ pub(crate) fn create_groups<'data, O: ObjectFile<'data>>( } /// Decides after how many symbols, we should start a new group. -fn determine_symbols_per_group(num_symbols: usize, args: &Args) -> usize { +fn determine_symbols_per_group(num_symbols: usize, args: &Args) -> usize { let num_threads = args.available_threads.get(); // If we're running with a single thread, then we might as well put everything into a single @@ -213,7 +213,7 @@ fn determine_symbols_per_group(num_symbols: usize, args: &Args) -> usize { } /// Decides the maximum number of files that we'll put into one group. -fn determine_max_files_per_group(args: &Args) -> usize { +fn determine_max_files_per_group(args: &Args) -> usize { if let Some(v) = args.files_per_group { return v as usize; } diff --git a/libwild/src/input_data.rs b/libwild/src/input_data.rs index a1cbb2b3e..1f7ef2d3c 100644 --- a/libwild/src/input_data.rs +++ b/libwild/src/input_data.rs @@ -1,961 +1,965 @@ -//! Code for figuring out what input files we need to read then mapping them into memory. - -use crate::archive; -use crate::archive::ArchiveEntry; -use crate::archive::ArchiveIterator; -use crate::archive::EntryMeta; -use crate::args::Args; -use crate::args::Input; -use crate::args::InputSpec; -use crate::args::Modifiers; -use crate::bail; -use crate::error::Context as _; -use crate::error::Error; -use crate::error::Result; -use crate::file_kind::FileKind; -use crate::linker_plugins::LinkerPlugin; -use crate::linker_plugins::LtoInputInfo; -use crate::linker_script::LinkerScript; -use crate::parsing::ParsedInputObject; -use crate::platform::ObjectFile; -use crate::timing_phase; -use crate::verbose_timing_phase; -use colosseum::sync::Arena; -use crossbeam_queue::SegQueue; -use hashbrown::HashMap; -use memmap2::Mmap; -use rayon::Scope; -use rayon::iter::IntoParallelIterator; -use rayon::iter::IntoParallelRefIterator; -use rayon::iter::ParallelIterator; -use std::fmt::Display; -use std::ops::Deref; -use std::path::Path; -use std::path::PathBuf; -use std::sync::Arc; -use std::sync::Mutex; -use std::sync::atomic::AtomicUsize; -use std::sync::atomic::Ordering; - -pub(crate) struct FileLoader<'data> { - /// The files that we've loaded so far. - pub(crate) loaded_files: Vec<&'data InputFile>, - - /// Whether we have at least one input file that is a dynamic object. - pub(crate) has_dynamic: bool, - - inputs_arena: &'data Arena, -} - -#[derive(Default)] -pub(crate) struct LoadedInputs<'data, O: ObjectFile<'data>> { - /// The results of parsing all the input files and archive entries. We defer checking for - /// success until later, since otherwise a parse error would mean that the save-dir mechanism - /// wouldn't capture all the input files. - pub(crate) objects: Vec>>>, - - pub(crate) linker_scripts: Vec>, - - pub(crate) lto_objects: Vec>>>, -} - -pub(crate) struct InputBytes<'data> { - pub(crate) input: InputRef<'data>, - pub(crate) kind: FileKind, - pub(crate) data: &'data [u8], - pub(crate) modifiers: Modifiers, -} - -#[derive(Clone, Copy)] -pub(crate) struct ScriptData<'data> { - pub(crate) raw: &'data [u8], -} - -/// Identifies an input file. IDs start from 0 which is reserved for our prelude file. -#[derive(derive_more::Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] -#[debug("file-{_0}")] -pub(crate) struct FileId(u32); - -pub(crate) const PRELUDE_FILE_ID: FileId = FileId::new(0, 0); - -#[derive(Debug)] -pub(crate) struct InputFile { - pub(crate) filename: PathBuf, - - /// The filename prior to path search. If this is absolute, then `filename` will be the same. - original_filename: PathBuf, - - pub(crate) modifiers: Modifiers, - - data: Option, -} - -#[derive(Debug)] -pub(crate) struct FileData { - bytes: Mmap, - - /// The modification timestamp of the input file just before we opened it. We expect our input - /// files not to change while we're running. - modification_time: std::time::SystemTime, -} - -/// Identifies an input object that may not be a regular file on disk, or may be an entry in an -/// archive. -#[derive(Clone, Copy)] -pub(crate) struct InputRef<'data> { - pub(crate) file: &'data InputFile, - pub(crate) entry: Option>, -} - -impl InputFile { - pub(crate) fn data(&self) -> &[u8] { - self.data.as_deref().unwrap_or_default() - } -} - -#[derive(Debug)] -struct InputPath { - /// An absolute path to the file. - absolute: PathBuf, - - /// The file as specified on the command line. In the case of an argument like -lfoo, this will - /// be "libfoo.so". - original: PathBuf, -} - -#[derive(Debug)] -pub(crate) struct InputLinkerScript<'data> { - pub(crate) script: LinkerScript<'data>, - pub(crate) input_file: &'data InputFile, -} - -struct TemporaryState<'data, O: ObjectFile<'data>> { - args: &'data Args, - - /// Mapping from paths to the index in `files` at which we'll place the result. - path_to_load_index: Mutex>, - - next_file_load_index: AtomicUsize, - - files: SegQueue>, - - inputs_arena: &'data Arena, -} - -struct LoadedFile<'data, O: ObjectFile<'data>> { - index: FileLoadIndex, - state: LoadedFileState<'data, O>, -} - -enum LoadedFileState<'data, O: ObjectFile<'data>> { - Loaded(&'data InputFile, InputRecord<'data, O>), - Archive(&'data InputFile, Vec>), - ThinArchive(Vec<&'data InputFile>, Vec>), - LinkerScript(LoadedLinkerScriptState<'data>), - Error(Error), -} - -enum InputRecord<'data, O: ObjectFile<'data>> { - Object(Result>>), - LtoInput(Box>), -} - -struct UnclaimedLtoInput<'data> { - input_ref: InputRef<'data>, - file: Arc, - kind: FileKind, -} - -struct LoadedLinkerScriptState<'data> { - /// The indexes of the files requested by the linker script. Some of these indexes may turn out - /// to have been claimed earlier in the command-line, so we'll only load those that haven't. - file_indexes: Vec, - - /// The parsed linker script. - script: InputLinkerScript<'data>, -} - -/// A temporary ID for files that we loaded. Files specified on the command-line will have -/// deterministic values. Other files, e.g. those referenced by thin archives or linker scripts will -/// have non-deterministic values. -#[derive(Clone, Copy)] -struct FileLoadIndex(usize); - -/// A request for a worker to open the specified input, mmap its contents and identify what type of -/// file it is. If it turns out to be a thin archive, then the referenced files are also loaded. -struct OpenFileRequest { - file_index: FileLoadIndex, - paths: InputPath, - modifiers: Modifiers, - - /// The file that requested this file be opened. e.g. a linker script. In theory, we could have - /// a chain of files where linker scripts reference linker scripts, but for simplicity, we only - /// report the last file in the chain. - referenced_by: Option, -} - -struct LoadedLinkerScript<'data> { - script: InputLinkerScript<'data>, - extra_inputs: Vec, -} - -pub(crate) struct AuxiliaryFiles<'data> { - pub(crate) version_script_data: Option>, - pub(crate) export_list_data: Option>, -} - -impl<'data> AuxiliaryFiles<'data> { - pub(crate) fn new(args: &'data Args, inputs_arena: &'data Arena) -> Result { - let resolve_script_path = |path: &Path| -> PathBuf { - if path.exists() { - path.to_owned() - } else if let Some(found) = search_for_file(&args.lib_search_path, None, path) { - found - } else { - path.to_owned() - } - }; - - Ok(Self { - version_script_data: args - .version_script_path - .as_ref() - .map(|path| read_script_data(&resolve_script_path(path), inputs_arena)) - .transpose()?, - export_list_data: args - .export_list_path - .as_ref() - .map(|path| read_script_data(&resolve_script_path(path), inputs_arena)) - .transpose()?, - }) - } -} - -impl<'data> FileLoader<'data> { - pub(crate) fn new(inputs_arena: &'data Arena) -> Self { - Self { - loaded_files: Vec::new(), - inputs_arena, - has_dynamic: false, - } - } - - pub(crate) fn load_inputs>( - &mut self, - inputs: &[Input], - args: &'data Args, - plugin: &mut Option>, - ) -> Result> { - timing_phase!("Open input files"); - - let mut path_to_load_index = HashMap::new(); - - let mut initial_work = Vec::with_capacity(inputs.len()); - for input in inputs { - let path = input.path(args)?; - path_to_load_index - .entry(path.absolute.clone()) - .or_insert_with(|| { - let file_index = FileLoadIndex(initial_work.len()); - - initial_work.push(OpenFileRequest { - file_index, - paths: path, - modifiers: input.modifiers, - referenced_by: None, - }); - - file_index - }); - } - - let temporary_state = TemporaryState { - args, - path_to_load_index: Mutex::new(path_to_load_index), - next_file_load_index: AtomicUsize::new(initial_work.len()), - files: SegQueue::new(), - inputs_arena: self.inputs_arena, - }; - - // Open files, mmap them and identify their type from separate threads. - rayon::scope(|scope| { - initial_work.into_par_iter().for_each(|request| { - temporary_state.process_and_record_open_file_request(request, scope); - }); - }); - - verbose_timing_phase!("Finalise open input files"); - - // Put files into a deterministic order. That order will the order we'd find them if we just - // processed command-line arguments in order, recursively processing any files that those - // files pulled in. - let mut files_by_index = Vec::new(); - files_by_index.resize_with(temporary_state.files.len(), || None); - for file in temporary_state.files.into_iter() { - let entry = &mut files_by_index[file.index.0]; - assert!( - entry.is_none(), - "Internal error: Multiple files with the same index" - ); - *entry = Some(file.state); - } - self.extract_all(&mut files_by_index, plugin) - } - - /// Checks that the modification timestamp on all our input files hasn't changed since we opened - /// them. If they were modified while we were running, then we may fail with a SIGBUS if we try - /// to access part of the file that's no longer there, however if we don't, then we may have - /// read inconsistent data from the changed object, so we want to fail the link. - pub(crate) fn verify_inputs_unchanged(&self) -> Result { - timing_phase!("Verify inputs unchanged"); - - self.loaded_files.par_iter().try_for_each(|file| { - let Some(file_data) = &file.data else { - return Ok(()); - }; - - let metadata = std::fs::metadata(&file.filename).with_context(|| { - format!("Failed to read metadata for `{}`", file.filename.display()) - })?; - - let new_modified = metadata.modified().with_context(|| { - format!( - "Failed to get modification time for `{}`", - file.filename.display() - ) - })?; - - if file_data.modification_time != new_modified { - bail!( - "The file `{}` was changed while we were running", - file.filename.display() - ); - } - - Ok(()) - }) - } - - /// Extract all files and linker scripts from `files`. Extraction order is the same as the order - /// on the original command-line. This is roughly FileLoadIndex order, except that (a) if a file - /// is loaded multiple times, it will only appear the first time it's encountered and (b) when a - /// linker script is loaded, its files appear at the point at which the linker script appeared - /// on the command-line, even though the FileLoadIndex for files loaded by linker scripts is - /// later. - fn extract_all>( - &mut self, - files: &mut [Option>], - plugin: &mut Option>, - ) -> Result> { - let mut loaded = LoadedInputs { - objects: Vec::with_capacity(files.len()), - linker_scripts: Vec::new(), - lto_objects: Vec::new(), - }; - - for i in 0..files.len() { - self.extract_file(FileLoadIndex(i), files, &mut loaded, plugin)?; - } - - Ok(loaded) - } - - fn extract_file>( - &mut self, - index: FileLoadIndex, - files: &mut [Option>], - loaded: &mut LoadedInputs<'data, O>, - plugin: &mut Option>, - ) -> Result { - match core::mem::take(&mut files[index.0]) { - None => {} - Some(LoadedFileState::Loaded(input_file, parse_result)) => { - if parse_result.is_dynamic_object() { - self.has_dynamic = true; - } - loaded.add_record(parse_result, plugin); - self.loaded_files.push(input_file); - } - Some(LoadedFileState::Archive(input_file, parsed_parts)) => { - loaded.add_records(parsed_parts, plugin); - self.loaded_files.push(input_file); - } - Some(LoadedFileState::ThinArchive(mut input_files, parsed_parts)) => { - loaded.add_records(parsed_parts, plugin); - self.loaded_files.append(&mut input_files); - } - Some(LoadedFileState::LinkerScript(loaded_linker_script_state)) => { - self.loaded_files - .push(loaded_linker_script_state.script.input_file); - - loaded - .linker_scripts - .push(loaded_linker_script_state.script); - - for i in loaded_linker_script_state.file_indexes { - self.extract_file(i, files, loaded, plugin)?; - } - } - Some(LoadedFileState::Error(error)) => { - // For now, we just report the first error that we come to. - return Err(error); - } - } - - Ok(()) - } -} - -fn process_linker_script<'data>( - input_file: &'data InputFile, - args: &Args, -) -> Result> { - let bytes = input_file.data(); - let script = LinkerScript::parse(bytes, &input_file.filename)?; - - let script_path = std::fs::canonicalize(&input_file.filename)?; - let directory = script_path.parent().expect("expected an absolute path"); - - let mut extra_inputs = Vec::new(); - - script.foreach_input(input_file.modifiers, |mut input| { - input.search_first = Some(directory.to_owned()); - - if let (Some(sysroot), InputSpec::File(file)) = (args.sysroot.as_ref(), &mut input.spec) - && let Some(new_file) = - crate::linker_script::maybe_apply_sysroot(&script_path, file, sysroot) - { - *file = new_file; - } - - extra_inputs.push(input); - - Ok(()) - })?; - - Ok(LoadedLinkerScript { - script: InputLinkerScript { script, input_file }, - extra_inputs, - }) -} - -fn process_archive<'data, O: ObjectFile<'data>>( - input_file: &'data InputFile, - file: &Arc, - state: &TemporaryState<'data, O>, -) -> Result> { - let mut outputs = Vec::new(); - - for entry in ArchiveIterator::from_archive_bytes(input_file.data())? { - let entry = entry?; - match entry { - ArchiveEntry::Regular(archive_entry) => { - let input_ref = InputRef { - file: input_file, - entry: Some(EntryMeta { - identifier: archive_entry.ident, - start_offset: archive_entry.data_offset, - end_offset: archive_entry.data_offset + archive_entry.entry_data.len(), - }), - }; - - let kind = FileKind::identify_bytes(input_ref.data()) - .with_context(|| format!("Failed process input `{input_ref}`"))?; - - outputs.push(state.process_input(input_ref, file, kind)?); - } - ArchiveEntry::Thin(_) => unreachable!(), - } - } - - Ok(LoadedFileState::Archive(input_file, outputs)) -} - -fn process_thin_archive<'data, O: ObjectFile<'data>>( - input_file: &InputFile, - state: &TemporaryState<'data, O>, -) -> Result> { - let absolute_path = &input_file.filename; - let parent_path = absolute_path.parent().unwrap(); - let mut files = Vec::new(); - let mut parsed_files = Vec::new(); - - for entry in ArchiveIterator::from_archive_bytes(input_file.data())? { - match entry? { - ArchiveEntry::Thin(entry) => { - let path = entry.ident.as_path(); - let entry_path = parent_path.join(path); - - let (file_data, file) = FileData::open(&entry_path, state.args.prepopulate_maps) - .with_context(|| { - format!( - "Failed to open file referenced by thin archive `{}`", - input_file.filename.display() - ) - })?; - - let input_file = InputFile { - filename: entry_path.clone(), - original_filename: entry_path, - modifiers: Modifiers { - archive_semantics: true, - ..input_file.modifiers - }, - data: Some(file_data), - }; - - let input_file = &*state.inputs_arena.alloc(input_file); - - let input_ref = InputRef { - file: input_file, - entry: None, - }; - - let kind = FileKind::identify_bytes(input_ref.data()) - .with_context(|| format!("Failed process input `{input_ref}`"))?; - - parsed_files.push(state.process_input(input_ref, &Arc::new(file), kind)?); - files.push(input_file); - } - ArchiveEntry::Regular(_) => {} - } - } - - Ok(LoadedFileState::ThinArchive(files, parsed_files)) -} - -impl<'data, O: ObjectFile<'data>> TemporaryState<'data, O> { - fn process_and_record_open_file_request<'scope>( - &'scope self, - request: OpenFileRequest, - scope: &Scope<'scope>, - ) { - let file_index = request.file_index; - let loaded_state = self - .process_open_file_request(request, scope) - .unwrap_or_else(LoadedFileState::Error); - self.files.push(LoadedFile { - index: file_index, - state: loaded_state, - }); - } - - fn process_open_file_request<'scope>( - &'scope self, - request: OpenFileRequest, - scope: &Scope<'scope>, - ) -> Result> { - verbose_timing_phase!("Open file"); - - let absolute_path = &request.paths.absolute; - let result = FileData::open(absolute_path.as_path(), self.args.prepopulate_maps); - let (data, file) = match request.referenced_by.as_ref() { - Some(referenced_by) => { - result.with_context(|| format!("Failed to process `{}`", referenced_by.display())) - } - None => result, - }?; - - let input_file = self.inputs_arena.alloc(InputFile { - filename: absolute_path.to_owned(), - original_filename: request.paths.original, - modifiers: request.modifiers, - data: Some(data), - }); - - let input_ref = InputRef { - file: input_file, - entry: None, - }; - - let data = input_ref.file.data.as_ref().unwrap(); - let kind = FileKind::identify_bytes(&data.bytes)?; - - match kind { - FileKind::Archive => process_archive(input_file, &Arc::new(file), self), - FileKind::ThinArchive => process_thin_archive(input_file, self), - FileKind::Text => { - let script = process_linker_script(input_file, self.args)?; - - let file_indexes = script - .extra_inputs - .into_iter() - .map(|input| { - self.load_input( - &input, - scope, - Some(script.script.input_file.filename.clone()), - ) - }) - .collect::>>()?; - - Ok(LoadedFileState::LinkerScript(LoadedLinkerScriptState { - file_indexes, - script: script.script, - })) - } - _ => { - let parsed = self.process_input(input_ref, &Arc::new(file), kind)?; - Ok(LoadedFileState::Loaded(input_file, parsed)) - } - } - } - - /// Sends a request to load `input` unless it has already been requested. In either case, return - /// the index for `input` in our files Vec. - fn load_input<'scope>( - &'scope self, - input: &Input, - scope: &Scope<'scope>, - referenced_by: Option, - ) -> Result { - let paths = input.path(self.args)?; - - let mut path_to_load_index = self.path_to_load_index.lock().unwrap(); - - let index = match path_to_load_index.entry(paths.absolute.clone()) { - hashbrown::hash_map::Entry::Occupied(e) => *e.get(), - hashbrown::hash_map::Entry::Vacant(e) => { - let new_index = - FileLoadIndex(self.next_file_load_index.fetch_add(1, Ordering::Relaxed)); - e.insert(new_index); - - drop(path_to_load_index); - - let request = OpenFileRequest { - file_index: new_index, - paths, - modifiers: input.modifiers, - referenced_by, - }; - - scope.spawn(|scope| { - self.process_and_record_open_file_request(request, scope); - }); - - new_index - } - }; - - Ok(index) - } - - fn process_input( - &self, - input_ref: InputRef<'data>, - file: &Arc, - kind: FileKind, - ) -> Result> { - let data = input_ref.data(); - - // The plugin API docs say to pass files to the plugin before the linker tries to identify - // the them. Unfortunately the plugin API doesn't provide a fast way to identify files. The - // plugin API doesn't say anything about thread-safety and although the GCC plugin appears - // to be threadsafe, the clang plugin definitely isn't. This means that using the API to - // identify files is much too slow, so we do our own file identification and only pass files - // to the plugin if we think it can handle them. We can't rely on a plugin only being - // supplied when actually needed, since GCC seems to pretty much always pass a plugin to the - // linker. - if kind.is_compiler_ir() { - return Ok(InputRecord::LtoInput(Box::new(UnclaimedLtoInput { - input_ref, - file: Arc::clone(file), - kind, - }))); - } - - if input_ref.is_archive_entry() && kind != FileKind::ElfObject { - bail!("Unexpected archive member of kind {kind:?}: {input_ref}"); - } - - let input_bytes = InputBytes { - kind, - input: input_ref, - data, - modifiers: input_ref.file.modifiers, - }; - - let object = ParsedInputObject::new(&input_bytes, self.args); - - Ok(InputRecord::Object(object)) - } -} - -fn read_script_data<'data>( - path: &Path, - inputs_arena: &'data Arena, -) -> Result> { - let data = FileData::new(path, false).context("Failed to read script")?; - - let file = inputs_arena.alloc(InputFile { - filename: path.to_owned(), - original_filename: path.to_owned(), - modifiers: Default::default(), - data: Some(data), - }); - - Ok(ScriptData { raw: file.data() }) -} - -impl Input { - fn path(&self, args: &Args) -> Result { - match &self.spec { - InputSpec::File(p) => { - if self.search_first.is_some() - && let Some(path) = search_for_file( - &args.lib_search_path, - self.search_first.as_ref(), - p.as_ref(), - ) - { - return Ok(InputPath { - absolute: std::path::absolute(path)?, - original: p.as_ref().to_owned(), - }); - } - Ok(InputPath { - absolute: p.as_ref().to_owned(), - original: p.as_ref().to_owned(), - }) - } - InputSpec::Lib(lib_name) => { - if self.modifiers.allow_shared { - let filename = format!("lib{lib_name}.so"); - if let Some(path) = search_for_file( - &args.lib_search_path, - self.search_first.as_ref(), - &filename, - ) { - return Ok(InputPath { - absolute: std::path::absolute(&path)?, - original: PathBuf::from(filename), - }); - } - } - let filename = format!("lib{lib_name}.a"); - if let Some(path) = - search_for_file(&args.lib_search_path, self.search_first.as_ref(), &filename) - { - return Ok(InputPath { - absolute: std::path::absolute(&path)?, - original: PathBuf::from(filename), - }); - } - bail!("Couldn't find library `{lib_name}` on library search path"); - } - InputSpec::Search(filename) => { - if let Some(path) = search_for_file( - &args.lib_search_path, - self.search_first.as_ref(), - filename.as_ref(), - ) { - return Ok(InputPath { - absolute: std::path::absolute(&path)?, - original: PathBuf::from(filename.as_ref()), - }); - } - bail!("Couldn't find library `{filename}` on library search path"); - } - } - } -} - -impl FileData { - pub(crate) fn new(path: &Path, prepopulate_maps: bool) -> Result { - Self::open(path, prepopulate_maps).map(|(file_data, _file)| file_data) - } - - fn open(path: &Path, prepopulate_maps: bool) -> Result<(Self, std::fs::File)> { - let file = std::fs::File::open(path) - .with_context(|| format!("Failed to open input file `{}`", path.display()))?; - - let modification_time = std::fs::metadata(path) - .and_then(|meta| meta.modified()) - .with_context(|| { - format!("Failed to read file modification time `{}`", path.display()) - })?; - - // Safety: Unfortunately, this is a bit of a compromise. Basically this is only safe if our - // users manage to avoid editing the input files while we've got them mapped. It'd be great - // if there were a way to protect against unsoundness when the input files were modified - // externally, but there isn't - at least on Linux. Not only could the bytes change without - // notice, but the mapped file could be truncated causing any access to result in a SIGBUS. - // - // For our use case, mmap just has too many advantages. There are likely large parts of our - // input files that we don't need to read, so reading all our input files up front isn't - // really an option. Reading just the parts we need might be an option, but would add - // substantial complexity. Also, using mmap means that if the system needs to reclaim - // memory, it can just release some of our pages. - - let mut mmap_options = memmap2::MmapOptions::new(); - - // Prepopulating maps generally slows things down, so is off by default, however it's useful - // when profiling, since it means that you don't see false positive slowness in the parts of - // the code that first read a bit of memory. - if prepopulate_maps { - mmap_options.populate(); - } - - let bytes = unsafe { mmap_options.map(&file) } - .with_context(|| format!("Failed to mmap input file `{}`", path.display()))?; - - Ok(( - FileData { - bytes, - modification_time, - }, - file, - )) - } -} - -fn search_for_file( - lib_search_path: &[Box], - search_first: Option<&PathBuf>, - filename: impl AsRef, -) -> Option { - let filename = filename.as_ref(); - if let Some(search_first) = search_first { - let path = search_first.join(filename); - if path.exists() { - return Some(path); - } - } - for dir in lib_search_path { - let path = dir.join(filename); - if path.exists() { - return Some(path); - } - } - None -} - -impl Deref for FileData { - type Target = [u8]; - - fn deref(&self) -> &Self::Target { - &self.bytes - } -} - -const FILE_INDEX_BITS: u32 = 8; -pub(crate) const MAX_FILES_PER_GROUP: u32 = 1 << FILE_INDEX_BITS; - -impl FileId { - pub(crate) const fn new(group: u32, file: u32) -> Self { - Self((group << FILE_INDEX_BITS) | file) - } - - pub(crate) const fn from_encoded(v: u32) -> Self { - Self(v) - } - - pub(crate) fn group(self) -> usize { - self.0 as usize >> FILE_INDEX_BITS - } - - pub(crate) fn file(self) -> usize { - self.0 as usize & ((1 << FILE_INDEX_BITS) - 1) - } - - pub(crate) fn as_u32(self) -> u32 { - self.0 - } -} - -impl std::fmt::Display for InputRef<'_> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - std::fmt::Display::fmt(&self.file.filename.display(), f)?; - if let Some(entry) = &self.entry { - std::fmt::Display::fmt(" @ ", f)?; - std::fmt::Display::fmt(&String::from_utf8_lossy(entry.identifier.as_slice()), f)?; - } - Ok(()) - } -} - -impl std::fmt::Debug for InputRef<'_> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - std::fmt::Display::fmt(self, f) - } -} - -impl std::fmt::Display for FileId { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{} ({}/{})", self.0, self.group(), self.file()) - } -} - -impl<'data> InputRef<'data> { - pub(crate) fn lib_name(&self) -> &'data [u8] { - self.file.original_filename.as_os_str().as_encoded_bytes() - } - - pub(crate) fn has_archive_semantics(&self) -> bool { - self.entry.is_some() || self.file.modifiers.archive_semantics - } - - pub(crate) fn data(&self) -> &'data [u8] { - if let Some(entry) = &self.entry { - &self.file.data()[entry.byte_range()] - } else { - self.file.data() - } - } - - fn is_archive_entry(&self) -> bool { - self.entry.is_some() - } -} - -impl Display for InputBytes<'_> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - Display::fmt(&self.input, f) - } -} - -impl<'data, O: ObjectFile<'data>> LoadedInputs<'data, O> { - fn add_record( - &mut self, - record: InputRecord<'data, O>, - plugin: &mut Option>, - ) { - match record { - InputRecord::Object(obj) => self.objects.push(obj), - InputRecord::LtoInput(obj) => { - let UnclaimedLtoInput { - input_ref, - file, - kind, - } = *obj; - let result = plugin.as_mut() - .with_context(|| { - format!( - "Input file {input_ref} contains {kind}, but linker plugin was not supplied" - ) - }) - .and_then(|plugin| plugin.process_input(input_ref, &file, kind)); - self.lto_objects.push(result); - } - } - } - - fn add_records( - &mut self, - parsed_parts: Vec>, - plugin: &mut Option>, - ) { - for part in parsed_parts { - self.add_record(part, plugin); - } - } -} - -impl<'data, O: ObjectFile<'data>> InputRecord<'data, O> { - fn is_dynamic_object(&self) -> bool { - match self { - InputRecord::Object(Ok(obj)) => obj.is_dynamic(), - _ => false, - } - } -} +//! Code for figuring out what input files we need to read then mapping them into memory. + +use crate::archive; +use crate::archive::ArchiveEntry; +use crate::archive::ArchiveIterator; +use crate::archive::EntryMeta; +use crate::args::Args; +use crate::args::Input; +use crate::args::InputSpec; +use crate::args::Modifiers; +use crate::bail; +use crate::error::Context as _; +use crate::error::Error; +use crate::error::Result; +use crate::file_kind::FileKind; +use crate::linker_plugins::LinkerPlugin; +use crate::linker_plugins::LtoInputInfo; +use crate::linker_script::LinkerScript; +use crate::parsing::ParsedInputObject; +use crate::platform::ObjectFile; +use crate::timing_phase; +use crate::verbose_timing_phase; +use colosseum::sync::Arena; +use crossbeam_queue::SegQueue; +use hashbrown::HashMap; +use memmap2::Mmap; +use rayon::Scope; +use rayon::iter::IntoParallelIterator; +use rayon::iter::IntoParallelRefIterator; +use rayon::iter::ParallelIterator; +use std::fmt::Display; +use std::ops::Deref; +use std::path::Path; +use std::path::PathBuf; +use std::sync::Arc; +use std::sync::Mutex; +use std::sync::atomic::AtomicUsize; +use std::sync::atomic::Ordering; + +pub(crate) struct FileLoader<'data> { + /// The files that we've loaded so far. + pub(crate) loaded_files: Vec<&'data InputFile>, + + /// Whether we have at least one input file that is a dynamic object. + pub(crate) has_dynamic: bool, + + inputs_arena: &'data Arena, +} + +#[derive(Default)] +pub(crate) struct LoadedInputs<'data, O: ObjectFile<'data>> { + /// The results of parsing all the input files and archive entries. We defer checking for + /// success until later, since otherwise a parse error would mean that the save-dir mechanism + /// wouldn't capture all the input files. + pub(crate) objects: Vec>>>, + + pub(crate) linker_scripts: Vec>, + + pub(crate) lto_objects: Vec>>>, +} + +pub(crate) struct InputBytes<'data> { + pub(crate) input: InputRef<'data>, + pub(crate) kind: FileKind, + pub(crate) data: &'data [u8], + pub(crate) modifiers: Modifiers, +} + +#[derive(Clone, Copy)] +pub(crate) struct ScriptData<'data> { + pub(crate) raw: &'data [u8], +} + +/// Identifies an input file. IDs start from 0 which is reserved for our prelude file. +#[derive(derive_more::Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[debug("file-{_0}")] +pub(crate) struct FileId(u32); + +pub(crate) const PRELUDE_FILE_ID: FileId = FileId::new(0, 0); + +#[derive(Debug)] +pub(crate) struct InputFile { + pub(crate) filename: PathBuf, + + /// The filename prior to path search. If this is absolute, then `filename` will be the same. + pub(crate) original_filename: PathBuf, + + pub(crate) modifiers: Modifiers, + + pub(crate) data: Option, +} + +#[derive(Debug)] +pub(crate) struct FileData { + bytes: Mmap, + + /// The modification timestamp of the input file just before we opened it. We expect our input + /// files not to change while we're running. + modification_time: std::time::SystemTime, +} + +/// Identifies an input object that may not be a regular file on disk, or may be an entry in an +/// archive. +#[derive(Clone, Copy)] +pub(crate) struct InputRef<'data> { + pub(crate) file: &'data InputFile, + pub(crate) entry: Option>, +} + +impl InputFile { + pub(crate) fn data(&self) -> &[u8] { + self.data.as_deref().unwrap_or_default() + } +} + +#[derive(Debug)] +struct InputPath { + /// An absolute path to the file. + absolute: PathBuf, + + /// The file as specified on the command line. In the case of an argument like -lfoo, this will + /// be "libfoo.so". + original: PathBuf, +} + +#[derive(Debug)] +pub(crate) struct InputLinkerScript<'data> { + pub(crate) script: LinkerScript<'data>, + pub(crate) input_file: &'data InputFile, +} + +struct TemporaryState<'data, O: ObjectFile<'data>> { + args: &'data Args, + + /// Mapping from paths to the index in `files` at which we'll place the result. + path_to_load_index: Mutex>, + + next_file_load_index: AtomicUsize, + + files: SegQueue>, + + inputs_arena: &'data Arena, +} + +struct LoadedFile<'data, O: ObjectFile<'data>> { + index: FileLoadIndex, + state: LoadedFileState<'data, O>, +} + +enum LoadedFileState<'data, O: ObjectFile<'data>> { + Loaded(&'data InputFile, InputRecord<'data, O>), + Archive(&'data InputFile, Vec>), + ThinArchive(Vec<&'data InputFile>, Vec>), + LinkerScript(LoadedLinkerScriptState<'data>), + Error(Error), +} + +enum InputRecord<'data, O: ObjectFile<'data>> { + Object(Result>>), + LtoInput(Box>), +} + +struct UnclaimedLtoInput<'data> { + input_ref: InputRef<'data>, + file: Arc, + kind: FileKind, +} + +struct LoadedLinkerScriptState<'data> { + /// The indexes of the files requested by the linker script. Some of these indexes may turn out + /// to have been claimed earlier in the command-line, so we'll only load those that haven't. + file_indexes: Vec, + + /// The parsed linker script. + script: InputLinkerScript<'data>, +} + +/// A temporary ID for files that we loaded. Files specified on the command-line will have +/// deterministic values. Other files, e.g. those referenced by thin archives or linker scripts will +/// have non-deterministic values. +#[derive(Clone, Copy)] +struct FileLoadIndex(usize); + +/// A request for a worker to open the specified input, mmap its contents and identify what type of +/// file it is. If it turns out to be a thin archive, then the referenced files are also loaded. +struct OpenFileRequest { + file_index: FileLoadIndex, + paths: InputPath, + modifiers: Modifiers, + + /// The file that requested this file be opened. e.g. a linker script. In theory, we could have + /// a chain of files where linker scripts reference linker scripts, but for simplicity, we only + /// report the last file in the chain. + referenced_by: Option, +} + +struct LoadedLinkerScript<'data> { + script: InputLinkerScript<'data>, + extra_inputs: Vec, +} + +pub(crate) struct AuxiliaryFiles<'data> { + pub(crate) version_script_data: Option>, + pub(crate) export_list_data: Option>, +} + +impl<'data> AuxiliaryFiles<'data> { + pub(crate) fn new(args: &'data Args, inputs_arena: &'data Arena) -> Result { + let resolve_script_path = |path: &Path| -> PathBuf { + if path.exists() { + path.to_owned() + } else if let Some(found) = search_for_file(&args.lib_search_path, None, path) { + found + } else { + path.to_owned() + } + }; + + Ok(Self { + version_script_data: args + .version_script_path + .as_ref() + .map(|path| read_script_data(&resolve_script_path(path), inputs_arena)) + .transpose()?, + export_list_data: args + .export_list_path + .as_ref() + .map(|path| read_script_data(&resolve_script_path(path), inputs_arena)) + .transpose()?, + }) + } +} + +impl<'data> FileLoader<'data> { + pub(crate) fn new(inputs_arena: &'data Arena) -> Self { + Self { + loaded_files: Vec::new(), + inputs_arena, + has_dynamic: false, + } + } + + pub(crate) fn load_inputs>( + &mut self, + inputs: &[Input], + args: &'data Args, + plugin: &mut Option>, + ) -> Result> { + timing_phase!("Open input files"); + + let mut path_to_load_index = HashMap::new(); + + let mut initial_work = Vec::with_capacity(inputs.len()); + for input in inputs { + let path = input.path(args)?; + path_to_load_index + .entry(path.absolute.clone()) + .or_insert_with(|| { + let file_index = FileLoadIndex(initial_work.len()); + + initial_work.push(OpenFileRequest { + file_index, + paths: path, + modifiers: input.modifiers, + referenced_by: None, + }); + + file_index + }); + } + + let temporary_state = TemporaryState { + args, + path_to_load_index: Mutex::new(path_to_load_index), + next_file_load_index: AtomicUsize::new(initial_work.len()), + files: SegQueue::new(), + inputs_arena: self.inputs_arena, + }; + + // Open files, mmap them and identify their type from separate threads. + rayon::scope(|scope| { + initial_work.into_par_iter().for_each(|request| { + temporary_state.process_and_record_open_file_request(request, scope); + }); + }); + + verbose_timing_phase!("Finalise open input files"); + + // Put files into a deterministic order. That order will the order we'd find them if we just + // processed command-line arguments in order, recursively processing any files that those + // files pulled in. + let mut files_by_index = Vec::new(); + files_by_index.resize_with(temporary_state.files.len(), || None); + for file in temporary_state.files.into_iter() { + let entry = &mut files_by_index[file.index.0]; + assert!( + entry.is_none(), + "Internal error: Multiple files with the same index" + ); + *entry = Some(file.state); + } + self.extract_all(&mut files_by_index, plugin) + } + + /// Checks that the modification timestamp on all our input files hasn't changed since we opened + /// them. If they were modified while we were running, then we may fail with a SIGBUS if we try + /// to access part of the file that's no longer there, however if we don't, then we may have + /// read inconsistent data from the changed object, so we want to fail the link. + pub(crate) fn verify_inputs_unchanged(&self) -> Result { + timing_phase!("Verify inputs unchanged"); + + self.loaded_files.par_iter().try_for_each(|file| { + let Some(file_data) = &file.data else { + return Ok(()); + }; + + let metadata = std::fs::metadata(&file.filename).with_context(|| { + format!("Failed to read metadata for `{}`", file.filename.display()) + })?; + + let new_modified = metadata.modified().with_context(|| { + format!( + "Failed to get modification time for `{}`", + file.filename.display() + ) + })?; + + if file_data.modification_time != new_modified { + bail!( + "The file `{}` was changed while we were running", + file.filename.display() + ); + } + + Ok(()) + }) + } + + /// Extract all files and linker scripts from `files`. Extraction order is the same as the order + /// on the original command-line. This is roughly FileLoadIndex order, except that (a) if a file + /// is loaded multiple times, it will only appear the first time it's encountered and (b) when a + /// linker script is loaded, its files appear at the point at which the linker script appeared + /// on the command-line, even though the FileLoadIndex for files loaded by linker scripts is + /// later. + fn extract_all>( + &mut self, + files: &mut [Option>], + plugin: &mut Option>, + ) -> Result> { + let mut loaded = LoadedInputs { + objects: Vec::with_capacity(files.len()), + linker_scripts: Vec::new(), + lto_objects: Vec::new(), + }; + + for i in 0..files.len() { + self.extract_file(FileLoadIndex(i), files, &mut loaded, plugin)?; + } + + Ok(loaded) + } + + fn extract_file>( + &mut self, + index: FileLoadIndex, + files: &mut [Option>], + loaded: &mut LoadedInputs<'data, O>, + plugin: &mut Option>, + ) -> Result { + match core::mem::take(&mut files[index.0]) { + None => {} + Some(LoadedFileState::Loaded(input_file, parse_result)) => { + if parse_result.is_dynamic_object() { + self.has_dynamic = true; + } + loaded.add_record(parse_result, plugin); + self.loaded_files.push(input_file); + } + Some(LoadedFileState::Archive(input_file, parsed_parts)) => { + loaded.add_records(parsed_parts, plugin); + self.loaded_files.push(input_file); + } + Some(LoadedFileState::ThinArchive(mut input_files, parsed_parts)) => { + loaded.add_records(parsed_parts, plugin); + self.loaded_files.append(&mut input_files); + } + Some(LoadedFileState::LinkerScript(loaded_linker_script_state)) => { + self.loaded_files + .push(loaded_linker_script_state.script.input_file); + + loaded + .linker_scripts + .push(loaded_linker_script_state.script); + + for i in loaded_linker_script_state.file_indexes { + self.extract_file(i, files, loaded, plugin)?; + } + } + Some(LoadedFileState::Error(error)) => { + // For now, we just report the first error that we come to. + return Err(error); + } + } + + Ok(()) + } +} + +fn process_linker_script<'data, T>( + input_file: &'data InputFile, + args: &Args, +) -> Result> { + let bytes = input_file.data(); + let script = LinkerScript::parse(bytes, &input_file.filename)?; + + let script_path = std::fs::canonicalize(&input_file.filename)?; + let directory = script_path.parent().expect("expected an absolute path"); + + let mut extra_inputs = Vec::new(); + + script.foreach_input(input_file.modifiers, |mut input| { + input.search_first = Some(directory.to_owned()); + + if let (Some(sysroot), InputSpec::File(file)) = (args.sysroot.as_ref(), &mut input.spec) + && let Some(new_file) = + crate::linker_script::maybe_apply_sysroot(&script_path, file, sysroot) + { + *file = new_file; + } + + extra_inputs.push(input); + + Ok(()) + })?; + + Ok(LoadedLinkerScript { + script: InputLinkerScript { script, input_file }, + extra_inputs, + }) +} + +fn process_archive<'data, O: ObjectFile<'data>>( + input_file: &'data InputFile, + file: &Arc, + state: &TemporaryState<'data, O>, +) -> Result> { + let mut outputs = Vec::new(); + + for entry in ArchiveIterator::from_archive_bytes(input_file.data())? { + let entry = entry?; + match entry { + ArchiveEntry::Regular(archive_entry) => { + let input_ref = InputRef { + file: input_file, + entry: Some(EntryMeta { + identifier: archive_entry.ident, + start_offset: archive_entry.data_offset, + end_offset: archive_entry.data_offset + archive_entry.entry_data.len(), + }), + }; + + let kind = FileKind::identify_bytes(input_ref.data()) + .with_context(|| format!("Failed process input `{input_ref}`"))?; + + outputs.push(state.process_input(input_ref, file, kind)?); + } + ArchiveEntry::Thin(_) => unreachable!(), + } + } + + Ok(LoadedFileState::Archive(input_file, outputs)) +} + +fn process_thin_archive<'data, O: ObjectFile<'data>>( + input_file: &InputFile, + state: &TemporaryState<'data, O>, +) -> Result> { + let absolute_path = &input_file.filename; + let parent_path = absolute_path.parent().unwrap(); + let mut files = Vec::new(); + let mut parsed_files = Vec::new(); + + for entry in ArchiveIterator::from_archive_bytes(input_file.data())? { + match entry? { + ArchiveEntry::Thin(entry) => { + let path = entry.ident.as_path(); + let entry_path = parent_path.join(path); + + let (file_data, file) = FileData::open(&entry_path, state.args.prepopulate_maps) + .with_context(|| { + format!( + "Failed to open file referenced by thin archive `{}`", + input_file.filename.display() + ) + })?; + + let input_file = InputFile { + filename: entry_path.clone(), + original_filename: entry_path, + modifiers: Modifiers { + archive_semantics: true, + ..input_file.modifiers + }, + data: Some(file_data), + }; + + let input_file = &*state.inputs_arena.alloc(input_file); + + let input_ref = InputRef { + file: input_file, + entry: None, + }; + + let kind = FileKind::identify_bytes(input_ref.data()) + .with_context(|| format!("Failed process input `{input_ref}`"))?; + + parsed_files.push(state.process_input(input_ref, &Arc::new(file), kind)?); + files.push(input_file); + } + ArchiveEntry::Regular(_) => {} + } + } + + Ok(LoadedFileState::ThinArchive(files, parsed_files)) +} + +impl<'data, O: ObjectFile<'data>> TemporaryState<'data, O> { + fn process_and_record_open_file_request<'scope>( + &'scope self, + request: OpenFileRequest, + scope: &Scope<'scope>, + ) { + let file_index = request.file_index; + let loaded_state = self + .process_open_file_request(request, scope) + .unwrap_or_else(LoadedFileState::Error); + self.files.push(LoadedFile { + index: file_index, + state: loaded_state, + }); + } + + fn process_open_file_request<'scope>( + &'scope self, + request: OpenFileRequest, + scope: &Scope<'scope>, + ) -> Result> { + verbose_timing_phase!("Open file"); + + let absolute_path = &request.paths.absolute; + let result = FileData::open(absolute_path.as_path(), self.args.prepopulate_maps); + let (data, file) = match request.referenced_by.as_ref() { + Some(referenced_by) => { + result.with_context(|| format!("Failed to process `{}`", referenced_by.display())) + } + None => result, + }?; + + let input_file = self.inputs_arena.alloc(InputFile { + filename: absolute_path.to_owned(), + original_filename: request.paths.original, + modifiers: request.modifiers, + data: Some(data), + }); + + let input_ref = InputRef { + file: input_file, + entry: None, + }; + + let data = input_ref.file.data.as_ref().unwrap(); + let kind = FileKind::identify_bytes(&data.bytes)?; + + match kind { + FileKind::Archive => process_archive(input_file, &Arc::new(file), self), + FileKind::ThinArchive => process_thin_archive(input_file, self), + FileKind::Text => { + let script = process_linker_script(input_file, self.args)?; + + let file_indexes = script + .extra_inputs + .into_iter() + .map(|input| { + self.load_input( + &input, + scope, + Some(script.script.input_file.filename.clone()), + ) + }) + .collect::>>()?; + + Ok(LoadedFileState::LinkerScript(LoadedLinkerScriptState { + file_indexes, + script: script.script, + })) + } + _ => { + let parsed = self.process_input(input_ref, &Arc::new(file), kind)?; + Ok(LoadedFileState::Loaded(input_file, parsed)) + } + } + } + + /// Sends a request to load `input` unless it has already been requested. In either case, return + /// the index for `input` in our files Vec. + fn load_input<'scope>( + &'scope self, + input: &Input, + scope: &Scope<'scope>, + referenced_by: Option, + ) -> Result { + let paths = input.path(self.args)?; + + let mut path_to_load_index = self.path_to_load_index.lock().unwrap(); + + let index = match path_to_load_index.entry(paths.absolute.clone()) { + hashbrown::hash_map::Entry::Occupied(e) => *e.get(), + hashbrown::hash_map::Entry::Vacant(e) => { + let new_index = + FileLoadIndex(self.next_file_load_index.fetch_add(1, Ordering::Relaxed)); + e.insert(new_index); + + drop(path_to_load_index); + + let request = OpenFileRequest { + file_index: new_index, + paths, + modifiers: input.modifiers, + referenced_by, + }; + + scope.spawn(|scope| { + self.process_and_record_open_file_request(request, scope); + }); + + new_index + } + }; + + Ok(index) + } + + fn process_input( + &self, + input_ref: InputRef<'data>, + file: &Arc, + kind: FileKind, + ) -> Result> { + let data = input_ref.data(); + + // The plugin API docs say to pass files to the plugin before the linker tries to identify + // the them. Unfortunately the plugin API doesn't provide a fast way to identify files. The + // plugin API doesn't say anything about thread-safety and although the GCC plugin appears + // to be threadsafe, the clang plugin definitely isn't. This means that using the API to + // identify files is much too slow, so we do our own file identification and only pass files + // to the plugin if we think it can handle them. We can't rely on a plugin only being + // supplied when actually needed, since GCC seems to pretty much always pass a plugin to the + // linker. + if kind.is_compiler_ir() { + return Ok(InputRecord::LtoInput(Box::new(UnclaimedLtoInput { + input_ref, + file: Arc::clone(file), + kind, + }))); + } + + if input_ref.is_archive_entry() + && kind != FileKind::ElfObject + && kind != FileKind::CoffObject + && kind != FileKind::CoffImport + { + bail!("Unexpected archive member of kind {kind:?}: {input_ref}"); + } + + let input_bytes = InputBytes { + kind, + input: input_ref, + data, + modifiers: input_ref.file.modifiers, + }; + + let object = ParsedInputObject::new(&input_bytes, self.args); + + Ok(InputRecord::Object(object)) + } +} + +fn read_script_data<'data>( + path: &Path, + inputs_arena: &'data Arena, +) -> Result> { + let data = FileData::new(path, false).context("Failed to read script")?; + + let file = inputs_arena.alloc(InputFile { + filename: path.to_owned(), + original_filename: path.to_owned(), + modifiers: Default::default(), + data: Some(data), + }); + + Ok(ScriptData { raw: file.data() }) +} + +impl Input { + fn path(&self, args: &Args) -> Result { + match &self.spec { + InputSpec::File(p) => { + if self.search_first.is_some() + && let Some(path) = search_for_file( + &args.lib_search_path, + self.search_first.as_ref(), + p.as_ref(), + ) + { + return Ok(InputPath { + absolute: std::path::absolute(path)?, + original: p.as_ref().to_owned(), + }); + } + Ok(InputPath { + absolute: p.as_ref().to_owned(), + original: p.as_ref().to_owned(), + }) + } + InputSpec::Lib(lib_name) => { + if self.modifiers.allow_shared { + let filename = format!("lib{lib_name}.so"); + if let Some(path) = search_for_file( + &args.lib_search_path, + self.search_first.as_ref(), + &filename, + ) { + return Ok(InputPath { + absolute: std::path::absolute(&path)?, + original: PathBuf::from(filename), + }); + } + } + let filename = format!("lib{lib_name}.a"); + if let Some(path) = + search_for_file(&args.lib_search_path, self.search_first.as_ref(), &filename) + { + return Ok(InputPath { + absolute: std::path::absolute(&path)?, + original: PathBuf::from(filename), + }); + } + bail!("Couldn't find library `{lib_name}` on library search path"); + } + InputSpec::Search(filename) => { + if let Some(path) = search_for_file( + &args.lib_search_path, + self.search_first.as_ref(), + filename.as_ref(), + ) { + return Ok(InputPath { + absolute: std::path::absolute(&path)?, + original: PathBuf::from(filename.as_ref()), + }); + } + bail!("Couldn't find library `{filename}` on library search path"); + } + } + } +} + +impl FileData { + pub(crate) fn new(path: &Path, prepopulate_maps: bool) -> Result { + Self::open(path, prepopulate_maps).map(|(file_data, _file)| file_data) + } + + fn open(path: &Path, prepopulate_maps: bool) -> Result<(Self, std::fs::File)> { + let file = std::fs::File::open(path) + .with_context(|| format!("Failed to open input file `{}`", path.display()))?; + + let modification_time = std::fs::metadata(path) + .and_then(|meta| meta.modified()) + .with_context(|| { + format!("Failed to read file modification time `{}`", path.display()) + })?; + + // Safety: Unfortunately, this is a bit of a compromise. Basically this is only safe if our + // users manage to avoid editing the input files while we've got them mapped. It'd be great + // if there were a way to protect against unsoundness when the input files were modified + // externally, but there isn't - at least on Linux. Not only could the bytes change without + // notice, but the mapped file could be truncated causing any access to result in a SIGBUS. + // + // For our use case, mmap just has too many advantages. There are likely large parts of our + // input files that we don't need to read, so reading all our input files up front isn't + // really an option. Reading just the parts we need might be an option, but would add + // substantial complexity. Also, using mmap means that if the system needs to reclaim + // memory, it can just release some of our pages. + + let mut mmap_options = memmap2::MmapOptions::new(); + + // Prepopulating maps generally slows things down, so is off by default, however it's useful + // when profiling, since it means that you don't see false positive slowness in the parts of + // the code that first read a bit of memory. + if prepopulate_maps { + mmap_options.populate(); + } + + let bytes = unsafe { mmap_options.map(&file) } + .with_context(|| format!("Failed to mmap input file `{}`", path.display()))?; + + Ok(( + FileData { + bytes, + modification_time, + }, + file, + )) + } +} + +fn search_for_file( + lib_search_path: &[Box], + search_first: Option<&PathBuf>, + filename: impl AsRef, +) -> Option { + let filename = filename.as_ref(); + if let Some(search_first) = search_first { + let path = search_first.join(filename); + if path.exists() { + return Some(path); + } + } + for dir in lib_search_path { + let path = dir.join(filename); + if path.exists() { + return Some(path); + } + } + None +} + +impl Deref for FileData { + type Target = [u8]; + + fn deref(&self) -> &Self::Target { + &self.bytes + } +} + +const FILE_INDEX_BITS: u32 = 8; +pub(crate) const MAX_FILES_PER_GROUP: u32 = 1 << FILE_INDEX_BITS; + +impl FileId { + pub(crate) const fn new(group: u32, file: u32) -> Self { + Self((group << FILE_INDEX_BITS) | file) + } + + pub(crate) const fn from_encoded(v: u32) -> Self { + Self(v) + } + + pub(crate) fn group(self) -> usize { + self.0 as usize >> FILE_INDEX_BITS + } + + pub(crate) fn file(self) -> usize { + self.0 as usize & ((1 << FILE_INDEX_BITS) - 1) + } + + pub(crate) fn as_u32(self) -> u32 { + self.0 + } +} + +impl std::fmt::Display for InputRef<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + std::fmt::Display::fmt(&self.file.filename.display(), f)?; + if let Some(entry) = &self.entry { + std::fmt::Display::fmt(" @ ", f)?; + std::fmt::Display::fmt(&String::from_utf8_lossy(entry.identifier.as_slice()), f)?; + } + Ok(()) + } +} + +impl std::fmt::Debug for InputRef<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + std::fmt::Display::fmt(self, f) + } +} + +impl std::fmt::Display for FileId { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{} ({}/{})", self.0, self.group(), self.file()) + } +} + +impl<'data> InputRef<'data> { + pub(crate) fn lib_name(&self) -> &'data [u8] { + self.file.original_filename.as_os_str().as_encoded_bytes() + } + + pub(crate) fn has_archive_semantics(&self) -> bool { + self.entry.is_some() || self.file.modifiers.archive_semantics + } + + pub(crate) fn data(&self) -> &'data [u8] { + if let Some(entry) = &self.entry { + &self.file.data()[entry.byte_range()] + } else { + self.file.data() + } + } + + fn is_archive_entry(&self) -> bool { + self.entry.is_some() + } +} + +impl Display for InputBytes<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + Display::fmt(&self.input, f) + } +} + +impl<'data, O: ObjectFile<'data>> LoadedInputs<'data, O> { + fn add_record( + &mut self, + record: InputRecord<'data, O>, + plugin: &mut Option>, + ) { + match record { + InputRecord::Object(obj) => self.objects.push(obj), + InputRecord::LtoInput(obj) => { + let UnclaimedLtoInput { + input_ref, + file, + kind, + } = *obj; + let result = plugin.as_mut() + .with_context(|| { + format!( + "Input file {input_ref} contains {kind}, but linker plugin was not supplied" + ) + }) + .and_then(|plugin| plugin.process_input(input_ref, &file, kind)); + self.lto_objects.push(result); + } + } + } + + fn add_records( + &mut self, + parsed_parts: Vec>, + plugin: &mut Option>, + ) { + for part in parsed_parts { + self.add_record(part, plugin); + } + } +} + +impl<'data, O: ObjectFile<'data>> InputRecord<'data, O> { + fn is_dynamic_object(&self) -> bool { + match self { + InputRecord::Object(Ok(obj)) => obj.is_dynamic(), + _ => false, + } + } +} diff --git a/libwild/src/layout.rs b/libwild/src/layout.rs index 376b0cd2a..5355e76ca 100644 --- a/libwild/src/layout.rs +++ b/libwild/src/layout.rs @@ -548,7 +548,7 @@ fn compute_total_file_size(section_layouts: &OutputSectionMap> { pub(crate) symbol_db: SymbolDb<'data, O>, pub(crate) symbol_resolutions: SymbolResolutions, @@ -576,6 +576,18 @@ pub struct Layout<'data, O: ObjectFile<'data>> { pub(crate) properties_and_attributes: O::LayoutProperties, } +impl<'data, O: ObjectFile<'data>> std::fmt::Debug for Layout<'data, O> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Layout") + .field("symbol_resolutions", &self.symbol_resolutions) + .field("section_part_layouts", &self.section_part_layouts) + .field("section_layouts", &self.section_layouts) + .field("segment_layouts", &self.segment_layouts) + .field("has_static_tls", &self.has_static_tls) + .finish_non_exhaustive() + } +} + #[derive(Debug)] pub(crate) struct SegmentLayouts { /// The layout of each of our segments. Segments containing no active output sections will have @@ -808,7 +820,7 @@ trait SymbolRequestHandler<'data, O: ObjectFile<'data>>: std::fmt::Display + Han allocate_symbol_resolution(flags, &mut common.mem_sizes, symbol_db.output_kind); - if symbol_db.args.got_plt_syms && flags.needs_got() { + if O::wants_got_plt_syms(symbol_db.args) && flags.needs_got() { let name = symbol_db.symbol_name(symbol_id)?; let name = O::RawSymbolName::parse(name.bytes()).name(); let name_len = name.len() + 4; // "$got" or "$plt" suffix @@ -1381,7 +1393,7 @@ impl<'data, O: ObjectFile<'data>> Layout<'data, O> { i } - pub(crate) fn args(&self) -> &'data Args { + pub(crate) fn args(&self) -> &'data Args { self.symbol_db.args } @@ -1662,7 +1674,7 @@ fn compute_segment_layout<'data, O: ObjectFile<'data>>( output_order: &OutputOrder, program_segments: &ProgramSegments, header_info: &HeaderInfo, - args: &Args, + args: &Args, ) -> Result { #[derive(Clone)] struct Record { @@ -1690,7 +1702,7 @@ fn compute_segment_layout<'data, O: ObjectFile<'data>>( file_start: 0, file_end: 0, mem_start: 0, - mem_end: args.z_stack_size.map_or(0, |size| size.get()), + mem_end: O::stack_size(args), alignment: alignment::MIN, }); } else { @@ -3043,7 +3055,7 @@ impl<'data> PreludeLayoutState<'data> { &mut self, common: &mut CommonGroupState<'data, O>, uses_tlsld: &AtomicBool, - args: &Args, + args: &Args, output_kind: OutputKind, ) { if uses_tlsld.load(atomic::Ordering::Relaxed) { @@ -3548,7 +3560,7 @@ fn should_emit_undefined_error<'data, O: ObjectFile<'data>>( sym_file_id: FileId, sym_def_file_id: FileId, flags: ValueFlags, - args: &Args, + args: &Args, output_kind: OutputKind, ) -> bool { if (output_kind.is_shared_object() && !args.no_undefined) || symbol.is_weak() { @@ -3621,7 +3633,7 @@ impl<'data> SyntheticSymbolsLayoutState<'data> { impl<'data, O: ObjectFile<'data>> EpilogueLayoutState<'data, O> { fn new( - args: &Args, + args: &Args, output_kind: OutputKind, dynamic_symbol_definitions: &mut [DynamicSymbolDefinition], ) -> Self { @@ -3636,7 +3648,7 @@ impl<'data, O: ObjectFile<'data>> EpilogueLayoutState<'data, O> { total_sizes: &mut OutputSectionPartMap, resources: &FinaliseSizesResources<'data, '_, O>, ) -> Result { - if resources.symbol_db.args.hash_style.includes_sysv() { + if O::hash_includes_sysv(resources.symbol_db.args) { let mut extra_sizes = OutputSectionPartMap::with_size(common.mem_sizes.num_parts()); O::apply_late_size_adjustments_epilogue( &mut self.format_specific, @@ -5019,7 +5031,7 @@ fn layout_section_parts<'data, O: ObjectFile<'data>>( output_sections: &OutputSections, program_segments: &ProgramSegments, output_order: &OutputOrder, - args: &Args, + args: &Args, ) -> OutputSectionPartMap { let segment_alignments = compute_segment_alignments::( sizes, @@ -5141,7 +5153,7 @@ fn compute_segment_alignments<'data, O: ObjectFile<'data>>( sizes: &OutputSectionPartMap, program_segments: &ProgramSegments, output_order: &OutputOrder, - args: &Args, + args: &Args, output_sections: &OutputSections, ) -> HashMap { timing_phase!("Computing segment alignments"); @@ -5667,7 +5679,7 @@ fn test_no_disallowed_overlaps() { let mut output_sections = OutputSections::with_base_address(0x1000); let (output_order, program_segments) = output_sections.output_order::(); - let args = Args::default(); + let args: Args = Args::default(); let section_part_sizes = output_sections.new_part_map::().map(|_, _| 7); let section_part_layouts = layout_section_parts::( diff --git a/libwild/src/lib.rs b/libwild/src/lib.rs index 78ea1502d..853c4ad99 100644 --- a/libwild/src/lib.rs +++ b/libwild/src/lib.rs @@ -2,6 +2,7 @@ pub(crate) mod alignment; pub(crate) mod arch; pub(crate) mod archive; pub mod args; +pub(crate) mod coff; pub(crate) mod debug_trace; pub(crate) mod diagnostics; pub(crate) mod diff; @@ -34,6 +35,7 @@ pub(crate) mod output_section_part_map; pub(crate) mod output_trace; pub(crate) mod parsing; pub(crate) mod part_id; +pub(crate) mod pe_writer; #[cfg(all( target_os = "linux", any(target_arch = "x86_64", target_arch = "aarch64") @@ -62,6 +64,7 @@ pub(crate) mod subprocess; pub(crate) mod subprocess; pub(crate) mod symbol; pub(crate) mod symbol_db; +pub(crate) mod target_os; pub(crate) mod timing; pub(crate) mod validation; pub(crate) mod value_flags; @@ -74,17 +77,15 @@ use crate::error::Result; use crate::identity::linker_identity; use crate::layout_rules::LayoutRulesBuilder; use crate::output_kind::OutputKind; +use crate::platform::ObjectFile; use crate::platform::Platform; use crate::value_flags::PerSymbolFlags; -use crate::version_script::VersionScript; pub use args::Args; use colosseum::sync::Arena; use crossbeam_utils::atomic::AtomicCell; use error::AlreadyInitialised; use input_data::FileLoader; use input_data::InputFile; -use input_data::InputLinkerScript; -use layout_rules::LayoutRules; use output_section_id::OutputSections; use std::io::BufWriter; use std::io::Write; @@ -95,16 +96,92 @@ use tracing_subscriber::fmt; use tracing_subscriber::layer::SubscriberExt; use tracing_subscriber::util::SubscriberInitExt; -/// Runs the linker and cleans up associated resources. Only use this function if you've OK with -/// waiting for cleanup. -pub fn run(args: Args) -> error::Result { +/// Trait implemented by format-specific args types to dispatch linking by architecture. +pub(crate) trait TargetFormat: Send + Sync + 'static { + fn dispatch_link<'layout_inputs>( + linker: &'layout_inputs Linker, + args: &'layout_inputs Args, + ) -> error::Result> + where + Self: Sized; +} + +impl TargetFormat for args::linux::ElfArgs { + fn dispatch_link<'layout_inputs>( + linker: &'layout_inputs Linker, + args: &'layout_inputs Args, + ) -> error::Result> { + let res = match args.arch { + arch::Architecture::X86_64 => linker.link_for_arch::(args)?, + arch::Architecture::AArch64 => linker.link_for_arch::(args)?, + arch::Architecture::RISCV64 => linker.link_for_arch::(args)?, + arch::Architecture::LoongArch64 => { + linker.link_for_arch::(args)? + } + }; + Ok(match res { + Some(layout) => LinkerOutput::Elf(layout), + None => LinkerOutput::None, + }) + } +} + +impl TargetFormat for args::windows::PeArgs { + fn dispatch_link<'layout_inputs>( + linker: &'layout_inputs Linker, + args: &'layout_inputs Args, + ) -> error::Result> { + let res = match args.arch { + arch::Architecture::X86_64 | arch::Architecture::AArch64 => { + linker.link_for_arch::(args)? + } + other => { + bail!("PE format does not support architecture: {other}"); + } + }; + Ok(match res { + Some(layout) => LinkerOutput::Pe(layout), + None => LinkerOutput::None, + }) + } +} + +#[macro_export] +macro_rules! linker_run { + ($linker:ident, $args:ident, $after:expr) => { + use $crate::TargetFormat; + let args = $args.args; + match args.target_args { + $crate::args::TargetArgs::Elf(_) => { + let args = args.map_target(|t| match t { + $crate::args::TargetArgs::Elf(e) => e, + _ => unreachable!(), + }); + let res = $crate::args::linux::ElfArgs::dispatch_link(&$linker, &args)?; + $after(res); + } + $crate::args::TargetArgs::Pe(_) => { + let args = args.map_target(|t| match t { + $crate::args::TargetArgs::Pe(p) => p, + _ => unreachable!(), + }); + let res = $crate::args::windows::PeArgs::dispatch_link(&$linker, &args)?; + $after(res); + } + } + }; +} + +/// Runs the linker and cleans up associated resources. +pub fn run(args: args::Args) -> error::Result { // Note, we need to setup tracing before we activate the thread pool. In particular, we need to // initialise the timing module before the worker threads are started, otherwise the threads // won't contribute to counters such as --time=cycles,instructions etc. setup_tracing(&args)?; let args = args.activate_thread_pool()?; let linker = Linker::new(); - linker.run(&args)?; + linker_run!(linker, args, |_| {}); + drop(linker); timing::finalise_perfetto_trace()?; Ok(()) @@ -113,7 +190,7 @@ pub fn run(args: Args) -> error::Result { /// Sets up whatever tracing, if any, is indicated by the supplied arguments. This can only be /// called once and only if nothing else has already set the global tracing dispatcher. Calling this /// is optional. If it isn't called, no tracing-based features will function. e.g. --time. -pub fn setup_tracing(args: &Args) -> Result<(), AlreadyInitialised> { +pub fn setup_tracing(args: &args::Args) -> Result<(), AlreadyInitialised> { if let Some(opts) = args.time_phase_options.as_ref() { timing::init_tracing(opts) } else if args.print_allocations.is_some() { @@ -135,13 +212,13 @@ pub fn setup_tracing(args: &Args) -> Result<(), AlreadyInitialised> { /// pages) will still happen anyway. pub struct Linker { /// We store our input files here once we've read them. - inputs_arena: Arena, + pub(crate) inputs_arena: Arena, - linker_plugin_arena: Arena, + pub(crate) linker_plugin_arena: Arena, /// Anything that doesn't need a custom Drop implementation can go in here. In practice, it's /// mostly just the decompressed copy of compressed string-merge sections. - herd: bumpalo_herd::Herd, + pub(crate) herd: bumpalo_herd::Herd, /// We'll fill this in when we're done linking and start shutting down. Once this is dropped, /// that signals the end of shutdown for the purposes of timing measurement. @@ -153,11 +230,13 @@ pub struct Linker { _link_scope: Vec>, } -pub struct LinkerOutput<'layout_inputs> { - /// This is just here so that we defer its destruction. This allows us to (a) measure how long - /// it takes to drop and (b) if we forked, signal our parent that we're done, then drop it in - /// the background. - layout: Option>>, +/// This is just here so that we defer its destruction. This allows us to (a) measure how long +/// it takes to drop and (b) if we forked, signal our parent that we're done, then drop it in +/// the background. +pub(crate) enum LinkerOutput<'layout_inputs> { + Elf(layout::Layout<'layout_inputs, crate::elf::File<'layout_inputs>>), + Pe(layout::Layout<'layout_inputs, crate::coff::CoffObjectFile<'layout_inputs>>), + None, } impl Linker { @@ -173,19 +252,18 @@ impl Linker { } } - /// Runs the linker. The returned value isn't useful for anything, but is somewhat expensive to - /// drop, so we leave it up to the caller to decide when to drop it. At the point at which we - /// return, the output file should be usable. - pub fn run<'layout_inputs>( + /// Runs the linker. Generic over the target format args type (ElfArgs or PeArgs). + /// The caller is responsible for mapping the args to the correct format type first. + pub(crate) fn run<'layout_inputs, T: TargetFormat>( &'layout_inputs self, - args: &'layout_inputs ActivatedArgs, + args: &'layout_inputs ActivatedArgs, ) -> error::Result> { let args = &args.args; match args.version_mode { args::VersionMode::ExitAfterPrint => { let mut stdout = std::io::stdout().lock(); writeln!(stdout, "{}", linker_identity())?; - return Ok(LinkerOutput { layout: None }); + return Ok(LinkerOutput::None); } args::VersionMode::Verbose => { let mut stdout = std::io::stdout().lock(); @@ -197,23 +275,16 @@ impl Linker { } } - match args.arch { - arch::Architecture::X86_64 => self.link_for_arch::(args), - arch::Architecture::AArch64 => self.link_for_arch::(args), - arch::Architecture::RISCV64 => self.link_for_arch::(args), - arch::Architecture::LoongArch64 => { - self.link_for_arch::(args) - } - } + T::dispatch_link(self, args) } - fn link_for_arch<'data, P: Platform<'data, File = crate::elf::File<'data>>>( - &'data self, - args: &'data Args, - ) -> error::Result> { + fn link_for_arch<'layout_inputs, P: Platform<'layout_inputs>>( + &'layout_inputs self, + args: &'layout_inputs Args<>::Args>, + ) -> error::Result>> { let mut file_loader = input_data::FileLoader::new(&self.inputs_arena); - // Note, we propagate errors from `link_with_input_data` after we've checked if any files + // Note, we propagate errors from `load_inputs_and_link` after we've checked if any files // changed. We want inputs-changed errors to take precedence over all other errors. let result = self.load_inputs_and_link::

(&mut file_loader, args); @@ -235,13 +306,12 @@ impl Linker { result } - fn load_inputs_and_link<'data, P: Platform<'data, File = crate::elf::File<'data>>>( - &'data self, - file_loader: &mut FileLoader<'data>, - args: &'data Args, - ) -> error::Result> { - let mut plugin = - linker_plugins::LinkerPlugin::from_args(args, &self.linker_plugin_arena, &self.herd)?; + fn load_inputs_and_link<'layout_inputs, P: Platform<'layout_inputs>>( + &'layout_inputs self, + file_loader: &mut FileLoader<'layout_inputs>, + args: &'layout_inputs Args<>::Args>, + ) -> error::Result>> { + let mut plugin = P::create_plugin(self, args)?; let loaded = file_loader.load_inputs(&args.inputs, args, &mut plugin); @@ -251,8 +321,6 @@ impl Linker { let output_kind = OutputKind::new(args, file_loader); - let mut output = file_writer::Output::new(args, output_kind); - let mut output_sections = OutputSections::with_base_address(output_kind.base_address()); let mut layout_rules_builder = LayoutRulesBuilder::default(); @@ -285,52 +353,25 @@ impl Linker { &resolver.resolved_groups, )?; - if let Some(plugin) = plugin.as_mut() - && plugin.is_initialised() - { - plugin.all_symbols_read( - &mut symbol_db, - &mut resolver, - file_loader, - &mut per_symbol_flags, - &mut output_sections, - &mut layout_rules_builder, - )?; - } - - // If it's a rust version script, apply the global symbol visibility now. - // We previously downgraded all symbols to local visibility. - if let VersionScript::Rust(rust_vscript) = &symbol_db.version_script { - symbol_db.handle_rust_version_script(rust_vscript, &mut per_symbol_flags); - } - - let layout_rules = layout_rules_builder.build(); - - let resolved = resolver.resolve_sections_and_canonicalise_undefined( - &mut symbol_db, - &mut per_symbol_flags, - &mut output_sections, - &layout_rules, - )?; - - let layout = layout::compute::

( + let layout = P::finish_link( + file_loader, + args, + &mut plugin, symbol_db, per_symbol_flags, - resolved, + resolver, output_sections, - &mut output, + layout_rules_builder, + output_kind, )?; - output.write(&layout, elf_writer::write::

)?; diff::maybe_diff()?; // We've finished linking. We consider everything from this point onwards as shutdown. let (g1, g2) = timing_guard!("Shutdown"); self.shutdown_scope.store(vec![Box::new(g1), Box::new(g2)]); - Ok(LinkerOutput { - layout: Some(layout), - }) + Ok(layout) } } @@ -348,10 +389,10 @@ impl Drop for Linker { } } -impl Drop for LinkerOutput<'_> { +impl<'layout_inputs> Drop for LinkerOutput<'layout_inputs> { fn drop(&mut self) { timing_phase!("Drop layout"); - self.layout.take(); + let _ = std::mem::swap(self, &mut LinkerOutput::None); } } diff --git a/libwild/src/linker_plugins.rs b/libwild/src/linker_plugins.rs index 98509e283..d3057d0d7 100644 --- a/libwild/src/linker_plugins.rs +++ b/libwild/src/linker_plugins.rs @@ -9,7 +9,8 @@ //! up having to make quite a bit of use of thread locals in order to get state to where it needs to //! be. -use crate::Args; +use crate::args::Args; +use crate::args::linux::ElfArgs; use crate::args::Input; use crate::args::Modifiers; use crate::bail; @@ -68,7 +69,7 @@ enum Store<'data> { } struct LoadInfo<'data> { - args: &'data Args, + args: &'data Args, arena: &'data Arena, } @@ -128,7 +129,7 @@ pub(crate) struct PluginOutputs { impl<'data> LinkerPlugin<'data> { pub(crate) fn from_args( - args: &'data crate::Args, + args: &'data crate::args::Args, arena: &'data Arena, herd: &'data Herd, ) -> Result>> { @@ -319,7 +320,7 @@ impl<'data> WrapSymbols<'data> { } impl LoadedPlugin { - fn new(plugin_path: &Path, args: &Args) -> Result { + fn new(plugin_path: &Path, args: &Args) -> Result { timing_phase!("Load linker plugin"); if cfg!(target_feature = "crt-static") { diff --git a/libwild/src/linker_plugins_disabled.rs b/libwild/src/linker_plugins_disabled.rs index 7570c686b..9880b6851 100644 --- a/libwild/src/linker_plugins_disabled.rs +++ b/libwild/src/linker_plugins_disabled.rs @@ -34,8 +34,8 @@ impl<'data> LinkerPlugin<'data> { unreachable!(); } - pub(crate) fn from_args( - _args: &'data crate::Args, + pub(crate) fn from_args( + _args: &'data crate::args::Args, _linker_plugin_arena: &colosseum::sync::Arena, _herd: &bumpalo_herd::Herd, ) -> Result> { @@ -46,10 +46,10 @@ impl<'data> LinkerPlugin<'data> { false } - pub(crate) fn all_symbols_read( + pub(crate) fn all_symbols_read>( &mut self, - _symbol_db: &mut SymbolDb<'data, crate::elf::File<'data>>, - _resolver: &mut Resolver<'data, crate::elf::File<'data>>, + _symbol_db: &mut SymbolDb<'data, O>, + _resolver: &mut Resolver<'data, O>, _file_loader: &mut FileLoader<'data>, _per_symbol_flags: &mut PerSymbolFlags, _output_sections: &mut OutputSections<'data>, diff --git a/libwild/src/output_kind.rs b/libwild/src/output_kind.rs index e4c988671..d670dbe79 100644 --- a/libwild/src/output_kind.rs +++ b/libwild/src/output_kind.rs @@ -1,4 +1,4 @@ -use crate::Args; +use crate::args::Args; use crate::args::RelocationModel; use crate::input_data::FileLoader; @@ -10,7 +10,7 @@ pub(crate) enum OutputKind { } impl OutputKind { - pub(crate) fn new(args: &Args, input_data: &FileLoader<'_>) -> OutputKind { + pub(crate) fn new(args: &Args, input_data: &FileLoader<'_>) -> OutputKind { if !args.should_output_executable { OutputKind::SharedObject } else if args.dynamic_linker.is_some() diff --git a/libwild/src/output_section_id.rs b/libwild/src/output_section_id.rs index 966a3d4eb..d815082f1 100644 --- a/libwild/src/output_section_id.rs +++ b/libwild/src/output_section_id.rs @@ -954,11 +954,11 @@ impl<'data> OutputSections<'data> { pub(crate) fn secondary_order(&self, id: OutputSectionId) -> Option { self.section_infos.get(id).secondary_order } - pub(crate) fn add_sections( + pub(crate) fn add_sections( &mut self, custom_sections: &[CustomSectionDetails<'data>], sections: &mut [SectionSlot], - args: &Args, + args: &Args, ) { for custom in custom_sections { let name_str = std::str::from_utf8(custom.name.bytes()).ok(); diff --git a/libwild/src/parsing.rs b/libwild/src/parsing.rs index 7b2242427..72fa495d4 100644 --- a/libwild/src/parsing.rs +++ b/libwild/src/parsing.rs @@ -191,7 +191,7 @@ impl<'data> InternalSymDefInfo<'data> { } impl<'data, O: ObjectFile<'data>> ParsedInputObject<'data, O> { - pub(crate) fn new(input: &InputBytes<'data>, args: &Args) -> Result> { + pub(crate) fn new(input: &InputBytes<'data>, args: &Args) -> Result> { verbose_timing_phase!("Parse file"); let object = O::parse(input, args) @@ -214,7 +214,7 @@ impl<'data, O: ObjectFile<'data>> ParsedInputObject<'data, O> { } impl<'data> Prelude<'data> { - pub(crate) fn new>(args: &'data Args, output_kind: OutputKind) -> Self { + pub(crate) fn new>(args: &'data Args, output_kind: OutputKind) -> Self { verbose_timing_phase!("Construct prelude"); let mut symbols = InternalSymbolsBuilder::default(); diff --git a/libwild/src/part_id.rs b/libwild/src/part_id.rs index 0052a9c5a..25236c10f 100644 --- a/libwild/src/part_id.rs +++ b/libwild/src/part_id.rs @@ -57,10 +57,10 @@ pub(crate) const CUSTOM_PLACEHOLDER: PartId = PartId(u32::MAX); /// Returns whether the supplied section meets our criteria for section merging. Section merging is /// optional, so there are cases where we might be able to merge, but don't currently. For example /// if alignment is > 1. -pub(crate) fn should_merge_sections( +pub(crate) fn should_merge_sections( section_flags: S, section_alignment: u64, - args: &Args, + args: &Args, ) -> bool { if !args.merge_sections { return false; diff --git a/libwild/src/pe_writer.rs b/libwild/src/pe_writer.rs new file mode 100644 index 000000000..eff468ebc --- /dev/null +++ b/libwild/src/pe_writer.rs @@ -0,0 +1,657 @@ +//! PE executable output: layout computation and writing. +//! +//! This module handles the PE-specific portion of linking: computing section layout, +//! writing PE headers, copying section data, and applying COFF relocations. + +use crate::args::Args; +use crate::args::windows::PeArgs; +use crate::arch::Architecture; +use crate::coff::CoffObjectFile; +use crate::error::Context as _; +use crate::error::Result; +use crate::platform::ObjectFile; +use crate::platform::Symbol as _; +use crate::resolution::ResolvedFile; +use crate::resolution::ResolvedGroup; +use crate::resolution::SectionSlot; +use crate::sharding::ShardKey as _; +use crate::symbol::UnversionedSymbolName; +use crate::symbol_db::SymbolDb; +use object::LittleEndian as LE; +use object::pe; +use std::collections::HashMap; + +// ── Constants ──────────────────────────────────────────────────────────────── + +const IMAGE_BASE_X64: u64 = 0x0000_0001_4000_0000; +const SECTION_ALIGNMENT: u32 = 0x1000; +const FILE_ALIGNMENT: u32 = 0x200; + +const DOS_HEADER_SIZE: u32 = core::mem::size_of::() as u32; +const PE_SIGNATURE_SIZE: u32 = 4; +const COFF_HEADER_SIZE: u32 = core::mem::size_of::() as u32; +const OPTIONAL_HEADER_BASE_SIZE: u32 = core::mem::size_of::() as u32; +const DATA_DIRS_SIZE: u32 = + (pe::IMAGE_NUMBEROF_DIRECTORY_ENTRIES as u32) * core::mem::size_of::() as u32; +const OPTIONAL_HEADER_SIZE: u32 = OPTIONAL_HEADER_BASE_SIZE + DATA_DIRS_SIZE; +const SECTION_HEADER_SIZE: u32 = core::mem::size_of::() as u32; + +// ── Layout data structures ─────────────────────────────────────────────────── + +pub(crate) struct PeLayout<'data> { + pub(crate) args: &'data Args, + pub(crate) image_base: u64, + pub(crate) sections: Vec>, + pub(crate) entry_point_rva: u32, + pub(crate) size_of_headers: u32, + pub(crate) size_of_image: u32, + pub(crate) machine: u16, + pub(crate) file_size: u64, + pub(crate) symbol_addresses: Vec, +} + +pub(crate) struct PeOutputSection<'data> { + pub(crate) name: [u8; 8], + pub(crate) virtual_address: u32, + pub(crate) virtual_size: u32, + pub(crate) file_offset: u32, + pub(crate) raw_data_size: u32, + pub(crate) characteristics: u32, + pub(crate) contributions: Vec>, + pub(crate) is_bss: bool, +} + +pub(crate) struct SectionContribution<'data> { + pub(crate) object: &'data CoffObjectFile<'data>, + pub(crate) input_section_index: object::SectionIndex, + pub(crate) output_offset: u32, + pub(crate) size: u32, + pub(crate) symbol_id_start: crate::symbol_db::SymbolId, +} + +// ── Section name / characteristics mapping ─────────────────────────────────── + +fn output_section_name(input_name: &[u8]) -> [u8; 8] { + let base_name = if let Some(dollar_pos) = input_name.iter().position(|&b| b == b'$') { + &input_name[..dollar_pos] + } else { + input_name + }; + + match base_name { + b".text" => *b".text\0\0\0", + b".rdata" => *b".rdata\0\0", + b".data" => *b".data\0\0\0", + b".bss" => *b".bss\0\0\0\0", + b".pdata" => *b".pdata\0\0", + b".xdata" => *b".xdata\0\0", + _ => { + let mut out = [0u8; 8]; + let len = base_name.len().min(8); + out[..len].copy_from_slice(&base_name[..len]); + out + } + } +} + +fn merge_characteristics(chars: u32) -> u32 { + chars + & (pe::IMAGE_SCN_CNT_CODE + | pe::IMAGE_SCN_CNT_INITIALIZED_DATA + | pe::IMAGE_SCN_CNT_UNINITIALIZED_DATA + | pe::IMAGE_SCN_MEM_EXECUTE + | pe::IMAGE_SCN_MEM_READ + | pe::IMAGE_SCN_MEM_WRITE) +} + +fn coff_section_alignment(chars: u32) -> u32 { + let align_field = (chars >> 20) & 0xF; + if align_field == 0 { + 1 + } else { + 1 << (align_field - 1) + } +} + +// ── Layout computation ─────────────────────────────────────────────────────── + +pub(crate) fn compute_layout<'data>( + symbol_db: &SymbolDb<'data, CoffObjectFile<'data>>, + resolved_groups: &[ResolvedGroup<'data, CoffObjectFile<'data>>], + args: &'data Args, +) -> Result> { + let image_base = IMAGE_BASE_X64; + let machine = match args.arch { + Architecture::X86_64 => pe::IMAGE_FILE_MACHINE_AMD64, + Architecture::AArch64 => pe::IMAGE_FILE_MACHINE_ARM64, + _ => crate::bail!("Unsupported PE architecture: {:?}", args.arch), + }; + + // Step 1: Collect input sections into output sections. + let mut output_sections: Vec> = Vec::new(); + let mut section_map: HashMap<([u8; 8], u32), usize> = HashMap::new(); + + for group in resolved_groups { + for file in &group.files { + let ResolvedFile::Object(resolved_obj) = file else { + continue; + }; + let object = resolved_obj.common.object; + let symbol_id_start = resolved_obj.common.symbol_id_range.start(); + + for (slot_index, slot) in resolved_obj.sections.iter().enumerate() { + match slot { + SectionSlot::Discard => continue, + SectionSlot::Loaded(_) + | SectionSlot::Unloaded(_) + | SectionSlot::MustLoad(_) => {} + _ => continue, + } + + let section_index = object::SectionIndex(slot_index + 1); + let section_header = object.section(section_index)?; + let name_bytes = object.section_name(section_header)?; + let out_name = output_section_name(name_bytes); + let in_chars = section_header.characteristics.get(LE); + let out_chars = merge_characteristics(in_chars); + let is_bss = in_chars & pe::IMAGE_SCN_CNT_UNINITIALIZED_DATA != 0; + + let size = section_header.size_of_raw_data.get(LE); + if size == 0 { + continue; + } + + let section_idx = *section_map + .entry((out_name, out_chars)) + .or_insert_with(|| { + output_sections.push(PeOutputSection { + name: out_name, + virtual_address: 0, + virtual_size: 0, + file_offset: 0, + raw_data_size: 0, + characteristics: out_chars, + contributions: Vec::new(), + is_bss, + }); + output_sections.len() - 1 + }); + + let out_section = &mut output_sections[section_idx]; + + let input_alignment = coff_section_alignment(in_chars).max(1); + let aligned_offset = align_up(out_section.virtual_size, input_alignment); + + out_section.contributions.push(SectionContribution { + object, + input_section_index: section_index, + output_offset: aligned_offset, + size, + symbol_id_start, + }); + + out_section.virtual_size = aligned_offset + size; + if !is_bss { + out_section.raw_data_size = aligned_offset + size; + } + } + } + } + + output_sections.sort_by_key(|s| section_sort_key(&s.name)); + + // Step 2: Compute header size + let headers_raw = DOS_HEADER_SIZE + + PE_SIGNATURE_SIZE + + COFF_HEADER_SIZE + + OPTIONAL_HEADER_SIZE + + SECTION_HEADER_SIZE * output_sections.len() as u32; + let size_of_headers = align_up(headers_raw, FILE_ALIGNMENT); + + // Step 3: Assign virtual addresses and file offsets + let mut next_rva = align_up(size_of_headers, SECTION_ALIGNMENT); + let mut next_file_offset = size_of_headers; + + for section in &mut output_sections { + section.virtual_address = next_rva; + section.file_offset = if section.is_bss { 0 } else { next_file_offset }; + + if !section.is_bss { + section.raw_data_size = align_up(section.raw_data_size, FILE_ALIGNMENT); + next_file_offset += section.raw_data_size; + } + + next_rva = align_up(next_rva + section.virtual_size, SECTION_ALIGNMENT); + } + + let size_of_image = next_rva; + let file_size = next_file_offset as u64; + + // Step 4: Compute symbol addresses + let mut section_address_map: HashMap<(usize, usize), (u32, u32)> = HashMap::new(); + for section in &output_sections { + for contrib in §ion.contributions { + let key = ( + contrib.object as *const _ as usize, + contrib.input_section_index.0, + ); + section_address_map.insert(key, (section.virtual_address, contrib.output_offset)); + } + } + + let num_symbols = symbol_db.num_symbols(); + let mut symbol_addresses = vec![0u64; num_symbols]; + + for group in resolved_groups { + for file in &group.files { + let ResolvedFile::Object(resolved_obj) = file else { + continue; + }; + let object = resolved_obj.common.object; + let symbol_id_range = &resolved_obj.common.symbol_id_range; + let obj_ptr = object as *const _ as usize; + + for (local_sym_index, symbol) in object.enumerate_symbols() { + let global_id = symbol_id_range.offset_to_id(local_sym_index.0); + let global_index = global_id.as_usize(); + if global_index >= num_symbols { + continue; + } + + let def_id = symbol_db.definition(global_id); + if def_id != global_id { + continue; + } + + if let Ok(Some(section_index)) = + object.symbol_section(symbol, local_sym_index) + { + let key = (obj_ptr, section_index.0); + if let Some(&(section_va, contrib_offset)) = section_address_map.get(&key) { + let sym_value = symbol.value(); + symbol_addresses[global_index] = + image_base + section_va as u64 + contrib_offset as u64 + sym_value; + } + } + } + } + } + + // Propagate addresses for redirected symbols + for sym_id_raw in 0..num_symbols { + let sym_id = crate::symbol_db::SymbolId::from_usize(sym_id_raw); + let def_id = symbol_db.definition(sym_id); + if def_id != sym_id && def_id.as_usize() < num_symbols { + symbol_addresses[sym_id_raw] = symbol_addresses[def_id.as_usize()]; + } + } + + // Step 5: Find entry point + let entry_point_rva = if let Some(entry_name) = &args.entry { + find_entry_point_rva(symbol_db, &symbol_addresses, entry_name, image_base)? + } else { + find_entry_point_rva(symbol_db, &symbol_addresses, "mainCRTStartup", image_base) + .or_else(|_| { + find_entry_point_rva(symbol_db, &symbol_addresses, "_mainCRTStartup", image_base) + }) + .or_else(|_| { + find_entry_point_rva( + symbol_db, + &symbol_addresses, + "WinMainCRTStartup", + image_base, + ) + }) + .unwrap_or(0) + }; + + Ok(PeLayout { + args, + image_base, + sections: output_sections, + entry_point_rva, + size_of_headers, + size_of_image, + machine, + file_size, + symbol_addresses, + }) +} + +fn find_entry_point_rva<'data>( + symbol_db: &SymbolDb<'data, CoffObjectFile<'data>>, + symbol_addresses: &[u64], + name: &str, + image_base: u64, +) -> Result { + let prehashed = UnversionedSymbolName::prehashed(name.as_bytes()); + let sym_id = symbol_db + .get_unversioned(&prehashed) + .with_context(|| format!("Entry point symbol `{name}` not found"))?; + let def_id = symbol_db.definition(sym_id); + let addr = symbol_addresses[def_id.as_usize()]; + if addr == 0 { + crate::bail!("Entry point symbol `{name}` has no address"); + } + Ok((addr - image_base) as u32) +} + +fn section_sort_key(name: &[u8; 8]) -> u32 { + match name { + b".text\0\0\0" => 0, + b".rdata\0\0" => 1, + b".data\0\0\0" => 2, + b".pdata\0\0" => 3, + b".xdata\0\0" => 4, + b".bss\0\0\0\0" => 5, + _ => 10, + } +} + +// ── Entry point ────────────────────────────────────────────────────────────── + +pub(crate) fn link<'data>( + symbol_db: &SymbolDb<'data, CoffObjectFile<'data>>, + resolved_groups: &[ResolvedGroup<'data, CoffObjectFile<'data>>], + args: &'data Args, + _output_kind: crate::OutputKind, +) -> Result { + let pe_layout = compute_layout(symbol_db, resolved_groups, args)?; + + // Create the output file and write the PE image. + let file_size = pe_layout.file_size; + let mut buf = vec![0u8; file_size as usize]; + write_headers(&mut buf, &pe_layout)?; + write_sections(&mut buf, &pe_layout)?; + + std::fs::write(&args.output, &buf) + .with_context(|| format!("Failed to write PE output to `{}`", args.output.display()))?; + + Ok(()) +} + +// ── Writing ────────────────────────────────────────────────────────────────── + +fn write_headers(buf: &mut [u8], layout: &PeLayout) -> Result { + let e = LE; + let mut offset = 0usize; + + // DOS Header + let dos_header: &mut pe::ImageDosHeader = from_bytes_mut_at(buf, &mut offset)?; + dos_header.e_magic.set(e, pe::IMAGE_DOS_SIGNATURE); + dos_header.e_lfanew.set(e, DOS_HEADER_SIZE); + + // PE Signature + let sig = buf + .get_mut(offset..offset + 4) + .context("Buffer too small for PE signature")?; + sig.copy_from_slice(&pe::IMAGE_NT_SIGNATURE.to_le_bytes()); + offset += 4; + + // COFF File Header + let file_header: &mut pe::ImageFileHeader = from_bytes_mut_at(buf, &mut offset)?; + file_header.machine.set(e, layout.machine); + file_header + .number_of_sections + .set(e, layout.sections.len() as u16); + file_header.time_date_stamp.set(e, 0); + file_header.pointer_to_symbol_table.set(e, 0); + file_header.number_of_symbols.set(e, 0); + file_header + .size_of_optional_header + .set(e, OPTIONAL_HEADER_SIZE as u16); + file_header.characteristics.set( + e, + pe::IMAGE_FILE_EXECUTABLE_IMAGE | pe::IMAGE_FILE_LARGE_ADDRESS_AWARE, + ); + + // Optional Header (PE32+) + let opt_header: &mut pe::ImageOptionalHeader64 = from_bytes_mut_at(buf, &mut offset)?; + opt_header.magic.set(e, pe::IMAGE_NT_OPTIONAL_HDR64_MAGIC); + opt_header.major_linker_version = 1; + opt_header.minor_linker_version = 0; + opt_header + .address_of_entry_point + .set(e, layout.entry_point_rva); + opt_header.image_base.set(e, layout.image_base); + opt_header.section_alignment.set(e, SECTION_ALIGNMENT); + opt_header.file_alignment.set(e, FILE_ALIGNMENT); + opt_header.major_operating_system_version.set(e, 6); + opt_header.minor_operating_system_version.set(e, 0); + opt_header.major_subsystem_version.set(e, 6); + opt_header.minor_subsystem_version.set(e, 0); + opt_header.size_of_image.set(e, layout.size_of_image); + opt_header.size_of_headers.set(e, layout.size_of_headers); + opt_header.subsystem.set(e, pe::IMAGE_SUBSYSTEM_WINDOWS_CUI); + opt_header.dll_characteristics.set( + e, + pe::IMAGE_DLLCHARACTERISTICS_HIGH_ENTROPY_VA + | pe::IMAGE_DLLCHARACTERISTICS_DYNAMIC_BASE + | pe::IMAGE_DLLCHARACTERISTICS_NX_COMPAT + | pe::IMAGE_DLLCHARACTERISTICS_TERMINAL_SERVER_AWARE, + ); + opt_header.size_of_stack_reserve.set(e, 0x100000); + opt_header.size_of_stack_commit.set(e, 0x1000); + opt_header.size_of_heap_reserve.set(e, 0x100000); + opt_header.size_of_heap_commit.set(e, 0x1000); + opt_header + .number_of_rva_and_sizes + .set(e, pe::IMAGE_NUMBEROF_DIRECTORY_ENTRIES as u32); + + // Compute aggregate sizes for the optional header + let mut size_of_code = 0u32; + let mut size_of_initialized_data = 0u32; + let mut size_of_uninitialized_data = 0u32; + let mut base_of_code = 0u32; + for section in &layout.sections { + if section.characteristics & pe::IMAGE_SCN_CNT_CODE != 0 { + size_of_code += section.raw_data_size; + if base_of_code == 0 { + base_of_code = section.virtual_address; + } + } + if section.characteristics & pe::IMAGE_SCN_CNT_INITIALIZED_DATA != 0 { + size_of_initialized_data += section.raw_data_size; + } + if section.characteristics & pe::IMAGE_SCN_CNT_UNINITIALIZED_DATA != 0 { + size_of_uninitialized_data += section.virtual_size; + } + } + opt_header.size_of_code.set(e, size_of_code); + opt_header + .size_of_initialized_data + .set(e, size_of_initialized_data); + opt_header + .size_of_uninitialized_data + .set(e, size_of_uninitialized_data); + opt_header.base_of_code.set(e, base_of_code); + + // Data directories (16 entries, all zeroed for now) + let data_dirs_slice = buf + .get_mut(offset..offset + DATA_DIRS_SIZE as usize) + .context("Buffer too small for data directories")?; + data_dirs_slice.fill(0); + offset += DATA_DIRS_SIZE as usize; + + // Section Headers + for section in &layout.sections { + let sec_header: &mut pe::ImageSectionHeader = from_bytes_mut_at(buf, &mut offset)?; + sec_header.name = section.name; + sec_header.virtual_size.set(e, section.virtual_size); + sec_header.virtual_address.set(e, section.virtual_address); + sec_header.size_of_raw_data.set( + e, + if section.is_bss { + 0 + } else { + section.raw_data_size + }, + ); + sec_header.pointer_to_raw_data.set(e, section.file_offset); + sec_header.characteristics.set(e, section.characteristics); + } + + Ok(()) +} + +fn write_sections(buf: &mut [u8], layout: &PeLayout) -> Result { + for (section_idx, section) in layout.sections.iter().enumerate() { + if section.is_bss { + continue; + } + + for contrib in §ion.contributions { + let section_header = contrib.object.section(contrib.input_section_index)?; + let data = contrib.object.raw_section_data(section_header)?; + let out_offset = section.file_offset as usize + contrib.output_offset as usize; + let copy_size = data.len().min(contrib.size as usize); + let out_end = out_offset + copy_size; + + if out_end > buf.len() { + crate::bail!( + "Section data write out of bounds: offset={out_offset}, size={copy_size}, buf_len={}", + buf.len() + ); + } + + buf[out_offset..out_end].copy_from_slice(&data[..copy_size]); + + apply_relocations(buf, layout, section, section_idx, contrib)?; + } + } + + Ok(()) +} + +fn apply_relocations( + buf: &mut [u8], + layout: &PeLayout, + section: &PeOutputSection, + section_idx: usize, + contrib: &SectionContribution, +) -> Result { + let relocations = contrib + .object + .relocations(contrib.input_section_index, &())?; + + for reloc in relocations { + let reloc_type = reloc.typ.get(LE); + let reloc_offset_in_section = reloc.virtual_address.get(LE); + let symbol_table_index = reloc.symbol_table_index.get(LE) as usize; + + let global_id = contrib.symbol_id_start.add_usize(symbol_table_index); + let target_addr = layout.symbol_addresses[global_id.as_usize()]; + + let file_offset = section.file_offset as usize + + contrib.output_offset as usize + + reloc_offset_in_section as usize; + + let reloc_va = layout.image_base + + section.virtual_address as u64 + + contrib.output_offset as u64 + + reloc_offset_in_section as u64; + + match reloc_type { + pe::IMAGE_REL_AMD64_ABSOLUTE => {} + pe::IMAGE_REL_AMD64_ADDR64 => { + write_u64(buf, file_offset, target_addr)?; + } + pe::IMAGE_REL_AMD64_ADDR32 => { + write_u32(buf, file_offset, target_addr as u32)?; + } + pe::IMAGE_REL_AMD64_ADDR32NB => { + let rva = target_addr.wrapping_sub(layout.image_base); + write_u32(buf, file_offset, rva as u32)?; + } + pe::IMAGE_REL_AMD64_REL32 => { + let value = target_addr.wrapping_sub(reloc_va).wrapping_sub(4); + write_i32(buf, file_offset, value as i32)?; + } + pe::IMAGE_REL_AMD64_REL32_1 => { + let value = target_addr.wrapping_sub(reloc_va).wrapping_sub(5); + write_i32(buf, file_offset, value as i32)?; + } + pe::IMAGE_REL_AMD64_REL32_2 => { + let value = target_addr.wrapping_sub(reloc_va).wrapping_sub(6); + write_i32(buf, file_offset, value as i32)?; + } + pe::IMAGE_REL_AMD64_REL32_3 => { + let value = target_addr.wrapping_sub(reloc_va).wrapping_sub(7); + write_i32(buf, file_offset, value as i32)?; + } + pe::IMAGE_REL_AMD64_REL32_4 => { + let value = target_addr.wrapping_sub(reloc_va).wrapping_sub(8); + write_i32(buf, file_offset, value as i32)?; + } + pe::IMAGE_REL_AMD64_REL32_5 => { + let value = target_addr.wrapping_sub(reloc_va).wrapping_sub(9); + write_i32(buf, file_offset, value as i32)?; + } + pe::IMAGE_REL_AMD64_SECTION => { + write_u16(buf, file_offset, (section_idx + 1) as u16)?; + } + pe::IMAGE_REL_AMD64_SECREL => { + let section_base = layout.image_base + section.virtual_address as u64; + let secrel = target_addr.wrapping_sub(section_base); + write_u32(buf, file_offset, secrel as u32)?; + } + _ => { + crate::bail!( + "Unsupported COFF relocation type 0x{reloc_type:04x} at offset 0x{file_offset:x}" + ); + } + } + } + + Ok(()) +} + +// ── Helper functions ───────────────────────────────────────────────────────── + +fn align_up(value: u32, alignment: u32) -> u32 { + (value + alignment - 1) & !(alignment - 1) +} + +fn from_bytes_mut_at<'a, T: object::pod::Pod>(buf: &'a mut [u8], offset: &mut usize) -> Result<&'a mut T> { + let size = core::mem::size_of::(); + let end = *offset + size; + if end > buf.len() { + crate::bail!("Buffer too small: need {end} bytes, have {}", buf.len()); + } + let slice = &mut buf[*offset..end]; + let ptr = slice.as_mut_ptr() as *mut T; + *offset = end; + Ok(unsafe { &mut *ptr }) +} + +fn write_u16(buf: &mut [u8], offset: usize, value: u16) -> Result { + let bytes = buf + .get_mut(offset..offset + 2) + .context("Relocation write out of bounds (u16)")?; + bytes.copy_from_slice(&value.to_le_bytes()); + Ok(()) +} + +fn write_u32(buf: &mut [u8], offset: usize, value: u32) -> Result { + let bytes = buf + .get_mut(offset..offset + 4) + .context("Relocation write out of bounds (u32)")?; + bytes.copy_from_slice(&value.to_le_bytes()); + Ok(()) +} + +fn write_i32(buf: &mut [u8], offset: usize, value: i32) -> Result { + let bytes = buf + .get_mut(offset..offset + 4) + .context("Relocation write out of bounds (i32)")?; + bytes.copy_from_slice(&value.to_le_bytes()); + Ok(()) +} + +fn write_u64(buf: &mut [u8], offset: usize, value: u64) -> Result { + let bytes = buf + .get_mut(offset..offset + 8) + .context("Relocation write out of bounds (u64)")?; + bytes.copy_from_slice(&value.to_le_bytes()); + Ok(()) +} diff --git a/libwild/src/platform.rs b/libwild/src/platform.rs index cca83649d..403d546e0 100644 --- a/libwild/src/platform.rs +++ b/libwild/src/platform.rs @@ -34,10 +34,29 @@ use std::ops::Range; use std::path::PathBuf; /// Represents a supported object file format + architecture combination. -pub(crate) trait Platform<'data>: Send + Sync + 'data { +pub(crate) trait Platform<'data>: 'data { type Relaxation: Relaxation; type File: ObjectFile<'data>; + fn create_plugin( + _linker: &'data crate::Linker, + _args: &'data Args<>::Args>, + ) -> crate::Result>> { + Ok(None) + } + + fn finish_link( + file_loader: &mut crate::input_data::FileLoader<'data>, + args: &'data Args<>::Args>, + plugin: &mut Option>, + symbol_db: crate::symbol_db::SymbolDb<'data, Self::File>, + per_symbol_flags: crate::value_flags::PerSymbolFlags, + resolver: crate::resolution::Resolver<'data, Self::File>, + output_sections: crate::output_section_id::OutputSections<'data>, + layout_rules_builder: crate::layout_rules::LayoutRulesBuilder<'data>, + output_kind: crate::OutputKind, + ) -> crate::Result>>; + /// Get ELF header magic for the architecture. fn elf_header_arch_magic() -> u16; @@ -170,6 +189,7 @@ pub(crate) trait ObjectFile<'data>: Send + Sync + Sized + std::fmt::Debug + 'dat type CommonGroupStateExt: Default + std::fmt::Debug + Send + Sync + 'static; type LayoutResourcesExt: std::fmt::Debug + Send + Sync + 'data; type ProgramSegmentDef: ProgramSegmentDef; + type Args: Send + Sync + 'static; /// An index into the local object's symbol versions. type SymbolVersionIndex: Send + Sync + Copy; @@ -190,7 +210,7 @@ pub(crate) trait ObjectFile<'data>: Send + Sync + Sized + std::fmt::Debug + 'dat /// As for `parse_bytes` but also validates that the file architecture matches what is expected /// based on `args`. - fn parse(input: &InputBytes<'data>, args: &Args) -> Result; + fn parse(input: &InputBytes<'data>, args: &Args) -> Result; fn is_dynamic(&self) -> bool; @@ -274,7 +294,7 @@ pub(crate) trait ObjectFile<'data>: Send + Sync + Sized + std::fmt::Debug + 'dat ) -> Self::DynamicLayout; fn new_epilogue_layout( - args: &Args, + args: &Args, output_kind: OutputKind, dynamic_symbol_definitions: &mut [DynamicSymbolDefinition<'_>], ) -> Self::EpilogueLayout; @@ -376,7 +396,7 @@ pub(crate) trait ObjectFile<'data>: Send + Sync + Sized + std::fmt::Debug + 'dat ) -> Result; fn create_layout_properties<'states, 'files, P: Platform<'data, File = Self>>( - args: &Args, + args: &Args, objects: impl Iterator, states: impl Iterator + Clone, ) -> Result @@ -409,7 +429,7 @@ pub(crate) trait ObjectFile<'data>: Send + Sync + Sized + std::fmt::Debug + 'dat /// Called after GC phase has completed. Mostly useful for platform-specific logging. fn finalise_find_required_sections(groups: &[layout::GroupState<'data, Self>]); - fn pre_finalise_sizes_prelude(common: &mut layout::CommonGroupState<'data, Self>, args: &Args); + fn pre_finalise_sizes_prelude(common: &mut layout::CommonGroupState<'data, Self>, args: &Args); fn finalise_object_sizes( object: &mut layout::ObjectLayoutState<'data, Self>, @@ -492,7 +512,7 @@ pub(crate) trait ObjectFile<'data>: Send + Sync + Sized + std::fmt::Debug + 'dat fn update_segment_keep_list( program_segments: &ProgramSegments, keep_segments: &mut [bool], - args: &Args, + args: &Args, ); fn program_segment_defs() -> &'static [Self::ProgramSegmentDef]; @@ -507,11 +527,26 @@ pub(crate) trait ObjectFile<'data>: Send + Sync + Sized + std::fmt::Debug + 'dat /// Implementations can force certain sections to be kept. Only needs to be done for sections /// that need to be emitted even if empty. - fn apply_force_keep_sections(keep_sections: &mut OutputSectionMap, args: &Args); + fn apply_force_keep_sections(keep_sections: &mut OutputSectionMap, args: &Args); /// Returns whether an input section with zero size destined for the specified output section /// should be considered content and thus prevent the output section from being discarded. fn is_zero_sized_section_content(section_id: OutputSectionId) -> bool; + + /// Whether to emit GOT/PLT symbol entries in the local symtab. + fn wants_got_plt_syms(_args: &Args) -> bool { + false + } + + /// Returns the stack size for the STACK segment. + fn stack_size(_args: &Args) -> u64 { + 0 + } + + /// Whether the hash style includes sysv. + fn hash_includes_sysv(_args: &Args) -> bool { + false + } } pub(crate) trait SectionHeader<'data, O: ObjectFile<'data>>: diff --git a/libwild/src/resolution.rs b/libwild/src/resolution.rs index 984290dae..698391f52 100644 --- a/libwild/src/resolution.rs +++ b/libwild/src/resolution.rs @@ -1,1525 +1,1525 @@ -//! This module resolves symbol references between objects. In the process, it decides which archive -//! entries are needed. We also resolve which output section, if any, each input section should be -//! assigned to. - -use crate::LayoutRules; -use crate::alignment::Alignment; -use crate::args::Args; -use crate::bail; -use crate::debug_assert_bail; -use crate::elf::RawSymbolName; -use crate::error::Context as _; -use crate::error::Error; -use crate::error::Result; -use crate::grouping::Group; -use crate::grouping::SequencedInputObject; -use crate::hash::PassThroughHashMap; -use crate::hash::PreHashed; -use crate::input_data::FileId; -use crate::input_data::InputRef; -use crate::input_data::PRELUDE_FILE_ID; -use crate::layout_rules::SectionRuleOutcome; -use crate::layout_rules::SectionRules; -use crate::output_section_id::CustomSectionDetails; -use crate::output_section_id::InitFiniSectionDetail; -use crate::output_section_id::OutputSections; -use crate::output_section_id::SectionName; -use crate::parsing::InternalSymDefInfo; -use crate::parsing::SymbolPlacement; -use crate::part_id; -use crate::part_id::PartId; -use crate::platform::DynamicTagValues as _; -use crate::platform::FrameIndex; -use crate::platform::ObjectFile; -use crate::platform::RawSymbolName as _; -use crate::platform::SectionFlags as _; -use crate::platform::SectionHeader as _; -use crate::platform::SectionType as _; -use crate::platform::Symbol as _; -use crate::platform::VerneedTable as _; -use crate::string_merging::StringMergeSectionExtra; -use crate::string_merging::StringMergeSectionSlot; -use crate::symbol::PreHashedSymbolName; -use crate::symbol::UnversionedSymbolName; -use crate::symbol::VersionedSymbolName; -use crate::symbol_db; -use crate::symbol_db::SymbolDb; -use crate::symbol_db::SymbolId; -use crate::symbol_db::SymbolIdRange; -use crate::symbol_db::SymbolStrength; -use crate::symbol_db::Visibility; -use crate::timing_phase; -use crate::value_flags::PerSymbolFlags; -use crate::value_flags::ValueFlags; -use crate::verbose_timing_phase; -use atomic_take::AtomicTake; -use crossbeam_queue::ArrayQueue; -use crossbeam_queue::SegQueue; -use linker_utils::elf::secnames; -use object::SectionIndex; -use rayon::Scope; -use rayon::iter::IntoParallelIterator; -use rayon::iter::IntoParallelRefMutIterator; -use rayon::iter::ParallelIterator; -use std::sync::atomic::AtomicUsize; -use std::sync::atomic::Ordering; - -pub(crate) struct Resolver<'data, O: ObjectFile<'data>> { - undefined_symbols: Vec>, - pub(crate) resolved_groups: Vec>, -} - -impl<'data, O: ObjectFile<'data>> Resolver<'data, O> { - /// Resolves undefined symbols. In the process of resolving symbols, we decide which archive - /// entries to load. Some symbols may not have definitions, in which case we'll note those for - /// later processing. Can be called multiple times with additional groups having been added to - /// the SymbolDb in between. - pub(crate) fn resolve_symbols_and_select_archive_entries( - &mut self, - symbol_db: &mut SymbolDb<'data, O>, - ) -> Result { - resolve_symbols_and_select_archive_entries(self, symbol_db) - } - - /// For all regular objects that we've decided to load, decide what to do with each section. - /// Canonicalises undefined symbols. Some undefined symbols might be able to become defined if - /// we can identify them as start/stop symbols for which we found a custom section with the - /// appropriate name. - pub(crate) fn resolve_sections_and_canonicalise_undefined( - mut self, - symbol_db: &mut SymbolDb<'data, O>, - per_symbol_flags: &mut PerSymbolFlags, - output_sections: &mut OutputSections<'data>, - layout_rules: &LayoutRules<'data>, - ) -> Result>> { - timing_phase!("Section resolution"); - - resolve_sections(&mut self.resolved_groups, symbol_db, layout_rules)?; - - let mut syn = symbol_db.new_synthetic_symbols_group(); - - assign_section_ids(&mut self.resolved_groups, output_sections, symbol_db.args); - - canonicalise_undefined_symbols( - self.undefined_symbols, - output_sections, - &self.resolved_groups, - symbol_db, - per_symbol_flags, - &mut syn, - ); - - self.resolved_groups.push(ResolvedGroup { - files: vec![ResolvedFile::SyntheticSymbols(syn)], - }); - - Ok(self.resolved_groups) - } -} - -fn resolve_symbols_and_select_archive_entries<'data, O: ObjectFile<'data>>( - resolver: &mut Resolver<'data, O>, - symbol_db: &mut SymbolDb<'data, O>, -) -> Result { - timing_phase!("Resolve symbols"); - - // Note, this is the total number of objects including those that we might have processed in - // previous calls. This is just an upper bound on how many objects might need to be loaded. We - // can't just count the objects in the new groups because we might end up loading some of the - // objects from earlier groups. - let num_regular_objects = symbol_db.num_regular_objects(); - let num_lto_objects = symbol_db.num_lto_objects(); - if num_regular_objects == 0 && num_lto_objects == 0 { - bail!("no input files"); - } - - let mut symbol_definitions = symbol_db.take_definitions(); - let mut symbol_definitions_slice: &mut [SymbolId] = symbol_definitions.as_mut(); - - let mut definitions_per_group_and_file = Vec::new(); - definitions_per_group_and_file.resize_with(symbol_db.groups.len(), Vec::new); - - let outputs = { - verbose_timing_phase!("Allocate outputs store"); - Outputs::new(num_regular_objects, num_lto_objects) - }; - - let mut initial_work = Vec::new(); - - { - verbose_timing_phase!("Resolution setup"); - - let pre_existing_groups = resolver.resolved_groups.len(); - let new_groups = &symbol_db.groups[pre_existing_groups..]; - - for (group, definitions_out_per_file) in resolver - .resolved_groups - .iter() - .zip(&mut definitions_per_group_and_file) - { - *definitions_out_per_file = group - .files - .iter() - .map(|file| { - let definitions = symbol_definitions_slice - .split_off_mut(..file.symbol_id_range().len()) - .unwrap(); - - if matches!(file, ResolvedFile::NotLoaded(_)) { - AtomicTake::new(definitions) - } else { - AtomicTake::empty() - } - }) - .collect(); - } - - resolver.resolved_groups.extend( - new_groups - .iter() - .zip(&mut definitions_per_group_and_file[pre_existing_groups..]) - .map(|(group, definitions_out_per_file)| { - resolve_group( - group, - &mut initial_work, - definitions_out_per_file, - &mut symbol_definitions_slice, - symbol_db, - &outputs, - ) - }), - ); - }; - - let resources = ResolutionResources { - definitions_per_file: &definitions_per_group_and_file, - symbol_db, - outputs: &outputs, - }; - - rayon::in_place_scope(|scope| { - initial_work.into_par_iter().for_each(|work_item| { - process_object(work_item, &resources, scope); - }); - }); - - { - verbose_timing_phase!("Drop definitions_per_group_and_file"); - drop(definitions_per_group_and_file); - } - - symbol_db.restore_definitions(symbol_definitions); - - if let Some(e) = outputs.errors.pop() { - return Err(e); - } - - verbose_timing_phase!("Gather loaded objects"); - - for obj in outputs.loaded { - let file_id = match &obj { - ResolvedFile::Object(o) => o.common.file_id, - ResolvedFile::Dynamic(o) => o.common.file_id, - _ => unreachable!(), - }; - resolver.resolved_groups[file_id.group()].files[file_id.file()] = obj; - } - - #[cfg(feature = "plugins")] - for obj in outputs.loaded_lto_objects { - let file_id = obj.file_id; - resolver.resolved_groups[file_id.group()].files[file_id.file()] = - ResolvedFile::LtoInput(obj); - } - - resolver.undefined_symbols.extend(outputs.undefined_symbols); - - Ok(()) -} - -fn resolve_group<'data, 'definitions, O: ObjectFile<'data>>( - group: &Group<'data, O>, - initial_work_out: &mut Vec>, - definitions_out_per_file: &mut Vec>, - symbol_definitions_slice: &mut &'definitions mut [SymbolId], - symbol_db: &SymbolDb<'data, O>, - outputs: &Outputs<'data, O>, -) -> ResolvedGroup<'data, O> { - match group { - Group::Prelude(prelude) => { - let definitions_out = symbol_definitions_slice - .split_off_mut(..prelude.symbol_definitions.len()) - .unwrap(); - - work_items_do( - PRELUDE_FILE_ID, - definitions_out, - symbol_db, - outputs, - |work_item| { - initial_work_out.push(work_item); - }, - ); - - definitions_out_per_file.push(AtomicTake::empty()); - - ResolvedGroup { - files: vec![ResolvedFile::Prelude(ResolvedPrelude { - symbol_definitions: prelude.symbol_definitions.clone(), - })], - } - } - Group::Objects(parsed_input_objects) => { - definitions_out_per_file.reserve(parsed_input_objects.len()); - - let files = parsed_input_objects - .iter() - .map(|s| { - let definitions_out = symbol_definitions_slice - .split_off_mut(..s.symbol_id_range.len()) - .unwrap(); - - if s.is_optional() { - definitions_out_per_file.push(AtomicTake::new(definitions_out)); - } else { - work_items_do( - s.file_id, - definitions_out, - symbol_db, - outputs, - |work_item| { - initial_work_out.push(work_item); - }, - ); - definitions_out_per_file.push(AtomicTake::empty()); - } - - ResolvedFile::NotLoaded(NotLoaded { - symbol_id_range: s.symbol_id_range, - }) - }) - .collect(); - - ResolvedGroup { files } - } - Group::LinkerScripts(scripts) => { - let files = scripts - .iter() - .map(|s| { - definitions_out_per_file.push(AtomicTake::empty()); - - ResolvedFile::LinkerScript(ResolvedLinkerScript { - input: s.parsed.input, - file_id: s.file_id, - symbol_id_range: s.symbol_id_range, - // TODO: Consider alternative to cloning this. - symbol_definitions: s.parsed.symbol_defs.clone(), - }) - }) - .collect(); - - ResolvedGroup { files } - } - Group::SyntheticSymbols(syn) => { - definitions_out_per_file.push(AtomicTake::empty()); - - ResolvedGroup { - files: vec![ResolvedFile::SyntheticSymbols(ResolvedSyntheticSymbols { - file_id: syn.file_id, - start_symbol_id: syn.symbol_id_range.start(), - symbol_definitions: Vec::new(), - })], - } - } - #[cfg(feature = "plugins")] - Group::LtoInputs(lto_objects) => ResolvedGroup { - files: lto_objects - .iter() - .map(|o| { - let definitions_out = symbol_definitions_slice - .split_off_mut(..o.symbol_id_range.len()) - .unwrap(); - - if o.is_optional() { - definitions_out_per_file.push(AtomicTake::new(definitions_out)); - } else { - work_items_do( - o.file_id, - definitions_out, - symbol_db, - outputs, - |work_item| { - initial_work_out.push(work_item); - }, - ); - definitions_out_per_file.push(AtomicTake::empty()); - } - - ResolvedFile::NotLoaded(NotLoaded { - symbol_id_range: o.symbol_id_range, - }) - }) - .collect(), - }, - } -} - -fn resolve_sections<'data, O: ObjectFile<'data>>( - groups: &mut [ResolvedGroup<'data, O>], - symbol_db: &SymbolDb<'data, O>, - layout_rules: &LayoutRules<'data>, -) -> Result { - timing_phase!("Resolve sections"); - - let loaded_metrics: LoadedMetrics = Default::default(); - let herd = symbol_db.herd; - - groups.par_iter_mut().try_for_each_init( - || herd.get(), - |allocator, group| -> Result { - verbose_timing_phase!("Resolve group sections"); - - for file in &mut group.files { - let ResolvedFile::::Object(obj) = file else { - continue; - }; - - obj.sections = resolve_sections_for_object( - obj, - symbol_db.args, - allocator, - &loaded_metrics, - &layout_rules.section_rules, - )?; - - obj.relocations = obj.common.object.parse_relocations()?; - } - Ok(()) - }, - )?; - - loaded_metrics.log(); - - Ok(()) -} - -const MAX_SYMBOLS_PER_WORK_ITEM: usize = 5000; - -/// A request to load a chunk of symbols from an object. -struct LoadObjectSymbolsRequest<'definitions> { - /// The ID of the object to load. - file_id: FileId, - - symbol_start_offset: usize, - - /// The symbol resolutions for the object to be loaded that should be written to when we load - /// the object. - definitions_out: &'definitions mut [SymbolId], -} - -#[derive(Default)] -pub(crate) struct LoadedMetrics { - pub(crate) loaded_bytes: AtomicUsize, - pub(crate) loaded_compressed_bytes: AtomicUsize, - pub(crate) decompressed_bytes: AtomicUsize, -} - -impl LoadedMetrics { - fn log(&self) { - let loaded_bytes = self.loaded_bytes.load(Ordering::Relaxed); - let loaded_compressed_bytes = self.loaded_compressed_bytes.load(Ordering::Relaxed); - let decompressed_bytes = self.decompressed_bytes.load(Ordering::Relaxed); - tracing::debug!(target: "metrics", loaded_bytes, loaded_compressed_bytes, decompressed_bytes, "input_sections"); - } -} - -struct ResolutionResources<'data, 'scope, O: ObjectFile<'data>> { - definitions_per_file: &'scope Vec>>, - symbol_db: &'scope SymbolDb<'data, O>, - outputs: &'scope Outputs<'data, O>, -} - -impl<'scope, 'data, O: ObjectFile<'data>> ResolutionResources<'data, 'scope, O> { - /// Request loading of `file_id` if it hasn't already been requested. - #[inline(always)] - fn try_request_file_id(&'scope self, file_id: FileId, scope: &Scope<'scope>) { - let definitions_group = &self.definitions_per_file[file_id.group()]; - - let Some(atomic_take) = &definitions_group.get(file_id.file()) else { - // A group from a previous resolution batch. Assume that the relevant file was already - // loaded. - return; - }; - - // Do a read before we call `take`. Reads are cheaper, so this is an optimisation that - // reduces the need for exclusive access to the cache line. - if atomic_take.is_taken() { - // The definitions have previously been taken indicating that this file has already been - // processed, nothing more to do. - return; - } - - let Some(definitions_out) = atomic_take.take() else { - // Another thread just beat us to it. - return; - }; - - work_items_do( - file_id, - definitions_out, - self.symbol_db, - self.outputs, - |work_item| { - scope.spawn(|scope| { - process_object(work_item, self, scope); - }); - }, - ); - } - - fn handle_result(&self, result: Result) { - if let Err(error) = result { - let _ = self.outputs.errors.push(error); - } - } -} - -fn work_items_do<'definitions, 'data, O: ObjectFile<'data>>( - file_id: FileId, - mut definitions_out: &'definitions mut [SymbolId], - symbol_db: &SymbolDb<'data, O>, - outputs: &Outputs<'data, O>, - mut request_callback: impl FnMut(LoadObjectSymbolsRequest<'definitions>), -) { - match &symbol_db.groups[file_id.group()] { - Group::Objects(parsed_input_objects) => { - let obj = &parsed_input_objects[file_id.file()]; - let common = ResolvedCommon::new(obj); - let resolved_object = - if let Some(dynamic_tag_values) = obj.parsed.object.dynamic_tag_values() { - ResolvedFile::Dynamic(ResolvedDynamic::new(common, dynamic_tag_values)) - } else { - ResolvedFile::Object(ResolvedObject::new(common)) - }; - // Push won't fail because we allocated enough space for all the objects. - outputs.loaded.push(resolved_object).unwrap(); - } - #[cfg(feature = "plugins")] - Group::LtoInputs(lto_objects) => { - let obj = <o_objects[file_id.file()]; - // Push won't fail because we allocated enough space for all the LTO objects. - outputs - .loaded_lto_objects - .push(ResolvedLtoInput { - file_id: obj.file_id, - symbol_id_range: obj.symbol_id_range, - }) - .unwrap(); - - request_callback(LoadObjectSymbolsRequest { - file_id, - symbol_start_offset: 0, - definitions_out, - }); - return; - } - _ => {} - } - - let chunk_size = match &symbol_db.groups[file_id.group()] { - Group::Objects(_) => MAX_SYMBOLS_PER_WORK_ITEM, - _ => definitions_out.len(), - }; - - let mut symbol_start_offset = 0; - loop { - let len = chunk_size.min(definitions_out.len()); - let chunk_definitions_out = definitions_out.split_off_mut(..len).unwrap(); - - let work_item = LoadObjectSymbolsRequest { - file_id, - definitions_out: chunk_definitions_out, - symbol_start_offset, - }; - request_callback(work_item); - - symbol_start_offset += len; - if definitions_out.is_empty() { - break; - } - } -} - -#[derive(Debug)] -pub(crate) struct ResolvedGroup<'data, O: ObjectFile<'data>> { - pub(crate) files: Vec>, -} - -#[derive(Debug)] -pub(crate) enum ResolvedFile<'data, O: ObjectFile<'data>> { - NotLoaded(NotLoaded), - Prelude(ResolvedPrelude<'data>), - Object(ResolvedObject<'data, O>), - Dynamic(ResolvedDynamic<'data, O>), - LinkerScript(ResolvedLinkerScript<'data>), - SyntheticSymbols(ResolvedSyntheticSymbols<'data>), - #[cfg(feature = "plugins")] - LtoInput(ResolvedLtoInput), -} - -#[derive(Debug)] -pub(crate) struct NotLoaded { - pub(crate) symbol_id_range: SymbolIdRange, -} - -/// A section, but where we may or may not yet have decided to load it. -#[derive(Debug, Clone, Copy)] -pub(crate) enum SectionSlot { - /// We've decided that this section won't be loaded. - Discard, - - /// The section hasn't been loaded yet, but may be loaded if it's referenced. - Unloaded(UnloadedSection), - - /// The section had the retain bit set, so must be loaded. - MustLoad(UnloadedSection), - - /// We've already loaded the section. - Loaded(crate::layout::Section), - - /// The section contains frame data, e.g. .eh_frame or equivalent. - FrameData(object::SectionIndex), - - /// The section is a string-merge section. - MergeStrings(StringMergeSectionSlot), - - // The section contains a debug info section that might be loaded. - UnloadedDebugInfo(PartId), - - // Loaded section with debug info content. - LoadedDebugInfo(crate::layout::Section), - - // GNU property section (.note.gnu.property) - NoteGnuProperty(object::SectionIndex), - - // RISC-V attributes section (.riscv.attributes) - RiscvVAttributes(object::SectionIndex), -} - -#[derive(Debug, Clone, Copy)] -pub(crate) struct UnloadedSection { - pub(crate) part_id: PartId, - - /// The index of the last FDE for this section. Previous FDEs will be linked from this. - pub(crate) last_frame_index: Option, - - /// Whether the section has a name that makes it eligible for generation of __start_ / __stop_ - /// symbols. In particular, the name of the section doesn't start with a ".". - pub(crate) start_stop_eligible: bool, -} - -impl UnloadedSection { - fn new(part_id: PartId) -> Self { - Self { - part_id, - last_frame_index: None, - start_stop_eligible: false, - } - } -} - -#[derive(Debug, Clone)] -pub(crate) struct ResolvedPrelude<'data> { - pub(crate) symbol_definitions: Vec>, -} - -/// Resolved state common to dynamic and regular objects. -#[derive(Debug)] -pub(crate) struct ResolvedCommon<'data, O: ObjectFile<'data>> { - pub(crate) input: InputRef<'data>, - pub(crate) object: &'data O, - pub(crate) file_id: FileId, - pub(crate) symbol_id_range: SymbolIdRange, -} - -#[derive(Debug)] -pub(crate) struct ResolvedObject<'data, O: ObjectFile<'data>> { - pub(crate) common: ResolvedCommon<'data, O>, - - pub(crate) sections: Vec, - pub(crate) relocations: O::RelocationSections, - - pub(crate) string_merge_extras: Vec>, - - /// Details about each custom section that is defined in this object. - custom_sections: Vec>, - - init_fini_sections: Vec, -} - -#[derive(Debug)] -pub(crate) struct ResolvedDynamic<'data, O: ObjectFile<'data>> { - pub(crate) common: ResolvedCommon<'data, O>, - dynamic_tag_values: O::DynamicTagValues, -} - -#[derive(Debug)] -pub(crate) struct ResolvedLinkerScript<'data> { - pub(crate) input: InputRef<'data>, - pub(crate) file_id: FileId, - pub(crate) symbol_id_range: SymbolIdRange, - pub(crate) symbol_definitions: Vec>, -} - -#[derive(Debug, Clone)] -pub(crate) struct ResolvedSyntheticSymbols<'data> { - pub(crate) file_id: FileId, - pub(crate) start_symbol_id: SymbolId, - pub(crate) symbol_definitions: Vec>, -} - -#[cfg(feature = "plugins")] -#[derive(Debug, Clone)] -pub(crate) struct ResolvedLtoInput { - pub(crate) file_id: FileId, - pub(crate) symbol_id_range: SymbolIdRange, -} - -fn assign_section_ids<'data, O: ObjectFile<'data>>( - resolved: &mut [ResolvedGroup<'data, O>], - output_sections: &mut OutputSections<'data>, - args: &Args, -) { - timing_phase!("Assign section IDs"); - - for group in resolved { - for file in &mut group.files { - if let ResolvedFile::Object(s) = file { - output_sections.add_sections(&s.custom_sections, s.sections.as_mut_slice(), args); - apply_init_fini_secondaries( - &s.init_fini_sections, - s.sections.as_mut_slice(), - output_sections, - ); - } - } - } -} - -fn parse_priority_suffix(suffix: &[u8]) -> Option { - if suffix.is_empty() || !suffix.iter().all(|b| b.is_ascii_digit()) { - return None; - } - - let value = core::str::from_utf8(suffix).ok()?.parse::().ok()?; - Some(u16::try_from(value).unwrap_or(u16::MAX)) -} - -fn init_fini_priority(name: &[u8]) -> Option { - if name == secnames::INIT_ARRAY_SECTION_NAME || name == secnames::FINI_ARRAY_SECTION_NAME { - return Some(u16::MAX); - } - - if let Some(rest) = name.strip_prefix(b".init_array.") { - return parse_priority_suffix(rest); - } - - if let Some(rest) = name.strip_prefix(b".fini_array.") { - return parse_priority_suffix(rest); - } - - // .ctors and .dtors without suffix have the same priority as .init_array/.fini_array - if name == secnames::CTORS_SECTION_NAME || name == secnames::DTORS_SECTION_NAME { - return Some(u16::MAX); - } - - // .ctors uses descending order (65535 = lowest priority, 0 = highest) - // while .init_array uses ascending order (0 = highest priority, 65535 = lowest) - if let Some(rest) = name.strip_prefix(b".ctors.") { - return parse_priority_suffix(rest).map(|p| u16::MAX.saturating_sub(p)); - } - - if let Some(rest) = name.strip_prefix(b".dtors.") { - return parse_priority_suffix(rest).map(|p| u16::MAX.saturating_sub(p)); - } - - None -} - -struct Outputs<'data, O: ObjectFile<'data>> { - /// Where we put objects once we've loaded them. - loaded: ArrayQueue>, - - #[cfg(feature = "plugins")] - loaded_lto_objects: ArrayQueue, - - /// Any errors that we encountered. - errors: ArrayQueue, - - undefined_symbols: SegQueue>, -} - -impl<'data, O: ObjectFile<'data>> Outputs<'data, O> { - #[allow(unused_variables)] - fn new(num_regular_objects: usize, num_lto_objects: usize) -> Self { - Self { - loaded: ArrayQueue::new(num_regular_objects.max(1)), - #[cfg(feature = "plugins")] - loaded_lto_objects: ArrayQueue::new(num_lto_objects.max(1)), - errors: ArrayQueue::new(1), - undefined_symbols: SegQueue::new(), - } - } -} - -fn process_object<'scope, 'data: 'scope, 'definitions, O: ObjectFile<'data>>( - work_item: LoadObjectSymbolsRequest<'definitions>, - resources: &'scope ResolutionResources<'data, 'scope, O>, - scope: &Scope<'scope>, -) { - let file_id = work_item.file_id; - let definitions_out = work_item.definitions_out; - - match &resources.symbol_db.groups[file_id.group()] { - Group::Prelude(prelude) => { - verbose_timing_phase!("Resolve prelude symbols"); - - load_prelude(prelude, definitions_out, resources, scope); - } - Group::Objects(parsed_input_objects) => { - verbose_timing_phase!("Resolve object symbols"); - - let obj = &parsed_input_objects[file_id.file()]; - - resources.handle_result( - resolve_symbols( - obj, - resources, - work_item.symbol_start_offset, - definitions_out, - scope, - ) - .with_context(|| format!("Failed to resolve symbols in {obj}")), - ); - } - Group::LinkerScripts(_) => {} - Group::SyntheticSymbols(_) => {} - #[cfg(feature = "plugins")] - Group::LtoInputs(objects) => { - let obj = &objects[file_id.file()]; - resources.handle_result( - resolve_lto_symbols(obj, resources, definitions_out, scope) - .with_context(|| format!("Failed to resolve symbols in {obj}")), - ); - } - } -} - -#[cfg(feature = "plugins")] -fn resolve_lto_symbols<'data, 'scope, O: ObjectFile<'data>>( - obj: &crate::linker_plugins::LtoInput<'data>, - resources: &'scope ResolutionResources<'data, 'scope, O>, - definitions_out: &mut [SymbolId], - scope: &Scope<'scope>, -) -> Result { - obj.symbols - .iter() - .enumerate() - .zip(definitions_out) - .try_for_each( - |((local_symbol_index, local_symbol), definition)| -> Result { - if !local_symbol.is_definition() { - let mut name_info = RawSymbolName::parse(local_symbol.name.bytes()); - if let Some(version) = local_symbol.version { - name_info.version_name = Some(version); - } - - let symbol_attributes = SymbolAttributes { - name_info, - is_local: false, - default_visibility: local_symbol.visibility == object::elf::STV_DEFAULT, - is_weak: local_symbol.kind - == Some(crate::linker_plugins::SymbolKind::WeakUndef), - }; - - resolve_symbol( - obj.symbol_id_range.offset_to_id(local_symbol_index), - &symbol_attributes, - definition, - resources, - false, - obj.file_id, - scope, - )?; - } - - Ok(()) - }, - ) -} - -struct UndefinedSymbol<'data> { - /// If we have a file ID here and that file is loaded, then the symbol is actually defined and - /// this record can be ignored. - ignore_if_loaded: Option, - name: PreHashedSymbolName<'data>, - symbol_id: SymbolId, -} - -fn load_prelude<'scope, 'data, O: ObjectFile<'data>>( - prelude: &crate::parsing::Prelude, - definitions_out: &mut [SymbolId], - resources: &'scope ResolutionResources<'data, 'scope, O>, - scope: &Scope<'scope>, -) { - // The start symbol could be defined within an archive entry. If it is, then we need to load - // it. We don't currently store the resulting SymbolId, but instead look it up again during - // layout. - load_symbol_named( - resources, - &mut SymbolId::undefined(), - resources.symbol_db.entry_symbol_name(), - scope, - ); - - // Try to resolve any symbols that the user requested be undefined (e.g. via --undefined). If an - // object defines such a symbol, request that the object be loaded. Also, point our undefined - // symbol record to the definition. - for (def_info, definition_out) in prelude.symbol_definitions.iter().zip(definitions_out) { - match def_info.placement { - SymbolPlacement::ForceUndefined | SymbolPlacement::DefsymSymbol(_, _) => { - load_symbol_named(resources, definition_out, def_info.name, scope); - } - _ => {} - } - } -} - -fn load_symbol_named<'scope, 'data, O: ObjectFile<'data>>( - resources: &'scope ResolutionResources<'data, 'scope, O>, - definition_out: &mut SymbolId, - name: &[u8], - scope: &Scope<'scope>, -) { - if let Some(symbol_id) = resources - .symbol_db - .get_unversioned(&UnversionedSymbolName::prehashed(name)) - { - *definition_out = symbol_id; - - let symbol_file_id = resources.symbol_db.file_id_for_symbol(symbol_id); - resources.try_request_file_id(symbol_file_id, scope); - } -} - -/// Where there are multiple references to undefined symbols with the same name, pick one reference -/// as the canonical one to which we'll refer. Where undefined symbols can be resolved to -/// __start/__stop symbols that refer to the start or stop of a custom section, collect that -/// information up and put it into `custom_start_stop_defs`. -fn canonicalise_undefined_symbols<'data, O: ObjectFile<'data>>( - mut undefined_symbols: Vec>, - output_sections: &OutputSections, - groups: &[ResolvedGroup<'data, O>], - symbol_db: &mut SymbolDb<'data, O>, - per_symbol_flags: &mut PerSymbolFlags, - custom_start_stop_defs: &mut ResolvedSyntheticSymbols<'data>, -) { - timing_phase!("Canonicalise undefined symbols"); - - let mut name_to_id: PassThroughHashMap, SymbolId> = - Default::default(); - - let mut versioned_name_to_id: PassThroughHashMap, SymbolId> = - Default::default(); - - // Sort by symbol ID to ensure deterministic behaviour. We sort in reverse order so that LTO - // outputs get higher priority than LTO inputs. This means that the canonical symbol ID for any - // given name will be the one for the last file that refers to that symbol. - undefined_symbols.sort_by_key(|u| usize::MAX - u.symbol_id.as_usize()); - - for undefined in undefined_symbols { - let is_defined = undefined.ignore_if_loaded.is_some_and(|file_id| { - !matches!( - groups[file_id.group()].files[file_id.file()], - ResolvedFile::NotLoaded(_) - ) - }); - - if is_defined { - // The archive entry that defined the symbol in question ended up being loaded, so the - // weak symbol is defined after all. - continue; - } - - match undefined.name { - PreHashedSymbolName::Unversioned(pre_hashed) => { - match name_to_id.entry(pre_hashed) { - hashbrown::hash_map::Entry::Vacant(entry) => { - let symbol_id = allocate_start_stop_symbol_id( - pre_hashed, - symbol_db, - per_symbol_flags, - custom_start_stop_defs, - output_sections, - ); - - // We either make our undefined symbol dynamic, allowing the possibility - // that it might end up being defined at runtime, or we make it - // non-interposable, which means it'll remain null and even if it ends up - // defined at runtime, we won't use that definition. If the symbol doesn't - // have default visibility, then we make it non-interposable. If we're - // building a shared object, we always make the symbol dynamic. If we're - // building a statically linked executable, then we always make it - // non-interposable. If we're building a regular, dynamically linked - // executable, then we make it dynamic if the symbol is weak and otherwise - // make it non-interposable. That last case, a non-weak, default-visibility, - // undefined symbol in an executable is generally a link error, however if - // the flag --warn-unresolved-symbols is passed, then it won't be. Linker - // behaviour differs in this case. GNU ld makes the symbol non-interposable, - // while lld makes it dynamic. We match GNU ld in this case. - if symbol_id.is_none() { - let output_kind = symbol_db.output_kind; - let visibility = symbol_db.input_symbol_visibility(undefined.symbol_id); - - if visibility == Visibility::Default - && (output_kind.is_shared_object() - || (!output_kind.is_static_executable() - && symbol_db.symbol_strength(undefined.symbol_id, groups) - == SymbolStrength::Weak)) - { - per_symbol_flags.set_flag(undefined.symbol_id, ValueFlags::DYNAMIC); - } else { - per_symbol_flags - .set_flag(undefined.symbol_id, ValueFlags::NON_INTERPOSABLE); - } - } - - // If the symbol isn't a start/stop symbol, then assign responsibility for - // the symbol to the first object that referenced - // it. This lets us have PLT/GOT entries - // for the symbol if they're needed. - let symbol_id = symbol_id.unwrap_or(undefined.symbol_id); - entry.insert(symbol_id); - symbol_db.replace_definition(undefined.symbol_id, symbol_id); - } - hashbrown::hash_map::Entry::Occupied(entry) => { - symbol_db.replace_definition(undefined.symbol_id, *entry.get()); - } - } - } - PreHashedSymbolName::Versioned(pre_hashed) => { - match versioned_name_to_id.entry(pre_hashed) { - hashbrown::hash_map::Entry::Vacant(entry) => { - entry.insert(undefined.symbol_id); - } - hashbrown::hash_map::Entry::Occupied(entry) => { - symbol_db.replace_definition(undefined.symbol_id, *entry.get()); - } - } - } - } - } -} - -fn allocate_start_stop_symbol_id<'data, O: ObjectFile<'data>>( - name: PreHashed>, - symbol_db: &mut SymbolDb<'data, O>, - per_symbol_flags: &mut PerSymbolFlags, - custom_start_stop_defs: &mut ResolvedSyntheticSymbols<'data>, - output_sections: &OutputSections, -) -> Option { - let symbol_name_bytes = name.bytes(); - - let (section_name, is_start) = if let Some(s) = symbol_name_bytes.strip_prefix(b"__start_") { - (s, true) - } else if let Some(s) = symbol_name_bytes.strip_prefix(b"__stop_") { - (s, false) - } else { - return None; - }; - - let section_id = output_sections.custom_name_to_id(SectionName(section_name))?; - - let def_info = if is_start { - InternalSymDefInfo::new(SymbolPlacement::SectionStart(section_id), name.bytes()) - } else { - InternalSymDefInfo::new(SymbolPlacement::SectionEnd(section_id), name.bytes()) - }; - - let symbol_id = symbol_db.add_synthetic_symbol(per_symbol_flags, name, custom_start_stop_defs); - - custom_start_stop_defs.symbol_definitions.push(def_info); - - Some(symbol_id) -} - -impl<'data, O: ObjectFile<'data>> ResolvedCommon<'data, O> { - fn new(obj: &'data SequencedInputObject<'data, O>) -> Self { - Self { - input: obj.parsed.input, - object: &obj.parsed.object, - file_id: obj.file_id, - symbol_id_range: obj.symbol_id_range, - } - } - - pub(crate) fn symbol_strength(&self, symbol_id: SymbolId) -> SymbolStrength { - let local_index = symbol_id.to_input(self.symbol_id_range); - let Ok(obj_symbol) = self.object.symbol(local_index) else { - // Errors from this function should have been reported elsewhere. - return SymbolStrength::Undefined; - }; - SymbolStrength::of(obj_symbol) - } -} - -fn apply_init_fini_secondaries<'data>( - details: &[InitFiniSectionDetail], - sections: &mut [SectionSlot], - output_sections: &mut OutputSections<'data>, -) { - for d in details { - let Some(slot) = sections.get_mut(d.index as usize) else { - continue; - }; - - let unloaded = match slot { - SectionSlot::Unloaded(u) | SectionSlot::MustLoad(u) => u, - _ => continue, - }; - - let sid = - output_sections.get_or_create_init_fini_secondary(d.primary, d.priority, d.alignment); - unloaded.part_id = sid.part_id_with_alignment(d.alignment); - } -} - -impl<'data, O: ObjectFile<'data>> ResolvedObject<'data, O> { - fn new(common: ResolvedCommon<'data, O>) -> Self { - Self { - common, - // We'll fill this the rest during section resolution. - sections: Default::default(), - relocations: Default::default(), - string_merge_extras: Default::default(), - custom_sections: Default::default(), - init_fini_sections: Default::default(), - } - } -} - -impl<'data, O: ObjectFile<'data>> ResolvedDynamic<'data, O> { - fn new(common: ResolvedCommon<'data, O>, dynamic_tag_values: O::DynamicTagValues) -> Self { - Self { - common, - dynamic_tag_values, - } - } - - pub(crate) fn lib_name(&self) -> &'data [u8] { - self.dynamic_tag_values.lib_name(&self.common.input) - } -} - -fn resolve_sections_for_object<'data, O: ObjectFile<'data>>( - obj: &mut ResolvedObject<'data, O>, - args: &Args, - allocator: &bumpalo_herd::Member<'data>, - loaded_metrics: &LoadedMetrics, - rules: &SectionRules, -) -> Result> { - // Note, we build up the collection with push rather than collect because at the time of - // writing, object's `SectionTable::enumerate` isn't an exact-size iterator, so using collect - // would result in resizing. - let mut sections = Vec::with_capacity(obj.common.object.num_sections()); - for (input_section_index, input_section) in obj.common.object.enumerate_sections() { - sections.push(resolve_section( - input_section_index, - input_section, - obj, - args, - allocator, - loaded_metrics, - rules, - )?); - } - Ok(sections) -} - -#[inline(always)] -fn resolve_section<'data, O: ObjectFile<'data>>( - input_section_index: SectionIndex, - input_section: &O::SectionHeader, - obj: &mut ResolvedObject<'data, O>, - args: &Args, - allocator: &bumpalo_herd::Member<'data>, - loaded_metrics: &LoadedMetrics, - rules: &SectionRules, -) -> Result { - let section_name = obj - .common - .object - .section_name(input_section) - .unwrap_or_default(); - - if section_name.starts_with(secnames::GNU_LTO_SYMTAB_PREFIX.as_bytes()) { - if cfg!(feature = "plugins") { - bail!("Found GCC LTO input that we didn't supply to linker plugin"); - } - return Err(symbol_db::linker_plugin_disabled_error()); - } - - let section_flags = input_section.flags(); - let raw_alignment = obj.common.object.section_alignment(input_section)?; - let alignment = Alignment::new(raw_alignment.max(1))?; - let should_merge_sections = part_id::should_merge_sections(section_flags, raw_alignment, args); - - let mut unloaded_section; - let mut is_debug_info = false; - let section_type = input_section.section_type(); - let mut must_load = section_flags.should_retain() || section_type.is_note(); - - let file_name = if let Some(entry) = &obj.common.input.entry { - // For archive members, match against the member name (e.g., "app.o"), - // not the archive filename (e.g., "libfoo.a"). - Some(entry.identifier.as_slice()) - } else { - obj.common - .input - .file - .filename - .file_name() - .map(|n| n.as_encoded_bytes()) - }; - - match rules.lookup(section_name, file_name, section_flags, section_type) { - SectionRuleOutcome::Section(output_info) => { - let part_id = if output_info.section_id.is_regular() { - output_info.section_id.part_id_with_alignment(alignment) - } else { - output_info.section_id.base_part_id() - }; - - must_load |= output_info.must_keep; - - unloaded_section = UnloadedSection::new(part_id); - } - SectionRuleOutcome::SortedSection(output_info) => { - let part_id = if output_info.section_id.is_regular() { - output_info.section_id.part_id_with_alignment(alignment) - } else { - output_info.section_id.base_part_id() - }; - if let Some(priority) = init_fini_priority(section_name) { - obj.init_fini_sections.push(InitFiniSectionDetail { - index: input_section_index.0 as u32, - primary: output_info.section_id, - priority, - alignment, - }); - } - - must_load |= output_info.must_keep; - - unloaded_section = UnloadedSection::new(part_id); - } - SectionRuleOutcome::Discard => return Ok(SectionSlot::Discard), - SectionRuleOutcome::EhFrame => { - return Ok(SectionSlot::FrameData(input_section_index)); - } - SectionRuleOutcome::NoteGnuProperty => { - return Ok(SectionSlot::NoteGnuProperty(input_section_index)); - } - SectionRuleOutcome::Debug => { - if args.strip_debug() && !section_flags.is_alloc() { - return Ok(SectionSlot::Discard); - } - - is_debug_info = !section_flags.is_alloc(); - - unloaded_section = UnloadedSection::new(part_id::CUSTOM_PLACEHOLDER); - } - SectionRuleOutcome::Custom => { - unloaded_section = UnloadedSection::new(part_id::CUSTOM_PLACEHOLDER); - unloaded_section.start_stop_eligible = !section_name.starts_with(b"."); - } - SectionRuleOutcome::RiscVAttribute => { - return Ok(SectionSlot::RiscvVAttributes(input_section_index)); - } - }; - - if unloaded_section.part_id == part_id::CUSTOM_PLACEHOLDER { - let custom_section = CustomSectionDetails { - name: SectionName(section_name), - alignment, - index: input_section_index, - }; - - obj.custom_sections.push(custom_section); - } - - let slot = if should_merge_sections { - let section_data = - obj.common - .object - .section_data(input_section, allocator, loaded_metrics)?; - let section_flags = input_section.flags(); - - if section_data.is_empty() { - SectionSlot::Discard - } else { - obj.string_merge_extras.push(StringMergeSectionExtra { - index: input_section_index, - section_data, - section_flags, - }); - - SectionSlot::MergeStrings(StringMergeSectionSlot::new(unloaded_section.part_id)) - } - } else if is_debug_info { - SectionSlot::UnloadedDebugInfo(part_id::CUSTOM_PLACEHOLDER) - } else if must_load { - SectionSlot::MustLoad(unloaded_section) - } else { - SectionSlot::Unloaded(unloaded_section) - }; - - Ok(slot) -} - -fn resolve_symbols<'data, 'scope, O: ObjectFile<'data>>( - obj: &SequencedInputObject<'data, O>, - resources: &'scope ResolutionResources<'data, 'scope, O>, - start_symbol_offset: usize, - definitions_out: &mut [SymbolId], - scope: &Scope<'scope>, -) -> Result { - let verneed = obj.parsed.object.verneed_table()?; - - obj.parsed.object.symbols()[start_symbol_offset..] - .iter() - .enumerate() - .zip(definitions_out) - .try_for_each( - |((local_symbol_index, local_symbol), definition)| -> Result { - // Don't try to resolve symbols that are already defined, e.g. locals and globals - // that we define. Also don't try to resolve symbol zero - the undefined symbol. - // Hidden symbols exported from shared objects don't make sense, so we skip - // resolving them as well. - if !definition.is_undefined() - || start_symbol_offset + local_symbol_index == 0 - || (obj.is_dynamic() && local_symbol.is_hidden()) - { - return Ok(()); - } - - let name_bytes = obj.parsed.object.symbol_name(local_symbol)?; - - let name_info = if let Some(version_name) = - verneed.version_name(object::SymbolIndex(local_symbol_index)) - { - RawSymbolName { - name: name_bytes, - version_name: Some(version_name), - is_default: false, - } - } else { - RawSymbolName::parse(name_bytes) - }; - - let symbol_attributes = SymbolAttributes { - name_info, - is_local: local_symbol.is_local(), - default_visibility: local_symbol.is_interposable(), - is_weak: local_symbol.is_weak(), - }; - - resolve_symbol( - obj.symbol_id_range - .offset_to_id(start_symbol_offset + local_symbol_index), - &symbol_attributes, - definition, - resources, - obj.is_dynamic(), - obj.file_id, - scope, - ) - }, - ) -} - -#[derive(Debug)] -struct SymbolAttributes<'data> { - is_local: bool, - default_visibility: bool, - is_weak: bool, - name_info: RawSymbolName<'data>, -} - -#[inline(always)] -fn resolve_symbol<'data, 'scope, O: ObjectFile<'data>>( - local_symbol_id: SymbolId, - local_symbol_attributes: &SymbolAttributes<'data>, - definition_out: &mut SymbolId, - resources: &'scope ResolutionResources<'data, 'scope, O>, - is_dynamic: bool, - file_id: FileId, - scope: &Scope<'scope>, -) -> Result { - debug_assert_bail!( - !local_symbol_attributes.is_local, - "Only globals should be undefined, found symbol `{}` ({local_symbol_id})", - local_symbol_attributes.name_info, - ); - - let prehashed_name = PreHashedSymbolName::from_raw(&local_symbol_attributes.name_info); - - // Only default-visibility symbols can reference symbols from shared objects. - let allow_dynamic = local_symbol_attributes.default_visibility; - - match resources.symbol_db.get(&prehashed_name, allow_dynamic) { - Some(symbol_id) => { - *definition_out = symbol_id; - let symbol_file_id = resources.symbol_db.file_id_for_symbol(symbol_id); - - if symbol_file_id != file_id && !local_symbol_attributes.is_weak { - // Undefined symbols in shared objects should actually activate as-needed shared - // objects, however the rules for whether this should result in a DT_NEEDED entry - // are kind of subtle, so for now, we don't activate shared objects from shared - // objects. See - // https://github.com/wild-linker/wild/issues/930#issuecomment-3007027924 for - // more details. TODO: Fix this. - if !is_dynamic || !resources.symbol_db.file(symbol_file_id).is_dynamic() { - resources.try_request_file_id(symbol_file_id, scope); - } - } else if symbol_file_id != PRELUDE_FILE_ID { - // The symbol is weak and we can't be sure that the file that defined it will end up - // being loaded, so the symbol might actually be undefined. Register it as an - // undefined symbol then later when we handle undefined symbols, we'll check if the - // file got loaded. TODO: If the file is a non-archived object, or possibly even if - // it's an archived object that we've already decided to load, then we could skip - // this. - resources.outputs.undefined_symbols.push(UndefinedSymbol { - ignore_if_loaded: Some(symbol_file_id), - name: prehashed_name, - symbol_id: local_symbol_id, - }); - } - } - None => { - resources.outputs.undefined_symbols.push(UndefinedSymbol { - ignore_if_loaded: None, - name: prehashed_name, - symbol_id: local_symbol_id, - }); - } - } - Ok(()) -} - -impl<'data, O: ObjectFile<'data>> std::fmt::Display for ResolvedObject<'data, O> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - std::fmt::Display::fmt(&self.common.input, f) - } -} - -impl<'data, O: ObjectFile<'data>> std::fmt::Display for ResolvedDynamic<'data, O> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - std::fmt::Display::fmt(&self.common.input, f) - } -} - -impl<'data> std::fmt::Display for ResolvedLinkerScript<'data> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - std::fmt::Display::fmt(&self.input, f) - } -} - -impl<'data, O: ObjectFile<'data>> std::fmt::Display for ResolvedFile<'data, O> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - ResolvedFile::NotLoaded(_) => std::fmt::Display::fmt("", f), - ResolvedFile::Prelude(_) => std::fmt::Display::fmt("", f), - ResolvedFile::Object(o) => std::fmt::Display::fmt(o, f), - ResolvedFile::Dynamic(o) => std::fmt::Display::fmt(o, f), - ResolvedFile::LinkerScript(o) => std::fmt::Display::fmt(o, f), - ResolvedFile::SyntheticSymbols(_) => std::fmt::Display::fmt("", f), - #[cfg(feature = "plugins")] - ResolvedFile::LtoInput(_) => std::fmt::Display::fmt("", f), - } - } -} - -impl SectionSlot { - pub(crate) fn is_loaded(&self) -> bool { - !matches!(self, SectionSlot::Discard | SectionSlot::Unloaded(..)) - } - - pub(crate) fn set_part_id(&mut self, part_id: PartId) { - match self { - SectionSlot::Unloaded(section) => section.part_id = part_id, - SectionSlot::MustLoad(section) => section.part_id = part_id, - SectionSlot::Loaded(section) => section.part_id = part_id, - SectionSlot::MergeStrings(section) => section.part_id = part_id, - SectionSlot::UnloadedDebugInfo(out) => *out = part_id, - SectionSlot::LoadedDebugInfo(section) => section.part_id = part_id, - SectionSlot::Discard - | SectionSlot::FrameData(_) - | SectionSlot::NoteGnuProperty(_) - | SectionSlot::RiscvVAttributes(_) => {} - } - } - - pub(crate) fn unloaded_mut(&mut self) -> Option<&mut UnloadedSection> { - match self { - SectionSlot::Unloaded(unloaded) | SectionSlot::MustLoad(unloaded) => Some(unloaded), - _ => None, - } - } -} - -impl<'data, O: ObjectFile<'data>> ResolvedFile<'data, O> { - fn symbol_id_range(&self) -> SymbolIdRange { - match self { - ResolvedFile::NotLoaded(s) => s.symbol_id_range, - ResolvedFile::Prelude(s) => s.symbol_id_range(), - ResolvedFile::Object(s) => s.common.symbol_id_range, - ResolvedFile::Dynamic(s) => s.common.symbol_id_range, - ResolvedFile::LinkerScript(s) => s.symbol_id_range, - ResolvedFile::SyntheticSymbols(s) => s.symbol_id_range(), - #[cfg(feature = "plugins")] - ResolvedFile::LtoInput(s) => s.symbol_id_range, - } - } -} - -impl ResolvedPrelude<'_> { - fn symbol_id_range(&self) -> SymbolIdRange { - SymbolIdRange::input(SymbolId::undefined(), self.symbol_definitions.len()) - } -} - -impl ResolvedSyntheticSymbols<'_> { - fn symbol_id_range(&self) -> SymbolIdRange { - SymbolIdRange::input(self.start_symbol_id, self.symbol_definitions.len()) - } -} - -impl<'data, O: ObjectFile<'data>> Default for Resolver<'data, O> { - fn default() -> Self { - Self { - undefined_symbols: Default::default(), - resolved_groups: Default::default(), - } - } -} - -// We create quite a lot of `SectionSlot`s. We don't generally copy them, however we do need to -// eventually drop the Vecs that contain them. Dropping those Vecs is a lot cheaper if the slots -// don't need to have run Drop. We check for this, by making sure the type implements `Copy` -#[test] -fn section_slot_is_copy() { - fn assert_copy(_v: T) {} - - assert_copy(SectionSlot::Discard); -} +//! This module resolves symbol references between objects. In the process, it decides which archive +//! entries are needed. We also resolve which output section, if any, each input section should be +//! assigned to. + +use crate::layout_rules::LayoutRules; +use crate::alignment::Alignment; +use crate::args::Args; +use crate::bail; +use crate::debug_assert_bail; +use crate::elf::RawSymbolName; +use crate::error::Context as _; +use crate::error::Error; +use crate::error::Result; +use crate::grouping::Group; +use crate::grouping::SequencedInputObject; +use crate::hash::PassThroughHashMap; +use crate::hash::PreHashed; +use crate::input_data::FileId; +use crate::input_data::InputRef; +use crate::input_data::PRELUDE_FILE_ID; +use crate::layout_rules::SectionRuleOutcome; +use crate::layout_rules::SectionRules; +use crate::output_section_id::CustomSectionDetails; +use crate::output_section_id::InitFiniSectionDetail; +use crate::output_section_id::OutputSections; +use crate::output_section_id::SectionName; +use crate::parsing::InternalSymDefInfo; +use crate::parsing::SymbolPlacement; +use crate::part_id; +use crate::part_id::PartId; +use crate::platform::DynamicTagValues as _; +use crate::platform::FrameIndex; +use crate::platform::ObjectFile; +use crate::platform::RawSymbolName as _; +use crate::platform::SectionFlags as _; +use crate::platform::SectionHeader as _; +use crate::platform::SectionType as _; +use crate::platform::Symbol as _; +use crate::platform::VerneedTable as _; +use crate::string_merging::StringMergeSectionExtra; +use crate::string_merging::StringMergeSectionSlot; +use crate::symbol::PreHashedSymbolName; +use crate::symbol::UnversionedSymbolName; +use crate::symbol::VersionedSymbolName; +use crate::symbol_db; +use crate::symbol_db::SymbolDb; +use crate::symbol_db::SymbolId; +use crate::symbol_db::SymbolIdRange; +use crate::symbol_db::SymbolStrength; +use crate::symbol_db::Visibility; +use crate::timing_phase; +use crate::value_flags::PerSymbolFlags; +use crate::value_flags::ValueFlags; +use crate::verbose_timing_phase; +use atomic_take::AtomicTake; +use crossbeam_queue::ArrayQueue; +use crossbeam_queue::SegQueue; +use linker_utils::elf::secnames; +use object::SectionIndex; +use rayon::Scope; +use rayon::iter::IntoParallelIterator; +use rayon::iter::IntoParallelRefMutIterator; +use rayon::iter::ParallelIterator; +use std::sync::atomic::AtomicUsize; +use std::sync::atomic::Ordering; + +pub(crate) struct Resolver<'data, O: ObjectFile<'data>> { + undefined_symbols: Vec>, + pub(crate) resolved_groups: Vec>, +} + +impl<'data, O: ObjectFile<'data>> Resolver<'data, O> { + /// Resolves undefined symbols. In the process of resolving symbols, we decide which archive + /// entries to load. Some symbols may not have definitions, in which case we'll note those for + /// later processing. Can be called multiple times with additional groups having been added to + /// the SymbolDb in between. + pub(crate) fn resolve_symbols_and_select_archive_entries( + &mut self, + symbol_db: &mut SymbolDb<'data, O>, + ) -> Result { + resolve_symbols_and_select_archive_entries(self, symbol_db) + } + + /// For all regular objects that we've decided to load, decide what to do with each section. + /// Canonicalises undefined symbols. Some undefined symbols might be able to become defined if + /// we can identify them as start/stop symbols for which we found a custom section with the + /// appropriate name. + pub(crate) fn resolve_sections_and_canonicalise_undefined( + mut self, + symbol_db: &mut SymbolDb<'data, O>, + per_symbol_flags: &mut PerSymbolFlags, + output_sections: &mut OutputSections<'data>, + layout_rules: &LayoutRules<'data>, + ) -> Result>> { + timing_phase!("Section resolution"); + + resolve_sections(&mut self.resolved_groups, symbol_db, layout_rules)?; + + let mut syn = symbol_db.new_synthetic_symbols_group(); + + assign_section_ids(&mut self.resolved_groups, output_sections, symbol_db.args); + + canonicalise_undefined_symbols( + self.undefined_symbols, + output_sections, + &self.resolved_groups, + symbol_db, + per_symbol_flags, + &mut syn, + ); + + self.resolved_groups.push(ResolvedGroup { + files: vec![ResolvedFile::SyntheticSymbols(syn)], + }); + + Ok(self.resolved_groups) + } +} + +fn resolve_symbols_and_select_archive_entries<'data, O: ObjectFile<'data>>( + resolver: &mut Resolver<'data, O>, + symbol_db: &mut SymbolDb<'data, O>, +) -> Result { + timing_phase!("Resolve symbols"); + + // Note, this is the total number of objects including those that we might have processed in + // previous calls. This is just an upper bound on how many objects might need to be loaded. We + // can't just count the objects in the new groups because we might end up loading some of the + // objects from earlier groups. + let num_regular_objects = symbol_db.num_regular_objects(); + let num_lto_objects = symbol_db.num_lto_objects(); + if num_regular_objects == 0 && num_lto_objects == 0 { + bail!("no input files"); + } + + let mut symbol_definitions = symbol_db.take_definitions(); + let mut symbol_definitions_slice: &mut [SymbolId] = symbol_definitions.as_mut(); + + let mut definitions_per_group_and_file = Vec::new(); + definitions_per_group_and_file.resize_with(symbol_db.groups.len(), Vec::new); + + let outputs = { + verbose_timing_phase!("Allocate outputs store"); + Outputs::new(num_regular_objects, num_lto_objects) + }; + + let mut initial_work = Vec::new(); + + { + verbose_timing_phase!("Resolution setup"); + + let pre_existing_groups = resolver.resolved_groups.len(); + let new_groups = &symbol_db.groups[pre_existing_groups..]; + + for (group, definitions_out_per_file) in resolver + .resolved_groups + .iter() + .zip(&mut definitions_per_group_and_file) + { + *definitions_out_per_file = group + .files + .iter() + .map(|file| { + let definitions = symbol_definitions_slice + .split_off_mut(..file.symbol_id_range().len()) + .unwrap(); + + if matches!(file, ResolvedFile::NotLoaded(_)) { + AtomicTake::new(definitions) + } else { + AtomicTake::empty() + } + }) + .collect(); + } + + resolver.resolved_groups.extend( + new_groups + .iter() + .zip(&mut definitions_per_group_and_file[pre_existing_groups..]) + .map(|(group, definitions_out_per_file)| { + resolve_group( + group, + &mut initial_work, + definitions_out_per_file, + &mut symbol_definitions_slice, + symbol_db, + &outputs, + ) + }), + ); + }; + + let resources = ResolutionResources { + definitions_per_file: &definitions_per_group_and_file, + symbol_db, + outputs: &outputs, + }; + + rayon::in_place_scope(|scope| { + initial_work.into_par_iter().for_each(|work_item| { + process_object(work_item, &resources, scope); + }); + }); + + { + verbose_timing_phase!("Drop definitions_per_group_and_file"); + drop(definitions_per_group_and_file); + } + + symbol_db.restore_definitions(symbol_definitions); + + if let Some(e) = outputs.errors.pop() { + return Err(e); + } + + verbose_timing_phase!("Gather loaded objects"); + + for obj in outputs.loaded { + let file_id = match &obj { + ResolvedFile::Object(o) => o.common.file_id, + ResolvedFile::Dynamic(o) => o.common.file_id, + _ => unreachable!(), + }; + resolver.resolved_groups[file_id.group()].files[file_id.file()] = obj; + } + + #[cfg(feature = "plugins")] + for obj in outputs.loaded_lto_objects { + let file_id = obj.file_id; + resolver.resolved_groups[file_id.group()].files[file_id.file()] = + ResolvedFile::LtoInput(obj); + } + + resolver.undefined_symbols.extend(outputs.undefined_symbols); + + Ok(()) +} + +fn resolve_group<'data, 'definitions, O: ObjectFile<'data>>( + group: &Group<'data, O>, + initial_work_out: &mut Vec>, + definitions_out_per_file: &mut Vec>, + symbol_definitions_slice: &mut &'definitions mut [SymbolId], + symbol_db: &SymbolDb<'data, O>, + outputs: &Outputs<'data, O>, +) -> ResolvedGroup<'data, O> { + match group { + Group::Prelude(prelude) => { + let definitions_out = symbol_definitions_slice + .split_off_mut(..prelude.symbol_definitions.len()) + .unwrap(); + + work_items_do( + PRELUDE_FILE_ID, + definitions_out, + symbol_db, + outputs, + |work_item| { + initial_work_out.push(work_item); + }, + ); + + definitions_out_per_file.push(AtomicTake::empty()); + + ResolvedGroup { + files: vec![ResolvedFile::Prelude(ResolvedPrelude { + symbol_definitions: prelude.symbol_definitions.clone(), + })], + } + } + Group::Objects(parsed_input_objects) => { + definitions_out_per_file.reserve(parsed_input_objects.len()); + + let files = parsed_input_objects + .iter() + .map(|s| { + let definitions_out = symbol_definitions_slice + .split_off_mut(..s.symbol_id_range.len()) + .unwrap(); + + if s.is_optional() { + definitions_out_per_file.push(AtomicTake::new(definitions_out)); + } else { + work_items_do( + s.file_id, + definitions_out, + symbol_db, + outputs, + |work_item| { + initial_work_out.push(work_item); + }, + ); + definitions_out_per_file.push(AtomicTake::empty()); + } + + ResolvedFile::NotLoaded(NotLoaded { + symbol_id_range: s.symbol_id_range, + }) + }) + .collect(); + + ResolvedGroup { files } + } + Group::LinkerScripts(scripts) => { + let files = scripts + .iter() + .map(|s| { + definitions_out_per_file.push(AtomicTake::empty()); + + ResolvedFile::LinkerScript(ResolvedLinkerScript { + input: s.parsed.input, + file_id: s.file_id, + symbol_id_range: s.symbol_id_range, + // TODO: Consider alternative to cloning this. + symbol_definitions: s.parsed.symbol_defs.clone(), + }) + }) + .collect(); + + ResolvedGroup { files } + } + Group::SyntheticSymbols(syn) => { + definitions_out_per_file.push(AtomicTake::empty()); + + ResolvedGroup { + files: vec![ResolvedFile::SyntheticSymbols(ResolvedSyntheticSymbols { + file_id: syn.file_id, + start_symbol_id: syn.symbol_id_range.start(), + symbol_definitions: Vec::new(), + })], + } + } + #[cfg(feature = "plugins")] + Group::LtoInputs(lto_objects) => ResolvedGroup { + files: lto_objects + .iter() + .map(|o| { + let definitions_out = symbol_definitions_slice + .split_off_mut(..o.symbol_id_range.len()) + .unwrap(); + + if o.is_optional() { + definitions_out_per_file.push(AtomicTake::new(definitions_out)); + } else { + work_items_do( + o.file_id, + definitions_out, + symbol_db, + outputs, + |work_item| { + initial_work_out.push(work_item); + }, + ); + definitions_out_per_file.push(AtomicTake::empty()); + } + + ResolvedFile::NotLoaded(NotLoaded { + symbol_id_range: o.symbol_id_range, + }) + }) + .collect(), + }, + } +} + +fn resolve_sections<'data, O: ObjectFile<'data>>( + groups: &mut [ResolvedGroup<'data, O>], + symbol_db: &SymbolDb<'data, O>, + layout_rules: &LayoutRules<'data>, +) -> Result { + timing_phase!("Resolve sections"); + + let loaded_metrics: LoadedMetrics = Default::default(); + let herd = symbol_db.herd; + + groups.par_iter_mut().try_for_each_init( + || herd.get(), + |allocator, group| -> Result { + verbose_timing_phase!("Resolve group sections"); + + for file in &mut group.files { + let ResolvedFile::::Object(obj) = file else { + continue; + }; + + obj.sections = resolve_sections_for_object( + obj, + symbol_db.args, + allocator, + &loaded_metrics, + &layout_rules.section_rules, + )?; + + obj.relocations = obj.common.object.parse_relocations()?; + } + Ok(()) + }, + )?; + + loaded_metrics.log(); + + Ok(()) +} + +const MAX_SYMBOLS_PER_WORK_ITEM: usize = 5000; + +/// A request to load a chunk of symbols from an object. +struct LoadObjectSymbolsRequest<'definitions> { + /// The ID of the object to load. + file_id: FileId, + + symbol_start_offset: usize, + + /// The symbol resolutions for the object to be loaded that should be written to when we load + /// the object. + definitions_out: &'definitions mut [SymbolId], +} + +#[derive(Default)] +pub(crate) struct LoadedMetrics { + pub(crate) loaded_bytes: AtomicUsize, + pub(crate) loaded_compressed_bytes: AtomicUsize, + pub(crate) decompressed_bytes: AtomicUsize, +} + +impl LoadedMetrics { + fn log(&self) { + let loaded_bytes = self.loaded_bytes.load(Ordering::Relaxed); + let loaded_compressed_bytes = self.loaded_compressed_bytes.load(Ordering::Relaxed); + let decompressed_bytes = self.decompressed_bytes.load(Ordering::Relaxed); + tracing::debug!(target: "metrics", loaded_bytes, loaded_compressed_bytes, decompressed_bytes, "input_sections"); + } +} + +struct ResolutionResources<'data, 'scope, O: ObjectFile<'data>> { + definitions_per_file: &'scope Vec>>, + symbol_db: &'scope SymbolDb<'data, O>, + outputs: &'scope Outputs<'data, O>, +} + +impl<'scope, 'data, O: ObjectFile<'data>> ResolutionResources<'data, 'scope, O> { + /// Request loading of `file_id` if it hasn't already been requested. + #[inline(always)] + fn try_request_file_id(&'scope self, file_id: FileId, scope: &Scope<'scope>) { + let definitions_group = &self.definitions_per_file[file_id.group()]; + + let Some(atomic_take) = &definitions_group.get(file_id.file()) else { + // A group from a previous resolution batch. Assume that the relevant file was already + // loaded. + return; + }; + + // Do a read before we call `take`. Reads are cheaper, so this is an optimisation that + // reduces the need for exclusive access to the cache line. + if atomic_take.is_taken() { + // The definitions have previously been taken indicating that this file has already been + // processed, nothing more to do. + return; + } + + let Some(definitions_out) = atomic_take.take() else { + // Another thread just beat us to it. + return; + }; + + work_items_do( + file_id, + definitions_out, + self.symbol_db, + self.outputs, + |work_item| { + scope.spawn(|scope| { + process_object(work_item, self, scope); + }); + }, + ); + } + + fn handle_result(&self, result: Result) { + if let Err(error) = result { + let _ = self.outputs.errors.push(error); + } + } +} + +fn work_items_do<'definitions, 'data, O: ObjectFile<'data>>( + file_id: FileId, + mut definitions_out: &'definitions mut [SymbolId], + symbol_db: &SymbolDb<'data, O>, + outputs: &Outputs<'data, O>, + mut request_callback: impl FnMut(LoadObjectSymbolsRequest<'definitions>), +) { + match &symbol_db.groups[file_id.group()] { + Group::Objects(parsed_input_objects) => { + let obj = &parsed_input_objects[file_id.file()]; + let common = ResolvedCommon::new(obj); + let resolved_object = + if let Some(dynamic_tag_values) = obj.parsed.object.dynamic_tag_values() { + ResolvedFile::Dynamic(ResolvedDynamic::new(common, dynamic_tag_values)) + } else { + ResolvedFile::Object(ResolvedObject::new(common)) + }; + // Push won't fail because we allocated enough space for all the objects. + outputs.loaded.push(resolved_object).unwrap(); + } + #[cfg(feature = "plugins")] + Group::LtoInputs(lto_objects) => { + let obj = <o_objects[file_id.file()]; + // Push won't fail because we allocated enough space for all the LTO objects. + outputs + .loaded_lto_objects + .push(ResolvedLtoInput { + file_id: obj.file_id, + symbol_id_range: obj.symbol_id_range, + }) + .unwrap(); + + request_callback(LoadObjectSymbolsRequest { + file_id, + symbol_start_offset: 0, + definitions_out, + }); + return; + } + _ => {} + } + + let chunk_size = match &symbol_db.groups[file_id.group()] { + Group::Objects(_) => MAX_SYMBOLS_PER_WORK_ITEM, + _ => definitions_out.len(), + }; + + let mut symbol_start_offset = 0; + loop { + let len = chunk_size.min(definitions_out.len()); + let chunk_definitions_out = definitions_out.split_off_mut(..len).unwrap(); + + let work_item = LoadObjectSymbolsRequest { + file_id, + definitions_out: chunk_definitions_out, + symbol_start_offset, + }; + request_callback(work_item); + + symbol_start_offset += len; + if definitions_out.is_empty() { + break; + } + } +} + +#[derive(Debug)] +pub(crate) struct ResolvedGroup<'data, O: ObjectFile<'data>> { + pub(crate) files: Vec>, +} + +#[derive(Debug)] +pub(crate) enum ResolvedFile<'data, O: ObjectFile<'data>> { + NotLoaded(NotLoaded), + Prelude(ResolvedPrelude<'data>), + Object(ResolvedObject<'data, O>), + Dynamic(ResolvedDynamic<'data, O>), + LinkerScript(ResolvedLinkerScript<'data>), + SyntheticSymbols(ResolvedSyntheticSymbols<'data>), + #[cfg(feature = "plugins")] + LtoInput(ResolvedLtoInput), +} + +#[derive(Debug)] +pub(crate) struct NotLoaded { + pub(crate) symbol_id_range: SymbolIdRange, +} + +/// A section, but where we may or may not yet have decided to load it. +#[derive(Debug, Clone, Copy)] +pub(crate) enum SectionSlot { + /// We've decided that this section won't be loaded. + Discard, + + /// The section hasn't been loaded yet, but may be loaded if it's referenced. + Unloaded(UnloadedSection), + + /// The section had the retain bit set, so must be loaded. + MustLoad(UnloadedSection), + + /// We've already loaded the section. + Loaded(crate::layout::Section), + + /// The section contains frame data, e.g. .eh_frame or equivalent. + FrameData(object::SectionIndex), + + /// The section is a string-merge section. + MergeStrings(StringMergeSectionSlot), + + // The section contains a debug info section that might be loaded. + UnloadedDebugInfo(PartId), + + // Loaded section with debug info content. + LoadedDebugInfo(crate::layout::Section), + + // GNU property section (.note.gnu.property) + NoteGnuProperty(object::SectionIndex), + + // RISC-V attributes section (.riscv.attributes) + RiscvVAttributes(object::SectionIndex), +} + +#[derive(Debug, Clone, Copy)] +pub(crate) struct UnloadedSection { + pub(crate) part_id: PartId, + + /// The index of the last FDE for this section. Previous FDEs will be linked from this. + pub(crate) last_frame_index: Option, + + /// Whether the section has a name that makes it eligible for generation of __start_ / __stop_ + /// symbols. In particular, the name of the section doesn't start with a ".". + pub(crate) start_stop_eligible: bool, +} + +impl UnloadedSection { + fn new(part_id: PartId) -> Self { + Self { + part_id, + last_frame_index: None, + start_stop_eligible: false, + } + } +} + +#[derive(Debug, Clone)] +pub(crate) struct ResolvedPrelude<'data> { + pub(crate) symbol_definitions: Vec>, +} + +/// Resolved state common to dynamic and regular objects. +#[derive(Debug)] +pub(crate) struct ResolvedCommon<'data, O: ObjectFile<'data>> { + pub(crate) input: InputRef<'data>, + pub(crate) object: &'data O, + pub(crate) file_id: FileId, + pub(crate) symbol_id_range: SymbolIdRange, +} + +#[derive(Debug)] +pub(crate) struct ResolvedObject<'data, O: ObjectFile<'data>> { + pub(crate) common: ResolvedCommon<'data, O>, + + pub(crate) sections: Vec, + pub(crate) relocations: O::RelocationSections, + + pub(crate) string_merge_extras: Vec>, + + /// Details about each custom section that is defined in this object. + custom_sections: Vec>, + + init_fini_sections: Vec, +} + +#[derive(Debug)] +pub(crate) struct ResolvedDynamic<'data, O: ObjectFile<'data>> { + pub(crate) common: ResolvedCommon<'data, O>, + dynamic_tag_values: O::DynamicTagValues, +} + +#[derive(Debug)] +pub(crate) struct ResolvedLinkerScript<'data> { + pub(crate) input: InputRef<'data>, + pub(crate) file_id: FileId, + pub(crate) symbol_id_range: SymbolIdRange, + pub(crate) symbol_definitions: Vec>, +} + +#[derive(Debug, Clone)] +pub(crate) struct ResolvedSyntheticSymbols<'data> { + pub(crate) file_id: FileId, + pub(crate) start_symbol_id: SymbolId, + pub(crate) symbol_definitions: Vec>, +} + +#[cfg(feature = "plugins")] +#[derive(Debug, Clone)] +pub(crate) struct ResolvedLtoInput { + pub(crate) file_id: FileId, + pub(crate) symbol_id_range: SymbolIdRange, +} + +fn assign_section_ids<'data, O: ObjectFile<'data>>( + resolved: &mut [ResolvedGroup<'data, O>], + output_sections: &mut OutputSections<'data>, + args: &Args, +) { + timing_phase!("Assign section IDs"); + + for group in resolved { + for file in &mut group.files { + if let ResolvedFile::Object(s) = file { + output_sections.add_sections(&s.custom_sections, s.sections.as_mut_slice(), args); + apply_init_fini_secondaries( + &s.init_fini_sections, + s.sections.as_mut_slice(), + output_sections, + ); + } + } + } +} + +fn parse_priority_suffix(suffix: &[u8]) -> Option { + if suffix.is_empty() || !suffix.iter().all(|b| b.is_ascii_digit()) { + return None; + } + + let value = core::str::from_utf8(suffix).ok()?.parse::().ok()?; + Some(u16::try_from(value).unwrap_or(u16::MAX)) +} + +fn init_fini_priority(name: &[u8]) -> Option { + if name == secnames::INIT_ARRAY_SECTION_NAME || name == secnames::FINI_ARRAY_SECTION_NAME { + return Some(u16::MAX); + } + + if let Some(rest) = name.strip_prefix(b".init_array.") { + return parse_priority_suffix(rest); + } + + if let Some(rest) = name.strip_prefix(b".fini_array.") { + return parse_priority_suffix(rest); + } + + // .ctors and .dtors without suffix have the same priority as .init_array/.fini_array + if name == secnames::CTORS_SECTION_NAME || name == secnames::DTORS_SECTION_NAME { + return Some(u16::MAX); + } + + // .ctors uses descending order (65535 = lowest priority, 0 = highest) + // while .init_array uses ascending order (0 = highest priority, 65535 = lowest) + if let Some(rest) = name.strip_prefix(b".ctors.") { + return parse_priority_suffix(rest).map(|p| u16::MAX.saturating_sub(p)); + } + + if let Some(rest) = name.strip_prefix(b".dtors.") { + return parse_priority_suffix(rest).map(|p| u16::MAX.saturating_sub(p)); + } + + None +} + +struct Outputs<'data, O: ObjectFile<'data>> { + /// Where we put objects once we've loaded them. + loaded: ArrayQueue>, + + #[cfg(feature = "plugins")] + loaded_lto_objects: ArrayQueue, + + /// Any errors that we encountered. + errors: ArrayQueue, + + undefined_symbols: SegQueue>, +} + +impl<'data, O: ObjectFile<'data>> Outputs<'data, O> { + #[allow(unused_variables)] + fn new(num_regular_objects: usize, num_lto_objects: usize) -> Self { + Self { + loaded: ArrayQueue::new(num_regular_objects.max(1)), + #[cfg(feature = "plugins")] + loaded_lto_objects: ArrayQueue::new(num_lto_objects.max(1)), + errors: ArrayQueue::new(1), + undefined_symbols: SegQueue::new(), + } + } +} + +fn process_object<'scope, 'data: 'scope, 'definitions, O: ObjectFile<'data>>( + work_item: LoadObjectSymbolsRequest<'definitions>, + resources: &'scope ResolutionResources<'data, 'scope, O>, + scope: &Scope<'scope>, +) { + let file_id = work_item.file_id; + let definitions_out = work_item.definitions_out; + + match &resources.symbol_db.groups[file_id.group()] { + Group::Prelude(prelude) => { + verbose_timing_phase!("Resolve prelude symbols"); + + load_prelude(prelude, definitions_out, resources, scope); + } + Group::Objects(parsed_input_objects) => { + verbose_timing_phase!("Resolve object symbols"); + + let obj = &parsed_input_objects[file_id.file()]; + + resources.handle_result( + resolve_symbols( + obj, + resources, + work_item.symbol_start_offset, + definitions_out, + scope, + ) + .with_context(|| format!("Failed to resolve symbols in {obj}")), + ); + } + Group::LinkerScripts(_) => {} + Group::SyntheticSymbols(_) => {} + #[cfg(feature = "plugins")] + Group::LtoInputs(objects) => { + let obj = &objects[file_id.file()]; + resources.handle_result( + resolve_lto_symbols(obj, resources, definitions_out, scope) + .with_context(|| format!("Failed to resolve symbols in {obj}")), + ); + } + } +} + +#[cfg(feature = "plugins")] +fn resolve_lto_symbols<'data, 'scope, O: ObjectFile<'data>>( + obj: &crate::linker_plugins::LtoInput<'data>, + resources: &'scope ResolutionResources<'data, 'scope, O>, + definitions_out: &mut [SymbolId], + scope: &Scope<'scope>, +) -> Result { + obj.symbols + .iter() + .enumerate() + .zip(definitions_out) + .try_for_each( + |((local_symbol_index, local_symbol), definition)| -> Result { + if !local_symbol.is_definition() { + let mut name_info = RawSymbolName::parse(local_symbol.name.bytes()); + if let Some(version) = local_symbol.version { + name_info.version_name = Some(version); + } + + let symbol_attributes = SymbolAttributes { + name_info, + is_local: false, + default_visibility: local_symbol.visibility == object::elf::STV_DEFAULT, + is_weak: local_symbol.kind + == Some(crate::linker_plugins::SymbolKind::WeakUndef), + }; + + resolve_symbol( + obj.symbol_id_range.offset_to_id(local_symbol_index), + &symbol_attributes, + definition, + resources, + false, + obj.file_id, + scope, + )?; + } + + Ok(()) + }, + ) +} + +struct UndefinedSymbol<'data> { + /// If we have a file ID here and that file is loaded, then the symbol is actually defined and + /// this record can be ignored. + ignore_if_loaded: Option, + name: PreHashedSymbolName<'data>, + symbol_id: SymbolId, +} + +fn load_prelude<'scope, 'data, O: ObjectFile<'data>>( + prelude: &crate::parsing::Prelude, + definitions_out: &mut [SymbolId], + resources: &'scope ResolutionResources<'data, 'scope, O>, + scope: &Scope<'scope>, +) { + // The start symbol could be defined within an archive entry. If it is, then we need to load + // it. We don't currently store the resulting SymbolId, but instead look it up again during + // layout. + load_symbol_named( + resources, + &mut SymbolId::undefined(), + resources.symbol_db.entry_symbol_name(), + scope, + ); + + // Try to resolve any symbols that the user requested be undefined (e.g. via --undefined). If an + // object defines such a symbol, request that the object be loaded. Also, point our undefined + // symbol record to the definition. + for (def_info, definition_out) in prelude.symbol_definitions.iter().zip(definitions_out) { + match def_info.placement { + SymbolPlacement::ForceUndefined | SymbolPlacement::DefsymSymbol(_, _) => { + load_symbol_named(resources, definition_out, def_info.name, scope); + } + _ => {} + } + } +} + +fn load_symbol_named<'scope, 'data, O: ObjectFile<'data>>( + resources: &'scope ResolutionResources<'data, 'scope, O>, + definition_out: &mut SymbolId, + name: &[u8], + scope: &Scope<'scope>, +) { + if let Some(symbol_id) = resources + .symbol_db + .get_unversioned(&UnversionedSymbolName::prehashed(name)) + { + *definition_out = symbol_id; + + let symbol_file_id = resources.symbol_db.file_id_for_symbol(symbol_id); + resources.try_request_file_id(symbol_file_id, scope); + } +} + +/// Where there are multiple references to undefined symbols with the same name, pick one reference +/// as the canonical one to which we'll refer. Where undefined symbols can be resolved to +/// __start/__stop symbols that refer to the start or stop of a custom section, collect that +/// information up and put it into `custom_start_stop_defs`. +fn canonicalise_undefined_symbols<'data, O: ObjectFile<'data>>( + mut undefined_symbols: Vec>, + output_sections: &OutputSections, + groups: &[ResolvedGroup<'data, O>], + symbol_db: &mut SymbolDb<'data, O>, + per_symbol_flags: &mut PerSymbolFlags, + custom_start_stop_defs: &mut ResolvedSyntheticSymbols<'data>, +) { + timing_phase!("Canonicalise undefined symbols"); + + let mut name_to_id: PassThroughHashMap, SymbolId> = + Default::default(); + + let mut versioned_name_to_id: PassThroughHashMap, SymbolId> = + Default::default(); + + // Sort by symbol ID to ensure deterministic behaviour. We sort in reverse order so that LTO + // outputs get higher priority than LTO inputs. This means that the canonical symbol ID for any + // given name will be the one for the last file that refers to that symbol. + undefined_symbols.sort_by_key(|u| usize::MAX - u.symbol_id.as_usize()); + + for undefined in undefined_symbols { + let is_defined = undefined.ignore_if_loaded.is_some_and(|file_id| { + !matches!( + groups[file_id.group()].files[file_id.file()], + ResolvedFile::NotLoaded(_) + ) + }); + + if is_defined { + // The archive entry that defined the symbol in question ended up being loaded, so the + // weak symbol is defined after all. + continue; + } + + match undefined.name { + PreHashedSymbolName::Unversioned(pre_hashed) => { + match name_to_id.entry(pre_hashed) { + hashbrown::hash_map::Entry::Vacant(entry) => { + let symbol_id = allocate_start_stop_symbol_id( + pre_hashed, + symbol_db, + per_symbol_flags, + custom_start_stop_defs, + output_sections, + ); + + // We either make our undefined symbol dynamic, allowing the possibility + // that it might end up being defined at runtime, or we make it + // non-interposable, which means it'll remain null and even if it ends up + // defined at runtime, we won't use that definition. If the symbol doesn't + // have default visibility, then we make it non-interposable. If we're + // building a shared object, we always make the symbol dynamic. If we're + // building a statically linked executable, then we always make it + // non-interposable. If we're building a regular, dynamically linked + // executable, then we make it dynamic if the symbol is weak and otherwise + // make it non-interposable. That last case, a non-weak, default-visibility, + // undefined symbol in an executable is generally a link error, however if + // the flag --warn-unresolved-symbols is passed, then it won't be. Linker + // behaviour differs in this case. GNU ld makes the symbol non-interposable, + // while lld makes it dynamic. We match GNU ld in this case. + if symbol_id.is_none() { + let output_kind = symbol_db.output_kind; + let visibility = symbol_db.input_symbol_visibility(undefined.symbol_id); + + if visibility == Visibility::Default + && (output_kind.is_shared_object() + || (!output_kind.is_static_executable() + && symbol_db.symbol_strength(undefined.symbol_id, groups) + == SymbolStrength::Weak)) + { + per_symbol_flags.set_flag(undefined.symbol_id, ValueFlags::DYNAMIC); + } else { + per_symbol_flags + .set_flag(undefined.symbol_id, ValueFlags::NON_INTERPOSABLE); + } + } + + // If the symbol isn't a start/stop symbol, then assign responsibility for + // the symbol to the first object that referenced + // it. This lets us have PLT/GOT entries + // for the symbol if they're needed. + let symbol_id = symbol_id.unwrap_or(undefined.symbol_id); + entry.insert(symbol_id); + symbol_db.replace_definition(undefined.symbol_id, symbol_id); + } + hashbrown::hash_map::Entry::Occupied(entry) => { + symbol_db.replace_definition(undefined.symbol_id, *entry.get()); + } + } + } + PreHashedSymbolName::Versioned(pre_hashed) => { + match versioned_name_to_id.entry(pre_hashed) { + hashbrown::hash_map::Entry::Vacant(entry) => { + entry.insert(undefined.symbol_id); + } + hashbrown::hash_map::Entry::Occupied(entry) => { + symbol_db.replace_definition(undefined.symbol_id, *entry.get()); + } + } + } + } + } +} + +fn allocate_start_stop_symbol_id<'data, O: ObjectFile<'data>>( + name: PreHashed>, + symbol_db: &mut SymbolDb<'data, O>, + per_symbol_flags: &mut PerSymbolFlags, + custom_start_stop_defs: &mut ResolvedSyntheticSymbols<'data>, + output_sections: &OutputSections, +) -> Option { + let symbol_name_bytes = name.bytes(); + + let (section_name, is_start) = if let Some(s) = symbol_name_bytes.strip_prefix(b"__start_") { + (s, true) + } else if let Some(s) = symbol_name_bytes.strip_prefix(b"__stop_") { + (s, false) + } else { + return None; + }; + + let section_id = output_sections.custom_name_to_id(SectionName(section_name))?; + + let def_info = if is_start { + InternalSymDefInfo::new(SymbolPlacement::SectionStart(section_id), name.bytes()) + } else { + InternalSymDefInfo::new(SymbolPlacement::SectionEnd(section_id), name.bytes()) + }; + + let symbol_id = symbol_db.add_synthetic_symbol(per_symbol_flags, name, custom_start_stop_defs); + + custom_start_stop_defs.symbol_definitions.push(def_info); + + Some(symbol_id) +} + +impl<'data, O: ObjectFile<'data>> ResolvedCommon<'data, O> { + fn new(obj: &'data SequencedInputObject<'data, O>) -> Self { + Self { + input: obj.parsed.input, + object: &obj.parsed.object, + file_id: obj.file_id, + symbol_id_range: obj.symbol_id_range, + } + } + + pub(crate) fn symbol_strength(&self, symbol_id: SymbolId) -> SymbolStrength { + let local_index = symbol_id.to_input(self.symbol_id_range); + let Ok(obj_symbol) = self.object.symbol(local_index) else { + // Errors from this function should have been reported elsewhere. + return SymbolStrength::Undefined; + }; + SymbolStrength::of(obj_symbol) + } +} + +fn apply_init_fini_secondaries<'data>( + details: &[InitFiniSectionDetail], + sections: &mut [SectionSlot], + output_sections: &mut OutputSections<'data>, +) { + for d in details { + let Some(slot) = sections.get_mut(d.index as usize) else { + continue; + }; + + let unloaded = match slot { + SectionSlot::Unloaded(u) | SectionSlot::MustLoad(u) => u, + _ => continue, + }; + + let sid = + output_sections.get_or_create_init_fini_secondary(d.primary, d.priority, d.alignment); + unloaded.part_id = sid.part_id_with_alignment(d.alignment); + } +} + +impl<'data, O: ObjectFile<'data>> ResolvedObject<'data, O> { + fn new(common: ResolvedCommon<'data, O>) -> Self { + Self { + common, + // We'll fill this the rest during section resolution. + sections: Default::default(), + relocations: Default::default(), + string_merge_extras: Default::default(), + custom_sections: Default::default(), + init_fini_sections: Default::default(), + } + } +} + +impl<'data, O: ObjectFile<'data>> ResolvedDynamic<'data, O> { + fn new(common: ResolvedCommon<'data, O>, dynamic_tag_values: O::DynamicTagValues) -> Self { + Self { + common, + dynamic_tag_values, + } + } + + pub(crate) fn lib_name(&self) -> &'data [u8] { + self.dynamic_tag_values.lib_name(&self.common.input) + } +} + +fn resolve_sections_for_object<'data, O: ObjectFile<'data>>( + obj: &mut ResolvedObject<'data, O>, + args: &Args, + allocator: &bumpalo_herd::Member<'data>, + loaded_metrics: &LoadedMetrics, + rules: &SectionRules, +) -> Result> { + // Note, we build up the collection with push rather than collect because at the time of + // writing, object's `SectionTable::enumerate` isn't an exact-size iterator, so using collect + // would result in resizing. + let mut sections = Vec::with_capacity(obj.common.object.num_sections()); + for (input_section_index, input_section) in obj.common.object.enumerate_sections() { + sections.push(resolve_section( + input_section_index, + input_section, + obj, + args, + allocator, + loaded_metrics, + rules, + )?); + } + Ok(sections) +} + +#[inline(always)] +fn resolve_section<'data, O: ObjectFile<'data>>( + input_section_index: SectionIndex, + input_section: &O::SectionHeader, + obj: &mut ResolvedObject<'data, O>, + args: &Args, + allocator: &bumpalo_herd::Member<'data>, + loaded_metrics: &LoadedMetrics, + rules: &SectionRules, +) -> Result { + let section_name = obj + .common + .object + .section_name(input_section) + .unwrap_or_default(); + + if section_name.starts_with(secnames::GNU_LTO_SYMTAB_PREFIX.as_bytes()) { + if cfg!(feature = "plugins") { + bail!("Found GCC LTO input that we didn't supply to linker plugin"); + } + return Err(symbol_db::linker_plugin_disabled_error()); + } + + let section_flags = input_section.flags(); + let raw_alignment = obj.common.object.section_alignment(input_section)?; + let alignment = Alignment::new(raw_alignment.max(1))?; + let should_merge_sections = part_id::should_merge_sections(section_flags, raw_alignment, args); + + let mut unloaded_section; + let mut is_debug_info = false; + let section_type = input_section.section_type(); + let mut must_load = section_flags.should_retain() || section_type.is_note(); + + let file_name = if let Some(entry) = &obj.common.input.entry { + // For archive members, match against the member name (e.g., "app.o"), + // not the archive filename (e.g., "libfoo.a"). + Some(entry.identifier.as_slice()) + } else { + obj.common + .input + .file + .filename + .file_name() + .map(|n| n.as_encoded_bytes()) + }; + + match rules.lookup(section_name, file_name, section_flags, section_type) { + SectionRuleOutcome::Section(output_info) => { + let part_id = if output_info.section_id.is_regular() { + output_info.section_id.part_id_with_alignment(alignment) + } else { + output_info.section_id.base_part_id() + }; + + must_load |= output_info.must_keep; + + unloaded_section = UnloadedSection::new(part_id); + } + SectionRuleOutcome::SortedSection(output_info) => { + let part_id = if output_info.section_id.is_regular() { + output_info.section_id.part_id_with_alignment(alignment) + } else { + output_info.section_id.base_part_id() + }; + if let Some(priority) = init_fini_priority(section_name) { + obj.init_fini_sections.push(InitFiniSectionDetail { + index: input_section_index.0 as u32, + primary: output_info.section_id, + priority, + alignment, + }); + } + + must_load |= output_info.must_keep; + + unloaded_section = UnloadedSection::new(part_id); + } + SectionRuleOutcome::Discard => return Ok(SectionSlot::Discard), + SectionRuleOutcome::EhFrame => { + return Ok(SectionSlot::FrameData(input_section_index)); + } + SectionRuleOutcome::NoteGnuProperty => { + return Ok(SectionSlot::NoteGnuProperty(input_section_index)); + } + SectionRuleOutcome::Debug => { + if args.strip_debug() && !section_flags.is_alloc() { + return Ok(SectionSlot::Discard); + } + + is_debug_info = !section_flags.is_alloc(); + + unloaded_section = UnloadedSection::new(part_id::CUSTOM_PLACEHOLDER); + } + SectionRuleOutcome::Custom => { + unloaded_section = UnloadedSection::new(part_id::CUSTOM_PLACEHOLDER); + unloaded_section.start_stop_eligible = !section_name.starts_with(b"."); + } + SectionRuleOutcome::RiscVAttribute => { + return Ok(SectionSlot::RiscvVAttributes(input_section_index)); + } + }; + + if unloaded_section.part_id == part_id::CUSTOM_PLACEHOLDER { + let custom_section = CustomSectionDetails { + name: SectionName(section_name), + alignment, + index: input_section_index, + }; + + obj.custom_sections.push(custom_section); + } + + let slot = if should_merge_sections { + let section_data = + obj.common + .object + .section_data(input_section, allocator, loaded_metrics)?; + let section_flags = input_section.flags(); + + if section_data.is_empty() { + SectionSlot::Discard + } else { + obj.string_merge_extras.push(StringMergeSectionExtra { + index: input_section_index, + section_data, + section_flags, + }); + + SectionSlot::MergeStrings(StringMergeSectionSlot::new(unloaded_section.part_id)) + } + } else if is_debug_info { + SectionSlot::UnloadedDebugInfo(part_id::CUSTOM_PLACEHOLDER) + } else if must_load { + SectionSlot::MustLoad(unloaded_section) + } else { + SectionSlot::Unloaded(unloaded_section) + }; + + Ok(slot) +} + +fn resolve_symbols<'data, 'scope, O: ObjectFile<'data>>( + obj: &SequencedInputObject<'data, O>, + resources: &'scope ResolutionResources<'data, 'scope, O>, + start_symbol_offset: usize, + definitions_out: &mut [SymbolId], + scope: &Scope<'scope>, +) -> Result { + let verneed = obj.parsed.object.verneed_table()?; + + obj.parsed.object.symbols()[start_symbol_offset..] + .iter() + .enumerate() + .zip(definitions_out) + .try_for_each( + |((local_symbol_index, local_symbol), definition)| -> Result { + // Don't try to resolve symbols that are already defined, e.g. locals and globals + // that we define. Also don't try to resolve symbol zero - the undefined symbol. + // Hidden symbols exported from shared objects don't make sense, so we skip + // resolving them as well. + if !definition.is_undefined() + || start_symbol_offset + local_symbol_index == 0 + || (obj.is_dynamic() && local_symbol.is_hidden()) + { + return Ok(()); + } + + let name_bytes = obj.parsed.object.symbol_name(local_symbol)?; + + let name_info = if let Some(version_name) = + verneed.version_name(object::SymbolIndex(local_symbol_index)) + { + RawSymbolName { + name: name_bytes, + version_name: Some(version_name), + is_default: false, + } + } else { + RawSymbolName::parse(name_bytes) + }; + + let symbol_attributes = SymbolAttributes { + name_info, + is_local: local_symbol.is_local(), + default_visibility: local_symbol.is_interposable(), + is_weak: local_symbol.is_weak(), + }; + + resolve_symbol( + obj.symbol_id_range + .offset_to_id(start_symbol_offset + local_symbol_index), + &symbol_attributes, + definition, + resources, + obj.is_dynamic(), + obj.file_id, + scope, + ) + }, + ) +} + +#[derive(Debug)] +struct SymbolAttributes<'data> { + is_local: bool, + default_visibility: bool, + is_weak: bool, + name_info: RawSymbolName<'data>, +} + +#[inline(always)] +fn resolve_symbol<'data, 'scope, O: ObjectFile<'data>>( + local_symbol_id: SymbolId, + local_symbol_attributes: &SymbolAttributes<'data>, + definition_out: &mut SymbolId, + resources: &'scope ResolutionResources<'data, 'scope, O>, + is_dynamic: bool, + file_id: FileId, + scope: &Scope<'scope>, +) -> Result { + debug_assert_bail!( + !local_symbol_attributes.is_local, + "Only globals should be undefined, found symbol `{}` ({local_symbol_id})", + local_symbol_attributes.name_info, + ); + + let prehashed_name = PreHashedSymbolName::from_raw(&local_symbol_attributes.name_info); + + // Only default-visibility symbols can reference symbols from shared objects. + let allow_dynamic = local_symbol_attributes.default_visibility; + + match resources.symbol_db.get(&prehashed_name, allow_dynamic) { + Some(symbol_id) => { + *definition_out = symbol_id; + let symbol_file_id = resources.symbol_db.file_id_for_symbol(symbol_id); + + if symbol_file_id != file_id && !local_symbol_attributes.is_weak { + // Undefined symbols in shared objects should actually activate as-needed shared + // objects, however the rules for whether this should result in a DT_NEEDED entry + // are kind of subtle, so for now, we don't activate shared objects from shared + // objects. See + // https://github.com/wild-linker/wild/issues/930#issuecomment-3007027924 for + // more details. TODO: Fix this. + if !is_dynamic || !resources.symbol_db.file(symbol_file_id).is_dynamic() { + resources.try_request_file_id(symbol_file_id, scope); + } + } else if symbol_file_id != PRELUDE_FILE_ID { + // The symbol is weak and we can't be sure that the file that defined it will end up + // being loaded, so the symbol might actually be undefined. Register it as an + // undefined symbol then later when we handle undefined symbols, we'll check if the + // file got loaded. TODO: If the file is a non-archived object, or possibly even if + // it's an archived object that we've already decided to load, then we could skip + // this. + resources.outputs.undefined_symbols.push(UndefinedSymbol { + ignore_if_loaded: Some(symbol_file_id), + name: prehashed_name, + symbol_id: local_symbol_id, + }); + } + } + None => { + resources.outputs.undefined_symbols.push(UndefinedSymbol { + ignore_if_loaded: None, + name: prehashed_name, + symbol_id: local_symbol_id, + }); + } + } + Ok(()) +} + +impl<'data, O: ObjectFile<'data>> std::fmt::Display for ResolvedObject<'data, O> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + std::fmt::Display::fmt(&self.common.input, f) + } +} + +impl<'data, O: ObjectFile<'data>> std::fmt::Display for ResolvedDynamic<'data, O> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + std::fmt::Display::fmt(&self.common.input, f) + } +} + +impl<'data> std::fmt::Display for ResolvedLinkerScript<'data> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + std::fmt::Display::fmt(&self.input, f) + } +} + +impl<'data, O: ObjectFile<'data>> std::fmt::Display for ResolvedFile<'data, O> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + ResolvedFile::NotLoaded(_) => std::fmt::Display::fmt("", f), + ResolvedFile::Prelude(_) => std::fmt::Display::fmt("", f), + ResolvedFile::Object(o) => std::fmt::Display::fmt(o, f), + ResolvedFile::Dynamic(o) => std::fmt::Display::fmt(o, f), + ResolvedFile::LinkerScript(o) => std::fmt::Display::fmt(o, f), + ResolvedFile::SyntheticSymbols(_) => std::fmt::Display::fmt("", f), + #[cfg(feature = "plugins")] + ResolvedFile::LtoInput(_) => std::fmt::Display::fmt("", f), + } + } +} + +impl SectionSlot { + pub(crate) fn is_loaded(&self) -> bool { + !matches!(self, SectionSlot::Discard | SectionSlot::Unloaded(..)) + } + + pub(crate) fn set_part_id(&mut self, part_id: PartId) { + match self { + SectionSlot::Unloaded(section) => section.part_id = part_id, + SectionSlot::MustLoad(section) => section.part_id = part_id, + SectionSlot::Loaded(section) => section.part_id = part_id, + SectionSlot::MergeStrings(section) => section.part_id = part_id, + SectionSlot::UnloadedDebugInfo(out) => *out = part_id, + SectionSlot::LoadedDebugInfo(section) => section.part_id = part_id, + SectionSlot::Discard + | SectionSlot::FrameData(_) + | SectionSlot::NoteGnuProperty(_) + | SectionSlot::RiscvVAttributes(_) => {} + } + } + + pub(crate) fn unloaded_mut(&mut self) -> Option<&mut UnloadedSection> { + match self { + SectionSlot::Unloaded(unloaded) | SectionSlot::MustLoad(unloaded) => Some(unloaded), + _ => None, + } + } +} + +impl<'data, O: ObjectFile<'data>> ResolvedFile<'data, O> { + fn symbol_id_range(&self) -> SymbolIdRange { + match self { + ResolvedFile::NotLoaded(s) => s.symbol_id_range, + ResolvedFile::Prelude(s) => s.symbol_id_range(), + ResolvedFile::Object(s) => s.common.symbol_id_range, + ResolvedFile::Dynamic(s) => s.common.symbol_id_range, + ResolvedFile::LinkerScript(s) => s.symbol_id_range, + ResolvedFile::SyntheticSymbols(s) => s.symbol_id_range(), + #[cfg(feature = "plugins")] + ResolvedFile::LtoInput(s) => s.symbol_id_range, + } + } +} + +impl ResolvedPrelude<'_> { + fn symbol_id_range(&self) -> SymbolIdRange { + SymbolIdRange::input(SymbolId::undefined(), self.symbol_definitions.len()) + } +} + +impl ResolvedSyntheticSymbols<'_> { + fn symbol_id_range(&self) -> SymbolIdRange { + SymbolIdRange::input(self.start_symbol_id, self.symbol_definitions.len()) + } +} + +impl<'data, O: ObjectFile<'data>> Default for Resolver<'data, O> { + fn default() -> Self { + Self { + undefined_symbols: Default::default(), + resolved_groups: Default::default(), + } + } +} + +// We create quite a lot of `SectionSlot`s. We don't generally copy them, however we do need to +// eventually drop the Vecs that contain them. Dropping those Vecs is a lot cheaper if the slots +// don't need to have run Drop. We check for this, by making sure the type implements `Copy` +#[test] +fn section_slot_is_copy() { + fn assert_copy(_v: T) {} + + assert_copy(SectionSlot::Discard); +} diff --git a/libwild/src/save_dir.rs b/libwild/src/save_dir.rs index 32d9bebc2..cfcddba5b 100644 --- a/libwild/src/save_dir.rs +++ b/libwild/src/save_dir.rs @@ -1,6 +1,6 @@ //! Support for saving inputs for later use. -use crate::Args; +use crate::args::Args; use crate::archive::ArchiveEntry; use crate::archive::ArchiveIterator; use crate::args::Modifiers; @@ -48,7 +48,7 @@ impl SaveDir { )))) } - pub(crate) fn finish(&self, input_data: &FileLoader, parsed_args: &Args) -> Result { + pub(crate) fn finish(&self, input_data: &FileLoader, parsed_args: &Args) -> Result { if let Some(state) = self.0.as_ref() { let mut files_to_copy = state.files_to_copy.clone(); files_to_copy.extend( @@ -129,10 +129,10 @@ impl SaveDirState { /// Finalise the save directory. Makes sure that all `filenames` have been copied, writes the /// `run-with` file and if the environment variable is set to indicate that we should skip /// linking, then exit. - fn finish<'a, I: Iterator>( + fn finish<'a, T, I: Iterator>( &self, filenames: I, - parsed_args: &Args, + parsed_args: &Args, ) -> Result { for filename in filenames { self.copy_file(&std::path::absolute(filename)?, parsed_args)?; @@ -148,7 +148,7 @@ impl SaveDirState { Ok(()) } - fn write_args_file(&self, run_file: &Path, args: &Args) -> Result { + fn write_args_file(&self, run_file: &Path, args: &Args) -> Result { let mut file = std::fs::File::create(run_file)?; let mut out = BufWriter::new(&mut file); out.write_all(PRELUDE.as_bytes())?; @@ -229,7 +229,7 @@ impl SaveDirState { } /// Copies `source_path` to our output directory. - fn copy_file(&self, source_path: &Path, parsed_args: &Args) -> Result { + fn copy_file(&self, source_path: &Path, parsed_args: &Args) -> Result { let dest_path = self.output_path(source_path); if dest_path.exists() || !source_path.exists() { @@ -273,6 +273,15 @@ impl SaveDirState { self.copy_file(&absolute_target, parsed_args)?; } + #[cfg(windows)] + std::os::windows::fs::symlink_file(&target, &dest_path).with_context(|| { + format!( + "Failed to symlink {} to {}", + dest_path.display(), + target.display() + ) + })?; + #[cfg(unix)] std::os::unix::fs::symlink(&target, &dest_path).with_context(|| { format!( "Failed to symlink {} to {}", @@ -331,7 +340,7 @@ impl SaveDirState { } /// Copies the files listed by the thin archive. - fn handle_thin_archive(&self, path: &Path, parsed_args: &Args) -> Result { + fn handle_thin_archive(&self, path: &Path, parsed_args: &Args) -> Result { let file_bytes = std::fs::read(path)?; let parent_path = path.parent().unwrap(); @@ -459,7 +468,7 @@ fn to_output_relative_path(path: &Path) -> PathBuf { /// Saves certain environment variables into the script. We only propagate environment variables /// that are known to be used for communication between the compiler and say linker plugins. -fn write_env(out: &mut BufWriter<&mut std::fs::File>, args: &Args) -> Result { +fn write_env(out: &mut BufWriter<&mut std::fs::File>, args: &Args) -> Result { for var in &["COLLECT_GCC", "COLLECT_GCC_OPTIONS"] { if let Ok(mut value) = std::env::var(var) { // COLLECT_GCC_OPTIONS has things like "-o /path/to/output-file" in it. Update these so diff --git a/libwild/src/string_merging.rs b/libwild/src/string_merging.rs index e5e550d48..ddb70b34f 100644 --- a/libwild/src/string_merging.rs +++ b/libwild/src/string_merging.rs @@ -212,10 +212,10 @@ pub(crate) struct MergeStringsSectionBucket<'data> { /// Merges identical strings from all loaded objects where those strings are from input sections /// that are marked with both the SHF_MERGE and SHF_STRINGS flags. -pub(crate) fn merge_strings<'data>( +pub(crate) fn merge_strings<'data, T>( inputs: &StringMergeInputs<'data>, output_sections: &OutputSections, - args: &Args, + args: &Args, ) -> Result>> { timing_phase!("Merge strings"); @@ -422,11 +422,11 @@ fn process_input_section<'data, 'offsets>( } impl<'data> MergedStringsSection<'data> { - fn add_input_sections( + fn add_input_sections( &mut self, input_sections: &[StringMergeInputSection<'data>], reuse_pool: &ReusePool, - args: &Args, + args: &Args, ) -> Result { let mut resources = create_split_resources(&mut self.string_offsets, input_sections, reuse_pool, args); @@ -583,11 +583,11 @@ enum StringsSlot<'data, 'offsets> { Strings(Vec>), } -fn create_split_resources<'data, 'offsets, 'scope>( +fn create_split_resources<'data, 'offsets, 'scope, T>( string_offsets: &'offsets mut OffsetMap, input_sections: &'scope [StringMergeInputSection<'data>], reuse_pool: &'scope ReusePool, - args: &Args, + args: &Args, ) -> SplitResources<'data, 'offsets, 'scope> { verbose_timing_phase!("Create input section groups"); diff --git a/libwild/src/subprocess.rs b/libwild/src/subprocess/linux.rs similarity index 93% rename from libwild/src/subprocess.rs rename to libwild/src/subprocess/linux.rs index 61ed90e48..a797cb3b0 100644 --- a/libwild/src/subprocess.rs +++ b/libwild/src/subprocess/linux.rs @@ -1,117 +1,119 @@ -use crate::Args; -use crate::bail; -use crate::error::Context as _; -use crate::error::Result; -use libc::c_char; -use libc::fork; -use libc::pid_t; -use std::ffi::c_int; -use std::ffi::c_void; - -/// Runs the linker, in a subprocess if possible, prints any errors, then exits. -/// -/// This is done by forking a sub-process which runs the linker and waits for communication back -/// from the sub-process (via a pipe) when the main link task is done (the output file has been -/// written, but some shutdown tasks remain. -/// -/// Don't call `setup_tracing` or `setup_thread_pool` if using this function, these will be called -/// for you in the subprocess. -/// -/// # Safety -/// Must not be called once threads have been spawned. Calling this function from main is generally -/// the best way to ensure this. -pub unsafe fn run_in_subprocess(args: Args) -> ! { - let exit_code = match subprocess_result(args) { - Ok(code) => code, - Err(error) => crate::error::report_error_and_exit(&error), - }; - std::process::exit(exit_code); -} - -fn subprocess_result(args: Args) -> Result { - let mut fds: [c_int; 2] = [0; 2]; - // create the pipe used to communicate between the parent and child processes - exit on failure - make_pipe(&mut fds).context("make_pipe")?; - - // Safety: The function we're in is private to this module and is only called from - // run_in_subprocess, which imposed the requirement that threads have not yet been started on - // its caller. - match unsafe { fork() } { - 0 => { - // Fork success in child - Run linker in this process. - - crate::setup_tracing(&args)?; - let args = args.activate_thread_pool()?; - let linker = crate::Linker::new(); - let _outputs = linker.run(&args)?; - crate::timing::finalise_perfetto_trace()?; - inform_parent_done(&fds); - Ok(0) - } - -1 => { - // Fork failure in the parent - Fallback to running linker in this process - - crate::run(args)?; - Ok(0) - } - pid => { - // Fork success in the parent - wait for the child to "signal" us it's done - let exit_status = wait_for_child_done(&fds, pid); - Ok(exit_status) - } - } -} - -/// Inform the parent process that work of linker is done and that it succeeded. -fn inform_parent_done(fds: &[c_int]) { - unsafe { - libc::close(fds[0]); - let stream = libc::fdopen(fds[1], "w".as_ptr() as *const c_char); - let bytes: [u8; 1] = [b'X']; - libc::fwrite(bytes.as_ptr() as *const c_void, 1, 1, stream); - libc::fclose(stream); - libc::close(libc::STDOUT_FILENO); - libc::close(libc::STDERR_FILENO); - } -} - -/// Wait for the child process to signal it is done, by sending a byte on the pipe. In the case the -/// child crashes, or exits via some path that doesn't send a byte, then the pipe will be closed and -/// we'll then wait for the subprocess to exit, returning its exit code. -fn wait_for_child_done(fds: &[c_int], child_pid: pid_t) -> i32 { - unsafe { - // close our sending end of the pipe - libc::close(fds[1]); - // open the other end of the pipe for reading - let stream = libc::fdopen(fds[0], "r".as_ptr() as *const c_char); - - // Wait for child to send a byte via the pipe or for the pipe to be closed. - let mut response: [u8; 1] = [0u8; 1]; - match libc::fread(response.as_mut_ptr() as *mut c_void, 1, 1, stream) { - 1 => { - // Child sent a byte, which indicates that it succeeded and is now shutting down in - // the background. - 0 - } - _ => { - // Child closed pipe without sending a byte - get the process exit_status - let mut status: libc::c_int = -1i32; - libc::waitpid(child_pid, &mut status, 0); - libc::WEXITSTATUS(status) - } - } - } -} - -/// Create a pipe for communication between parent and child processes. -/// If successful it will return Ok and `fds` will have file descriptors for reading and writing -/// If errors it will return an error message with the errno set, if it can be read or -1 if not -fn make_pipe(fds: &mut [c_int; 2]) -> Result { - match unsafe { libc::pipe(fds.as_mut_ptr()) } { - 0 => Ok(()), - _ => bail!( - "Error creating pipe. Errno = {:?}", - std::io::Error::last_os_error().raw_os_error().unwrap_or(-1) - ), - } -} +use crate::args::Args; +use crate::bail; +use crate::error::Context as _; +use crate::error::Result; +use libc::c_char; +use libc::fork; +use libc::pid_t; +use std::ffi::c_int; +use std::ffi::c_void; + +/// Runs the linker, in a subprocess if possible, prints any errors, then exits. +/// +/// This is done by forking a sub-process which runs the linker and waits for communication back +/// from the sub-process (via a pipe) when the main link task is done (the output file has been +/// written, but some shutdown tasks remain. +/// +/// Don't call `setup_tracing` or `setup_thread_pool` if using this function, these will be called +/// for you in the subprocess. +/// +/// # Safety +/// Must not be called once threads have been spawned. Calling this function from main is generally +/// the best way to ensure this. +pub unsafe fn run_in_subprocess(args: Args) -> ! { + let exit_code = match subprocess_result(args) { + Ok(code) => code, + Err(error) => crate::error::report_error_and_exit(&error), + }; + std::process::exit(exit_code); +} + +fn subprocess_result(args: Args) -> Result { + let mut fds: [c_int; 2] = [0; 2]; + // create the pipe used to communicate between the parent and child processes - exit on failure + make_pipe(&mut fds).context("make_pipe")?; + + // Safety: The function we're in is private to this module and is only called from + // run_in_subprocess, which imposed the requirement that threads have not yet been started on + // its caller. + match unsafe { fork() } { + 0 => { + // Fork success in child - Run linker in this process. + + crate::setup_tracing(&args)?; + let args = args.activate_thread_pool()?; + let linker = crate::Linker::new(); + crate::linker_run!(linker, args, |_outputs| { + crate::timing::finalise_perfetto_trace()?; + inform_parent_done(&fds); + }); + + Ok(0) + } + -1 => { + // Fork failure in the parent - Fallback to running linker in this process + + crate::run(args)?; + Ok(0) + } + pid => { + // Fork success in the parent - wait for the child to "signal" us it's done + let exit_status = wait_for_child_done(&fds, pid); + Ok(exit_status) + } + } +} + +/// Inform the parent process that work of linker is done and that it succeeded. +fn inform_parent_done(fds: &[c_int]) { + unsafe { + libc::close(fds[0]); + let stream = libc::fdopen(fds[1], "w".as_ptr() as *const c_char); + let bytes: [u8; 1] = [b'X']; + libc::fwrite(bytes.as_ptr() as *const c_void, 1, 1, stream); + libc::fclose(stream); + libc::close(libc::STDOUT_FILENO); + libc::close(libc::STDERR_FILENO); + } +} + +/// Wait for the child process to signal it is done, by sending a byte on the pipe. In the case the +/// child crashes, or exits via some path that doesn't send a byte, then the pipe will be closed and +/// we'll then wait for the subprocess to exit, returning its exit code. +fn wait_for_child_done(fds: &[c_int], child_pid: pid_t) -> i32 { + unsafe { + // close our sending end of the pipe + libc::close(fds[1]); + // open the other end of the pipe for reading + let stream = libc::fdopen(fds[0], "r".as_ptr() as *const c_char); + + // Wait for child to send a byte via the pipe or for the pipe to be closed. + let mut response: [u8; 1] = [0u8; 1]; + match libc::fread(response.as_mut_ptr() as *mut c_void, 1, 1, stream) { + 1 => { + // Child sent a byte, which indicates that it succeeded and is now shutting down in + // the background. + 0 + } + _ => { + // Child closed pipe without sending a byte - get the process exit_status + let mut status: libc::c_int = -1i32; + libc::waitpid(child_pid, &mut status, 0); + libc::WEXITSTATUS(status) + } + } + } +} + +/// Create a pipe for communication between parent and child processes. +/// If successful it will return Ok and `fds` will have file descriptors for reading and writing +/// If errors it will return an error message with the errno set, if it can be read or -1 if not +fn make_pipe(fds: &mut [c_int; 2]) -> Result { + match unsafe { libc::pipe(fds.as_mut_ptr()) } { + 0 => Ok(()), + _ => bail!( + "Error creating pipe. Errno = {:?}", + std::io::Error::last_os_error().raw_os_error().unwrap_or(-1) + ), + } +} diff --git a/libwild/src/subprocess/mod.rs b/libwild/src/subprocess/mod.rs new file mode 100644 index 000000000..8517e9d37 --- /dev/null +++ b/libwild/src/subprocess/mod.rs @@ -0,0 +1,18 @@ +#[cfg(target_os = "linux")] +mod linux; +#[cfg(target_os = "windows")] +mod windows; + +#[cfg(target_os = "linux")] +pub use linux::run_in_subprocess; +#[cfg(target_os = "windows")] +pub use windows::run_in_subprocess; + +#[cfg(not(any(target_os = "linux", target_os = "windows")))] +pub unsafe fn run_in_subprocess(args: crate::args::Args) -> ! { + let exit_code = match crate::run(args) { + Ok(()) => 0, + Err(error) => crate::error::report_error_and_exit(&error), + }; + std::process::exit(exit_code); +} diff --git a/libwild/src/subprocess/windows.rs b/libwild/src/subprocess/windows.rs new file mode 100644 index 000000000..ab0914dca --- /dev/null +++ b/libwild/src/subprocess/windows.rs @@ -0,0 +1,170 @@ +use crate::args::Args; +use crate::bail; +use crate::error::Result; +use phnt::ffi::HANDLE; +use phnt::ffi::NtClose; +use phnt::ffi::NtCreateUserProcess; +use phnt::ffi::NtTerminateProcess; +use phnt::ffi::NtWaitForSingleObject; +use phnt::ffi::PROCESS_CREATE_FLAGS_INHERIT_HANDLES; +use phnt::ffi::PS_CREATE_INFO; +use std::ptr; +use windows_sys::Win32::Foundation::CloseHandle; +use windows_sys::Win32::Foundation::FALSE; +use windows_sys::Win32::Foundation::STATUS_PROCESS_CLONED; +use windows_sys::Win32::Foundation::TRUE; +use windows_sys::Win32::Security::SECURITY_ATTRIBUTES; +use windows_sys::Win32::Storage::FileSystem::ReadFile; +use windows_sys::Win32::Storage::FileSystem::WriteFile; +use windows_sys::Win32::System::Console::ATTACH_PARENT_PROCESS; +use windows_sys::Win32::System::Console::AttachConsole; +use windows_sys::Win32::System::Console::FreeConsole; +use windows_sys::Win32::System::Pipes::CreatePipe; +use windows_sys::Win32::System::Threading::PROCESS_ALL_ACCESS; +use windows_sys::Win32::System::Threading::THREAD_ALL_ACCESS; + +/// Runs the linker, in a subprocess if possible, prints any errors, then exits. +/// +/// This is done by forking a sub-process which runs the linker and waits for communication back +/// from the sub-process (via a pipe) when the main link task is done (the output file has been +/// written, but some shutdown tasks remain. +/// +/// Don't call `setup_tracing` or `setup_thread_pool` if using this function, these will be called +/// for you in the subprocess. +/// +/// # Safety +/// Must not be called once threads have been spawned. Calling this function from main is generally +/// the best way to ensure this. +pub unsafe fn run_in_subprocess(args: Args) -> ! { + let exit_code = match subprocess_result(args) { + Ok(code) => code, + Err(error) => crate::error::report_error_and_exit(&error), + }; + std::process::exit(exit_code); +} + +#[allow(non_upper_case_globals)] +pub const NtCurrentProcess: HANDLE = -1isize as *mut std::ffi::c_void; + +fn subprocess_result(args: Args) -> Result { + let (read_end, write_end) = make_pipe()?; + + let mut hprocess: HANDLE = std::ptr::null_mut(); + let mut hthread: HANDLE = std::ptr::null_mut(); + + match unsafe { fork(&mut hprocess, &mut hthread) } { + STATUS_PROCESS_CLONED => { + // executing inside the clone + + // re attach to the parent's console to be able to write to it + unsafe { + FreeConsole(); + AttachConsole(ATTACH_PARENT_PROCESS); + }; + + crate::setup_tracing(&args)?; + let args = args.activate_thread_pool()?; + let linker = crate::Linker::new(); + crate::linker_run!(linker, args, |_outputs| { + inform_parent_done(write_end); + unsafe { NtTerminateProcess(NtCurrentProcess, STATUS_PROCESS_CLONED) }; + }); + + Ok(0) + } + 0 => { + let exit_status = wait_for_child_done(read_end, hprocess, hthread); + Ok(exit_status) + } + _ => { + // Fork failure in the parent - Fallback to running linker in this process + crate::run(args)?; + Ok(0) + } + } +} + +fn inform_parent_done(write_end: HANDLE) { + let mut bytes_written = 0; + + unsafe { + WriteFile( + write_end, + "X".as_ptr(), + 1, + &mut bytes_written, + std::ptr::null_mut(), + ); + CloseHandle(write_end); + FreeConsole(); + } +} + +fn wait_for_child_done(read_end: HANDLE, hprocess: HANDLE, hthread: HANDLE) -> i32 { + let mut response: [u8; 1] = [0u8; 1]; + let mut bytes_read = 0; + match unsafe { + ReadFile( + read_end, + response.as_mut_ptr(), + 1, + &mut bytes_read, + std::ptr::null_mut(), + ) + } { + TRUE => { + // Child sent a byte, which indicates that it succeeded and is now shutting down in + // the background. + 0 + } + _ => { + // Child closed pipe without sending a byte - get the process exit_status + let status = unsafe { NtWaitForSingleObject(hprocess, FALSE as _, ptr::null_mut()) }; + unsafe { + NtClose(hprocess); + NtClose(hthread); + }; + status + } + } +} + +unsafe fn fork(hprocess: &mut HANDLE, hthread: &mut HANDLE) -> i32 { + let mut create_info: PS_CREATE_INFO = unsafe { std::mem::zeroed() }; + create_info.Size = std::mem::size_of::() as _; + + unsafe { + NtCreateUserProcess( + hprocess, + hthread, + PROCESS_ALL_ACCESS, + THREAD_ALL_ACCESS, + std::ptr::null_mut(), + std::ptr::null_mut(), + PROCESS_CREATE_FLAGS_INHERIT_HANDLES, + 0, + std::ptr::null_mut(), + &mut create_info, + std::ptr::null_mut(), + ) + } +} + +fn make_pipe() -> Result<(HANDLE, HANDLE)> { + let mut read_end: HANDLE = std::ptr::null_mut(); + let mut write_end: HANDLE = std::ptr::null_mut(); + + let security_attributes = SECURITY_ATTRIBUTES { + nLength: std::mem::size_of::() as u32, + lpSecurityDescriptor: std::ptr::null_mut(), + bInheritHandle: TRUE, // The crucial part! + }; + + match unsafe { CreatePipe(&mut read_end, &mut write_end, &security_attributes, 0) } { + TRUE => Ok((read_end, write_end)), + _ => bail!( + "Error creating pipe. Errno = {:?}", + std::io::Error::last_os_error().raw_os_error().unwrap_or(-1) + ), + } +} diff --git a/libwild/src/subprocess_unsupported.rs b/libwild/src/subprocess_unsupported.rs index 4ee4862f7..80e8086f2 100644 --- a/libwild/src/subprocess_unsupported.rs +++ b/libwild/src/subprocess_unsupported.rs @@ -1,6 +1,6 @@ /// # Safety /// See function of the same name in `subprocess.rs` -pub unsafe fn run_in_subprocess(args: crate::Args) -> ! { +pub unsafe fn run_in_subprocess(args: crate::args::Args) -> ! { let exit_code = match crate::run(args) { Ok(()) => 0, Err(error) => { diff --git a/libwild/src/symbol_db.rs b/libwild/src/symbol_db.rs index 699bb56aa..6edc6a19d 100644 --- a/libwild/src/symbol_db.rs +++ b/libwild/src/symbol_db.rs @@ -1,7 +1,7 @@ //! Reads global symbols for each input file and builds a map from symbol names to IDs together with //! information about where each symbol can be obtained. -use crate::InputLinkerScript; +use crate::input_data::InputLinkerScript; use crate::OutputKind; use crate::args; use crate::args::Args; @@ -71,7 +71,7 @@ use symbolic_demangle::demangle; #[derive(Debug)] pub struct SymbolDb<'data, O: ObjectFile<'data>> { - pub(crate) args: &'data Args, + pub(crate) args: &'data Args, pub(crate) groups: Vec>, @@ -321,7 +321,7 @@ impl<'data, O: ObjectFile<'data>> SymbolDb<'data, O> { } pub(crate) fn new( - args: &'data Args, + args: &'data Args, output_kind: OutputKind, auxiliary: &AuxiliaryFiles<'data>, herd: &'data bumpalo_herd::Herd, @@ -1393,7 +1393,7 @@ pub(crate) fn is_mapping_symbol_name(name: &[u8]) -> bool { fn read_symbols<'data, O: ObjectFile<'data>>( version_script: &VersionScript, shards: &mut [SymbolWriterShard<'_, '_, 'data, O>], - args: &Args, + args: &Args, export_list: &Option>, output_kind: OutputKind, ) -> Result>> { @@ -1421,7 +1421,7 @@ fn read_symbols_for_group<'data, O: ObjectFile<'data>>( version_script: &VersionScript, export_list: &Option>, num_buckets: usize, - args: &Args, + args: &Args, output_kind: OutputKind, ) -> Result> { verbose_timing_phase!( @@ -1558,7 +1558,7 @@ fn load_symbols_from_file<'data, O: ObjectFile<'data>>( version_script: &VersionScript, symbols_out: &mut SymbolWriterShard<'_, '_, 'data, O>, outputs: &mut SymbolLoadOutputs<'data>, - args: &Args, + args: &Args, export_list: &Option>, output_kind: OutputKind, ) -> Result { @@ -1685,7 +1685,7 @@ trait SymbolLoader<'data, O: ObjectFile<'data>> { struct RegularObjectSymbolLoader<'a, 'data, O: ObjectFile<'data>> { object: &'a O, - args: &'a Args, + args: &'a Args, version_script: &'a VersionScript<'a>, archive_semantics: bool, lib_name: &'data [u8], @@ -2073,7 +2073,7 @@ impl<'data> PendingVersionedSymbol<'data> { } /// Decides how many buckets we should use for symbol names. -fn num_symbol_hash_buckets(args: &Args) -> usize { +fn num_symbol_hash_buckets(args: &Args) -> usize { args.available_threads.get() } diff --git a/libwild/src/target_os.rs b/libwild/src/target_os.rs new file mode 100644 index 000000000..39d0f44d1 --- /dev/null +++ b/libwild/src/target_os.rs @@ -0,0 +1,57 @@ +use std::{fmt::Display, str::FromStr}; + +use crate::bail; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(crate) enum Os { + Linux, + Windows, + MacOS, +} + +impl Os { + pub const DEFAULT: Self = const { + #[cfg(target_os = "linux")] + { + Os::Linux + } + #[cfg(target_os = "windows")] + { + Os::Windows + } + #[cfg(target_os = "macos")] + { + Os::MacOS + } + }; +} + +impl Default for Os { + fn default() -> Self { + Os::DEFAULT + } +} + +impl FromStr for Os { + type Err = crate::error::Error; + + fn from_str(s: &str) -> Result { + match s { + "linux" => Ok(Os::Linux), + "windows" => Ok(Os::Windows), + "macos" => Ok(Os::MacOS), + _ => bail!("-m {s} is not yet supported"), + } + } +} + +impl Display for Os { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let os = match self { + Os::Linux => "linux", + Os::Windows => "windows", + Os::MacOS => "macos", + }; + write!(f, "{os}") + } +} diff --git a/test_pe/test.c b/test_pe/test.c new file mode 100644 index 000000000..e82db2eb9 --- /dev/null +++ b/test_pe/test.c @@ -0,0 +1,4 @@ +// Minimal PE entry point — no CRT, just return an exit code which apparently works without c runtime on windows +int entry(void) { + return 42; +} diff --git a/wild/src/main.rs b/wild/src/main.rs index 4095d4610..01789fcf7 100644 --- a/wild/src/main.rs +++ b/wild/src/main.rs @@ -20,7 +20,7 @@ fn run() -> libwild::error::Result { let args = libwild::Args::parse(|| std::env::args().skip(1))?; - if args.should_fork() { + if args.should_fork { // Safety: We haven't spawned any threads yet. unsafe { libwild::run_in_subprocess(args) }; } else {