From df2265783e56da1c3fb7e051606d38cd90f1fa97 Mon Sep 17 00:00:00 2001 From: Chris Palmer <palmer@google.com> Date: Wed, 28 Aug 2024 12:08:15 -0700 Subject: [PATCH 1/5] A variety of minor cleanups Fix some typos, Markdown, `rustfmt`, and use a (necessary) double dash for `--version`. --- ci/dma/src/lib.rs | 6 ++-- ci/singleton/app/src/main.rs | 5 +-- src/asm.md | 4 +-- src/compiler-support.md | 4 +-- src/custom-target.md | 14 ++++---- src/dma.md | 63 +++++++++++++++++++----------------- src/exceptions.md | 2 +- src/logging.md | 19 +++++------ src/singleton.md | 8 ++--- 9 files changed, 66 insertions(+), 59 deletions(-) diff --git a/ci/dma/src/lib.rs b/ci/dma/src/lib.rs index 8ea59c6..9531d36 100644 --- a/ci/dma/src/lib.rs +++ b/ci/dma/src/lib.rs @@ -13,7 +13,8 @@ pub struct Dma1Channel1 { impl Dma1Channel1 { /// Data will be written to this `address` /// - /// `inc` indicates whether the address will be incremented after every byte transfer + /// `inc` indicates whether the address will be incremented after every byte + /// transfer /// /// NOTE this performs a volatile write pub fn set_destination_address(&mut self, address: usize, inc: bool) { @@ -22,7 +23,8 @@ impl Dma1Channel1 { /// Data will be read from this `address` /// - /// `inc` indicates whether the address will be incremented after every byte transfer + /// `inc` indicates whether the address will be incremented after every byte + /// transfer /// /// NOTE this performs a volatile write pub fn set_source_address(&mut self, address: usize, inc: bool) { diff --git a/ci/singleton/app/src/main.rs b/ci/singleton/app/src/main.rs index 747df62..2abfbd3 100644 --- a/ci/singleton/app/src/main.rs +++ b/ci/singleton/app/src/main.rs @@ -29,7 +29,7 @@ fn main() -> ! { impl GlobalLog for Logger { fn log(&self, address: u8) { // we use a critical section (`interrupt::free`) to make the access to the - // `static mut` variable interrupt safe which is required for memory safety + // `static mut` variable interrupt-safe which is required for memory safety interrupt::free(|_| unsafe { static mut HSTDOUT: Option<HStdout> = None; @@ -41,6 +41,7 @@ impl GlobalLog for Logger { let hstdout = HSTDOUT.as_mut().unwrap(); hstdout.write_all(&[address]) - }).ok(); // `.ok()` = ignore errors + }) + .ok(); // `.ok()` = ignore errors } } diff --git a/src/asm.md b/src/asm.md index 11517c0..ec0a930 100644 --- a/src/asm.md +++ b/src/asm.md @@ -9,7 +9,7 @@ So far we have managed to boot the device and handle interrupts without a single line of assembly. That's quite a feat! But depending on the architecture you are targeting you may need some assembly to get to this point. There are also some -operations like context switching that require assembly, etc. +operations like context switching, etc., that require assembly. The problem is that both *inline* assembly (`asm!`) and *free form* assembly (`global_asm!`) are unstable, and there's no estimate for when they'll be @@ -97,7 +97,7 @@ $ cargo objdump --bin app --release -- -d --no-show-raw-insn --print-imm-hex ``` > **NOTE:** To make this disassembly smaller I commented out the initialization -> of RAM +> of RAM. Now look at the vector table. The 4th entry should be the address of `HardFaultTrampoline` plus one. diff --git a/src/compiler-support.md b/src/compiler-support.md index 91162e1..80df531 100644 --- a/src/compiler-support.md +++ b/src/compiler-support.md @@ -20,7 +20,7 @@ following command: ``` console $ # you need to have `cargo-binutils` installed to run this command -$ cargo objdump -- -version +$ cargo objdump -- --version LLVM (http://llvm.org/): LLVM version 7.0.0svn Optimized build. @@ -68,7 +68,7 @@ to replace the original version of LLVM with the fork before building `rustc`. T allows this and in principle it should just require changing the `llvm` submodule to point to the fork. -If your target architecture is only supported by some vendor provided GCC, you have the option of +If your target architecture is only supported by some vendor-provided GCC, you have the option of using [`mrustc`], an unofficial Rust compiler, to translate your Rust program into C code and then compile that using GCC. diff --git a/src/custom-target.md b/src/custom-target.md index 6bb47bb..5363077 100644 --- a/src/custom-target.md +++ b/src/custom-target.md @@ -1,10 +1,10 @@ # Creating a custom target If a custom target triple is not available for your platform, you must create a custom target file -that describes your target to rustc. +that describes your target to `rustc`. Keep in mind that it is required to use a nightly compiler to build the core library, which must be -done for a target unknown to rustc. +done for a target unknown to `rustc`. ## Deciding on a target triple @@ -65,7 +65,7 @@ You can pretty much copy that output into your file. Start with a few modificati - I have no native atomic operations, but I can emulate them myself: set `max-atomic-width` to the highest number of bits that you can emulate up to 128, then implement all of the [atomic][libcalls-atomic] and [sync][libcalls-sync] functions expected by LLVM as - `#[no_mangle] unsafe extern "C"`. These functions have been standardized by gcc, so the [gcc + `#[no_mangle] unsafe extern "C"`. These functions have been standardized by GCC, so the [GCC documentation][gcc-sync] may have more notes. Missing functions will cause a linker error, while incorrectly implemented functions will possibly cause UB. For example, if you have a single-core, single-thread processor with interrupts, you can implement these functions to @@ -73,7 +73,7 @@ You can pretty much copy that output into your file. Start with a few modificati - I have no native atomic operations: you'll have to do some unsafe work to manually ensure synchronization in your code. You must set `"max-atomic-width": 0`. - Change the linker if integrating with an existing toolchain. For example, if you're using a - toolchain that uses a custom build of gcc, set `"linker-flavor": "gcc"` and `linker` to the + toolchain that uses a custom build of GCC, set `"linker-flavor": "gcc"` and `linker` to the command name of your linker. If you require additional linker arguments, use `pre-link-args` and `post-link-args` as so: ``` json @@ -96,8 +96,8 @@ You can pretty much copy that output into your file. Start with a few modificati (not including the version in the case of ARM) to list the available features and their descriptions. **If your target requires strict memory alignment access (e.g. `armv5te`), make sure that you enable `strict-align`**. To enable a feature, place a plus before it. Likewise, to - disable a feature, place a minus before it. Features should be comma separated like so: - `"features": "+soft-float,+neon`. Note that this may not be necessary if LLVM knows enough about + disable a feature, place a minus before it. Features should be comma-separated like so: + `"features": "+soft-float,+neon"`. Note that this may not be necessary if LLVM knows enough about your target based on the provided triple and CPU. - Configure the CPU that LLVM uses if you know it. This will enable CPU-specific optimizations and features. At the top of the output of the command in the last step, there is a list of known CPUs. @@ -121,7 +121,7 @@ You can pretty much copy that output into your file. Start with a few modificati Once you have a target specification file, you may refer to it by its path or by its name (i.e. excluding `.json`) if it is in the current directory or in `$RUST_TARGET_PATH`. -Verify that it is readable by rustc: +Verify that it is readable by `rustc`: ``` sh ❱ rustc --print cfg --target foo.json # or just foo if in the current directory diff --git a/src/dma.md b/src/dma.md index ab5078a..c5f13d6 100644 --- a/src/dma.md +++ b/src/dma.md @@ -6,7 +6,7 @@ DMA transfers. The DMA peripheral is used to perform memory transfers in parallel to the work of the processor (the execution of the main program). A DMA transfer is more or less equivalent to spawning a thread (see [`thread::spawn`]) to do a `memcpy`. -We'll use the fork-join model to illustrate the requirements of a memory safe +We'll use the fork-join model to illustrate the requirements of a memory-safe API. [`thread::spawn`]: https://doc.rust-lang.org/std/thread/fn.spawn.html @@ -28,11 +28,11 @@ Assume that the `Dma1Channel1` is statically configured to work with serial port {{#include ../ci/dma/src/lib.rs:82:83}} ``` -Let's say we want to extend `Serial1` API to (a) asynchronously send out a +Let's say we want to extend the `Serial1` API to (a) asynchronously send out a buffer and (b) asynchronously fill a buffer. -We'll start with a memory unsafe API and we'll iterate on it until it's -completely memory safe. On each step we'll show you how the API can be broken to +We'll start with a memory-unsafe API and we'll iterate on it until it's +completely memory-safe. At each step we'll show you how the API can be broken to make you aware of the issues that need to be addressed when dealing with asynchronous memory operations. @@ -47,7 +47,7 @@ keep things simple let's ignore all error handling. {{#include ../ci/dma/examples/one.rs:7:47}} ``` -> **NOTE:** `Transfer` could expose a futures or generator based API instead of +> **NOTE:** `Transfer` could expose a futures- or generator-based API instead of > the API shown above. That's an API design question that has little bearing on > the memory safety of the overall API so we won't delve into it in this text. @@ -95,11 +95,13 @@ variables `x` and `y` changing their value at random times. The DMA transfer could also overwrite the state (e.g. link register) pushed onto the stack by the prologue of function `bar`. -Note that if we had not use `mem::forget`, but `mem::drop`, it would have been -possible to make `Transfer`'s destructor stop the DMA transfer and then the -program would have been safe. But one can *not* rely on destructors running to -enforce memory safety because `mem::forget` and memory leaks (see RC cycles) are -safe in Rust. +Note that if we had used `mem::drop` instead of `mem::forget`, it would have +been possible to make `Transfer`'s destructor stop the DMA transfer and then the +program would have been safe. But one *cannot* rely on destructors running to +enforce memory safety because `mem::forget` and memory leaks (see `Rc` cycles) +are safe in Rust. (Refer to [`mem::forget` safety].) + +[`mem::forget` safety]: https://doc.rust-lang.org/std/mem/fn.forget.html#safety We can fix this particular problem by changing the lifetime of the buffer from `'a` to `'static` in both APIs. @@ -164,7 +166,7 @@ result in a data race: both the processor and the DMA would end up modifying `buf` at the same time. Similarly the compiler can move the zeroing operation to after `read_exact`, which would also result in a data race. -To prevent these problematic reorderings we can use a [`compiler_fence`] +To prevent these problematic reorderings we can use a [`compiler_fence`]: [`compiler_fence`]: https://doc.rust-lang.org/core/sync/atomic/fn.compiler_fence.html @@ -188,8 +190,8 @@ orderings in the comments. {{#include ../ci/dma/examples/four.rs:68:87}} ``` -The zeroing operation can *not* be moved *after* `read_exact` due to the -`Release` fence. Similarly, the `reverse` operation can *not* be moved *before* +The zeroing operation *cannot* be moved *after* `read_exact` due to the +`Release` fence. Similarly, the `reverse` operation *cannot* be moved *before* `wait` due to the `Acquire` fence. The memory operations *between* both fences *can* be freely reordered across the fences but none of those operations involves `buf` so such reorderings do *not* result in undefined behavior. @@ -197,20 +199,20 @@ involves `buf` so such reorderings do *not* result in undefined behavior. Note that `compiler_fence` is a bit stronger than what's required. For example, the fences will prevent the operations on `x` from being merged even though we know that `buf` doesn't overlap with `x` (due to Rust aliasing rules). However, -there exist no intrinsic that's more fine grained than `compiler_fence`. +there exists no intrinsic that's more fine grained than `compiler_fence`. ### Don't we need a memory barrier? That depends on the target architecture. In the case of Cortex M0 to M4F cores, [AN321] says: -[AN321]: https://static.docs.arm.com/dai0321/a/DAI0321A_programming_guide_memory_barriers_for_m_profile.pdf +[AN321]: https://documentation-service.arm.com/static/5efefb97dbdee951c1cd5aaf > 3.2 Typical usages > > (..) > -> The use of DMB is rarely needed in Cortex-M processors because they do not +> The use of `DMB` is rarely needed in Cortex-M processors because they do not > reorder memory transactions. However, it is needed if the software is to be > reused on other ARM processors, especially multi-master systems. For example: > @@ -223,25 +225,26 @@ That depends on the target architecture. In the case of Cortex M0 to M4F cores, > > (..) > -> Omitting the DMB or DSB instruction in the examples in Figure 41 on page 47 -> and Figure 42 would not cause any error because the Cortex-M processors: +> Omitting the `DMB` or `DSB` instruction in the examples in Figure 41 on page +> 47 and Figure 42 would not cause any error because the Cortex-M processors: > > - do not re-order memory transfers > - do not permit two write transfers to be overlapped. -Where Figure 41 shows a DMB (memory barrier) instruction being used before +Where Figure 41 shows a `DMB` (memory barrier) instruction being used before starting a DMA transaction. -In the case of Cortex-M7 cores you'll need memory barriers (DMB/DSB) if you are -using the data cache (DCache), unless you manually invalidate the buffer used by -the DMA. Even with the data cache disabled, memory barriers might still be -required to avoid reordering in the store buffer. +In the case of Cortex-M7 cores you'll need memory barriers (`DMB`/`DSB`) if you +are using the data cache (DCache), unless you manually invalidate the buffer +used by the DMA. Even with the data cache disabled, memory barriers might still +be required to avoid reordering in the store buffer. If your target is a multi-core system then it's very likely that you'll need memory barriers. If you do need the memory barrier then you need to use [`atomic::fence`] instead -of `compiler_fence`. That should generate a DMB instruction on Cortex-M devices. +of `compiler_fence`. That should generate a `DMB` instruction on Cortex-M +devices. [`atomic::fence`]: https://doc.rust-lang.org/core/sync/atomic/fn.fence.html @@ -282,7 +285,7 @@ pointer used in `read_exact` will become invalidated. You'll end up with a situation similar to the [`unsound`](#dealing-with-memforget) example. To avoid this problem we require that the buffer used with our API retains its -memory location even when it's moved. The [`Pin`] newtype provides such +memory location even when it's moved. The [`Pin`] newtype provides such a guarantee. We can update our API to required that all buffers are "pinned" first. @@ -347,7 +350,7 @@ over. For example, dropping a `Transfer<Box<[u8]>>` value will cause the buffer to be deallocated. This can result in undefined behavior if the transfer is still in progress as the DMA would end up writing to deallocated memory. -In such scenario one option is to make `Transfer.drop` stop the DMA transfer. +In such a scenario one option is to make `Transfer.drop` stop the DMA transfer. The other option is to make `Transfer.drop` wait for the transfer to finish. We'll pick the former option as it's cheaper. @@ -365,8 +368,8 @@ Now the DMA transfer will be stopped before the buffer is deallocated. ## Summary -To sum it up, we need to consider all the following points to achieve memory -safe DMA transfers: +To sum it up, we need to consider all the following points to achieve +memory-safe DMA transfers: - Use immovable buffers plus indirection: `Pin<B>`. Alternatively, you can use the `StableDeref` trait. @@ -381,8 +384,8 @@ safe DMA transfers: --- -This text leaves out up several details required to build a production grade -DMA abstraction, like configuring the DMA channels (e.g. streams, circular vs +This text leaves out several details required to build a production-grade DMA +abstraction, like configuring the DMA channels (e.g. streams, circular vs one-shot mode, etc.), alignment of buffers, error handling, how to make the abstraction device-agnostic, etc. All those aspects are left as an exercise for the reader / community (`:P`). diff --git a/src/exceptions.md b/src/exceptions.md index 89f0a4e..e986827 100644 --- a/src/exceptions.md +++ b/src/exceptions.md @@ -200,7 +200,7 @@ matches the name we used in `EXCEPTIONS`. {{#include ../ci/exceptions/app2/src/main.rs}} ``` -You can test it in QEMU +You can test it in QEMU: ``` console (gdb) target remote :3333 diff --git a/src/logging.md b/src/logging.md index 1832960..9d37a60 100644 --- a/src/logging.md +++ b/src/logging.md @@ -52,7 +52,7 @@ the `static` variables but their addresses. As long as the `static` variables are not zero sized each one will have a different address. What we're doing here is effectively encoding each message into a unique identifier, which happens to be the variable address. Some part of -the log system will have to decode this id back into the message. +the log system will have to decode this ID back into the message. Let's write some code to illustrate the idea. @@ -104,8 +104,8 @@ $ qemu-system-arm \ {{#include ../ci/logging/app/dev.out}} ``` -> **NOTE**: These addresses may not be the ones you get locally because -> addresses of `static` variable are not guaranteed to remain the same when the +> **NOTE**: These addresses may not be the ones you get locally because the +> addresses of `static` variables are not guaranteed to remain the same when the > toolchain is changed (e.g. optimizations may have improved). Now we have two addresses printed to the console. @@ -128,12 +128,12 @@ $ # first column is the symbol address; last column is the symbol name we are only looking for the ones in the `.rodata` section and whose size is one byte (our variables have type `u8`). -It's important to note that the address of the symbols will likely change when +It's important to note that the addresses of the symbols will likely change when optimizing the program. Let's check that. > **PROTIP** You can set `target.thumbv7m-none-eabi.runner` to the long QEMU -> command from before (`qemu-system-arm -cpu (..) -kernel`) in the Cargo -> configuration file (`.cargo/conifg`) to have `cargo run` use that *runner* to +> command from before (`qemu-system-arm -cpu ... -kernel ...`) in the Cargo +> configuration file (`.cargo/config`) to have `cargo run` use that *runner* to > execute the output binary. ``` console @@ -173,8 +173,9 @@ an exercise for the reader. Can we do better? Yes, we can! The current implementation places the `static` variables in `.rodata`, which -means they occupy size in Flash even though we never use their contents. Using a -little bit of linker script magic we can make them occupy *zero* space in Flash. +means they occupy space in flash even though we never use their contents. Using +a little bit of linker script magic we can make them occupy *zero* space in +flash. ``` console $ cat log.x @@ -199,7 +200,7 @@ We also specified the start address of this output section: the `0` in `.log 0 (INFO)`. The other improvement we can do is switch from formatted I/O (`fmt::Write`) to -binary I/O, that is send the addresses to the host as bytes rather than as +binary I/O; that is, send the addresses to the host as bytes rather than as strings. Binary serialization can be hard but we'll keep things super simple by diff --git a/src/singleton.md b/src/singleton.md index 5b31784..10142d3 100644 --- a/src/singleton.md +++ b/src/singleton.md @@ -4,7 +4,7 @@ In this section we'll cover how to implement a global, shared singleton. The embedded Rust book covered local, owned singletons which are pretty much unique to Rust. Global singletons are essentially the singleton pattern you see in C and C++; they are not specific to embedded development but since they involve -symbols they seemed a good fit for the embedonomicon. +symbols they seemed a good fit for the Embedonomicon. > **TODO**(resources team) link "the embedded Rust book" to the singletons > section when it's up @@ -16,7 +16,7 @@ section to support global logging. The result will be very similar to the > **TODO**(resources team) link `#[global_allocator]` to the collections chapter > of the book when it's in a more stable location. -Here's the summary of what we want to: +Here's the summary of what we want to do: In the last section we created a `log!` macro to log messages through a specific logger, a value that implements the `Log` trait. The syntax of the `log!` macro @@ -26,9 +26,9 @@ message through a global logger; this is how `std::println!` works. We'll also need a mechanism to declare what the global logger is; this is the part that's similar to `#[global_allocator]`. -It could be that the global logger is declared in the top crate and it could +It could be that the global logger is declared in the top crate, and it could also be that the type of the global logger is defined in the top crate. In this -scenario the dependencies can *not* know the exact type of the global logger. To +scenario the dependencies *cannot* know the exact type of the global logger. To support this scenario we'll need some indirection. Instead of hardcoding the type of the global logger in the `log` crate we'll From eee1b95e5ace3b204616733df783a8c13dc8ff74 Mon Sep 17 00:00:00 2001 From: Chris Palmer <chris@noncombatant.org> Date: Wed, 28 Aug 2024 14:07:50 -0700 Subject: [PATCH 2/5] Update asm.md --- src/asm.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/asm.md b/src/asm.md index ec0a930..5c02370 100644 --- a/src/asm.md +++ b/src/asm.md @@ -9,7 +9,7 @@ So far we have managed to boot the device and handle interrupts without a single line of assembly. That's quite a feat! But depending on the architecture you are targeting you may need some assembly to get to this point. There are also some -operations like context switching, etc., that require assembly. +operations, for example context switching, that require assembly. The problem is that both *inline* assembly (`asm!`) and *free form* assembly (`global_asm!`) are unstable, and there's no estimate for when they'll be From 438ad11908fd46f7177398c94b4ccc091996d593 Mon Sep 17 00:00:00 2001 From: Chris Palmer <chris@noncombatant.org> Date: Wed, 28 Aug 2024 14:09:04 -0700 Subject: [PATCH 3/5] Update dma.md --- src/dma.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dma.md b/src/dma.md index c5f13d6..fa092cb 100644 --- a/src/dma.md +++ b/src/dma.md @@ -166,7 +166,7 @@ result in a data race: both the processor and the DMA would end up modifying `buf` at the same time. Similarly the compiler can move the zeroing operation to after `read_exact`, which would also result in a data race. -To prevent these problematic reorderings we can use a [`compiler_fence`]: +To prevent these problematic reorderings we can use a [`compiler_fence`]. [`compiler_fence`]: https://doc.rust-lang.org/core/sync/atomic/fn.compiler_fence.html From db8d53dd25761d7c91c9f3df118aa2dd1937bc21 Mon Sep 17 00:00:00 2001 From: Chris Palmer <chris@noncombatant.org> Date: Wed, 28 Aug 2024 14:10:12 -0700 Subject: [PATCH 4/5] Update logging.md --- src/logging.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/logging.md b/src/logging.md index 9d37a60..ec96ce5 100644 --- a/src/logging.md +++ b/src/logging.md @@ -132,7 +132,7 @@ It's important to note that the addresses of the symbols will likely change when optimizing the program. Let's check that. > **PROTIP** You can set `target.thumbv7m-none-eabi.runner` to the long QEMU -> command from before (`qemu-system-arm -cpu ... -kernel ...`) in the Cargo +> command from before (`qemu-system-arm -cpu … -kernel …`) in the Cargo > configuration file (`.cargo/config`) to have `cargo run` use that *runner* to > execute the output binary. From 433582821ffc38502d28a4127ce16d74beabe9f5 Mon Sep 17 00:00:00 2001 From: Chris Palmer <chris@noncombatant.org> Date: Wed, 28 Aug 2024 14:11:05 -0700 Subject: [PATCH 5/5] Update logging.md --- src/logging.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/logging.md b/src/logging.md index ec96ce5..70c58fe 100644 --- a/src/logging.md +++ b/src/logging.md @@ -200,8 +200,7 @@ We also specified the start address of this output section: the `0` in `.log 0 (INFO)`. The other improvement we can do is switch from formatted I/O (`fmt::Write`) to -binary I/O; that is, send the addresses to the host as bytes rather than as -strings. +binary I/O, sending the addresses to the host as bytes rather than as strings. Binary serialization can be hard but we'll keep things super simple by serializing each address as a single byte. With this approach we don't have to