From 4505b859b8331616ca32ed9887fd0c5bb822b9dc Mon Sep 17 00:00:00 2001 From: Eduardo Bart Date: Mon, 16 Dec 2024 20:19:39 -0300 Subject: [PATCH 01/15] docs: refactor README --- LICENSES.md | 29 ++++ README.md | 428 +++++++++++++++++++++++----------------------------- 2 files changed, 220 insertions(+), 237 deletions(-) create mode 100644 LICENSES.md diff --git a/LICENSES.md b/LICENSES.md new file mode 100644 index 000000000..a6c9df24c --- /dev/null +++ b/LICENSES.md @@ -0,0 +1,29 @@ +# License + +This projects is licensed under the L[LGPL 3.0](https://www.gnu.org/licenses/lgpl-3.0.html) license. See the license terms in [COPYING](COPYING). + +## Submodules and Dependencies + +This project includes several submodules and dependencies, each with its own licensing: + +- `tests/machine`: Licensed under the Apache License 2.0. See the license terms in [tests/machine/LICENSE](tests/machine/LICENSE). +- `tests/uarch`: Licensed under the Apache License 2.0. Licensing details are available in [tests/uarch/LICENSE](tests/uarch/LICENSE). +- `third-party/llvm-flang-uint128`: Licensed under the Apache License 2.0 with LLVM exceptions. The license can be found at [third-party/llvm-flang-uint128/LICENSE](third-party/llvm-flang-uint128/LICENSE). +- `third-party/riscv-arch-test`: Source code licensed under the Apache 2.0 and BSD 3-Clause licenses. Documentation under `CC-BY-4.0`. License information is provided in README.md and other COPYING.* files like [third-party/riscv-arch-test/COPYING.APACHE](third-party/riscv-arch-test/COPYING.APACHE). +- `third-party/riscv-tests`: Licensed under the BSD 3-Clause "New" or "Revised" License. See [third-party/riscv-tests/LICENSE](third-party/riscv-tests/LICENSE) for license details. +- `third-party/riscv-tests/env`: Licensed under the BSD 3-Clause "New" or "Revised" License. License details are in [third-party/riscv-tests/env/LICENSE](third-party/riscv-tests/env/LICENSE). +- `third-party/tiny_sha3`: Licensed under the MIT License. The license can be found at [third-party/tiny_sha3/LICENSE](third-party/tiny_sha3/LICENSE). +- `third-party/nlohmann-json`: Licensed under the MIT License. The license can be found at [third-party/nlohmann-json/LICENSE.MIT](third-party/nlohmann-json/LICENSE.MIT). + +## Debian Packages + +The project releases several Debian packages, each subject to its specific licensing terms: + +- `cartesi-machine-[VERSION]_[ARCHITECTURE].deb` and `cartesi-machine-tests-[VERSION]_[ARCHITECTURE].deb` packages are licensed under LGPL v3.0 and may include or link to other software components with different licenses. +- `cartesi-machine-tests-data-[VERSION].deb`: This package contains files that are individually licensed under various terms, including but not limited to Apache-2.0, BSD-3-Clause-Regents, BSD-3-Clause, and GPL-2.0-only. For a comprehensive overview of the licenses applicable to specific files within this package, please refer to its copyright file, e.g., [tools/template/tests-data-copyright.template](tools/template/tests-data-copyright.template). + +For detailed licensing information of each Debian package, please refer to the copyright file included within the package. + +## Additional Notes + +This project may include or link to other software components with different licenses. Contributors and users are responsible for ensuring compliance with each component's licensing terms. For detailed information, please refer to the individual LICENSE files within each directory or submodule, and for the Debian packages, please review the respective copyright and licensing details as mentioned above. diff --git a/README.md b/README.md index 58e83ca64..79e131342 100644 --- a/README.md +++ b/README.md @@ -1,324 +1,278 @@ # Cartesi Machine Emulator -The Cartesi Machine Emulator is the reference off-chain implementation of the Cartesi Machine Specification. It's written in C/C++ with POSIX dependencies restricted to the terminal, process, and memory-mapping facilities. It is distributed as a library and scriptable in the Lua programming language. - -The emulator implements RISC-V's RV64IMASU ISA. The letters after RV specify the extension set. This selection corresponds to a 64-bit machine, Integer arithmetic with Multiplication and division, Atomic operations, as well as the optional Supervisor and User privilege levels. In addition, Cartesi Machines support the Sv48 mode of address translation and memory protection. +[![Latest Release](https://img.shields.io/github/v/release/cartesi/machine-emulator?label=version)](https://github.com/cartesi/machine-emulator/releases) +[![Build Status](https://img.shields.io/github/actions/workflow/status/cartesi/machine-emulator/build.yml?branch=main)](https://github.com/cartesi/machine-emulator/actions) +[![License](https://img.shields.io/github/license/cartesi/machine-emulator)](LICENSE) + +The Cartesi Machine Emulator is the basis of Cartesi's verifiable computation framework. +It is a portable, deterministic, high-performance RISC-V emulator (a.k.a. a virtual machine) that can run complex computations off-chain but supports on-chain verification via fraud proofs. + +Under the hood, the emulator implements the RISC-V RV64GC ISA (including the unprivileged and privileged specifications). +This allows it to boot Linux, which in turn, gives creators access to traditional software development stacks when developing and running their applications. + +Written in C++, the Cartesi Machine Emulator is available as a standalone CLI application or as a library for embedding into other applications. +It can be controlled via a well-defined C API that can be easily accessed from multiple programming languages. +In particular, it can be scripted in Lua, for fast prototyping and testing. + +*TL;DR:* +> I can use the Cartesi Machine to disprove a dishonest result of a computation `M' = F(M)`, where `F` is a deterministic state transition function that corresponds to running an application on top of the Linux operating system to process some input, `M = (S, I)` is the initial state `S` of the machine and the input `I`, and `M' = (S', O')` is the final state `S'` of the machine and its output `O'`. + +## Features + +- **Powerful** + - **High-performance RISC-V emulation**, delivering high execution speed for demanding applications. + - **Complete RISC-V RV64GC ISA support**, covering both privileged and unprivileged specifications. + - **Linux kernel execution**, enabling running of standard Linux distributions (e.g., Ubuntu). + - **Full-featured Linux environment**, enabling applications to use traditional software stacks. + - **Large state address space**, enabling applications to utilize gigabytes of data. + - **Forking support**, enabling parallel execution and efficient rollback of state transitions. + - **State inspection capabilities**, enabling examination of the entire address space and processor. +- **Developer Friendly** + - **Simple C API**, facilitating integration with various languages (e.g., C++, Rust, Go, Python, JavaScript). + - **Lua scripting interface**, for rapid prototyping and testing. + - **JSON-RPC API endpoint**, enabling remote machine control. + - **Interactive CLI application**, for prototyping in the terminal. + - **VirtIO network and shared filesystem devices**, allowing access to host state during prototyping. + - **State serialization**, for storing and loading of machine snapshots. +- **Verifiable** + - **Deterministic execution**, ensuring every instruction is reproducible (including floating-point). + - **State Merkle tree computation**, for generating cryptographic proofs. + - **State transition access logging**, enabling on-chain verification of state transitions. + - **Cycle-level execution control**, for interactive fraud-proof bisection. + - **Microarchitecture-based emulation** of its interpreter for simplifying on-chain verification. + - **Generic I/O interface**, enabling handling of data input/output through state transitions. +- **Portable** + - **Cross-platform compatibility**, including Linux, macOS and Windows. + - **WebAssembly compatibility**, bringing all capabilities to browser environments. + - **Freestanding compilation**, suitable for embedding in other applications (e.g., in a zkVM). + - **Minimal runtime dependencies**, ensuring easy installation and integration. + +## Overview + +For a comprehensive technical overview of the Cartesi Machine emulator and its blockchain use cases, +you can watch this detailed presentation by Diego Nehab, +the principal architect of the Cartesi Machine, at the Ethereum Engineering Group: + +[![Cartesi Machine Overview](https://img.youtube.com/vi/ofb7MJ8dK0U/0.jpg)](https://www.youtube.com/watch?v=ofb7MJ8dK0U) + +In addition, you can watch an insightful interview with Diego Nehab about the Cartesi Machine on Cartesi's YouTube channel: + +[![Cartesi Machine Deep Dive](https://img.youtube.com/vi/uUzn_vdWyDM/0.jpg)](https://www.youtube.com/watch?v=uUzn_vdWyDM) ## Getting Started -Run `make help` for a list of target options. Here are some of them: - -``` -Main targets: -* all - Build the src/ code. To build from a clean clone, run: make submodules all - uarch - Build microarchitecture (requires riscv64-cartesi-linux-gnu-* toolchain) - uarch-with-toolchain - Build microarchitecture using the toolchain docker image - build-tests-all - Build all tests (machine, uarch and misc) - build-tests-machine - Build machine emulator tests (requires rv64gc-lp64d riscv64-cartesi-linux-gnu-* toolchain) - build-tests-machine-with-toolchain - Build machine emulator tests using the rv64gc-lp64d toolchain docker image - build-tests-uarch - Build microarchitecture rv64i instruction tests (requires rv64ima-lp64 riscv64-cartesi-linux-gnu-* toolchain) - build-tests-uarch-with-toolchain - Build microarchitecture rv64i instruction tests using the rv64ima-lp64 toolchain docker image - build-tests-misc - Build miscellaneous tests - build-tests-misc-with-builder-image - Build miscellaneous tests using the cartesi/machine-emulator:builder image - test-machine - Run machine emulator tests - test-uarch - Run uarch tests - test-misc - Run miscellaneous tests - test - Run all tests - doc - Build the doxygen documentation (requires doxygen) -Docker images targets: - build-emulator-image - Build the machine-emulator debian based docker image - build-debian-package - Build the cartesi-machine.deb package from image - build-toolchain - Build the emulator toolchain docker image - create-generated-files-patch - Create patch that adds generated files to source tree -Cleaning targets: - clean - Clean the src/ artifacts - depclean - Clean + dependencies - distclean - Depclean + profile information and downloads -``` - -### Requirements - -- C++ Compiler with support for C++20 (tested with GCC >= 8+ and Clang >= 8.x). -- GNU Make >= 3.81 -- Lua >= 5.4.4 -- Libslirp >= 4.6.0 -- Boost >= 1.81 +### Installation -Obs: Please note that Apple Clang Version number does not follow upstream LLVM/Clang. +We provide official packages for some distributions, but you can also build from source. -#### Debian Bookworm +#### Debian or Ubuntu -```bash -sudo apt-get install build-essential wget git clang-tidy-16 clang-format-16 \ - libboost1.81-dev libssl-dev libslirp-dev \ - ca-certificates pkg-config lua5.4 liblua5.4-dev \ - luarocks +We maintain an APT package repository containing binary packages for *amd64*, *arm64* and *riscv64*, you can install with: -sudo luarocks install --lua-version=5.4 luasocket -sudo luarocks install --lua-version=5.4 luasec -sudo luarocks install --lua-version=5.4 luaposix +```sh +# Add package repository +wget -qO - https://dist.cartesi.io/apt/keys/cartesi-deb-key.gpg | sudo gpg --dearmor -o /etc/apt/trusted.gpg.d/cartesi-deb-key.gpg +echo "deb https://dist.cartesi.io/apt stable/" | sudo tee /etc/apt/sources.list.d/cartesi-deb-apt.list +sudo apt-get update +# Install cartesi-machine +sudo apt-get install cartesi-machine ``` -For more information, see the [Configuring Lua 5.4](#configuring-lua-54) section. - -#### MacOS +The packages provided in this APT repository are known to work with **Debian 12** (Bookworm) and **Ubuntu 24.04** (Noble). -##### MacPorts +#### Alpine Linux -```bash -sudo port install clang-16 boost181 wget pkgconfig lua54 lua-luarocks libslirp +We maintain an APK package repository containing binary packages for *amd64*, *arm64* and *riscv64*, you can install with: -sudo luarocks install --lua-version=5.4 luasocket -sudo luarocks install --lua-version=5.4 luasec -sudo luarocks install --lua-version=5.4 luaposix +```sh +# Add package repository +wget -qO /etc/apk/keys/cartesi-apk-key.rsa.pub https://dist.cartesi.io/apk/keys/cartesi-apk-key.rsa.pub +echo "https://dist.cartesi.io/apk/stable" >> /etc/apk/repositories +apk update +# Install cartesi-machine +apk add cartesi-machine ``` -For more information, see the [Configuring Lua 5.4](#configuring-lua-54) section. +#### Arch Linux -##### Homebrew +We maintain an official Arch Linux package in [AUR](https://aur.archlinux.org/packages/cartesi-machine), you can install with: -```bash -brew install llvm@16 boost wget pkg-config openssl lua luarocks libslirp - -luarocks --lua-dir=$(brew --prefix)/opt/lua install luasocket -luarocks --lua-dir=$(brew --prefix)/opt/lua install luasec -luarocks --lua-dir=$(brew --prefix)/opt/lua install luaposix +```sh +yay -S cartesi-machine ``` -For more information, see the [Configuring Lua 5.4](#configuring-lua-54) section. - -##### Configuring Lua 5.4 +#### Homebrew -For emulator scripts to function properly, it is necessary for the lua5.4 binary to be available in the system PATH. If your operating system or package manager provides a Lua binary under a different name (e.g., lua instead of lua5.4, which is common on Homebrew), you will need to create a symbolic link or an alias named lua5.4. This can be done as follows: +We maintain a Homebrew tap for macOS, you can install with: -```bash -ln -s $(which lua) /usr/local/bin/lua5.4 # Create a symbolic link (adjust as needed for your system) -# or -alias lua5.4='lua' # Create an alias (add this line to your shell profile file like .bashrc or .zshrc) +```sh +brew tap cartesi/tap +brew install cartesi-machine ``` -###### Setting Up LuaRocks Modules +#### From Sources -To use features that require LuaRocks modules, you must ensure your environment is configured to find these modules. Export the output of `luarocks path --lua-version=5.4` to your environment by executing them or adding it to your .bashrc or .zshrc file. E.g.: +##### System Requirements -```bash -eval "$(luarocks path --lua-version=5.4)" -``` - -This command adjusts the environment variables for your shell sessions, ensuring LuaRocks-installed modules are correctly discovered by Lua scripts. +- C++ Compiler with support for C++20 (tested with GCC >= 11.x and Clang >= 14.x). +- GNU Make >= 3.81 +- Boost >= 1.81 +- Lua >= 5.4.4 (optional, required for scripting support and interactive terminal) +- Libslirp >= 4.6.0 (optional, required for networking support) -### Build +###### Debian Requirements -```bash -git clone --recurse-submodules -j3 https://github.com/cartesi/machine-emulator.git -make +```sh +sudo apt-get install build-essential git wget libboost1.81-dev liblua5.4-dev libslirp-dev lua5.4 ``` -Cleaning: +###### MacPorts Requirements -```bash -make clean +```sh +sudo port install clang boost181 wget pkgconfig lua54 libslirp ``` -Microarchitecture: +###### Homebrew Requirements -If you want to use a pre-built uarch RAM image instead of building one, use the variable `UARCH_RAM_IMAGE` to specify the path to the desired image file. - -```bash -$ make UARCH_RAM_IMAGE= +```sh +brew install llvm boost wget pkg-config lua libslirp ``` -### Install +#### Build -```bash -sudo make install PREFIX=/usr/local -``` +First, make sure to have all the system requirements, then run the following to build and install a stable release of the machine: -### Build C libraries in standalone +```sh +# clone a stable branch of the emulator +git clone --branch v0.19.0 https://github.com/cartesi/machine-emulator.git +cd machine-emulator -Both `libcartesi` and `libcartes_jsonrpc` C libraries can be compiled in standalone, either as static or shared library: +# patch the sources with required generated files +wget https://github.com/cartesi/machine-emulator/releases/download/v0.19.0/add-generated-files.diff +git apply add-generated-files.diff -```bash -make bundle-boost -make -C src libcartesi.a libcartesi_jsonrpc.a libcartesi.so libcartesi_jsonrpc.so +# compile +make ``` -The `.a` and `.so` files will be available in `src` directory, you can use any of them to link your application. - -You can even use other toolchains to cross compile targeting other platforms: +*Note*: We recommend running only stable releases. If you want to build the `main` development branch, you will need to regenerate files instead of patching the sources, which will require Docker on your system. For more details, please check our [development guide](https://github.com/cartesi/machine-emulator/wiki/Development-Guide). -```bash -# Target WASM with Emscripten toolchain -make -C src \ - CC=emcc CXX=em++ AR="emar rcs" \ - libcartesi.a +Finally, you can install it in your system in any path you would like with: -# Target WASM with WASI SDK toolchain -make -C src \ - CC=/opt/wasi-sdk/bin/clang CXX=/opt/wasi-sdk/bin/clang++ AR="/opt/wasi-sdk/bin/llvm-ar rcs" \ - libcartesi.a - -# Target Windows with mingw-w64 toolchain -make -C src \ - CC=x86_64-w64-mingw32-gcc \ - CXX=x86_64-w64-mingw32-g++ \ - AR="x86_64-w64-mingw32-ar rcs" \ - libcartesi.a +```sh +# install the emulator +sudo make install PREFIX=/usr/local ``` -## Running Tests +After installation, to boot a Linux system with the `cartesi-machine` command, you will need to also download: -To build and execute the all tests run: +- Guest [Linux image](https://github.com/cartesi/machine-linux-image) and place it at `$PREFIX/cartesi/images/linux.bin` +- Guest [rootfs image](https://github.com/cartesi/machine-rootfs-image) and place it at `$PREFIX/cartesi/images/rootfs.ext2`. -```bash -make build-tests-all -make test -``` +### Usage -To execute the machine test suite run: +Once you have the emulator, guest Linux image, and guest rootfs images installed, you can boot a Linux operating system by running: -```bash -make build-tests-machine-with-toolchain -make test-machine +```sh +cartesi-machine ``` -To execute the uarch test suite run: - -```bash -make build-tests-uarch-with-toolchain -make test-uarch +It should output something similar to: ``` -## Linter - -We use clang-tidy 15 as the linter. - -### Install + . + / \ + / \ +\---/---\ /----\ + \ X \ + \----/ \---/---\ + \ / CARTESI + \ / MACHINE + ' -#### Debian Bookworm +Nothing to do. -You need to install the package clang-tidy-16 and set it as the default executable with update-alternatives. - -```bash -apt install clang-tidy-16 -update-alternatives --install /usr/bin/clang-tidy clang-tidy /usr/bin/clang-tidy-16 120 +Halted +Cycles: 48415113 ``` -### Running Lint +You can start an interactive terminal to play around with: -```bash -make lint -j$(nproc) +```sh +cartesi-machine -it bash ``` -## Code format +And there you have a full Linux running in a RISC-V emulated CPU that you can interact with. +You can check the `cartesi-machine --help` for more information on how to use the CLI application. -We use clang-format to format the code base. +### Library -### Install +You can use the emulator as library in other applications, its `libcartesi` library provides a [C API](https://github.com/cartesi/machine-emulator/blob/refactor/new-readme/src/machine-c-api.h) that is very simple to use. -#### Debian Bookworm +Check the following wiki guides on how to use with different languages: +- [C/C++](https://github.com/cartesi/machine-emulator/wiki/Using-the-C-API) +- [Rust](https://github.com/cartesi/machine-emulator/wiki/Using-the-C-API-with-Rust) +- [Go](https://github.com/cartesi/machine-emulator/wiki/Using-the-C-API-with-Go) +- [JavaScript](https://github.com/cartesi/machine-emulator/wiki/Using-the-C-API-with-JavaScript) +- [Python](https://github.com/cartesi/machine-emulator/wiki/Using-the-C-API-with-Python) +- [Lua](https://github.com/cartesi/machine-emulator/wiki/Using-the-Lua-API) +- [WebAssembly](https://github.com/cartesi/machine-emulator/wiki/Using-the-C-API-with-WebAssembly) -You need to install the package clang-format-16 and set is as the default executable with update-alternatives. +## Use Cases -```bash -apt install clang-format-16 -update-alternatives --install /usr/bin/clang-format clang-format /usr/bin/clang-format-16 120 -``` +The following projects have been using the emulator: +- [Cartesi Rollups Node](https://github.com/cartesi/rollups-node) - Uses the emulator's library in Go for Layer 2 rollups on Ethereum. +- [Cartesi Dave](https://github.com/cartesi/dave) - Uses the emulator's library in Rust for on-chain fraud-proofs validation. +- [Cartesi CLI](https://github.com/cartesi/machine-emulator/projects) - Uses the emulator's CLI in TypeScript for DApp development. -### Formatting code +## Related Projects -```bash -make format -``` +The Cartesi Machine emulator is directly related to the following important projects that are also maintained by us: +- [Cartesi Machine Guest Tools](https://github.com/cartesi/machine-guest-tools) - System utilities used inside guest machines. +- [Cartesi Machine Linux Image](https://github.com/cartesi/machine-linux-image) - Linux kernel image used by guest machines. +- [Cartesi Machine Rootfs Image](https://github.com/cartesi/machine-rootfs-image) - Root filesystem image used by guest machines. +- [Cartesi Machine Solidity Step](https://github.com/cartesi/machine-solidity-step) - Solidity smart contracts of machine microarchitecture step for on-chain fraud-proofs validation. -### Checking whether the code is formatted +## Benchmarks -```bash -make check-format -``` +The emulator's RISC-V interpreter is optimized for high performance given the requirements of on-chain verification. +For detailed performance metrics comparing the emulator against bare-metal execution and other virtual machines, +please see our [benchmarks](https://github.com/cartesi/machine-emulator/wiki/Benchmarks) page. -## Coverage +## Documentation -### Dependencies +The Cartesi Machine emulator documentation is undergoing a comprehensive update. +While the full documentation is being refreshed, you can find guides and tutorials in our [wiki](https://github.com/cartesi/machine-emulator/wiki). -#### Debian Bookworm - -If you want to run the GCC-based coverage, you should install the lcov package with the following command. - -```bash -sudo apt install lcov -``` +## Change Log -If you want to run the clang-based coverage, you should install the clang package with the following command. - -```bash -sudo apt install clang llvm -``` - -### Compilation - -Before running the coverage, you should build the emulator with the flag coverage-toolchain=gcc or coverage-toolchain=clang. -Make sure you run `make clean` to clean up any previous compilation. -For GCC-based coverage run the following command. - -```bash -make coverage=yes COVERAGE_TOOLCHAIN=gcc -j$(nproc) -make build-tests-all coverage=yes COVERAGE_TOOLCHAIN=gcc -j$(nproc) -``` +Changes between emulator releases are documented in [CHANGELOG](CHANGELOG). -For clang-based coverage run the following command. +## Roadmap -```bash -make coverage=yes COVERAGE_TOOLCHAIN=clang -j$(nproc) -make build-tests-all coverage=yes COVERAGE_TOOLCHAIN=clang -j$(nproc) -``` +We are continually improving the emulator with new features and enhancements. +Check out our roadmap at [GitHub Projects](https://github.com/cartesi/machine-emulator/projects) to see what's coming in the future. -### Running coverage +## Community & Support -After building the emulator with coverage enable, you should run the following command. -For instance: +- Join our [Discord](https://discord.gg/cartesi) `#cartesi-machine` channel to engage with the emulator users and developers. +- Report issues on our [GitHub Issues](https://github.com/cartesi/machine-emulator/issues). -```bash -make test coverage-report coverage=yes COVERAGE_TOOLCHAIN=gcc -``` +## Developing -This command will generate a coverage report in the src directory. -For clang coverage, repeat the same command but with the flag coverage-toolchain=clang. +For more detailed information about developing the emulator, including instructions for running tests, using the linter, and code formatting, please refer to our [development guide](https://github.com/cartesi/machine-emulator/wiki/Development-Guide) in the wiki. ## Contributing -Thank you for your interest in Cartesi! Head over to our [Contributing Guidelines](CONTRIBUTING.md) for instructions on how to sign our Contributors Agreement and get started with -Cartesi! - -Please note we have a [Code of Conduct](CODE_OF_CONDUCT.md), please follow it in all your interactions with the project. - -## License - -The `machine-emulator` repository and all contributions to it are licensed under the [LGPL 3.0](https://www.gnu.org/licenses/lgpl-3.0.html), unless otherwise specified below or in subdirectory LICENSE / COPYING files. Please review our [COPYING](COPYING) file for the LGPL 3.0 license. - -### Submodules and Dependencies +Please see our [contributing guidelines](CONTRIBUTING.md) for instructions on how to start contributing to the project. +Note we have a [code of conduct](CODE_OF_CONDUCT.md), please follow it in all your interactions with the project. -This project includes several submodules and dependencies, each with its own licensing: +## Authors -- `tests/machine`: Licensed under the Apache License 2.0. See the license terms in [tests/machine/LICENSE](tests/machine/LICENSE). -- `tests/uarch`: Licensed under the Apache License 2.0. Licensing details are available in [tests/uarch/LICENSE](tests/uarch/LICENSE). -- `third-party/llvm-flang-uint128`: Licensed under the Apache License 2.0 with LLVM exceptions. The license can be found at [third-party/llvm-flang-uint128/LICENSE](third-party/llvm-flang-uint128/LICENSE). -- `third-party/riscv-arch-test`: Source code licensed under the Apache 2.0 and BSD 3-Clause licenses. Documentation under `CC-BY-4.0`. License information is provided in README.md and other COPYING.* files like [third-party/riscv-arch-test/COPYING.APACHE](third-party/riscv-arch-test/COPYING.APACHE). -- `third-party/riscv-tests`: Licensed under the BSD 3-Clause "New" or "Revised" License. See [third-party/riscv-tests/LICENSE](third-party/riscv-tests/LICENSE) for license details. -- `third-party/riscv-tests/env`: Licensed under the BSD 3-Clause "New" or "Revised" License. License details are in [third-party/riscv-tests/env/LICENSE](third-party/riscv-tests/env/LICENSE). -- `third-party/tiny_sha3`: Licensed under the MIT License. The license can be found at [third-party/tiny_sha3/LICENSE](third-party/tiny_sha3/LICENSE). -- `third-party/nlohmann-json`: Licensed under the MIT License. The license can be found at [third-party/nlohmann-json/LICENSE.MIT](third-party/nlohmann-json/LICENSE.MIT). +The Cartesi Machine emulator is actively developed by [Cartesi](https://cartesi.io/)'s Machine Reference Unit, with significant contributions from many open-source developers. +For a complete list of authors, see the [AUTHORS](AUTHORS) file. -### Debian Packages - -The project releases several Debian packages, each subject to its specific licensing terms: - -- `cartesi-machine-[VERSION]_[ARCHITECTURE].deb` and `cartesi-machine-tests-[VERSION]_[ARCHITECTURE].deb` packages are licensed under LGPL v3.0 and may include or link to other software components with different licenses. -- `cartesi-machine-tests-data-[VERSION].deb`: This package contains files that are individually licensed under various terms, including but not limited to Apache-2.0, BSD-3-Clause-Regents, BSD-3-Clause, and GPL-2.0-only. For a comprehensive overview of the licenses applicable to specific files within this package, please refer to its copyright file, e.g., [tools/template/tests-data-copyright.template](tools/template/tests-data-copyright.template). - -For detailed licensing information of each Debian package, please refer to the copyright file included within the package. - -### Additional Notes - -This project may include or link to other software components with different licenses. Contributors and users are responsible for ensuring compliance with each component's licensing terms. For detailed information, please refer to the individual LICENSE files within each directory or submodule, and for the Debian packages, please review the respective copyright and licensing details as mentioned above. +## License +The repository and all contributions to it are licensed under the [LGPL 3.0](https://www.gnu.org/licenses/lgpl-3.0.html), unless otherwise specified below or in subdirectory LICENSE / COPYING files. +Please review our [COPYING](COPYING) file for the LGPL 3.0 license and also [LICENSES](LICENSES.md) file for additional information on third-party software licenses. From 37be57ebde84b71b4eea993dc57a7c1b4b41c3e5 Mon Sep 17 00:00:00 2001 From: Eduardo Bart Date: Wed, 22 Jan 2025 17:33:29 -0300 Subject: [PATCH 02/15] fix: minor changes to allow running tests with Alpine guests --- tests/lua/create-machines.lua | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/lua/create-machines.lua b/tests/lua/create-machines.lua index 58c155193..10175d2bd 100755 --- a/tests/lua/create-machines.lua +++ b/tests/lua/create-machines.lua @@ -197,7 +197,7 @@ echo ' curl -vv -H "Content-Type: application/json" -d "{\"status\":\"accept\"}" http://127.0.0.1:5004/finish; exit 2' > /home/dapp/s.sh; chmod +x /home/dapp/s.sh; -rollup-init bash /home/dapp/s.sh +rollup-init sh /home/dapp/s.sh ]] ) @@ -208,7 +208,7 @@ echo 'curl -vv -H "Content-Type: application/json" -d "{\"status\":\"accept\"}" killall rollup-http-server; sleep 86400' > /home/dapp/s.sh; chmod +x /home/dapp/s.sh; -rollup-init bash /home/dapp/s.sh +rollup-init sh /home/dapp/s.sh ]] ) From 4e3ed68903344a96ebca35c498afaeb64a51633e Mon Sep 17 00:00:00 2001 From: Eduardo Bart Date: Thu, 23 Jan 2025 08:45:34 -0300 Subject: [PATCH 03/15] chore: update debian base image --- Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index a689aec07..3da60b59d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM debian:bookworm-20241016 AS toolchain +FROM debian:bookworm-20250113 AS toolchain RUN apt-get update && \ DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \ @@ -80,7 +80,7 @@ FROM builder AS debian-packager RUN make install-uarch debian-package DESTDIR=$PWD/_install #################################################################################################### -FROM debian:bookworm-20241016-slim +FROM debian:bookworm-20250113-slim ARG MACHINE_EMULATOR_VERSION=0.0.0 ARG TARGETARCH From 478a3d88a91b519ad754e157d630f42346ad10a6 Mon Sep 17 00:00:00 2001 From: Eduardo Bart Date: Thu, 23 Jan 2025 08:48:09 -0300 Subject: [PATCH 04/15] chore: fix broken Boost link `make bundle-boost` --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 29baef663..613400a8f 100644 --- a/Makefile +++ b/Makefile @@ -206,7 +206,7 @@ doc: bundle-boost: $(DEPDIR)/downloads/boost $(DEPDIR)/downloads/boost: mkdir -p $(DOWNLOADDIR) - wget -O $(DEPDIR)/downloads/boost_1_81_0.tar.gz https://boostorg.jfrog.io/artifactory/main/release/1.81.0/source/boost_1_81_0.tar.gz + wget -O $(DEPDIR)/downloads/boost_1_81_0.tar.gz https://archives.boost.io/release/1.81.0/source/boost_1_81_0.tar.gz tar -C $(DEPDIR)/downloads -xzf $(DEPDIR)/downloads/boost_1_81_0.tar.gz boost_1_81_0/boost mv $(DEPDIR)/downloads/boost_1_81_0/boost $(DEPDIR)/downloads/boost rm -rf $(DEPDIR)/downloads/boost_1_81_0.tar.gz $(DEPDIR)/downloads/boost_1_81_0 From cf874cda6f5f1b6dea349e6e49c070e336e270f0 Mon Sep 17 00:00:00 2001 From: Eduardo Bart Date: Fri, 24 Jan 2025 11:24:05 -0300 Subject: [PATCH 05/15] fix: fix inconsistencies in jsonrpc-discover.json --- src/jsonrpc-discover.json | 41 ++++++++++++++++++++++----------------- 1 file changed, 23 insertions(+), 18 deletions(-) diff --git a/src/jsonrpc-discover.json b/src/jsonrpc-discover.json index 05b4e9fdc..24a90b15e 100644 --- a/src/jsonrpc-discover.json +++ b/src/jsonrpc-discover.json @@ -1389,13 +1389,17 @@ "$ref": "#/components/schemas/VirtIOHostfwd" } }, + "VirtIODeviceType": { + "title": "VirtIODeviceType", + "enum": ["console", "p9fs", "net-user", "net-tuntap"] + }, "VirtIODeviceConfig": { "title": "VirtIODeviceConfig", "type": "object", "required": ["type"], "properties": { "type": { - "type": "string" + "$ref": "#/components/schemas/VirtIODeviceType" }, "tag": { "type": "string" @@ -1660,7 +1664,8 @@ "virtio": { "$ref": "#/components/schemas/VirtIOConfigs" } - } + }, + "required": ["ram"] }, "InterpreterBreakReason": { "title": "InterpreterBreakReason", @@ -1685,6 +1690,7 @@ "Base64Hash": { "title": "Base64Hash", "type": "string", + "description": "32-byte hash encoded in base64", "contentEncoding": "base64", "minLength": 45, "maxLength": 45 @@ -1776,13 +1782,13 @@ "$ref": "#/components/schemas/UnsignedInteger" }, "read_hash": { - "$ref": "#/components/schemas/Base64String" + "$ref": "#/components/schemas/Base64Hash" }, "read": { "$ref": "#/components/schemas/Base64String" }, "written_hash": { - "$ref": "#/components/schemas/Base64String" + "$ref": "#/components/schemas/Base64Hash" }, "written": { "$ref": "#/components/schemas/Base64String" @@ -1932,13 +1938,24 @@ "scounteren", "senvcfg", "ilrsc", - "iflags", + "iprv", + "iflags_X", + "iflags_Y", + "iflags_H", "iunrep", "clint_mtimecmp", "plic_girqpend", "plic_girqsrvd", "htif_tohost", + "htif_tohost_dev", + "htif_tohost_cmd", + "htif_tohost_reason", + "htif_tohost_data", "htif_fromhost", + "htif_fromhost_dev", + "htif_fromhost_cmd", + "htif_fromhost_reason", + "htif_fromhost_data", "htif_ihalt", "htif_iconsole", "htif_iyield", @@ -1976,19 +1993,7 @@ "uarch_x31", "uarch_pc", "uarch_cycle", - "uarch_halt_flag", - "iflags_prv", - "iflags_x", - "iflags_y", - "iflags_h", - "htif_tohost_dev", - "htif_tohost_cmd", - "htif_tohost_reason", - "htif_tohost_data", - "htif_fromhost_dev", - "htif_fromhost_cmd", - "htif_fromhost_reason", - "htif_fromhost_data" + "uarch_halt_flag" ] }, "MemoryRangeDescription": { From c1880ca1dbd051feb9d32daa914332091bb93e75 Mon Sep 17 00:00:00 2001 From: Eduardo Bart Date: Fri, 24 Jan 2025 16:40:54 -0300 Subject: [PATCH 06/15] chore: update changelog --- CHANGELOG.md | 80 ++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 69 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d109b477c..d1294b4aa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,18 +5,76 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] -### Added -- Added the "log_step" method -- Added the "verify_step" method -- Added the "--log-step" option to "cartesi-machine.lua" - -### Changed -- Added a "--jobs" option to "uarch-riscv-tests.lua" test -- add-created-files.diff should now be applied with `-p1` -- Improved send_cmio_response bounds checking +## Added +- Added `libluacartesi.a` and `libluacartesi_jsonrpc.a` static libraries when installing to allow packaging statically linked Lua programs +- Added `--remote-spawn` command line option to spawn remote machines +- Added `--remote-health-check` command line option to check availability of remote machines +- Added `--store-json-config` command line option to store machine configs to JSON files +- Added `--load-json-config` command line option to load machine configs from JSON files +- Added `--server-fd` command line option to JSON-RPC remote machine +- Added `--setpgid` command line option to JSON-RPC remote machine +- Added `cartesi.tobase64`, `cartesi.frombase64`, `cartesi.tojson`, and `cartesi.fromjson` Lua functions +- Added various new methods to JSON-RPC remote machine interface to allow more precise management of remote machine processes +- Added timeout support to JSON-RPC remote machine requests +- Added capability to retrieve PID of spawned remote machines +- Added support for VirtIO devices in JSON configs +- Added the `--log-step` command line option to log multiple machine cycles +- Added the `cm_log_step` and `cm_verify_step` methods to log multiple machine cycles and verify with ZK VMs +- Added the `cm_receive_cmio_request` method, the counterpart of `cm_send_cmio_response` method + +## Fixed +- Fixed various linting errors with the latest Clang static analyzer +- Fixed broken link when running `make bundle-boost` +- Fixed many broken links in the README and markdown files +- Fixed issues when using `--network` option in Alpine guests +- Fixed issue when using `--assert-rolling-template` +- Fixed `--skip-root-hash-store` not skipping root hash computation +- Fixed kernel panic bug when feeding 100k stdin inputs into VirtIO console +- Fixed bug when trying to advance multiple inputs through the `cartesi-machine` CLI +- Fixed compile errors when targeting Windows, Alpine Linux, and WebAssembly + +## Changed +- Optimized RISC-V instruction decoder to use token threading, computed goto, and big jump tables +- Optimized RISC-V instruction fetcher to cache latest accessed pages +- Refactored the code to use C++20 standard, which is now a minimum requirement +- Refactored the public C API to be simpler and easier to use with other programming languages +- Refactored the Lua API to make remote and local use the same API interface +- Refactored and simplified many portions of the code, improving maintainability of the project +- Changed default GCC compiler flags to better optimize the RISC-V interpreter +- Changed flash drive start address configuration to be non-mandatory (it will be automatically chosen) +- Changed sibling hashes ordering in JSON logs to match the expected ordering in Lua test suite +- Changed rebind method of remote machines to support binding on port 0 and return the bound address +- Changed to Debian RISC-V toolchain when compiling microarchitecture +- Changed `--remote-fork` command line option to accept an address +- Changed `add-generated-files.diff` to be applied with `patch -Np1` to make packaging easier +- Changed default machine config to include bootargs +- Changed machine configs and access logs to be represented as JSON strings in the C API +- Renamed `--log-uarch-step` command line option to `--log-step-uarch` +- Renamed `--log-uarch-reset` command line option to `--log-reset-uarch` +- Renamed `--auto-uarch-reset` command line option to `--auto-reset-uarch` +- Renamed various C API functions, structs, and enumerations +- Revamped project README with more up-to-date explanations and simplified instructions +- Changed help and configs to be printed to `stdout` instead of `stderr` +- Changed the public C API to require less manual memory management +- Improved documentation in the public C API header +- Improved CI to use more parallel jobs when testing +- Improved `send_cmio_response` bounds checking +- Split `iflags` CSR into multiple CSRs + +## Removed +- Removed publishing of Debian package artifacts in favor of official Linux package repositories +- Removed use of `cartesi/toolchain` image +- Removed automatic strip of binaries when performing `make install` (stripping should now be done when packaging) +- Removed `no4lvl` from bootargs (Sv57 address translation is now disabled at interpreter level to fix NodeJS JIT issues) +- Removed `rootfstype=ext2` from bootargs, allowing use of root filesystems in other formats, such as Ext4 and SquashFS +- Removed all `*-defines.h` headers +- Removed all specific read/write of machine registers from the public API +- Removed various C API functions +- Removed `dkjson` Lua library (not needed since we have our own JSON encoding functions now) +- Removed GDB documentation from the repository (moved to the wiki) +- Removed internal development documentation from the README (moved to the wiki) +- Removed many dead code segments after the refactoring -### Fixed -- Fixed --skip-root-hash-store not skipping root hash computation when using the cli ## [0.18.1] - 2024-08-12 ### Changed From 2c4ca6d2bf1d94b0c24bfa29e7c1ac1e901a6fa8 Mon Sep 17 00:00:00 2001 From: Eduardo Bart Date: Fri, 24 Jan 2025 16:51:32 -0300 Subject: [PATCH 07/15] chore: bump guest tools --- tests/Makefile | 2 +- tests/dependencies | 4 ++-- tests/dependencies.sha256 | 2 +- tools/template/tests-data-copyright.template | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/Makefile b/tests/Makefile index b1b49575c..f28c6ba92 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -120,7 +120,7 @@ $(BUILDDIR)/%: $(CARTESI_IMAGES): | $(CARTESI_IMAGES_PATH) @wget -nc -i dependencies -P $(CARTESI_IMAGES_PATH) @shasum -ca 256 dependencies.sha256 - @cd $(CARTESI_IMAGES_PATH) && ln -s rootfs-tools-v0.16.1.ext2 rootfs.ext2 + @cd $(CARTESI_IMAGES_PATH) && ln -s rootfs-tools-v0.17.0-test2.ext2 rootfs.ext2 @cd $(CARTESI_IMAGES_PATH) && ln -s linux-6.5.13-ctsi-1-v0.20.0.bin linux.bin images: | $(CARTESI_IMAGES) diff --git a/tests/dependencies b/tests/dependencies index 7ee2ffd9b..d2a85cd10 100644 --- a/tests/dependencies +++ b/tests/dependencies @@ -1,2 +1,2 @@ -https://github.com/cartesi/machine-emulator-tools/releases/download/v0.16.1/rootfs-tools-v0.16.1.ext2 -https://github.com/cartesi/image-kernel/releases/download/v0.20.0/linux-6.5.13-ctsi-1-v0.20.0.bin +https://github.com/cartesi/machine-guest-tools/releases/download/v0.17.0-test2/rootfs-tools-v0.17.0-test2.ext2 +https://github.com/cartesi/machine-linux-image/releases/download/v0.20.0/linux-6.5.13-ctsi-1-v0.20.0.bin diff --git a/tests/dependencies.sha256 b/tests/dependencies.sha256 index 799320614..7242eebb4 100644 --- a/tests/dependencies.sha256 +++ b/tests/dependencies.sha256 @@ -1,2 +1,2 @@ 65dd100ff6204346ac2f50f772721358b5c1451450ceb39a154542ee27b4c947 build/images/linux-6.5.13-ctsi-1-v0.20.0.bin -4db885fdb4f013922d8ea8474768148ac4d45460a4ef30aea823836ea72ffed9 build/images/rootfs-tools-v0.16.1.ext2 +293f377b0cb32cc477ef2c71be9430bab3a25d54eb0ab9aff07a4e6fac6aa829 build/images/rootfs-tools-v0.17.0-test2.ext2 diff --git a/tools/template/tests-data-copyright.template b/tools/template/tests-data-copyright.template index 6b0e7c420..465351a23 100644 --- a/tools/template/tests-data-copyright.template +++ b/tools/template/tests-data-copyright.template @@ -55,9 +55,9 @@ Copyright: Various authors, see below Source: https://github.com/cartesi/linux/ License: GPL-2.0-only -Files: /usr/share/cartesi-machine/tests/data/images/rootfs-tools-v0.15.0.ext2 +Files: /usr/share/cartesi-machine/tests/data/images/rootfs-tools-v0.17.0-test2.ext2 Copyright: Various authors, see below -Source: https://github.com/cartesi/machine-emulator-tools/ +Source: https://github.com/cartesi/machine-guest-tools/ License: Various, see below License: Apache-2.0 From 9571afc21a7945aaf2738bff7f50c2c1b37d9854 Mon Sep 17 00:00:00 2001 From: Eduardo Bart Date: Fri, 24 Jan 2025 16:54:23 -0300 Subject: [PATCH 08/15] chore: bump emulator release number --- src/Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Makefile b/src/Makefile index 2e167b768..135ea5ea9 100644 --- a/src/Makefile +++ b/src/Makefile @@ -14,12 +14,12 @@ # with this program (see COPYING). If not, see . # -EMULATOR_MARCHID=18 +EMULATOR_MARCHID=19 # Every new emulator release should bump these constants EMULATOR_VERSION_MAJOR=0 -EMULATOR_VERSION_MINOR=18 -EMULATOR_VERSION_PATCH=1 +EMULATOR_VERSION_MINOR=19 +EMULATOR_VERSION_PATCH=0 EMULATOR_VERSION_LABEL= TARGET_OS?=$(shell uname) From e659541ca816e923a68a9a0f8be2b173a78736b1 Mon Sep 17 00:00:00 2001 From: Eduardo Bart Date: Tue, 28 Jan 2025 16:47:57 -0300 Subject: [PATCH 09/15] docs: fix documentation of cm_verify_step --- src/machine-c-api.h | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/src/machine-c-api.h b/src/machine-c-api.h index a3e904d9f..650a5cd60 100644 --- a/src/machine-c-api.h +++ b/src/machine-c-api.h @@ -338,7 +338,7 @@ CM_API cm_error cm_get_reg_address(const cm_machine *m, cm_reg reg, uint64_t *va /// \brief Creates a new local machine object. /// \param new_m Receives the pointer to the new machine object. Set to NULL on failure. /// \returns 0 for success, non zero code for error. -/// \detail A newly created object is empty (does not hold a machine instance). +/// \details A newly created object is empty (does not hold a machine instance). /// Use cm_create() or cm_load() to instantiate a machine into the object. /// Use cm_create_new() or cm_load_new() as single-call shortcuts. /// Use cm_delete() to delete the object. @@ -454,7 +454,7 @@ CM_API cm_error cm_get_runtime_config(const cm_machine *m, const char **runtime_ /// \param m Pointer to a non-empty machine object (holds a machine instance). /// \param start Range start physical address. /// \param length Range length in bytes. -/// \param shared[ni] If true, changes to the range from inside the machine will be +/// \param shared If true, changes to the range from inside the machine will be /// written to the associated image file in the host. /// \param image_filename Image file name to load into the range. If NULL, entire /// range is cleared with zeros. @@ -547,7 +547,7 @@ CM_API cm_error cm_write_memory(cm_machine *m, uint64_t address, const uint8_t * /// \param data Receives chunk of memory. /// \param length Size of chunk in bytes. /// \returns 0 for success, non zero code for error. -/// \detail The translation is based on the current mapping, as defined in CM_REG_SATP. +/// \details The translation is based on the current mapping, as defined in CM_REG_SATP. CM_API cm_error cm_read_virtual_memory(cm_machine *m, uint64_t address, uint8_t *data, uint64_t length); /// \brief Writes a chunk of data to a machine memory range, by its virtual address. @@ -556,7 +556,7 @@ CM_API cm_error cm_read_virtual_memory(cm_machine *m, uint64_t address, uint8_t /// \param data Source for chunk of data. /// \param length Size of chunk in bytes. /// \returns 0 for success, non zero code for error. -/// \detail The translation is based on the current mapping, as defined in CM_REG_SATP. +/// \details The translation is based on the current mapping, as defined in CM_REG_SATP. CM_API cm_error cm_write_virtual_memory(cm_machine *m, uint64_t address, const uint8_t *data, uint64_t length); /// \brief Translates a virtual memory address to its corresponding physical memory address. @@ -564,7 +564,7 @@ CM_API cm_error cm_write_virtual_memory(cm_machine *m, uint64_t address, const u /// \param vaddr Virtual address to translate. /// \param paddr Receives the physical memory address. /// \returns 0 for success, non zero code for error. -/// \detail The translation is based on the current mapping, as defined in CM_REG_SATP. +/// \details The translation is based on the current mapping, as defined in CM_REG_SATP. CM_API cm_error cm_translate_virtual_address(cm_machine *m, uint64_t vaddr, uint64_t *paddr); // ------------------------------------ @@ -623,8 +623,8 @@ CM_API cm_error cm_send_cmio_response(cm_machine *m, uint16_t reason, const uint /// \brief Runs the machine for the given mcycle count and generates a log of accessed pages and proof data. /// \param m Pointer to a non-empty machine object (holds a machine instance). -/// \param mcycle_count Number of mcycles to run -/// \param log_filename Name of the log file to be generated +/// \param mcycle_count Number of mcycles to run. +/// \param log_filename Name of the log file to be generated. /// \param break_reason Receives reason for returning (can be NULL). Set to CM_BREAK_REASON_FAILED on failure. /// \returns 0 for success, non zero code for error. CM_API cm_error cm_log_step(cm_machine *m, uint64_t mcycle_count, const char *log_filename, @@ -663,13 +663,13 @@ CM_API cm_error cm_log_send_cmio_response(cm_machine *m, uint16_t reason, const // ------------------------------------ /// \brief Checks the validity of a step log file. -/// \param m Pointer to a non-empty machine object (holds a machine instance). -/// \param root_hash_before State hash before step -/// \param log_filename Path to the step log file to be verified -/// \param mcycle_count Number of mcycles in the step -/// \param root_hash_after State hash after step +/// \param m Pointer to a machine object. Can be NULL (for local machines). +/// \param root_hash_before State hash before step. +/// \param log_filename Path to the step log file to be verified. +/// \param mcycle_count Number of mcycles in the step. +/// \param root_hash_after State hash after step. /// \param break_reason Receives reason for returning (can be NULL). Set to CM_BREAK_REASON_FAILED on failure. -/// \returns 0 for success, non zero code for error +/// \returns 0 for success, non zero code for error. CM_API cm_error cm_verify_step(const cm_machine *m, const cm_hash *root_hash_before, const char *log_filename, uint64_t mcycle_count, const cm_hash *root_hash_after, cm_break_reason *break_reason); From 491367fa01f83632b757210526d783cd0121086b Mon Sep 17 00:00:00 2001 From: Eduardo Bart Date: Wed, 29 Jan 2025 10:59:04 -0300 Subject: [PATCH 10/15] fix: thread_local logic was inverted --- src/os-features.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/os-features.h b/src/os-features.h index 62b631ff0..f853fd256 100644 --- a/src/os-features.h +++ b/src/os-features.h @@ -23,9 +23,9 @@ #if !defined(NO_THREADS) #define HAVE_THREADS -#define THREAD_LOCAL -#else #define THREAD_LOCAL thread_local +#else +#define THREAD_LOCAL #endif #if !defined(NO_TERMIOS) && !defined(_WIN32) && !defined(__wasi__) From 14dee0f889b877400e793d165846e66f72765193 Mon Sep 17 00:00:00 2001 From: Eduardo Bart Date: Mon, 3 Feb 2025 19:20:43 -0300 Subject: [PATCH 11/15] fix: forward raised interrupts to WFI instruction and fix os_now_us --- src/os.cpp | 2 +- src/state-access.h | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/os.cpp b/src/os.cpp index f9325e6ca..cd2ef02a0 100644 --- a/src/os.cpp +++ b/src/os.cpp @@ -700,7 +700,7 @@ void os_unmap_file(unsigned char *host_memory, [[maybe_unused]] uint64_t length) } int64_t os_now_us() { - std::chrono::time_point start{}; + static std::chrono::time_point start{}; static bool started = false; if (!started) { started = true; diff --git a/src/state-access.h b/src/state-access.h index 589144c5b..83e837c27 100644 --- a/src/state-access.h +++ b/src/state-access.h @@ -410,7 +410,7 @@ class state_access : public i_state_access { } NO_INLINE std::pair do_poll_external_interrupts(uint64_t mcycle, uint64_t mcycle_max) { - const bool interrupt_raised = false; + bool interrupt_raised = false; // Only poll external interrupts if we are in unreproducible mode if (unlikely(do_read_iunrep())) { // Convert the relative interval of cycles we can wait to the interval of host time we can wait @@ -423,16 +423,16 @@ class state_access : public i_state_access { // Poll virtio for events (e.g console stdin, network sockets) // Timeout may be decremented in case a device has deadline timers (e.g network device) if (m_m.has_virtio_devices() && m_m.has_virtio_console()) { // VirtIO + VirtIO console - m_m.poll_virtio_devices(&timeout_us, &da); + interrupt_raised |= m_m.poll_virtio_devices(&timeout_us, &da); // VirtIO console device will poll TTY } else if (m_m.has_virtio_devices()) { // VirtIO without a console - m_m.poll_virtio_devices(&timeout_us, &da); + interrupt_raised |= m_m.poll_virtio_devices(&timeout_us, &da); if (m_m.has_htif_console()) { // VirtIO + HTIF console // Poll tty without waiting more time, because the pool above should have waited enough time - os_poll_tty(0); + interrupt_raised |= os_poll_tty(0); } } else if (m_m.has_htif_console()) { // Only HTIF console - os_poll_tty(timeout_us); + interrupt_raised |= os_poll_tty(timeout_us); } else if (timeout_us > 0) { // No interrupts to check, just keep the CPU idle os_sleep_us(timeout_us); } From 8b2f26cae826efd676161bf91b367335df7ae6bc Mon Sep 17 00:00:00 2001 From: Eduardo Bart Date: Mon, 10 Feb 2025 17:47:42 -0300 Subject: [PATCH 12/15] fix: compile errors with Boost 1.87 --- src/jsonrpc-virtual-machine.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/jsonrpc-virtual-machine.cpp b/src/jsonrpc-virtual-machine.cpp index e31e60e42..03e56b354 100644 --- a/src/jsonrpc-virtual-machine.cpp +++ b/src/jsonrpc-virtual-machine.cpp @@ -40,6 +40,7 @@ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wdeprecated-declarations" #include +#include #include #include #include @@ -95,7 +96,7 @@ static asio::ip::tcp::endpoint parse_endpoint(const std::string &address) { if (port <= 0 || port >= 65536) { throw std::runtime_error("invalid port number"s); } - return {asio::ip::address::from_string(host), static_cast(port)}; + return {asio::ip::make_address(host), static_cast(port)}; } catch (std::exception &e) { throw std::runtime_error("failed to parse endpoint from address \""s + address + "\": "s + e.what()); } From 509183d8adf1a84e9177384e3802ef2141638776 Mon Sep 17 00:00:00 2001 From: Eduardo Bart Date: Fri, 31 Jan 2025 10:18:59 -0300 Subject: [PATCH 13/15] feat: add mtime CSR --- src/clua-i-virtual-machine.cpp | 1 + src/i-state-access.h | 12 +++++ src/json-util.cpp | 21 +++++--- src/jsonrpc-discover.json | 4 ++ src/machine-c-api.cpp | 2 + src/machine-c-api.h | 1 + src/machine-config.h | 1 + src/machine-reg.h | 1 + src/machine-state.h | 1 + src/machine.cpp | 7 +++ src/record-step-state-access.h | 10 ++++ src/replay-step-state-access.h | 8 +++ src/riscv-constants.h | 1 + src/shadow-state-factory.cpp | 1 + src/shadow-state.h | 1 + src/state-access.h | 8 +++ src/uarch-bridge.h | 7 +++ tests/lua/machine-bind.lua | 78 +++++++++++++++--------------- uarch/uarch-machine-state-access.h | 8 +++ 19 files changed, 127 insertions(+), 46 deletions(-) diff --git a/src/clua-i-virtual-machine.cpp b/src/clua-i-virtual-machine.cpp index 8ffbb30b5..2c94c202e 100644 --- a/src/clua-i-virtual-machine.cpp +++ b/src/clua-i-virtual-machine.cpp @@ -127,6 +127,7 @@ cm_reg clua_check_cm_proc_reg(lua_State *L, int idx) try { {"mimpid", CM_REG_MIMPID}, {"mcycle", CM_REG_MCYCLE}, {"icycleinstret", CM_REG_ICYCLEINSTRET}, + {"mtime", CM_REG_MTIME}, {"mstatus", CM_REG_MSTATUS}, {"mtvec", CM_REG_MTVEC}, {"mscratch", CM_REG_MSCRATCH}, diff --git a/src/i-state-access.h b/src/i-state-access.h index 694d728ad..2d4c4ffab 100644 --- a/src/i-state-access.h +++ b/src/i-state-access.h @@ -155,6 +155,18 @@ class i_state_access { // CRTP return derived().do_write_icycleinstret(val); } + /// \brief Reads CSR mtime. + /// \returns Register value. + uint64_t read_mtime() { + return derived().do_read_mtime(); + } + + /// \brief Writes CSR mtime. + /// \param val New register value. + void write_mtime(uint64_t val) { + return derived().do_write_mtime(val); + } + /// \brief Reads CSR mvendorid. /// \returns Register value. uint64_t read_mvendorid() { diff --git a/src/json-util.cpp b/src/json-util.cpp index a80e85e07..46ecd9536 100644 --- a/src/json-util.cpp +++ b/src/json-util.cpp @@ -144,6 +144,7 @@ static auto reg_from_name(const std::string &name) { {"mimpid", reg::mimpid}, {"mcycle", reg::mcycle}, {"icycleinstret", reg::icycleinstret}, + {"mtime", reg::mtime}, {"mstatus", reg::mstatus}, {"mtvec", reg::mtvec}, {"mscratch", reg::mscratch}, @@ -375,6 +376,8 @@ static auto reg_to_name(machine::reg r) { return "mcycle"; case reg::icycleinstret: return "icycleinstret"; + case reg::mtime: + return "mtime"; case reg::mstatus: return "mstatus"; case reg::mtvec: @@ -1236,6 +1239,7 @@ void ju_get_opt_field(const nlohmann::json &j, const K &key, processor_config &v ju_get_opt_field(jconfig, "mimpid"s, value.mimpid, new_path); ju_get_opt_field(jconfig, "mcycle"s, value.mcycle, new_path); ju_get_opt_field(jconfig, "icycleinstret"s, value.icycleinstret, new_path); + ju_get_opt_field(jconfig, "mtime"s, value.mtime, new_path); ju_get_opt_field(jconfig, "mstatus"s, value.mstatus, new_path); ju_get_opt_field(jconfig, "mtvec"s, value.mtvec, new_path); ju_get_opt_field(jconfig, "mscratch"s, value.mscratch, new_path); @@ -1797,14 +1801,15 @@ void to_json(nlohmann::json &j, const processor_config &config) { {"f25", config.f[25]}, {"f26", config.f[26]}, {"f27", config.f[27]}, {"f28", config.f[28]}, {"f29", config.f[29]}, {"f30", config.f[30]}, {"f31", config.f[31]}, {"pc", config.pc}, {"fcsr", config.fcsr}, {"mvendorid", config.mvendorid}, {"marchid", config.marchid}, {"mimpid", config.mimpid}, - {"mcycle", config.mcycle}, {"icycleinstret", config.icycleinstret}, {"mstatus", config.mstatus}, - {"mtvec", config.mtvec}, {"mscratch", config.mscratch}, {"mepc", config.mepc}, {"mcause", config.mcause}, - {"mtval", config.mtval}, {"misa", config.misa}, {"mie", config.mie}, {"mip", config.mip}, - {"medeleg", config.medeleg}, {"mideleg", config.mideleg}, {"mcounteren", config.mcounteren}, - {"menvcfg", config.menvcfg}, {"stvec", config.stvec}, {"sscratch", config.sscratch}, {"sepc", config.sepc}, - {"scause", config.scause}, {"stval", config.stval}, {"satp", config.satp}, {"scounteren", config.scounteren}, - {"senvcfg", config.senvcfg}, {"ilrsc", config.ilrsc}, {"iprv", config.iprv}, {"iflags_X", config.iflags_X}, - {"iflags_Y", config.iflags_Y}, {"iflags_H", config.iflags_H}, {"iunrep", config.iunrep}}; + {"mcycle", config.mcycle}, {"icycleinstret", config.icycleinstret}, {"mtime", config.mtime}, + {"mstatus", config.mstatus}, {"mtvec", config.mtvec}, {"mscratch", config.mscratch}, {"mepc", config.mepc}, + {"mcause", config.mcause}, {"mtval", config.mtval}, {"misa", config.misa}, {"mie", config.mie}, + {"mip", config.mip}, {"medeleg", config.medeleg}, {"mideleg", config.mideleg}, + {"mcounteren", config.mcounteren}, {"menvcfg", config.menvcfg}, {"stvec", config.stvec}, + {"sscratch", config.sscratch}, {"sepc", config.sepc}, {"scause", config.scause}, {"stval", config.stval}, + {"satp", config.satp}, {"scounteren", config.scounteren}, {"senvcfg", config.senvcfg}, {"ilrsc", config.ilrsc}, + {"iprv", config.iprv}, {"iflags_X", config.iflags_X}, {"iflags_Y", config.iflags_Y}, + {"iflags_H", config.iflags_H}, {"iunrep", config.iunrep}}; } void to_json(nlohmann::json &j, const flash_drive_configs &fs) { diff --git a/src/jsonrpc-discover.json b/src/jsonrpc-discover.json index 24a90b15e..b9bbde9f2 100644 --- a/src/jsonrpc-discover.json +++ b/src/jsonrpc-discover.json @@ -1217,6 +1217,9 @@ "icycleinstret": { "$ref": "#/components/schemas/UnsignedInteger" }, + "mtime": { + "$ref": "#/components/schemas/UnsignedInteger" + }, "mstatus": { "$ref": "#/components/schemas/UnsignedInteger" }, @@ -1916,6 +1919,7 @@ "mimpid", "mcycle", "icycleinstret", + "mtime", "mstatus", "mtvec", "mscratch", diff --git a/src/machine-c-api.cpp b/src/machine-c-api.cpp index 12268da54..2faea10ab 100644 --- a/src/machine-c-api.cpp +++ b/src/machine-c-api.cpp @@ -271,6 +271,8 @@ static cartesi::machine_reg convert_from_c(cm_reg r) { return reg::mcycle; case CM_REG_ICYCLEINSTRET: return reg::icycleinstret; + case CM_REG_MTIME: + return reg::mtime; case CM_REG_MSTATUS: return reg::mstatus; case CM_REG_MTVEC: diff --git a/src/machine-c-api.h b/src/machine-c-api.h index 650a5cd60..a5a1047d2 100644 --- a/src/machine-c-api.h +++ b/src/machine-c-api.h @@ -204,6 +204,7 @@ typedef enum cm_reg { CM_REG_MIMPID, CM_REG_MCYCLE, CM_REG_ICYCLEINSTRET, + CM_REG_MTIME, CM_REG_MSTATUS, CM_REG_MTVEC, CM_REG_MSCRATCH, diff --git a/src/machine-config.h b/src/machine-config.h index 30beab497..79aa5b834 100644 --- a/src/machine-config.h +++ b/src/machine-config.h @@ -50,6 +50,7 @@ struct processor_config final { uint64_t mimpid{MIMPID_INIT}; ///< Value of mimpid CSR uint64_t mcycle{MCYCLE_INIT}; ///< Value of mcycle CSR uint64_t icycleinstret{ICYCLEINSTRET_INIT}; ///< Value of icycleinstret CSR + uint64_t mtime{MTIME_INIT}; ///< Value of mtime CSR uint64_t mstatus{MSTATUS_INIT}; ///< Value of mstatus CSR uint64_t mtvec{MTVEC_INIT}; ///< Value of mtvec CSR uint64_t mscratch{MSCRATCH_INIT}; ///< Value of mscratch CSR diff --git a/src/machine-reg.h b/src/machine-reg.h index 0fc8e77bf..43b7c5471 100644 --- a/src/machine-reg.h +++ b/src/machine-reg.h @@ -100,6 +100,7 @@ enum class machine_reg : uint64_t { mimpid = PMA_SHADOW_STATE_START + offsetof(shadow_state, mimpid), mcycle = PMA_SHADOW_STATE_START + offsetof(shadow_state, mcycle), icycleinstret = PMA_SHADOW_STATE_START + offsetof(shadow_state, icycleinstret), + mtime = PMA_SHADOW_STATE_START + offsetof(shadow_state, mtime), mstatus = PMA_SHADOW_STATE_START + offsetof(shadow_state, mstatus), mtvec = PMA_SHADOW_STATE_START + offsetof(shadow_state, mtvec), mscratch = PMA_SHADOW_STATE_START + offsetof(shadow_state, mscratch), diff --git a/src/machine-state.h b/src/machine-state.h index e13edea6d..317ac22f3 100644 --- a/src/machine-state.h +++ b/src/machine-state.h @@ -87,6 +87,7 @@ struct machine_state { // Cartesi-specific state uint64_t ilrsc{}; ///< For LR/SC instructions (Cartesi-specific). uint64_t icycleinstret{}; ///< CSR icycleinstret (Cartesi-specific). + uint64_t mtime{}; ///< Clock time base (Cartesi-specific). struct { uint64_t X{}; ///< CPU has yielded with automatic reset (Cartesi-specific). uint64_t Y{}; ///< CPU has yielded with manual reset (Cartesi-specific). diff --git a/src/machine.cpp b/src/machine.cpp index 7d8275e60..9ea10d12e 100644 --- a/src/machine.cpp +++ b/src/machine.cpp @@ -310,6 +310,7 @@ machine::machine(const machine_config &c, const machine_runtime_config &r) : m_c write_reg(reg::fcsr, m_c.processor.fcsr); write_reg(reg::mcycle, m_c.processor.mcycle); write_reg(reg::icycleinstret, m_c.processor.icycleinstret); + write_reg(reg::mtime, m_c.processor.mtime); write_reg(reg::mstatus, m_c.processor.mstatus); write_reg(reg::mtvec, m_c.processor.mtvec); write_reg(reg::mscratch, m_c.processor.mscratch); @@ -656,6 +657,7 @@ machine_config machine::get_serialization_config() const { c.processor.mimpid = read_reg(reg::mimpid); c.processor.mcycle = read_reg(reg::mcycle); c.processor.icycleinstret = read_reg(reg::icycleinstret); + c.processor.mtime = read_reg(reg::mtime); c.processor.mstatus = read_reg(reg::mstatus); c.processor.mtvec = read_reg(reg::mtvec); c.processor.mscratch = read_reg(reg::mscratch); @@ -1039,6 +1041,8 @@ uint64_t machine::read_reg(reg r) const { return m_s.mcycle; case reg::icycleinstret: return m_s.icycleinstret; + case reg::mtime: + return m_s.mtime; case reg::mstatus: return m_s.mstatus; case reg::mtvec: @@ -1412,6 +1416,9 @@ void machine::write_reg(reg w, uint64_t value) { case reg::icycleinstret: m_s.icycleinstret = value; break; + case reg::mtime: + m_s.mtime = value; + break; case reg::mstatus: m_s.mstatus = value; break; diff --git a/src/record-step-state-access.h b/src/record-step-state-access.h index b1b443d78..77983f569 100644 --- a/src/record-step-state-access.h +++ b/src/record-step-state-access.h @@ -230,6 +230,16 @@ class record_step_state_access : public i_state_access(machine_reg_address(machine_reg::mtime)); + } + + void do_write_mtime(uint64_t val) { + raw_write_memory(machine_reg_address(machine_reg::mtime), val); + } + uint64_t do_read_mvendorid() { return raw_read_memory(machine_reg_address(machine_reg::mvendorid)); } diff --git a/src/riscv-constants.h b/src/riscv-constants.h index c298ac510..9c82269cf 100644 --- a/src/riscv-constants.h +++ b/src/riscv-constants.h @@ -432,6 +432,7 @@ enum CARTESI_init : uint64_t { MIMPID_INIT = CM_MIMPID, ///< Initial value for mimpid MCYCLE_INIT = UINT64_C(0), ///< Initial value for mcycle ICYCLEINSTRET_INIT = UINT64_C(0), ///< Initial value for icycleinstret + MTIME_INIT = UINT64_C(0), ///< Initial value for mtime MSTATUS_INIT = (MISA_MXL_VALUE << MSTATUS_UXL_SHIFT) | (MISA_MXL_VALUE << MSTATUS_SXL_SHIFT), ///< Initial value for mstatus MTVEC_INIT = UINT64_C(0), ///< Initial value for mtvec diff --git a/src/shadow-state-factory.cpp b/src/shadow-state-factory.cpp index d055a63da..695777197 100644 --- a/src/shadow-state-factory.cpp +++ b/src/shadow-state-factory.cpp @@ -62,6 +62,7 @@ static bool shadow_state_peek(const pma_entry & /*pma*/, const machine &m, uint6 s->mimpid = m.read_reg(machine_reg::mimpid); s->mcycle = m.read_reg(machine_reg::mcycle); s->icycleinstret = m.read_reg(machine_reg::icycleinstret); + s->mtime = m.read_reg(machine_reg::mtime); s->mstatus = m.read_reg(machine_reg::mstatus); s->mtvec = m.read_reg(machine_reg::mtvec); s->mscratch = m.read_reg(machine_reg::mscratch); diff --git a/src/shadow-state.h b/src/shadow-state.h index fd35759f1..f7eaf929f 100644 --- a/src/shadow-state.h +++ b/src/shadow-state.h @@ -41,6 +41,7 @@ struct PACKED shadow_state { uint64_t mimpid; uint64_t mcycle; uint64_t icycleinstret; + uint64_t mtime; uint64_t mstatus; uint64_t mtvec; uint64_t mscratch; diff --git a/src/state-access.h b/src/state-access.h index 83e837c27..b3c2e71ff 100644 --- a/src/state-access.h +++ b/src/state-access.h @@ -121,6 +121,14 @@ class state_access : public i_state_access { m_m.get_state().icycleinstret = val; } + uint64_t do_read_mtime() const { + return m_m.get_state().mtime; + } + + void do_write_mtime(uint64_t val) { + m_m.get_state().mtime = val; + } + uint64_t do_read_mvendorid() const { // NOLINT(readability-convert-member-functions-to-static) return MVENDORID_INIT; } diff --git a/src/uarch-bridge.h b/src/uarch-bridge.h index b09ea5a39..7ff307f67 100644 --- a/src/uarch-bridge.h +++ b/src/uarch-bridge.h @@ -250,6 +250,9 @@ class uarch_bridge { case reg::icycleinstret: s.icycleinstret = data; return; + case reg::mtime: + s.mtime = data; + return; case reg::mstatus: s.mstatus = data; return; @@ -504,6 +507,8 @@ class uarch_bridge { return s.mcycle; case reg::icycleinstret: return s.icycleinstret; + case reg::mtime: + return s.mtime; case reg::mstatus: return s.mstatus; case reg::mtvec: @@ -733,6 +738,8 @@ class uarch_bridge { return "mcycle"; case reg::icycleinstret: return "icycleinstret"; + case reg::mtime: + return "mtime"; case reg::mstatus: return "mstatus"; case reg::mtvec: diff --git a/tests/lua/machine-bind.lua b/tests/lua/machine-bind.lua index a8c9d87ce..ba94a4f41 100755 --- a/tests/lua/machine-bind.lua +++ b/tests/lua/machine-bind.lua @@ -155,41 +155,42 @@ local cpu_reg_addr = { mimpid = 544, mcycle = 552, icycleinstret = 560, - mstatus = 568, - mtvec = 576, - mscratch = 584, - mepc = 592, - mcause = 600, - mtval = 608, - misa = 616, - mie = 624, - mip = 632, - medeleg = 640, - mideleg = 648, - mcounteren = 656, - menvcfg = 664, - stvec = 672, - sscratch = 680, - sepc = 688, - scause = 696, - stval = 704, - satp = 712, - scounteren = 720, - senvcfg = 728, - ilrsc = 736, - iprv = 744, - iflags_X = 752, - iflags_Y = 760, - iflags_H = 768, - iunrep = 776, - clint_mtimecmp = 784, - plic_girqpend = 792, - plic_girqsrvd = 800, - htif_tohost = 808, - htif_fromhost = 816, - htif_ihalt = 824, - htif_iconsole = 832, - htif_iyield = 840, + mtime = 568, + mstatus = 576, + mtvec = 584, + mscratch = 592, + mepc = 600, + mcause = 608, + mtval = 616, + misa = 624, + mie = 632, + mip = 640, + medeleg = 648, + mideleg = 656, + mcounteren = 664, + menvcfg = 672, + stvec = 680, + sscratch = 688, + sepc = 696, + scause = 704, + stval = 712, + satp = 720, + scounteren = 728, + senvcfg = 736, + ilrsc = 744, + iprv = 752, + iflags_X = 760, + iflags_Y = 768, + iflags_H = 776, + iunrep = 784, + clint_mtimecmp = 792, + plic_girqpend = 800, + plic_girqsrvd = 808, + htif_tohost = 816, + htif_fromhost = 824, + htif_ihalt = 832, + htif_iconsole = 840, + htif_iyield = 848, } for i = 0, 31 do cpu_reg_addr["x" .. i] = i * 8 @@ -211,6 +212,7 @@ local function get_cpu_reg_test_values() mimpid = -1, mcycle = 0x220, icycleinstret = 0x228, + mtime = 0x228, mstatus = 0x230, mtvec = 0x238, mscratch = 0x240, @@ -1262,10 +1264,10 @@ do_test("Dump of log produced by send_cmio_response should match", function(mach local log = machine:log_send_cmio_response(reason, data, cartesi.ACCESS_LOG_TYPE_ANNOTATIONS) -- luacheck: push no max line length local expected_dump = "begin send cmio response\n" - .. " 1: read iflags.Y@0x2f8(760): 0x1(1)\n" + .. " 1: read iflags.Y@0x300(768): 0x1(1)\n" .. ' 2: write cmio rx buffer@0x60000000(1610612736): hash:"290decd9"(2^5 bytes) -> hash:"555b1f6d"(2^5 bytes)\n' - .. " 3: write htif.fromhost@0x330(816): 0x0(0) -> 0x70000000a(30064771082)\n" - .. " 4: write iflags.Y@0x2f8(760): 0x1(1) -> 0x0(0)\n" + .. " 3: write htif.fromhost@0x338(824): 0x0(0) -> 0x70000000a(30064771082)\n" + .. " 4: write iflags.Y@0x300(768): 0x1(1) -> 0x0(0)\n" .. "end send cmio response\n" -- luacheck: pop local temp_file = test_util.new_temp_file() diff --git a/uarch/uarch-machine-state-access.h b/uarch/uarch-machine-state-access.h index aa9346eb9..d90634044 100644 --- a/uarch/uarch-machine-state-access.h +++ b/uarch/uarch-machine-state-access.h @@ -247,6 +247,14 @@ class uarch_machine_state_access : public i_state_access(machine_reg_address(machine_reg::mtime)); + } + + void do_write_mtime(uint64_t val) { + raw_write_memory(machine_reg_address(machine_reg::mtime), val); + } + uint64_t do_read_mvendorid() { return raw_read_memory(machine_reg_address(machine_reg::mvendorid)); } From 0a6aa111f257ee8d3681697246d354ce8a487fd8 Mon Sep 17 00:00:00 2001 From: Eduardo Bart Date: Fri, 31 Jan 2025 11:12:42 -0300 Subject: [PATCH 14/15] feat: use mtime for RTC tick --- src/clint.cpp | 2 +- src/clua-cartesi.cpp | 5 + src/device-state-access.h | 7 +- src/i-device-state-access.h | 8 +- src/i-state-access.h | 15 +- src/interpret.cpp | 552 ++++++++++++++-------------- src/record-step-state-access.h | 5 +- src/replay-step-state-access.h | 5 +- src/rtc-defines.h | 4 +- src/rtc.h | 20 +- src/state-access.h | 17 +- tests/lua/cartesi-machine-tests.lua | 4 +- tests/lua/mtime-interrupt.lua | 2 +- uarch/uarch-machine-state-access.h | 4 +- 14 files changed, 324 insertions(+), 326 deletions(-) diff --git a/src/clint.cpp b/src/clint.cpp index 1e04a8b8f..10343753b 100644 --- a/src/clint.cpp +++ b/src/clint.cpp @@ -46,7 +46,7 @@ static bool clint_read_msip(i_device_state_access *a, uint64_t *val, int log2_si static bool clint_read_mtime(i_device_state_access *a, uint64_t *val, int log2_size) { if (log2_size == 3) { - *val = rtc_cycle_to_time(a->read_mcycle()); + *val = a->read_mtime(); return true; } return false; diff --git a/src/clua-cartesi.cpp b/src/clua-cartesi.cpp index 9d513edaf..fee2fe30f 100644 --- a/src/clua-cartesi.cpp +++ b/src/clua-cartesi.cpp @@ -29,6 +29,7 @@ #include "machine-c-api.h" #include "machine-c-version.h" #include "riscv-constants.h" +#include "rtc.h" #include "uarch-constants.h" #include "uarch-pristine.h" @@ -235,6 +236,10 @@ CM_API int luaopen_cartesi(lua_State *L) { clua_setintegerfield(L, MVENDORID_INIT, "MVENDORID", -1); clua_setintegerfield(L, MARCHID_INIT, "MARCHID", -1); clua_setintegerfield(L, MIMPID_INIT, "MIMPID", -1); + // RTC constants + clua_setintegerfield(L, RTC_FREQ_DIV, "RTC_FREQ_DIV", -1); + clua_setintegerfield(L, RTC_CLOCK_FREQ, "RTC_CLOCK_FREQ", -1); + clua_setintegerfield(L, RTC_US_PER_TICK, "RTC_US_PER_TICK", -1); // Build-related constants clua_setstringfield(L, BOOST_COMPILER, "COMPILER", -1); clua_setstringfield(L, BOOST_PLATFORM, "PLATFORM", -1); diff --git a/src/device-state-access.h b/src/device-state-access.h index 97dcad701..4eca972d3 100644 --- a/src/device-state-access.h +++ b/src/device-state-access.h @@ -36,7 +36,7 @@ namespace cartesi { template class device_state_access : public i_device_state_access { public: - explicit device_state_access(STATE_ACCESS a, uint64_t mcycle) : m_a(a), m_mcycle(mcycle) { + explicit device_state_access(STATE_ACCESS a) : m_a(a) { static_assert(is_an_i_state_access::value, "not an i_state_access"); } @@ -53,7 +53,6 @@ class device_state_access : public i_device_state_access { private: STATE_ACCESS m_a; - uint64_t m_mcycle; void do_set_mip(uint64_t mask) override { uint64_t mip = m_a.read_mip(); @@ -71,8 +70,8 @@ class device_state_access : public i_device_state_access { return m_a.read_mip(); } - uint64_t do_read_mcycle() override { - return m_mcycle; + uint64_t do_read_mtime() override { + return m_a.read_mtime(); } void do_write_iflags_H(uint64_t val) override { diff --git a/src/i-device-state-access.h b/src/i-device-state-access.h index efdfaa06e..726006881 100644 --- a/src/i-device-state-access.h +++ b/src/i-device-state-access.h @@ -66,10 +66,10 @@ class i_device_state_access { return do_read_mip(); } - /// \brief Reads CSR mcycle. + /// \brief Reads CSR mtime. /// \returns Register value. - uint64_t read_mcycle() { - return do_read_mcycle(); + uint64_t read_mtime() { + return do_read_mtime(); } /// \brief Sets the iflags_H flag. @@ -197,7 +197,7 @@ class i_device_state_access { virtual void do_set_mip(uint64_t mask) = 0; virtual void do_reset_mip(uint64_t mask) = 0; virtual uint64_t do_read_mip() = 0; - virtual uint64_t do_read_mcycle() = 0; + virtual uint64_t do_read_mtime() = 0; virtual void do_write_iflags_H(uint64_t val) = 0; virtual void do_write_iflags_Y(uint64_t val) = 0; virtual void do_write_iflags_X(uint64_t val) = 0; diff --git a/src/i-state-access.h b/src/i-state-access.h index 2d4c4ffab..56d997287 100644 --- a/src/i-state-access.h +++ b/src/i-state-access.h @@ -604,15 +604,14 @@ class i_state_access { // CRTP } /// \brief Poll for external interrupts. - /// \param mcycle Current machine mcycle. - /// \param mcycle_max Maximum mcycle to wait for interrupts. - /// \returns A pair, the first value is the new machine mcycle advanced by the relative elapsed time while - /// polling, the second value is a boolean that is true when the poll is stopped due do an external interrupt + /// \param timeout_us Maximum time in microseconds to poll. + /// \returns A pair, the first value is the elapsed time in microseconds relative to elapsed time while polling, + /// the second value is a boolean that is true when the poll is stopped due do an external interrupt /// request. - /// \details When mcycle_max is greater than mcycle, this function will sleep until an external interrupt - /// is triggered or mcycle_max relative elapsed time is reached. - std::pair poll_external_interrupts(uint64_t mcycle, uint64_t mcycle_max) { - return derived().do_poll_external_interrupts(mcycle, mcycle_max); + /// \details When timeout_us is greater than 0, this function will sleep until an external interrupt + /// is triggered or relative elapsed time is reached. + std::pair poll_external_interrupts(uint64_t timeout_us) { + return derived().do_poll_external_interrupts(timeout_us); } /// \brief Reads PMA entry at a given index. diff --git a/src/interpret.cpp b/src/interpret.cpp index 231f8bb3b..7458879eb 100644 --- a/src/interpret.cpp +++ b/src/interpret.cpp @@ -551,11 +551,17 @@ static inline uint64_t raise_interrupt_if_any(STATE_ACCESS a, uint64_t pc) { /// \brief At every tick, set interrupt as pending if the timer is expired /// \param a Machine state accessor object. -/// \param mcycle Machine current cycle. template -static inline void set_rtc_interrupt(STATE_ACCESS a, uint64_t mcycle) { - const uint64_t timecmp_cycle = rtc_time_to_cycle(a.read_clint_mtimecmp()); - if (timecmp_cycle <= mcycle && timecmp_cycle != 0) { +static inline void rtc_advance_tick(STATE_ACCESS a) { + uint64_t mtime = a.read_mtime(); + // Increment time taking care with overflow + if (likely(mtime < UINT64_MAX)) { + ++mtime; + a.write_mtime(mtime); + } + // Raise time interrupts if any + const uint64_t mtimecmp = a.read_clint_mtimecmp(); + if (mtimecmp <= mtime && mtimecmp != 0) { const uint64_t mip = a.read_mip(); a.write_mip(mip | MIP_MTIP_MASK); } @@ -839,8 +845,8 @@ static FORCE_INLINE int32_t insn_get_C_SWSP_imm(uint32_t insn) { /// is outlined, and taking PC by reference would cause the compiler to store it in a stack variable /// instead of always storing it in register (this is an optimization). template -static NO_INLINE std::pair read_virtual_memory_slow(STATE_ACCESS a, uint64_t pc, uint64_t mcycle, - uint64_t vaddr, T *pval) { +static NO_INLINE std::pair read_virtual_memory_slow(STATE_ACCESS a, uint64_t pc, uint64_t vaddr, + T *pval) { using U = std::make_unsigned_t; // No support for misaligned accesses: They are handled by a trap in BBL if (unlikely(vaddr & (sizeof(T) - 1))) { @@ -866,7 +872,7 @@ static NO_INLINE std::pair read_virtual_memory_slow(STATE_ACCESS if (likely(pma.get_istart_IO())) { const uint64_t offset = paddr - pma.get_start(); uint64_t val{}; - device_state_access da(a, mcycle); + device_state_access da(a); // If we do not know how to read, we treat this as a PMA violation const bool status = pma.get_device_noexcept().get_driver()->read(pma.get_device_noexcept().get_context(), &da, offset, &val, log2_size::value); @@ -891,14 +897,13 @@ static NO_INLINE std::pair read_virtual_memory_slow(STATE_ACCESS /// \param pval Pointer to word receiving value. /// \returns True if succeeded, false otherwise. template -static FORCE_INLINE bool read_virtual_memory(STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint64_t vaddr, T *pval) { +static FORCE_INLINE bool read_virtual_memory(STATE_ACCESS a, uint64_t &pc, uint64_t vaddr, T *pval) { // Try hitting the TLB if (unlikely(!(a.template read_memory_word_via_tlb(vaddr, pval)))) { // Outline the slow path into a function call to minimize host CPU code cache pressure INC_COUNTER(a.get_statistics(), tlb_rmiss); T val = 0; // Don't pass pval reference directly so the compiler can store it in a register - auto [status, new_pc] = - read_virtual_memory_slow(a, pc, mcycle, vaddr, &val); + auto [status, new_pc] = read_virtual_memory_slow(a, pc, vaddr, &val); *pval = val; pc = new_pc; return status; @@ -923,7 +928,7 @@ static FORCE_INLINE bool read_virtual_memory(STATE_ACCESS a, uint64_t &pc, uint6 /// instead of always storing it in register (this is an optimization). template static NO_INLINE std::pair write_virtual_memory_slow(STATE_ACCESS a, uint64_t pc, - uint64_t mcycle, uint64_t vaddr, uint64_t val64) { + uint64_t vaddr, uint64_t val64) { using U = std::make_unsigned_t; // No support for misaligned accesses: They are handled by a trap in BBL if (unlikely(vaddr & (sizeof(T) - 1))) { @@ -946,7 +951,7 @@ static NO_INLINE std::pair write_virtual_memory_slow(S } if (likely(pma.get_istart_IO())) { const uint64_t offset = paddr - pma.get_start(); - device_state_access da(a, mcycle); + device_state_access da(a); auto status = pma.get_device_noexcept().get_driver()->write(pma.get_device_noexcept().get_context(), &da, offset, static_cast(static_cast(val64)), log2_size::value); // If we do not know how to write, we treat this as a PMA violation @@ -968,13 +973,12 @@ static NO_INLINE std::pair write_virtual_memory_slow(S /// \param val64 Value to write. /// \returns True if succeeded, false if exception raised. template -static FORCE_INLINE execute_status write_virtual_memory(STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint64_t vaddr, - uint64_t val64) { +static FORCE_INLINE execute_status write_virtual_memory(STATE_ACCESS a, uint64_t &pc, uint64_t vaddr, uint64_t val64) { // Try hitting the TLB if (unlikely((!a.template write_memory_word_via_tlb(vaddr, static_cast(val64))))) { INC_COUNTER(a.get_statistics(), tlb_wmiss); // Outline the slow path into a function call to minimize host CPU code cache pressure - auto [status, new_pc] = write_virtual_memory_slow(a, pc, mcycle, vaddr, val64); + auto [status, new_pc] = write_virtual_memory_slow(a, pc, vaddr, val64); pc = new_pc; return status; } @@ -1078,10 +1082,10 @@ static FORCE_INLINE execute_status execute_jump(STATE_ACCESS /*a*/, uint64_t &pc /// \param pc Interpreter loop program counter (will be overwritten). /// \param insn Instruction. template -static FORCE_INLINE execute_status execute_LR(STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { +static FORCE_INLINE execute_status execute_LR(STATE_ACCESS a, uint64_t &pc, uint32_t insn) { const uint64_t vaddr = a.read_x(insn_get_rs1(insn)); T val = 0; - if (unlikely(!read_virtual_memory(a, pc, mcycle, vaddr, &val))) { + if (unlikely(!read_virtual_memory(a, pc, vaddr, &val))) { return advance_to_raised_exception(a, pc); } a.write_ilrsc(vaddr); @@ -1099,12 +1103,12 @@ static FORCE_INLINE execute_status execute_LR(STATE_ACCESS a, uint64_t &pc, uint /// \param pc Interpreter loop program counter (will be overwritten). /// \param insn Instruction. template -static FORCE_INLINE execute_status execute_SC(STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { +static FORCE_INLINE execute_status execute_SC(STATE_ACCESS a, uint64_t &pc, uint32_t insn) { uint64_t val = 0; const uint64_t vaddr = a.read_x(insn_get_rs1(insn)); execute_status status = execute_status::success; if (a.read_ilrsc() == vaddr) { - status = write_virtual_memory(a, pc, mcycle, vaddr, static_cast(a.read_x(insn_get_rs2(insn)))); + status = write_virtual_memory(a, pc, vaddr, static_cast(a.read_x(insn_get_rs2(insn)))); if (unlikely(status == execute_status::failure)) { return advance_to_raised_exception(a, pc); } @@ -1122,34 +1126,33 @@ static FORCE_INLINE execute_status execute_SC(STATE_ACCESS a, uint64_t &pc, uint /// \brief Implementation of the LR.W instruction. template -static FORCE_INLINE execute_status execute_LR_W(STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { +static FORCE_INLINE execute_status execute_LR_W(STATE_ACCESS a, uint64_t &pc, uint32_t insn) { if (unlikely((insn & 0b00000001111100000000000000000000) != 0)) { return raise_illegal_insn_exception(a, pc, insn); } dump_insn(a, pc, insn, "lr.w"); - return execute_LR(a, pc, mcycle, insn); + return execute_LR(a, pc, insn); } /// \brief Implementation of the SC.W instruction. template -static FORCE_INLINE execute_status execute_SC_W(STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { +static FORCE_INLINE execute_status execute_SC_W(STATE_ACCESS a, uint64_t &pc, uint32_t insn) { dump_insn(a, pc, insn, "sc.w"); - return execute_SC(a, pc, mcycle, insn); + return execute_SC(a, pc, insn); } template -static FORCE_INLINE execute_status execute_AMO(STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t insn, - const F &f) { +static FORCE_INLINE execute_status execute_AMO(STATE_ACCESS a, uint64_t &pc, uint32_t insn, const F &f) { const uint64_t vaddr = a.read_x(insn_get_rs1(insn)); T valm = 0; // AMOs never raise load exceptions. Since any unreadable page is also unwritable, // attempting to perform an AMO on an unreadable page always raises a store page-fault exception. - if (unlikely((!read_virtual_memory(a, pc, mcycle, vaddr, &valm)))) { + if (unlikely((!read_virtual_memory(a, pc, vaddr, &valm)))) { return advance_to_raised_exception(a, pc); } T valr = static_cast(a.read_x(insn_get_rs2(insn))); valr = f(valm, valr); - const execute_status status = write_virtual_memory(a, pc, mcycle, vaddr, valr); + const execute_status status = write_virtual_memory(a, pc, vaddr, valr); if (unlikely(status == execute_status::failure)) { return advance_to_raised_exception(a, pc); } @@ -1163,16 +1166,16 @@ static FORCE_INLINE execute_status execute_AMO(STATE_ACCESS a, uint64_t &pc, uin /// \brief Implementation of the AMOSWAP.W instruction. template -static FORCE_INLINE execute_status execute_AMOSWAP_W(STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { +static FORCE_INLINE execute_status execute_AMOSWAP_W(STATE_ACCESS a, uint64_t &pc, uint32_t insn) { dump_insn(a, pc, insn, "amoswap.w"); - return execute_AMO(a, pc, mcycle, insn, [](int32_t /*valm*/, int32_t valr) -> int32_t { return valr; }); + return execute_AMO(a, pc, insn, [](int32_t /*valm*/, int32_t valr) -> int32_t { return valr; }); } /// \brief Implementation of the AMOADD.W instruction. template -static FORCE_INLINE execute_status execute_AMOADD_W(STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { +static FORCE_INLINE execute_status execute_AMOADD_W(STATE_ACCESS a, uint64_t &pc, uint32_t insn) { dump_insn(a, pc, insn, "amoadd.w"); - return execute_AMO(a, pc, mcycle, insn, [](int32_t valm, int32_t valr) -> int32_t { + return execute_AMO(a, pc, insn, [](int32_t valm, int32_t valr) -> int32_t { int32_t val = 0; __builtin_add_overflow(valm, valr, &val); return val; @@ -1180,30 +1183,30 @@ static FORCE_INLINE execute_status execute_AMOADD_W(STATE_ACCESS a, uint64_t &pc } template -static FORCE_INLINE execute_status execute_AMOXOR_W(STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { +static FORCE_INLINE execute_status execute_AMOXOR_W(STATE_ACCESS a, uint64_t &pc, uint32_t insn) { dump_insn(a, pc, insn, "amoxor.w"); - return execute_AMO(a, pc, mcycle, insn, [](int32_t valm, int32_t valr) -> int32_t { return valm ^ valr; }); + return execute_AMO(a, pc, insn, [](int32_t valm, int32_t valr) -> int32_t { return valm ^ valr; }); } /// \brief Implementation of the AMOAND.W instruction. template -static FORCE_INLINE execute_status execute_AMOAND_W(STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { +static FORCE_INLINE execute_status execute_AMOAND_W(STATE_ACCESS a, uint64_t &pc, uint32_t insn) { dump_insn(a, pc, insn, "amoand.w"); - return execute_AMO(a, pc, mcycle, insn, [](int32_t valm, int32_t valr) -> int32_t { return valm & valr; }); + return execute_AMO(a, pc, insn, [](int32_t valm, int32_t valr) -> int32_t { return valm & valr; }); } /// \brief Implementation of the AMOOR.W instruction. template -static FORCE_INLINE execute_status execute_AMOOR_W(STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { +static FORCE_INLINE execute_status execute_AMOOR_W(STATE_ACCESS a, uint64_t &pc, uint32_t insn) { dump_insn(a, pc, insn, "amoor.w"); - return execute_AMO(a, pc, mcycle, insn, [](int32_t valm, int32_t valr) -> int32_t { return valm | valr; }); + return execute_AMO(a, pc, insn, [](int32_t valm, int32_t valr) -> int32_t { return valm | valr; }); } /// \brief Implementation of the AMOMIN.W instruction. template -static FORCE_INLINE execute_status execute_AMOMIN_W(STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { +static FORCE_INLINE execute_status execute_AMOMIN_W(STATE_ACCESS a, uint64_t &pc, uint32_t insn) { dump_insn(a, pc, insn, "amomin.w"); - return execute_AMO(a, pc, mcycle, insn, [](int32_t valm, int32_t valr) -> int32_t { + return execute_AMO(a, pc, insn, [](int32_t valm, int32_t valr) -> int32_t { if (valm < valr) { return valm; } @@ -1213,9 +1216,9 @@ static FORCE_INLINE execute_status execute_AMOMIN_W(STATE_ACCESS a, uint64_t &pc /// \brief Implementation of the AMOMAX.W instruction. template -static FORCE_INLINE execute_status execute_AMOMAX_W(STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { +static FORCE_INLINE execute_status execute_AMOMAX_W(STATE_ACCESS a, uint64_t &pc, uint32_t insn) { dump_insn(a, pc, insn, "amomax.w"); - return execute_AMO(a, pc, mcycle, insn, [](int32_t valm, int32_t valr) -> int32_t { + return execute_AMO(a, pc, insn, [](int32_t valm, int32_t valr) -> int32_t { if (valm > valr) { return valm; } @@ -1225,9 +1228,9 @@ static FORCE_INLINE execute_status execute_AMOMAX_W(STATE_ACCESS a, uint64_t &pc /// \brief Implementation of the AMOMINU.W instruction. template -static FORCE_INLINE execute_status execute_AMOMINU_W(STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { +static FORCE_INLINE execute_status execute_AMOMINU_W(STATE_ACCESS a, uint64_t &pc, uint32_t insn) { dump_insn(a, pc, insn, "amominu.w"); - return execute_AMO(a, pc, mcycle, insn, [](int32_t valm, int32_t valr) -> int32_t { + return execute_AMO(a, pc, insn, [](int32_t valm, int32_t valr) -> int32_t { if (static_cast(valm) < static_cast(valr)) { return valm; } @@ -1237,9 +1240,9 @@ static FORCE_INLINE execute_status execute_AMOMINU_W(STATE_ACCESS a, uint64_t &p /// \brief Implementation of the AMOMAXU.W instruction. template -static FORCE_INLINE execute_status execute_AMOMAXU_W(STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { +static FORCE_INLINE execute_status execute_AMOMAXU_W(STATE_ACCESS a, uint64_t &pc, uint32_t insn) { dump_insn(a, pc, insn, "amomaxu.w"); - return execute_AMO(a, pc, mcycle, insn, [](int32_t valm, int32_t valr) -> int32_t { + return execute_AMO(a, pc, insn, [](int32_t valm, int32_t valr) -> int32_t { if (static_cast(valm) > static_cast(valr)) { return valm; } @@ -1249,33 +1252,33 @@ static FORCE_INLINE execute_status execute_AMOMAXU_W(STATE_ACCESS a, uint64_t &p /// \brief Implementation of the LR.D instruction. template -static FORCE_INLINE execute_status execute_LR_D(STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { +static FORCE_INLINE execute_status execute_LR_D(STATE_ACCESS a, uint64_t &pc, uint32_t insn) { if (unlikely((insn & 0b00000001111100000000000000000000) != 0)) { return raise_illegal_insn_exception(a, pc, insn); } dump_insn(a, pc, insn, "lr.d"); - return execute_LR(a, pc, mcycle, insn); + return execute_LR(a, pc, insn); } /// \brief Implementation of the SC.D instruction. template -static FORCE_INLINE execute_status execute_SC_D(STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { +static FORCE_INLINE execute_status execute_SC_D(STATE_ACCESS a, uint64_t &pc, uint32_t insn) { dump_insn(a, pc, insn, "sc.d"); - return execute_SC(a, pc, mcycle, insn); + return execute_SC(a, pc, insn); } /// \brief Implementation of the AMOSWAP.D instruction. template -static FORCE_INLINE execute_status execute_AMOSWAP_D(STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { +static FORCE_INLINE execute_status execute_AMOSWAP_D(STATE_ACCESS a, uint64_t &pc, uint32_t insn) { dump_insn(a, pc, insn, "amoswap.d"); - return execute_AMO(a, pc, mcycle, insn, [](int64_t /*valm*/, int64_t valr) -> int64_t { return valr; }); + return execute_AMO(a, pc, insn, [](int64_t /*valm*/, int64_t valr) -> int64_t { return valr; }); } /// \brief Implementation of the AMOADD.D instruction. template -static FORCE_INLINE execute_status execute_AMOADD_D(STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { +static FORCE_INLINE execute_status execute_AMOADD_D(STATE_ACCESS a, uint64_t &pc, uint32_t insn) { dump_insn(a, pc, insn, "amoadd.d"); - return execute_AMO(a, pc, mcycle, insn, [](int64_t valm, int64_t valr) -> int64_t { + return execute_AMO(a, pc, insn, [](int64_t valm, int64_t valr) -> int64_t { int64_t val = 0; __builtin_add_overflow(valm, valr, &val); return val; @@ -1283,30 +1286,30 @@ static FORCE_INLINE execute_status execute_AMOADD_D(STATE_ACCESS a, uint64_t &pc } template -static FORCE_INLINE execute_status execute_AMOXOR_D(STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { +static FORCE_INLINE execute_status execute_AMOXOR_D(STATE_ACCESS a, uint64_t &pc, uint32_t insn) { dump_insn(a, pc, insn, "amoxor.d"); - return execute_AMO(a, pc, mcycle, insn, [](int64_t valm, int64_t valr) -> int64_t { return valm ^ valr; }); + return execute_AMO(a, pc, insn, [](int64_t valm, int64_t valr) -> int64_t { return valm ^ valr; }); } /// \brief Implementation of the AMOAND.D instruction. template -static FORCE_INLINE execute_status execute_AMOAND_D(STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { +static FORCE_INLINE execute_status execute_AMOAND_D(STATE_ACCESS a, uint64_t &pc, uint32_t insn) { dump_insn(a, pc, insn, "amoand.d"); - return execute_AMO(a, pc, mcycle, insn, [](int64_t valm, int64_t valr) -> int64_t { return valm & valr; }); + return execute_AMO(a, pc, insn, [](int64_t valm, int64_t valr) -> int64_t { return valm & valr; }); } /// \brief Implementation of the AMOOR.D instruction. template -static FORCE_INLINE execute_status execute_AMOOR_D(STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { +static FORCE_INLINE execute_status execute_AMOOR_D(STATE_ACCESS a, uint64_t &pc, uint32_t insn) { dump_insn(a, pc, insn, "amoor.d"); - return execute_AMO(a, pc, mcycle, insn, [](int64_t valm, int64_t valr) -> int64_t { return valm | valr; }); + return execute_AMO(a, pc, insn, [](int64_t valm, int64_t valr) -> int64_t { return valm | valr; }); } /// \brief Implementation of the AMOMIN.D instruction. template -static FORCE_INLINE execute_status execute_AMOMIN_D(STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { +static FORCE_INLINE execute_status execute_AMOMIN_D(STATE_ACCESS a, uint64_t &pc, uint32_t insn) { dump_insn(a, pc, insn, "amomin.d"); - return execute_AMO(a, pc, mcycle, insn, [](int64_t valm, int64_t valr) -> int64_t { + return execute_AMO(a, pc, insn, [](int64_t valm, int64_t valr) -> int64_t { if (valm < valr) { return valm; } @@ -1316,9 +1319,9 @@ static FORCE_INLINE execute_status execute_AMOMIN_D(STATE_ACCESS a, uint64_t &pc /// \brief Implementation of the AMOMAX.D instruction. template -static FORCE_INLINE execute_status execute_AMOMAX_D(STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { +static FORCE_INLINE execute_status execute_AMOMAX_D(STATE_ACCESS a, uint64_t &pc, uint32_t insn) { dump_insn(a, pc, insn, "amomax.d"); - return execute_AMO(a, pc, mcycle, insn, [](int64_t valm, int64_t valr) -> int64_t { + return execute_AMO(a, pc, insn, [](int64_t valm, int64_t valr) -> int64_t { if (valm > valr) { return valm; } @@ -1328,9 +1331,9 @@ static FORCE_INLINE execute_status execute_AMOMAX_D(STATE_ACCESS a, uint64_t &pc /// \brief Implementation of the AMOMINU.D instruction. template -static FORCE_INLINE execute_status execute_AMOMINU_D(STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { +static FORCE_INLINE execute_status execute_AMOMINU_D(STATE_ACCESS a, uint64_t &pc, uint32_t insn) { dump_insn(a, pc, insn, "amominu.d"); - return execute_AMO(a, pc, mcycle, insn, [](uint64_t valm, uint64_t valr) -> uint64_t { + return execute_AMO(a, pc, insn, [](uint64_t valm, uint64_t valr) -> uint64_t { if (valm < valr) { return valm; } @@ -1340,9 +1343,9 @@ static FORCE_INLINE execute_status execute_AMOMINU_D(STATE_ACCESS a, uint64_t &p /// \brief Implementation of the AMOMAXU.D instruction. template -static FORCE_INLINE execute_status execute_AMOMAXU_D(STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { +static FORCE_INLINE execute_status execute_AMOMAXU_D(STATE_ACCESS a, uint64_t &pc, uint32_t insn) { dump_insn(a, pc, insn, "amomaxu.d"); - return execute_AMO(a, pc, mcycle, insn, [](uint64_t valm, uint64_t valr) -> uint64_t { + return execute_AMO(a, pc, insn, [](uint64_t valm, uint64_t valr) -> uint64_t { if (valm > valr) { return valm; } @@ -1549,7 +1552,8 @@ static inline bool rdcounteren(STATE_ACCESS a, uint64_t mask) { } template -static inline uint64_t read_csr_cycle(STATE_ACCESS a, uint64_t mcycle, bool *status) { +static inline uint64_t read_csr_cycle(STATE_ACCESS a, bool *status) { + const uint64_t mcycle = a.read_mcycle(); if (rdcounteren(a, MCOUNTEREN_CY_MASK)) { return read_csr_success(mcycle, status); } @@ -1557,21 +1561,22 @@ static inline uint64_t read_csr_cycle(STATE_ACCESS a, uint64_t mcycle, bool *sta } template -static inline uint64_t read_csr_instret(STATE_ACCESS a, uint64_t mcycle, bool *status) { +static inline uint64_t read_csr_instret(STATE_ACCESS a, bool *status) { if (unlikely(!rdcounteren(a, MCOUNTEREN_IR_MASK))) { return read_csr_fail(status); } + const uint64_t mcycle = a.read_mcycle(); const uint64_t icycleinstret = a.read_icycleinstret(); const uint64_t minstret = mcycle - icycleinstret; return read_csr_success(minstret, status); } template -static inline uint64_t read_csr_time(STATE_ACCESS a, uint64_t mcycle, bool *status) { +static inline uint64_t read_csr_time(STATE_ACCESS a, bool *status) { if (unlikely(!rdcounteren(a, MCOUNTEREN_TM_MASK))) { return read_csr_fail(status); } - const uint64_t mtime = rtc_cycle_to_time(mcycle); + const uint64_t mtime = a.read_mtime(); return read_csr_success(mtime, status); } @@ -1707,12 +1712,14 @@ static inline uint64_t read_csr_mip(STATE_ACCESS a, bool *status) { return read_csr_success(a.read_mip(), status); } -static inline uint64_t read_csr_mcycle(uint64_t mcycle, bool *status) { - return read_csr_success(mcycle, status); +template +static inline uint64_t read_csr_mcycle(STATE_ACCESS a, bool *status) { + return read_csr_success(a.read_mcycle(), status); } template -static inline uint64_t read_csr_minstret(STATE_ACCESS a, uint64_t mcycle, bool *status) { +static inline uint64_t read_csr_minstret(STATE_ACCESS a, bool *status) { + const uint64_t mcycle = a.read_mcycle(); const uint64_t icycleinstret = a.read_icycleinstret(); const uint64_t minstret = mcycle - icycleinstret; return read_csr_success(minstret, status); @@ -1769,7 +1776,7 @@ static inline uint64_t read_csr_fcsr(STATE_ACCESS a, bool *status) { /// \returns Register value. /// \details This function is outlined to minimize host CPU code cache pressure. template -static NO_INLINE uint64_t read_csr(STATE_ACCESS a, uint64_t mcycle, CSR_address csraddr, bool *status) { +static NO_INLINE uint64_t read_csr(STATE_ACCESS a, CSR_address csraddr, bool *status) { if (unlikely(csr_prv(csraddr) > a.read_iprv())) { return read_csr_fail(status); } @@ -1783,11 +1790,11 @@ static NO_INLINE uint64_t read_csr(STATE_ACCESS a, uint64_t mcycle, CSR_address return read_csr_fcsr(a, status); case CSR_address::ucycle: - return read_csr_cycle(a, mcycle, status); + return read_csr_cycle(a, status); case CSR_address::uinstret: - return read_csr_instret(a, mcycle, status); + return read_csr_instret(a, status); case CSR_address::utime: - return read_csr_time(a, mcycle, status); + return read_csr_time(a, status); case CSR_address::sstatus: return read_csr_sstatus(a, status); @@ -1841,9 +1848,9 @@ static NO_INLINE uint64_t read_csr(STATE_ACCESS a, uint64_t mcycle, CSR_address return read_csr_mip(a, status); case CSR_address::mcycle: - return read_csr_mcycle(mcycle, status); + return read_csr_mcycle(a, status); case CSR_address::minstret: - return read_csr_minstret(a, mcycle, status); + return read_csr_minstret(a, status); case CSR_address::mvendorid: return read_csr_mvendorid(a, status); @@ -2172,9 +2179,10 @@ static execute_status write_csr_mcounteren(STATE_ACCESS a, uint64_t val) { } template -static execute_status write_csr_minstret(STATE_ACCESS a, uint64_t mcycle, uint64_t val) { +static execute_status write_csr_minstret(STATE_ACCESS a, uint64_t val) { // Note that mcycle will only be incremented after the instruction is executed, // but we have to compute this in advance + const uint64_t mcycle = a.read_mcycle(); const uint64_t icycleinstret = (mcycle + 1) - val; a.write_icycleinstret(icycleinstret); return execute_status::success; @@ -2267,7 +2275,7 @@ static inline execute_status write_csr_fcsr(STATE_ACCESS a, uint64_t val) { /// \returns The status of the operation (true for success, false otherwise). /// \details This function is outlined to minimize host CPU code cache pressure. template -static NO_INLINE execute_status write_csr(STATE_ACCESS a, uint64_t mcycle, CSR_address csraddr, uint64_t val) { +static NO_INLINE execute_status write_csr(STATE_ACCESS a, CSR_address csraddr, uint64_t val) { #if defined(DUMP_CSR) fprintf(stderr, "csr_write: csr=0x%03x val=0x", static_cast(csraddr)); print_uint64_t(val); @@ -2342,7 +2350,7 @@ static NO_INLINE execute_status write_csr(STATE_ACCESS a, uint64_t mcycle, CSR_a case CSR_address::mcycle: return write_csr_mcycle(a, val); case CSR_address::minstret: - return write_csr_minstret(a, mcycle, val); + return write_csr_minstret(a, val); // Ignore writes case CSR_address::misa: @@ -2421,8 +2429,7 @@ static NO_INLINE execute_status write_csr(STATE_ACCESS a, uint64_t mcycle, CSR_a } template -static FORCE_INLINE execute_status execute_csr_RW(STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t insn, - const RS1VAL &rs1val) { +static FORCE_INLINE execute_status execute_csr_RW(STATE_ACCESS a, uint64_t &pc, uint32_t insn, const RS1VAL &rs1val) { auto csraddr = static_cast(insn_I_get_uimm(insn)); // Try to read old CSR value bool status = true; @@ -2430,7 +2437,7 @@ static FORCE_INLINE execute_status execute_csr_RW(STATE_ACCESS a, uint64_t &pc, // If rd=r0, we do not read from the CSR to avoid side-effects const uint32_t rd = insn_get_rd(insn); if (rd != 0) { - csrval = read_csr(a, mcycle, csraddr, &status); + csrval = read_csr(a, csraddr, &status); } if (unlikely(!status)) { return raise_illegal_insn_exception(a, pc, insn); @@ -2440,7 +2447,7 @@ static FORCE_INLINE execute_status execute_csr_RW(STATE_ACCESS a, uint64_t &pc, // will have to check if there was a change to the // memory manager and report back from here so we // break out of the inner loop - const execute_status wstatus = write_csr(a, mcycle, csraddr, rs1val(a, insn)); + const execute_status wstatus = write_csr(a, csraddr, rs1val(a, insn)); if (unlikely(wstatus == execute_status::failure)) { return raise_illegal_insn_exception(a, pc, insn); } @@ -2454,27 +2461,26 @@ static FORCE_INLINE execute_status execute_csr_RW(STATE_ACCESS a, uint64_t &pc, /// \brief Implementation of the CSRRW instruction. template -static FORCE_INLINE execute_status execute_CSRRW(STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { +static FORCE_INLINE execute_status execute_CSRRW(STATE_ACCESS a, uint64_t &pc, uint32_t insn) { dump_insn(a, pc, insn, "csrrw"); - return execute_csr_RW(a, pc, mcycle, insn, + return execute_csr_RW(a, pc, insn, [](STATE_ACCESS a, uint32_t insn) -> uint64_t { return a.read_x(insn_get_rs1(insn)); }); } /// \brief Implementation of the CSRRWI instruction. template -static FORCE_INLINE execute_status execute_CSRRWI(STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { +static FORCE_INLINE execute_status execute_CSRRWI(STATE_ACCESS a, uint64_t &pc, uint32_t insn) { dump_insn(a, pc, insn, "csrrwi"); - return execute_csr_RW(a, pc, mcycle, insn, + return execute_csr_RW(a, pc, insn, [](STATE_ACCESS, uint32_t insn) -> uint64_t { return static_cast(insn_get_rs1(insn)); }); } template -static FORCE_INLINE execute_status execute_csr_SC(STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t insn, - const F &f) { +static FORCE_INLINE execute_status execute_csr_SC(STATE_ACCESS a, uint64_t &pc, uint32_t insn, const F &f) { auto csraddr = static_cast(insn_I_get_uimm(insn)); // Try to read old CSR value bool status = false; - const uint64_t csrval = read_csr(a, mcycle, csraddr, &status); + const uint64_t csrval = read_csr(a, csraddr, &status); if (unlikely(!status)) { return raise_illegal_insn_exception(a, pc, insn); } @@ -2488,7 +2494,7 @@ static FORCE_INLINE execute_status execute_csr_SC(STATE_ACCESS a, uint64_t &pc, // will have to check if there was a change to the // memory manager and report back from here so we // break out of the inner loop - wstatus = write_csr(a, mcycle, csraddr, f(csrval, rs1val)); + wstatus = write_csr(a, csraddr, f(csrval, rs1val)); if (unlikely(wstatus == execute_status::failure)) { return raise_illegal_insn_exception(a, pc, insn); } @@ -2504,25 +2510,24 @@ static FORCE_INLINE execute_status execute_csr_SC(STATE_ACCESS a, uint64_t &pc, /// \brief Implementation of the CSRRS instruction. template -static FORCE_INLINE execute_status execute_CSRRS(STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { +static FORCE_INLINE execute_status execute_CSRRS(STATE_ACCESS a, uint64_t &pc, uint32_t insn) { dump_insn(a, pc, insn, "csrrs"); - return execute_csr_SC(a, pc, mcycle, insn, [](uint64_t csr, uint64_t rs1) -> uint64_t { return csr | rs1; }); + return execute_csr_SC(a, pc, insn, [](uint64_t csr, uint64_t rs1) -> uint64_t { return csr | rs1; }); } /// \brief Implementation of the CSRRC instruction. template -static FORCE_INLINE execute_status execute_CSRRC(STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { +static FORCE_INLINE execute_status execute_CSRRC(STATE_ACCESS a, uint64_t &pc, uint32_t insn) { dump_insn(a, pc, insn, "csrrc"); - return execute_csr_SC(a, pc, mcycle, insn, [](uint64_t csr, uint64_t rs1) -> uint64_t { return csr & ~rs1; }); + return execute_csr_SC(a, pc, insn, [](uint64_t csr, uint64_t rs1) -> uint64_t { return csr & ~rs1; }); } template -static FORCE_INLINE execute_status execute_csr_SCI(STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t insn, - const F &f) { +static FORCE_INLINE execute_status execute_csr_SCI(STATE_ACCESS a, uint64_t &pc, uint32_t insn, const F &f) { auto csraddr = static_cast(insn_I_get_uimm(insn)); // Try to read old CSR value bool status = false; - const uint64_t csrval = read_csr(a, mcycle, csraddr, &status); + const uint64_t csrval = read_csr(a, csraddr, &status); if (unlikely(!status)) { return raise_illegal_insn_exception(a, pc, insn); } @@ -2533,7 +2538,7 @@ static FORCE_INLINE execute_status execute_csr_SCI(STATE_ACCESS a, uint64_t &pc, // will have to check if there was a change to the // memory manager and report back from here so we // break out of the inner loop - wstatus = write_csr(a, mcycle, csraddr, f(csrval, rs1)); + wstatus = write_csr(a, csraddr, f(csrval, rs1)); if (unlikely(wstatus == execute_status::failure)) { return raise_illegal_insn_exception(a, pc, insn); } @@ -2549,16 +2554,16 @@ static FORCE_INLINE execute_status execute_csr_SCI(STATE_ACCESS a, uint64_t &pc, /// \brief Implementation of the CSRRSI instruction. template -static FORCE_INLINE execute_status execute_CSRRSI(STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { +static FORCE_INLINE execute_status execute_CSRRSI(STATE_ACCESS a, uint64_t &pc, uint32_t insn) { dump_insn(a, pc, insn, "csrrsi"); - return execute_csr_SCI(a, pc, mcycle, insn, [](uint64_t csr, uint32_t rs1) -> uint64_t { return csr | rs1; }); + return execute_csr_SCI(a, pc, insn, [](uint64_t csr, uint32_t rs1) -> uint64_t { return csr | rs1; }); } /// \brief Implementation of the CSRRCI instruction. template -static FORCE_INLINE execute_status execute_CSRRCI(STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { +static FORCE_INLINE execute_status execute_CSRRCI(STATE_ACCESS a, uint64_t &pc, uint32_t insn) { dump_insn(a, pc, insn, "csrrci"); - return execute_csr_SCI(a, pc, mcycle, insn, [](uint64_t csr, uint32_t rs1) -> uint64_t { return csr & ~rs1; }); + return execute_csr_SCI(a, pc, insn, [](uint64_t csr, uint32_t rs1) -> uint64_t { return csr & ~rs1; }); } /// \brief Implementation of the ECALL instruction. @@ -2642,7 +2647,7 @@ static FORCE_INLINE execute_status execute_MRET(STATE_ACCESS a, uint64_t &pc, ui /// \brief Implementation of the WFI instruction. /// \details This function is outlined to minimize host CPU code cache pressure. template -static FORCE_INLINE execute_status execute_WFI(STATE_ACCESS a, uint64_t &pc, uint64_t &mcycle, uint32_t insn) { +static FORCE_INLINE execute_status execute_WFI(STATE_ACCESS a, uint64_t &pc, uint32_t insn) { dump_insn(a, pc, insn, "wfi"); // Check privileges and do nothing else auto prv = a.read_iprv(); @@ -2652,14 +2657,17 @@ static FORCE_INLINE execute_status execute_WFI(STATE_ACCESS a, uint64_t &pc, uin return raise_illegal_insn_exception(a, pc, insn); } // We wait for interrupts until the next timer interrupt. - const uint64_t mcycle_max = rtc_time_to_cycle(a.read_clint_mtimecmp()); + const uint64_t mtime = a.read_mtime(); + const uint64_t mtimecmp = a.read_clint_mtimecmp(); execute_status status = execute_status::success; - if (mcycle_max > mcycle) { + if (likely(mtimecmp > mtime)) { + const uint64_t timeout_us = (mtimecmp - mtime) * RTC_US_PER_TICK; // Poll for external interrupts (e.g console or network), - // this may advance mcycle only when interactive mode is enabled - const auto [next_mcycle, interrupted] = a.poll_external_interrupts(mcycle, mcycle_max); - mcycle = next_mcycle; - if (interrupted) { + // this may advance time only when interactive mode is enabled + const auto [elapsed_us, interrupted] = a.poll_external_interrupts(timeout_us); + const uint64_t next_mtime = mtime + (elapsed_us / RTC_US_PER_TICK); + a.write_mtime(next_mtime); + if (interrupted || mtimecmp <= next_mtime) { status = execute_status::success_and_serve_interrupts; } } @@ -3117,11 +3125,11 @@ static FORCE_INLINE execute_status execute_SRAIW(STATE_ACCESS a, uint64_t &pc, u } template -static FORCE_INLINE execute_status execute_S(STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { +static FORCE_INLINE execute_status execute_S(STATE_ACCESS a, uint64_t &pc, uint32_t insn) { const uint64_t vaddr = a.read_x(insn_get_rs1(insn)); const int32_t imm = insn_S_get_imm(insn); const uint64_t val = a.read_x(insn_get_rs2(insn)); - const execute_status status = write_virtual_memory(a, pc, mcycle, vaddr + imm, val); + const execute_status status = write_virtual_memory(a, pc, vaddr + imm, val); if (unlikely(status != execute_status::success)) { if (status == execute_status::failure) { return advance_to_raised_exception(a, pc); @@ -3133,38 +3141,38 @@ static FORCE_INLINE execute_status execute_S(STATE_ACCESS a, uint64_t &pc, uint6 /// \brief Implementation of the SB instruction. template -static FORCE_INLINE execute_status execute_SB(STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { +static FORCE_INLINE execute_status execute_SB(STATE_ACCESS a, uint64_t &pc, uint32_t insn) { dump_insn(a, pc, insn, "sb"); - return execute_S(a, pc, mcycle, insn); + return execute_S(a, pc, insn); } /// \brief Implementation of the SH instruction. template -static FORCE_INLINE execute_status execute_SH(STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { +static FORCE_INLINE execute_status execute_SH(STATE_ACCESS a, uint64_t &pc, uint32_t insn) { dump_insn(a, pc, insn, "sh"); - return execute_S(a, pc, mcycle, insn); + return execute_S(a, pc, insn); } /// \brief Implementation of the SW instruction. template -static FORCE_INLINE execute_status execute_SW(STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { +static FORCE_INLINE execute_status execute_SW(STATE_ACCESS a, uint64_t &pc, uint32_t insn) { dump_insn(a, pc, insn, "sw"); - return execute_S(a, pc, mcycle, insn); + return execute_S(a, pc, insn); } /// \brief Implementation of the SD instruction. template -static FORCE_INLINE execute_status execute_SD(STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { +static FORCE_INLINE execute_status execute_SD(STATE_ACCESS a, uint64_t &pc, uint32_t insn) { dump_insn(a, pc, insn, "sd"); - return execute_S(a, pc, mcycle, insn); + return execute_S(a, pc, insn); } template -static FORCE_INLINE execute_status execute_L(STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { +static FORCE_INLINE execute_status execute_L(STATE_ACCESS a, uint64_t &pc, uint32_t insn) { const uint64_t vaddr = a.read_x(insn_get_rs1(insn)); const int32_t imm = insn_I_get_imm(insn); T val = 0; - if (unlikely(!read_virtual_memory(a, pc, mcycle, vaddr + imm, &val))) { + if (unlikely(!read_virtual_memory(a, pc, vaddr + imm, &val))) { return advance_to_raised_exception(a, pc); } const uint32_t rd = insn_get_rd(insn); @@ -3183,51 +3191,51 @@ static FORCE_INLINE execute_status execute_L(STATE_ACCESS a, uint64_t &pc, uint6 /// \brief Implementation of the LB instruction. template -static FORCE_INLINE execute_status execute_LB(STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { +static FORCE_INLINE execute_status execute_LB(STATE_ACCESS a, uint64_t &pc, uint32_t insn) { dump_insn(a, pc, insn, "lb"); - return execute_L(a, pc, mcycle, insn); + return execute_L(a, pc, insn); } /// \brief Implementation of the LH instruction. template -static FORCE_INLINE execute_status execute_LH(STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { +static FORCE_INLINE execute_status execute_LH(STATE_ACCESS a, uint64_t &pc, uint32_t insn) { dump_insn(a, pc, insn, "lh"); - return execute_L(a, pc, mcycle, insn); + return execute_L(a, pc, insn); } /// \brief Implementation of the LW instruction. template -static FORCE_INLINE execute_status execute_LW(STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { +static FORCE_INLINE execute_status execute_LW(STATE_ACCESS a, uint64_t &pc, uint32_t insn) { dump_insn(a, pc, insn, "lw"); - return execute_L(a, pc, mcycle, insn); + return execute_L(a, pc, insn); } /// \brief Implementation of the LD instruction. template -static FORCE_INLINE execute_status execute_LD(STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { +static FORCE_INLINE execute_status execute_LD(STATE_ACCESS a, uint64_t &pc, uint32_t insn) { dump_insn(a, pc, insn, "ld"); - return execute_L(a, pc, mcycle, insn); + return execute_L(a, pc, insn); } /// \brief Implementation of the LBU instruction. template -static FORCE_INLINE execute_status execute_LBU(STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { +static FORCE_INLINE execute_status execute_LBU(STATE_ACCESS a, uint64_t &pc, uint32_t insn) { dump_insn(a, pc, insn, "lbu"); - return execute_L(a, pc, mcycle, insn); + return execute_L(a, pc, insn); } /// \brief Implementation of the LHU instruction. template -static FORCE_INLINE execute_status execute_LHU(STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { +static FORCE_INLINE execute_status execute_LHU(STATE_ACCESS a, uint64_t &pc, uint32_t insn) { dump_insn(a, pc, insn, "lhu"); - return execute_L(a, pc, mcycle, insn); + return execute_L(a, pc, insn); } /// \brief Implementation of the LWU instruction. template -static FORCE_INLINE execute_status execute_LWU(STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { +static FORCE_INLINE execute_status execute_LWU(STATE_ACCESS a, uint64_t &pc, uint32_t insn) { dump_insn(a, pc, insn, "lwu"); - return execute_L(a, pc, mcycle, insn); + return execute_L(a, pc, insn); } template @@ -3419,60 +3427,60 @@ static FORCE_INLINE execute_status execute_SRLIW_SRAIW(STATE_ACCESS a, uint64_t } template -static FORCE_INLINE execute_status execute_AMO_W(STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { +static FORCE_INLINE execute_status execute_AMO_W(STATE_ACCESS a, uint64_t &pc, uint32_t insn) { switch (static_cast(insn_get_funct7_sr2(insn))) { case insn_AMO_funct7_sr2::AMOADD: - return execute_AMOADD_W(a, pc, mcycle, insn); + return execute_AMOADD_W(a, pc, insn); case insn_AMO_funct7_sr2::AMOSWAP: - return execute_AMOSWAP_W(a, pc, mcycle, insn); + return execute_AMOSWAP_W(a, pc, insn); case insn_AMO_funct7_sr2::LR: - return execute_LR_W(a, pc, mcycle, insn); + return execute_LR_W(a, pc, insn); case insn_AMO_funct7_sr2::SC: - return execute_SC_W(a, pc, mcycle, insn); + return execute_SC_W(a, pc, insn); case insn_AMO_funct7_sr2::AMOXOR: - return execute_AMOXOR_W(a, pc, mcycle, insn); + return execute_AMOXOR_W(a, pc, insn); case insn_AMO_funct7_sr2::AMOOR: - return execute_AMOOR_W(a, pc, mcycle, insn); + return execute_AMOOR_W(a, pc, insn); case insn_AMO_funct7_sr2::AMOAND: - return execute_AMOAND_W(a, pc, mcycle, insn); + return execute_AMOAND_W(a, pc, insn); case insn_AMO_funct7_sr2::AMOMIN: - return execute_AMOMIN_W(a, pc, mcycle, insn); + return execute_AMOMIN_W(a, pc, insn); case insn_AMO_funct7_sr2::AMOMAX: - return execute_AMOMAX_W(a, pc, mcycle, insn); + return execute_AMOMAX_W(a, pc, insn); case insn_AMO_funct7_sr2::AMOMINU: - return execute_AMOMINU_W(a, pc, mcycle, insn); + return execute_AMOMINU_W(a, pc, insn); case insn_AMO_funct7_sr2::AMOMAXU: - return execute_AMOMAXU_W(a, pc, mcycle, insn); + return execute_AMOMAXU_W(a, pc, insn); default: return raise_illegal_insn_exception(a, pc, insn); } } template -static FORCE_INLINE execute_status execute_AMO_D(STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { +static FORCE_INLINE execute_status execute_AMO_D(STATE_ACCESS a, uint64_t &pc, uint32_t insn) { switch (static_cast(insn_get_funct7_sr2(insn))) { case insn_AMO_funct7_sr2::AMOADD: - return execute_AMOADD_D(a, pc, mcycle, insn); + return execute_AMOADD_D(a, pc, insn); case insn_AMO_funct7_sr2::AMOSWAP: - return execute_AMOSWAP_D(a, pc, mcycle, insn); + return execute_AMOSWAP_D(a, pc, insn); case insn_AMO_funct7_sr2::LR: - return execute_LR_D(a, pc, mcycle, insn); + return execute_LR_D(a, pc, insn); case insn_AMO_funct7_sr2::SC: - return execute_SC_D(a, pc, mcycle, insn); + return execute_SC_D(a, pc, insn); case insn_AMO_funct7_sr2::AMOXOR: - return execute_AMOXOR_D(a, pc, mcycle, insn); + return execute_AMOXOR_D(a, pc, insn); case insn_AMO_funct7_sr2::AMOOR: - return execute_AMOOR_D(a, pc, mcycle, insn); + return execute_AMOOR_D(a, pc, insn); case insn_AMO_funct7_sr2::AMOAND: - return execute_AMOAND_D(a, pc, mcycle, insn); + return execute_AMOAND_D(a, pc, insn); case insn_AMO_funct7_sr2::AMOMIN: - return execute_AMOMIN_D(a, pc, mcycle, insn); + return execute_AMOMIN_D(a, pc, insn); case insn_AMO_funct7_sr2::AMOMAX: - return execute_AMOMAX_D(a, pc, mcycle, insn); + return execute_AMOMAX_D(a, pc, insn); case insn_AMO_funct7_sr2::AMOMINU: - return execute_AMOMINU_D(a, pc, mcycle, insn); + return execute_AMOMINU_D(a, pc, insn); case insn_AMO_funct7_sr2::AMOMAXU: - return execute_AMOMAXU_D(a, pc, mcycle, insn); + return execute_AMOMAXU_D(a, pc, insn); default: return raise_illegal_insn_exception(a, pc, insn); } @@ -3621,7 +3629,7 @@ static FORCE_INLINE execute_status execute_SRLW_DIVUW_SRAW(STATE_ACCESS a, uint6 } template -static FORCE_INLINE execute_status execute_privileged(STATE_ACCESS a, uint64_t &pc, uint64_t &mcycle, uint32_t insn) { +static FORCE_INLINE execute_status execute_privileged(STATE_ACCESS a, uint64_t &pc, uint32_t insn) { switch (static_cast(insn)) { case insn_privileged::ECALL: return execute_ECALL(a, pc, insn); @@ -3632,7 +3640,7 @@ static FORCE_INLINE execute_status execute_privileged(STATE_ACCESS a, uint64_t & case insn_privileged::MRET: return execute_MRET(a, pc, insn); case insn_privileged::WFI: - return execute_WFI(a, pc, mcycle, insn); + return execute_WFI(a, pc, insn); default: return execute_SFENCE_VMA(a, pc, insn); } @@ -3746,13 +3754,13 @@ static FORCE_INLINE execute_status execute_float_unary_op_rm(STATE_ACCESS a, uin } template -static FORCE_INLINE execute_status execute_FS(STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { +static FORCE_INLINE execute_status execute_FS(STATE_ACCESS a, uint64_t &pc, uint32_t insn) { const uint64_t vaddr = a.read_x(insn_get_rs1(insn)); const int32_t imm = insn_S_get_imm(insn); // A narrower n-bit transfer out of the floating-point // registers will transfer the lower n bits of the register ignoring the upper FLEN−n bits. T val = static_cast(a.read_f(insn_get_rs2(insn))); - const execute_status status = write_virtual_memory(a, pc, mcycle, vaddr + imm, val); + const execute_status status = write_virtual_memory(a, pc, vaddr + imm, val); if (unlikely(status != execute_status::success)) { if (status == execute_status::failure) { return advance_to_raised_exception(a, pc); @@ -3763,32 +3771,32 @@ static FORCE_INLINE execute_status execute_FS(STATE_ACCESS a, uint64_t &pc, uint } template -static FORCE_INLINE execute_status execute_FSW(STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { +static FORCE_INLINE execute_status execute_FSW(STATE_ACCESS a, uint64_t &pc, uint32_t insn) { dump_insn(a, pc, insn, "fsw"); // If FS is OFF, attempts to read or write the float state will cause an illegal instruction exception. if (unlikely((a.read_mstatus() & MSTATUS_FS_MASK) == MSTATUS_FS_OFF)) { return raise_illegal_insn_exception(a, pc, insn); } - return execute_FS(a, pc, mcycle, insn); + return execute_FS(a, pc, insn); } template -static FORCE_INLINE execute_status execute_FSD(STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { +static FORCE_INLINE execute_status execute_FSD(STATE_ACCESS a, uint64_t &pc, uint32_t insn) { dump_insn(a, pc, insn, "fsd"); // If FS is OFF, attempts to read or write the float state will cause an illegal instruction exception. if (unlikely((a.read_mstatus() & MSTATUS_FS_MASK) == MSTATUS_FS_OFF)) { return raise_illegal_insn_exception(a, pc, insn); } - return execute_FS(a, pc, mcycle, insn); + return execute_FS(a, pc, insn); } template -static FORCE_INLINE execute_status execute_FL(STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { +static FORCE_INLINE execute_status execute_FL(STATE_ACCESS a, uint64_t &pc, uint32_t insn) { // Loads the float value from virtual memory const uint64_t vaddr = a.read_x(insn_get_rs1(insn)); const int32_t imm = insn_I_get_imm(insn); T val = 0; - if (unlikely(!read_virtual_memory(a, pc, mcycle, vaddr + imm, &val))) { + if (unlikely(!read_virtual_memory(a, pc, vaddr + imm, &val))) { return advance_to_raised_exception(a, pc); } // A narrower n-bit transfer, n < FLEN, @@ -3799,23 +3807,23 @@ static FORCE_INLINE execute_status execute_FL(STATE_ACCESS a, uint64_t &pc, uint } template -static FORCE_INLINE execute_status execute_FLW(STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { +static FORCE_INLINE execute_status execute_FLW(STATE_ACCESS a, uint64_t &pc, uint32_t insn) { dump_insn(a, pc, insn, "flw"); // If FS is OFF, attempts to read or write the float state will cause an illegal instruction exception. if (unlikely((a.read_mstatus() & MSTATUS_FS_MASK) == MSTATUS_FS_OFF)) { return raise_illegal_insn_exception(a, pc, insn); } - return execute_FL(a, pc, mcycle, insn); + return execute_FL(a, pc, insn); } template -static FORCE_INLINE execute_status execute_FLD(STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { +static FORCE_INLINE execute_status execute_FLD(STATE_ACCESS a, uint64_t &pc, uint32_t insn) { dump_insn(a, pc, insn, "fld"); // If FS is OFF, attempts to read or write the float state will cause an illegal instruction exception. if (unlikely((a.read_mstatus() & MSTATUS_FS_MASK) == MSTATUS_FS_OFF)) { return raise_illegal_insn_exception(a, pc, insn); } - return execute_FL(a, pc, mcycle, insn); + return execute_FL(a, pc, insn); } template @@ -4701,11 +4709,10 @@ static FORCE_INLINE execute_status execute_FD(STATE_ACCESS a, uint64_t &pc, uint } template -static FORCE_INLINE execute_status execute_C_L(STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t rd, uint32_t rs1, - int32_t imm) { +static FORCE_INLINE execute_status execute_C_L(STATE_ACCESS a, uint64_t &pc, uint32_t rd, uint32_t rs1, int32_t imm) { const uint64_t vaddr = a.read_x(rs1); T val = 0; - if (unlikely(!read_virtual_memory(a, pc, mcycle, vaddr + imm, &val))) { + if (unlikely(!read_virtual_memory(a, pc, vaddr + imm, &val))) { return advance_to_raised_exception(a, pc); } // This static branch is eliminated by the compiler @@ -4718,11 +4725,10 @@ static FORCE_INLINE execute_status execute_C_L(STATE_ACCESS a, uint64_t &pc, uin } template -static FORCE_INLINE execute_status execute_C_S(STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t rs2, - uint32_t rs1, int32_t imm) { +static FORCE_INLINE execute_status execute_C_S(STATE_ACCESS a, uint64_t &pc, uint32_t rs2, uint32_t rs1, int32_t imm) { const uint64_t vaddr = a.read_x(rs1); const uint64_t val = a.read_x(rs2); - const execute_status status = write_virtual_memory(a, pc, mcycle, vaddr + imm, val); + const execute_status status = write_virtual_memory(a, pc, vaddr + imm, val); if (unlikely(status != execute_status::success)) { if (status == execute_status::failure) { return advance_to_raised_exception(a, pc); @@ -4733,12 +4739,11 @@ static FORCE_INLINE execute_status execute_C_S(STATE_ACCESS a, uint64_t &pc, uin } template -static FORCE_INLINE execute_status execute_C_FL(STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t rd, - uint32_t rs1, int32_t imm) { +static FORCE_INLINE execute_status execute_C_FL(STATE_ACCESS a, uint64_t &pc, uint32_t rd, uint32_t rs1, int32_t imm) { // Loads the float value from virtual memory const uint64_t vaddr = a.read_x(rs1); T val = 0; - if (unlikely(!read_virtual_memory(a, pc, mcycle, vaddr + imm, &val))) { + if (unlikely(!read_virtual_memory(a, pc, vaddr + imm, &val))) { return advance_to_raised_exception(a, pc); } // A narrower n-bit transfer, n < FLEN, @@ -4748,13 +4753,12 @@ static FORCE_INLINE execute_status execute_C_FL(STATE_ACCESS a, uint64_t &pc, ui } template -static FORCE_INLINE execute_status execute_C_FS(STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t rs2, - uint32_t rs1, int32_t imm) { +static FORCE_INLINE execute_status execute_C_FS(STATE_ACCESS a, uint64_t &pc, uint32_t rs2, uint32_t rs1, int32_t imm) { const uint64_t vaddr = a.read_x(rs1); // A narrower n-bit transfer out of the floating-point // registers will transfer the lower n bits of the register ignoring the upper FLEN−n bits. T val = static_cast(a.read_f(rs2)); - const execute_status status = write_virtual_memory(a, pc, mcycle, vaddr + imm, val); + const execute_status status = write_virtual_memory(a, pc, vaddr + imm, val); if (unlikely(status != execute_status::success)) { if (status == execute_status::failure) { return advance_to_raised_exception(a, pc); @@ -4781,7 +4785,7 @@ static FORCE_INLINE execute_status execute_C_ADDI4SPN(STATE_ACCESS a, uint64_t & /// \brief Implementation of the C.FLD instruction. template -static FORCE_INLINE execute_status execute_C_FLD(STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { +static FORCE_INLINE execute_status execute_C_FLD(STATE_ACCESS a, uint64_t &pc, uint32_t insn) { dump_insn(a, pc, static_cast(insn), "c.fld"); // If FS is OFF, attempts to read or write the float state will cause an illegal instruction // exception. @@ -4791,32 +4795,32 @@ static FORCE_INLINE execute_status execute_C_FLD(STATE_ACCESS a, uint64_t &pc, u const uint32_t rd = insn_get_CIW_CL_rd_CS_CA_rs2(insn); const uint32_t rs1 = insn_get_CL_CS_CA_CB_rs1(insn); const int32_t imm = insn_get_CL_CS_imm(insn); - return execute_C_FL(a, pc, mcycle, rd, rs1, imm); + return execute_C_FL(a, pc, rd, rs1, imm); } /// \brief Implementation of the C.LW instruction. template -static FORCE_INLINE execute_status execute_C_LW(STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { +static FORCE_INLINE execute_status execute_C_LW(STATE_ACCESS a, uint64_t &pc, uint32_t insn) { dump_insn(a, pc, static_cast(insn), "c.lw"); const uint32_t rd = insn_get_CIW_CL_rd_CS_CA_rs2(insn); const uint32_t rs1 = insn_get_CL_CS_CA_CB_rs1(insn); const int32_t imm = insn_get_C_LW_C_SW_imm(insn); - return execute_C_L(a, pc, mcycle, rd, rs1, imm); + return execute_C_L(a, pc, rd, rs1, imm); } /// \brief Implementation of the C.LD instruction. template -static FORCE_INLINE execute_status execute_C_LD(STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { +static FORCE_INLINE execute_status execute_C_LD(STATE_ACCESS a, uint64_t &pc, uint32_t insn) { dump_insn(a, pc, static_cast(insn), "c.ld"); const uint32_t rd = insn_get_CIW_CL_rd_CS_CA_rs2(insn); const uint32_t rs1 = insn_get_CL_CS_CA_CB_rs1(insn); const int32_t imm = insn_get_CL_CS_imm(insn); - return execute_C_L(a, pc, mcycle, rd, rs1, imm); + return execute_C_L(a, pc, rd, rs1, imm); } /// \brief Implementation of the C.FSD instruction. template -static FORCE_INLINE execute_status execute_C_FSD(STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { +static FORCE_INLINE execute_status execute_C_FSD(STATE_ACCESS a, uint64_t &pc, uint32_t insn) { dump_insn(a, pc, static_cast(insn), "c.fsd"); // If FS is OFF, attempts to read or write the float state will cause an illegal instruction // exception. @@ -4826,27 +4830,27 @@ static FORCE_INLINE execute_status execute_C_FSD(STATE_ACCESS a, uint64_t &pc, u const uint32_t rs1 = insn_get_CL_CS_CA_CB_rs1(insn); const uint32_t rs2 = insn_get_CIW_CL_rd_CS_CA_rs2(insn); const int32_t imm = insn_get_CL_CS_imm(insn); - return execute_C_FS(a, pc, mcycle, rs2, rs1, imm); + return execute_C_FS(a, pc, rs2, rs1, imm); } /// \brief Implementation of the C.SW instruction. template -static FORCE_INLINE execute_status execute_C_SW(STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { +static FORCE_INLINE execute_status execute_C_SW(STATE_ACCESS a, uint64_t &pc, uint32_t insn) { dump_insn(a, pc, static_cast(insn), "c.sw"); const uint32_t rs1 = insn_get_CL_CS_CA_CB_rs1(insn); const uint32_t rs2 = insn_get_CIW_CL_rd_CS_CA_rs2(insn); const int32_t imm = insn_get_C_LW_C_SW_imm(insn); - return execute_C_S(a, pc, mcycle, rs2, rs1, imm); + return execute_C_S(a, pc, rs2, rs1, imm); } /// \brief Implementation of the C.SD instruction. template -static FORCE_INLINE execute_status execute_C_SD(STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { +static FORCE_INLINE execute_status execute_C_SD(STATE_ACCESS a, uint64_t &pc, uint32_t insn) { dump_insn(a, pc, static_cast(insn), "c.sd"); const uint32_t rs1 = insn_get_CL_CS_CA_CB_rs1(insn); const uint32_t rs2 = insn_get_CIW_CL_rd_CS_CA_rs2(insn); const int32_t imm = insn_get_CL_CS_imm(insn); - return execute_C_S(a, pc, mcycle, rs2, rs1, imm); + return execute_C_S(a, pc, rs2, rs1, imm); } /// \brief Implementation of the C.NOP instruction. @@ -5081,7 +5085,7 @@ static FORCE_INLINE execute_status execute_C_SLLI(STATE_ACCESS a, uint64_t &pc, /// \brief Implementation of the C.FLDSP instruction. template -static FORCE_INLINE execute_status execute_C_FLDSP(STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { +static FORCE_INLINE execute_status execute_C_FLDSP(STATE_ACCESS a, uint64_t &pc, uint32_t insn) { dump_insn(a, pc, static_cast(insn), "c.fldsp"); // If FS is OFF, attempts to read or write the float state will cause an illegal instruction // exception. @@ -5090,27 +5094,27 @@ static FORCE_INLINE execute_status execute_C_FLDSP(STATE_ACCESS a, uint64_t &pc, } const uint32_t rd = insn_get_rd(insn); const int32_t imm = insn_get_C_FLDSP_LDSP_imm(insn); - return execute_C_FL(a, pc, mcycle, rd, 0x2, imm); + return execute_C_FL(a, pc, rd, 0x2, imm); } /// \brief Implementation of the C.LWSP instruction. template -static FORCE_INLINE execute_status execute_C_LWSP(STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { +static FORCE_INLINE execute_status execute_C_LWSP(STATE_ACCESS a, uint64_t &pc, uint32_t insn) { dump_insn(a, pc, static_cast(insn), "c.lwsp"); // rd cannot be zero (guaranteed by jump table) const uint32_t rd = insn_get_rd(insn); const int32_t imm = insn_get_C_LWSP_imm(insn); - return execute_C_L(a, pc, mcycle, rd, 0x2, imm); + return execute_C_L(a, pc, rd, 0x2, imm); } /// \brief Implementation of the C.LDSP instruction. template -static FORCE_INLINE execute_status execute_C_LDSP(STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { +static FORCE_INLINE execute_status execute_C_LDSP(STATE_ACCESS a, uint64_t &pc, uint32_t insn) { dump_insn(a, pc, static_cast(insn), "c.ldsp"); // rd cannot be zero (guaranteed by jump table) const uint32_t rd = insn_get_rd(insn); const int32_t imm = insn_get_C_FLDSP_LDSP_imm(insn); - return execute_C_L(a, pc, mcycle, rd, 0x2, imm); + return execute_C_L(a, pc, rd, 0x2, imm); } /// \brief Implementation of the C.JR instruction. @@ -5171,7 +5175,7 @@ static FORCE_INLINE execute_status execute_C_ADD(STATE_ACCESS a, uint64_t &pc, u /// \brief Implementation of the C.FSDSP instruction. template -static FORCE_INLINE execute_status execute_C_FSDSP(STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { +static FORCE_INLINE execute_status execute_C_FSDSP(STATE_ACCESS a, uint64_t &pc, uint32_t insn) { dump_insn(a, pc, static_cast(insn), "c.fsdsp"); // If FS is OFF, attempts to read or write the float state will cause an illegal instruction // exception. @@ -5180,25 +5184,25 @@ static FORCE_INLINE execute_status execute_C_FSDSP(STATE_ACCESS a, uint64_t &pc, } const uint32_t rs2 = insn_get_CR_CSS_rs2(insn); const int32_t imm = insn_get_C_FSDSP_SDSP_imm(insn); - return execute_C_FS(a, pc, mcycle, rs2, 0x2, imm); + return execute_C_FS(a, pc, rs2, 0x2, imm); } /// \brief Implementation of the C.SWSP instruction. template -static FORCE_INLINE execute_status execute_C_SWSP(STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { +static FORCE_INLINE execute_status execute_C_SWSP(STATE_ACCESS a, uint64_t &pc, uint32_t insn) { dump_insn(a, pc, static_cast(insn), "c.swsp"); const uint32_t rs2 = insn_get_CR_CSS_rs2(insn); const int32_t imm = insn_get_C_SWSP_imm(insn); - return execute_C_S(a, pc, mcycle, rs2, 0x2, imm); + return execute_C_S(a, pc, rs2, 0x2, imm); } /// \brief Implementation of the C.SDSP instruction. template -static FORCE_INLINE execute_status execute_C_SDSP(STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { +static FORCE_INLINE execute_status execute_C_SDSP(STATE_ACCESS a, uint64_t &pc, uint32_t insn) { dump_insn(a, pc, static_cast(insn), "c.sdsp"); const uint32_t rs2 = insn_get_CR_CSS_rs2(insn); const int32_t imm = insn_get_C_FSDSP_SDSP_imm(insn); - return execute_C_S(a, pc, mcycle, rs2, 0x2, imm); + return execute_C_S(a, pc, rs2, 0x2, imm); } /// \brief Instruction fetch status code @@ -5360,15 +5364,15 @@ static NO_INLINE execute_status interpret_loop(STATE_ACCESS a, uint64_t mcycle_e while (mcycle < mcycle_end) { INC_COUNTER(a.get_statistics(), outer_loop); - if (rtc_is_tick(mcycle)) { + if (rtc_is_tick(mcycle) && mcycle != 0) { // Set interrupt flag for RTC - set_rtc_interrupt(a, mcycle); + rtc_advance_tick(a); // Polling external interrupts only in WFI instructions is not enough // because Linux won't execute WFI instructions while under heavy load, // yet external interrupts still need to be triggered. // Therefore we poll for external interrupt once a while in the interpreter loop. - a.poll_external_interrupts(mcycle, mcycle); + a.poll_external_interrupts(0); } // Raise the highest priority pending interrupt, if any @@ -5380,11 +5384,15 @@ static NO_INLINE execute_status interpret_loop(STATE_ACCESS a, uint64_t mcycle_e #endif // Limit mcycle_tick_end up to the next RTC tick, while avoiding unsigned overflows - const uint64_t mcycle_tick_end = mcycle + std::min(mcycle_end - mcycle, RTC_FREQ_DIV - (mcycle % RTC_FREQ_DIV)); + const volatile uint64_t mcycle_tick_end = + mcycle + std::min(mcycle_end - mcycle, RTC_FREQ_DIV - (mcycle % RTC_FREQ_DIV)); + // NOTE: Volatile is used above to force the optimizer to store mcycle_tick_end in the stack, + // otherwise the optimizer would be allocate them in registers, degrading performance in the hot inner loop // The inner loop continues until there is an interrupt condition // or mcycle reaches mcycle_tick_end - while (mcycle < mcycle_tick_end) { + uint64_t mcycle_remaining = mcycle_tick_end - mcycle + 1; + while (--mcycle_remaining > 0) { INC_COUNTER(a.get_statistics(), inner_loop); uint32_t insn = 0; @@ -5518,37 +5526,37 @@ static NO_INLINE execute_status interpret_loop(STATE_ACCESS a, uint64_t mcycle_e status = execute_REMUW(a, pc, insn); INSN_BREAK(); INSN_CASE(LD_rdN): - status = execute_LD(a, pc, mcycle, insn); + status = execute_LD(a, pc, insn); INSN_BREAK(); INSN_CASE(LW_rdN): - status = execute_LW(a, pc, mcycle, insn); + status = execute_LW(a, pc, insn); INSN_BREAK(); INSN_CASE(LWU_rdN): - status = execute_LWU(a, pc, mcycle, insn); + status = execute_LWU(a, pc, insn); INSN_BREAK(); INSN_CASE(LH_rdN): - status = execute_LH(a, pc, mcycle, insn); + status = execute_LH(a, pc, insn); INSN_BREAK(); INSN_CASE(LHU_rdN): - status = execute_LHU(a, pc, mcycle, insn); + status = execute_LHU(a, pc, insn); INSN_BREAK(); INSN_CASE(LB_rdN): - status = execute_LB(a, pc, mcycle, insn); + status = execute_LB(a, pc, insn); INSN_BREAK(); INSN_CASE(LBU_rdN): - status = execute_LBU(a, pc, mcycle, insn); + status = execute_LBU(a, pc, insn); INSN_BREAK(); INSN_CASE(SD): - status = execute_SD(a, pc, mcycle, insn); + status = execute_SD(a, pc, insn); INSN_BREAK(); INSN_CASE(SW): - status = execute_SW(a, pc, mcycle, insn); + status = execute_SW(a, pc, insn); INSN_BREAK(); INSN_CASE(SH): - status = execute_SH(a, pc, mcycle, insn); + status = execute_SH(a, pc, insn); INSN_BREAK(); INSN_CASE(SB): - status = execute_SB(a, pc, mcycle, insn); + status = execute_SB(a, pc, insn); INSN_BREAK(); // C extension INSN_CASE(C_HINT): @@ -5625,40 +5633,40 @@ static NO_INLINE execute_status interpret_loop(STATE_ACCESS a, uint64_t mcycle_e status = execute_C_SUBW(a, pc, insn); INSN_BREAK(); INSN_CASE(C_LD): - status = execute_C_LD(a, pc, mcycle, insn); + status = execute_C_LD(a, pc, insn); INSN_BREAK(); INSN_CASE(C_LW): - status = execute_C_LW(a, pc, mcycle, insn); + status = execute_C_LW(a, pc, insn); INSN_BREAK(); INSN_CASE(C_LDSP): - status = execute_C_LDSP(a, pc, mcycle, insn); + status = execute_C_LDSP(a, pc, insn); INSN_BREAK(); INSN_CASE(C_LWSP): - status = execute_C_LWSP(a, pc, mcycle, insn); + status = execute_C_LWSP(a, pc, insn); INSN_BREAK(); INSN_CASE(C_SD): - status = execute_C_SD(a, pc, mcycle, insn); + status = execute_C_SD(a, pc, insn); INSN_BREAK(); INSN_CASE(C_SW): - status = execute_C_SW(a, pc, mcycle, insn); + status = execute_C_SW(a, pc, insn); INSN_BREAK(); INSN_CASE(C_SDSP): - status = execute_C_SDSP(a, pc, mcycle, insn); + status = execute_C_SDSP(a, pc, insn); INSN_BREAK(); INSN_CASE(C_SWSP): - status = execute_C_SWSP(a, pc, mcycle, insn); + status = execute_C_SWSP(a, pc, insn); INSN_BREAK(); INSN_CASE(C_FLD): - status = execute_C_FLD(a, pc, mcycle, insn); + status = execute_C_FLD(a, pc, insn); INSN_BREAK(); INSN_CASE(C_FLDSP): - status = execute_C_FLDSP(a, pc, mcycle, insn); + status = execute_C_FLDSP(a, pc, insn); INSN_BREAK(); INSN_CASE(C_FSD): - status = execute_C_FSD(a, pc, mcycle, insn); + status = execute_C_FSD(a, pc, insn); INSN_BREAK(); INSN_CASE(C_FSDSP): - status = execute_C_FSDSP(a, pc, mcycle, insn); + status = execute_C_FSDSP(a, pc, insn); INSN_BREAK(); INSN_CASE(C_EBREAK): status = execute_C_EBREAK(a, pc, insn); @@ -5668,16 +5676,16 @@ static NO_INLINE execute_status interpret_loop(STATE_ACCESS a, uint64_t mcycle_e status = execute_FD(a, pc, insn); INSN_BREAK(); INSN_CASE(FLD): - status = execute_FLD(a, pc, mcycle, insn); + status = execute_FLD(a, pc, insn); INSN_BREAK(); INSN_CASE(FLW): - status = execute_FLW(a, pc, mcycle, insn); + status = execute_FLW(a, pc, insn); INSN_BREAK(); INSN_CASE(FSD): - status = execute_FSD(a, pc, mcycle, insn); + status = execute_FSD(a, pc, insn); INSN_BREAK(); INSN_CASE(FSW): - status = execute_FSW(a, pc, mcycle, insn); + status = execute_FSW(a, pc, insn); INSN_BREAK(); INSN_CASE(FMADD): status = execute_FMADD(a, pc, insn); @@ -5693,29 +5701,35 @@ static NO_INLINE execute_status interpret_loop(STATE_ACCESS a, uint64_t mcycle_e INSN_BREAK(); // A extension INSN_CASE(AMO_D): - status = execute_AMO_D(a, pc, mcycle, insn); + status = execute_AMO_D(a, pc, insn); INSN_BREAK(); INSN_CASE(AMO_W): - status = execute_AMO_W(a, pc, mcycle, insn); + status = execute_AMO_W(a, pc, insn); INSN_BREAK(); // Zicsr extension INSN_CASE(CSRRW): - status = execute_CSRRW(a, pc, mcycle, insn); + a.write_mcycle(mcycle_tick_end - mcycle_remaining); // commit mcycle + status = execute_CSRRW(a, pc, insn); INSN_BREAK(); INSN_CASE(CSRRS): - status = execute_CSRRS(a, pc, mcycle, insn); + a.write_mcycle(mcycle_tick_end - mcycle_remaining); // commit mcycle + status = execute_CSRRS(a, pc, insn); INSN_BREAK(); INSN_CASE(CSRRC): - status = execute_CSRRC(a, pc, mcycle, insn); + a.write_mcycle(mcycle_tick_end - mcycle_remaining); // commit mcycle + status = execute_CSRRC(a, pc, insn); INSN_BREAK(); INSN_CASE(CSRRWI): - status = execute_CSRRWI(a, pc, mcycle, insn); + a.write_mcycle(mcycle_tick_end - mcycle_remaining); // commit mcycle + status = execute_CSRRWI(a, pc, insn); INSN_BREAK(); INSN_CASE(CSRRSI): - status = execute_CSRRSI(a, pc, mcycle, insn); + a.write_mcycle(mcycle_tick_end - mcycle_remaining); // commit mcycle + status = execute_CSRRSI(a, pc, insn); INSN_BREAK(); INSN_CASE(CSRRCI): - status = execute_CSRRCI(a, pc, mcycle, insn); + a.write_mcycle(mcycle_tick_end - mcycle_remaining); // commit mcycle + status = execute_CSRRCI(a, pc, insn); INSN_BREAK(); // Special instructions that are less frequent INSN_CASE(FENCE): @@ -5725,7 +5739,7 @@ static NO_INLINE execute_status interpret_loop(STATE_ACCESS a, uint64_t mcycle_e status = execute_FENCE_I(a, pc, insn); INSN_BREAK(); INSN_CASE(PRIVILEGED): - status = execute_privileged(a, pc, mcycle, insn); + status = execute_privileged(a, pc, insn); INSN_BREAK(); // Instructions with hints where rd=0 INSN_CASE(LUI_rd0): @@ -5810,25 +5824,25 @@ static NO_INLINE execute_status interpret_loop(STATE_ACCESS a, uint64_t mcycle_e status = execute_REMUW(a, pc, insn); INSN_BREAK(); INSN_CASE(LD_rd0): - status = execute_LD(a, pc, mcycle, insn); + status = execute_LD(a, pc, insn); INSN_BREAK(); INSN_CASE(LW_rd0): - status = execute_LW(a, pc, mcycle, insn); + status = execute_LW(a, pc, insn); INSN_BREAK(); INSN_CASE(LWU_rd0): - status = execute_LWU(a, pc, mcycle, insn); + status = execute_LWU(a, pc, insn); INSN_BREAK(); INSN_CASE(LH_rd0): - status = execute_LH(a, pc, mcycle, insn); + status = execute_LH(a, pc, insn); INSN_BREAK(); INSN_CASE(LHU_rd0): - status = execute_LHU(a, pc, mcycle, insn); + status = execute_LHU(a, pc, insn); INSN_BREAK(); INSN_CASE(LB_rd0): - status = execute_LB(a, pc, mcycle, insn); + status = execute_LB(a, pc, insn); INSN_BREAK(); INSN_CASE(LBU_rd0): - status = execute_LBU(a, pc, mcycle, insn); + status = execute_LBU(a, pc, insn); INSN_BREAK(); // Illegal instructions INSN_CASE(ILLEGAL): @@ -5859,35 +5873,35 @@ static NO_INLINE execute_status interpret_loop(STATE_ACCESS a, uint64_t mcycle_e fetch_vaddr_page = ~pc; // All status above execute_status::success_and_serve_interrupts will require breaking the loop if (unlikely(status >= execute_status::success_and_serve_interrupts)) { - // Increment the cycle counter mcycle - ++mcycle; + // Decrement remaining cycles + --mcycle_remaining; if (likely(status == execute_status::success_and_serve_interrupts)) { // We have to break the inner loop to check and serve any pending interrupt immediately break; } // execute_status::success_and_yield or execute_status::success_and_halt // Commit machine state + a.write_mcycle(mcycle_tick_end - mcycle_remaining); a.write_pc(pc); - a.write_mcycle(mcycle); // Got an interruption that must be handled externally return status; } } } - // Increment the cycle counter mcycle - ++mcycle; - #ifndef NDEBUG // After a inner loop iteration, there can be no pending interrupts assert_no_brk(a); #endif } + + // Update loop mcycle + mcycle = mcycle_tick_end - mcycle_remaining; } // Commit machine state - a.write_pc(pc); a.write_mcycle(mcycle); + a.write_pc(pc); return execute_status::success; } diff --git a/src/record-step-state-access.h b/src/record-step-state-access.h index 77983f569..0ba64819e 100644 --- a/src/record-step-state-access.h +++ b/src/record-step-state-access.h @@ -601,9 +601,8 @@ class record_step_state_access : public i_state_access do_poll_external_interrupts(uint64_t mcycle, uint64_t mcycle_max) { - (void) mcycle_max; - return {mcycle, false}; + NO_INLINE std::pair do_poll_external_interrupts(uint64_t /*timeout_us*/) { + return {0, false}; } template diff --git a/src/replay-step-state-access.h b/src/replay-step-state-access.h index 7c6cb1865..d3d3c80e4 100644 --- a/src/replay-step-state-access.h +++ b/src/replay-step-state-access.h @@ -857,9 +857,8 @@ class replay_step_state_access : public i_state_access do_poll_external_interrupts(uint64_t mcycle, uint64_t mcycle_max) { - (void) mcycle_max; - return {mcycle, false}; + std::pair do_poll_external_interrupts(uint64_t /*timeout_us*/) { + return {0, false}; } uint64_t read_pma_istart(uint64_t i) { diff --git a/src/rtc-defines.h b/src/rtc-defines.h index c1bbcc82d..d955a9fc7 100644 --- a/src/rtc-defines.h +++ b/src/rtc-defines.h @@ -22,10 +22,10 @@ /// Changing this value affects the machine state hash /// Higher values decrease the performance of the interactive machine emulator /// Using base 2 values optimizes division and multiplications in the interpreter loop -#define RTC_FREQ_DIV_DEF 8192 +#define RTC_FREQ_DIV_DEF 16384 /// \brief Arbitrary CPU clock frequency. /// We have to make sure the clock frequency is divisible by RTC_FREQ_DIV_DEF and 10^6 -#define RTC_CLOCK_FREQ_DEF 128000000 ///< 128 MHz frequency +#define RTC_CLOCK_FREQ_DEF 256000000 ///< 256 MHz frequency // NOLINTEND(cppcoreguidelines-macro-usage,cppcoreguidelines-macro-to-enum,modernize-macro-to-enum) #endif /* end of include guard: RTC_DEFINES_H */ diff --git a/src/rtc.h b/src/rtc.h index 7e1a21eb1..2a128ccf8 100644 --- a/src/rtc.h +++ b/src/rtc.h @@ -28,25 +28,11 @@ namespace cartesi { /// \brief RTC constants enum RTC_constants : uint64_t { - RTC_FREQ_DIV = RTC_FREQ_DIV_DEF, ///< Clock divisor is set stone in whitepaper - RTC_CLOCK_FREQ = RTC_CLOCK_FREQ_DEF, ///< Clock frequency - RTC_CYCLES_PER_US = RTC_CLOCK_FREQ / 1000000, ///< Clock cycles per microsecond + RTC_FREQ_DIV = RTC_FREQ_DIV_DEF, ///< Clock divisor is set stone in whitepaper + RTC_CLOCK_FREQ = RTC_CLOCK_FREQ_DEF, ///< Clock frequency + RTC_US_PER_TICK = (1000000 * RTC_FREQ_DIV) / RTC_CLOCK_FREQ, /// < Microsecond per clock tick }; -/// \brief Converts from cycle count to time count -/// \param cycle Cycle count -/// \returns Time count -static inline uint64_t rtc_cycle_to_time(uint64_t cycle) { - return cycle / RTC_FREQ_DIV; -} - -/// \brief Converts from time count to cycle count -/// \param time Time count -/// \returns Cycle count -static inline uint64_t rtc_time_to_cycle(uint64_t time) { - return time * RTC_FREQ_DIV; -} - /// \brief Returns whether the cycle is a RTC tick /// \param cycle Cycle count static inline bool rtc_is_tick(uint64_t cycle) { diff --git a/src/state-access.h b/src/state-access.h index b3c2e71ff..d3564ada7 100644 --- a/src/state-access.h +++ b/src/state-access.h @@ -417,17 +417,16 @@ class state_access : public i_state_access { return m_m.get_state().htif.iyield; } - NO_INLINE std::pair do_poll_external_interrupts(uint64_t mcycle, uint64_t mcycle_max) { + NO_INLINE std::pair do_poll_external_interrupts(uint64_t timeout_us) { bool interrupt_raised = false; + uint64_t elapsed_us = 0; // Only poll external interrupts if we are in unreproducible mode if (unlikely(do_read_iunrep())) { - // Convert the relative interval of cycles we can wait to the interval of host time we can wait - uint64_t timeout_us = (mcycle_max - mcycle) / RTC_CYCLES_PER_US; int64_t start_us = 0; if (timeout_us > 0) { start_us = os_now_us(); } - device_state_access da(*this, mcycle); + device_state_access da(*this); // Poll virtio for events (e.g console stdin, network sockets) // Timeout may be decremented in case a device has deadline timers (e.g network device) if (m_m.has_virtio_devices() && m_m.has_virtio_console()) { // VirtIO + VirtIO console @@ -444,15 +443,13 @@ class state_access : public i_state_access { } else if (timeout_us > 0) { // No interrupts to check, just keep the CPU idle os_sleep_us(timeout_us); } - // If timeout is greater than zero, we should also increment mcycle relative to the elapsed time + // If timeout is greater than zero, we should also increment relative to the real elapsed time if (timeout_us > 0) { - const int64_t end_us = os_now_us(); - const uint64_t elapsed_us = static_cast(std::max(end_us - start_us, INT64_C(0))); - const uint64_t next_mcycle = mcycle + (elapsed_us * RTC_CYCLES_PER_US); - mcycle = std::min(std::max(next_mcycle, mcycle), mcycle_max); + const int64_t end_us = std::max(os_now_us(), start_us); + elapsed_us = static_cast(std::max(end_us - start_us, INT64_C(0))); } } - return {mcycle, interrupt_raised}; + return {elapsed_us, interrupt_raised}; } template diff --git a/tests/lua/cartesi-machine-tests.lua b/tests/lua/cartesi-machine-tests.lua index dbb7c6fe9..99228bdd6 100755 --- a/tests/lua/cartesi-machine-tests.lua +++ b/tests/lua/cartesi-machine-tests.lua @@ -284,8 +284,8 @@ local riscv_tests = { { "csr_semantics.bin", 378 }, { "amo.bin", 162 }, { "access.bin", 97 }, - { "interrupts.bin", 8209 }, - { "mtime_interrupt.bin", 16404 }, + { "interrupts.bin", 16401 }, + { "mtime_interrupt.bin", 32788 }, { "illegal_insn.bin", 972 }, { "version_check.bin", 26 }, { "translate_vaddr.bin", 343 }, diff --git a/tests/lua/mtime-interrupt.lua b/tests/lua/mtime-interrupt.lua index 841eaebc1..6a5771080 100755 --- a/tests/lua/mtime-interrupt.lua +++ b/tests/lua/mtime-interrupt.lua @@ -36,7 +36,7 @@ local function do_test(description, f) print(" passed") end -local RTC_FREQ_DIV = 8192 +local RTC_FREQ_DIV = 16384 local EXPECTED_MCYCLE = RTC_FREQ_DIV * 2 + 20 local function check_state(machine) diff --git a/uarch/uarch-machine-state-access.h b/uarch/uarch-machine-state-access.h index d90634044..85aa41643 100644 --- a/uarch/uarch-machine-state-access.h +++ b/uarch/uarch-machine-state-access.h @@ -543,8 +543,8 @@ class uarch_machine_state_access : public i_state_access(machine_reg_address(machine_reg::htif_iyield)); } - std::pair do_poll_external_interrupts(uint64_t mcycle, uint64_t /*mcycle_max*/) { - return {mcycle, false}; + std::pair do_poll_external_interrupts(uint64_t /*timeout_us*/) { + return {0, false}; } uint64_t read_pma_istart(uint64_t i) { From a000af7eb5283dfd3b08fb0bbbd4ddf87a5b6b5a Mon Sep 17 00:00:00 2001 From: Eduardo Bart Date: Sun, 2 Feb 2025 15:54:58 -0300 Subject: [PATCH 15/15] feat: optimize SQRT instruction using digit-by-digit calculation --- src/soft-float.h | 41 ++++++++++++++++------------------------- 1 file changed, 16 insertions(+), 25 deletions(-) diff --git a/src/soft-float.h b/src/soft-float.h index d87040be8..0f23ced34 100644 --- a/src/soft-float.h +++ b/src/soft-float.h @@ -123,33 +123,24 @@ template static bool sqrtrem_u(UINT *pr, UINT ah, UINT al) { using ULONG = typename make_long_uint::type; constexpr int UINT_SIZE = sizeof(UINT) * 8; - int l = 0; - // 2^l >= a - if (ah != 0) { - l = 2 * UINT_SIZE - clz(ah - 1); - } else { - // This branch will actually never be taken, - // because at this moment sqrtrem_u() is only called by sqrt() which makes sure that ah > 0 - // LCOV_EXCL_START - if (al == 0) { - *pr = 0; - return false; + const ULONG a = (static_cast(ah) << UINT_SIZE) | al; + + // Perform "digit‐by‐digit" square root extraction in base 2. + // See https://en.wikipedia.org/wiki/Methods_of_computing_square_roots#Binary_numeral_system_(base_2) + ULONG res = 0; + ULONG rem = a; // Will eventually hold a - res^2 + for (ULONG bit = static_cast(1) << ((2 * UINT_SIZE) - 2); bit != 0; bit >>= 2) { + const ULONG tmp = res + bit; + if (rem >= tmp) { + rem -= tmp; + res = (res >> 1) + bit; + } else { + res >>= 1; } - l = UINT_SIZE - clz(al - 1); - // LCOV_EXCL_STOP } - const ULONG a = (static_cast(ah) << UINT_SIZE) | al; - ULONG u = static_cast(1) << ((l + 1) / 2); - ULONG s = 0; - // NOLINTBEGIN(cppcoreguidelines-avoid-do-while) - do { - s = u; - // here we divide by two by shifting 1 bit to the right as an optimization - u = ((a / s) + s) >> 1; - } while (u < s); - // NOLINTEND(cppcoreguidelines-avoid-do-while) - *pr = static_cast(s); - return (a - s * s) != 0; + + *pr = static_cast(res); + return (rem != 0); } /// \class i_sfloat