Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 10 additions & 70 deletions libsolidity/codegen/CompilerUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1032,77 +1032,17 @@ void CompilerUtils::convertType(
// Copy the array to a free position in memory, unless it is already in memory.
if (typeOnStack.location() != DataLocation::Memory)
{
if (
typeOnStack.dataStoredIn(DataLocation::CallData) &&
typeOnStack.baseType()->isDynamicallyEncoded()
)
{
solAssert(m_context.useABICoderV2());
// stack: offset length(optional in case of dynamically sized array)
solAssert(typeOnStack.sizeOnStack() == (typeOnStack.isDynamicallySized() ? 2 : 1));
if (typeOnStack.isDynamicallySized())
m_context << Instruction::SWAP1;

m_context.callYulFunction(
m_context.utilFunctions().conversionFunction(typeOnStack, targetType),
typeOnStack.isDynamicallySized() ? 2 : 1,
1
);
}
else
{
// stack: <source ref> (variably sized)
unsigned stackSize = typeOnStack.sizeOnStack();
ArrayUtils(m_context).retrieveLength(typeOnStack);
solAssert(m_context.useABICoderV2());
// stack: offset length(optional in case of dynamically sized array)
solAssert(typeOnStack.sizeOnStack() == (typeOnStack.isDynamicallySized() ? 2 : 1));
if (typeOnStack.isDynamicallySized())
m_context << Instruction::SWAP1;

// allocate memory
// stack: <source ref> (variably sized) <length>
m_context << Instruction::DUP1;
ArrayUtils(m_context).convertLengthToSize(targetType, true);
// stack: <source ref> (variably sized) <length> <size>
if (targetType.isDynamicallySized())
m_context << u256(0x20) << Instruction::ADD;
allocateMemory();
// stack: <source ref> (variably sized) <length> <mem start>
m_context << Instruction::DUP1;
moveIntoStack(2 + stackSize);
if (targetType.isDynamicallySized())
{
m_context << Instruction::DUP2;
storeInMemoryDynamic(*TypeProvider::uint256());
}
// stack: <mem start> <source ref> (variably sized) <length> <mem data pos>
if (targetType.baseType()->isValueType())
{
copyToStackTop(2 + stackSize, stackSize);
ArrayUtils(m_context).copyArrayToMemory(typeOnStack);
}
else
{
m_context << u256(0) << Instruction::SWAP1;
// stack: <mem start> <source ref> (variably sized) <length> <counter> <mem data pos>
Comment on lines -1074 to -1083
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The else here is actually yet another case that I only discovered while doing the fix - I don't have test coverage for it yet. It's the case with a dynamic array with statically-encoded elements that are not value types. E.g. S[] calldata, where S is a static struct.

auto repeat = m_context.newTag();
m_context << repeat;
m_context << Instruction::DUP3 << Instruction::DUP3;
m_context << Instruction::LT << Instruction::ISZERO;
auto loopEnd = m_context.appendConditionalJump();
copyToStackTop(3 + stackSize, stackSize);
copyToStackTop(2 + stackSize, 1);
ArrayUtils(m_context).accessIndex(typeOnStack, false);
if (typeOnStack.location() == DataLocation::Storage)
StorageItem(m_context, *typeOnStack.baseType()).retrieveValue(SourceLocation(), true);
convertType(*typeOnStack.baseType(), *targetType.baseType(), _cleanupNeeded);
storeInMemoryDynamic(*targetType.baseType(), true);
m_context << Instruction::SWAP1 << u256(1) << Instruction::ADD;
m_context << Instruction::SWAP1;
m_context.appendJumpTo(repeat);
m_context << loopEnd;
m_context << Instruction::POP;
}
// stack: <mem start> <source ref> (variably sized) <length> <mem data pos updated>
popStackSlots(2 + stackSize);
// Stack: <mem start>
}
m_context.callYulFunction(
m_context.utilFunctions().conversionFunction(typeOnStack, targetType),
typeOnStack.isDynamicallySized() ? 2 : 1,
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the fix for the first bug. The problem here is that the evmasm codegen is sometimes still using code written for ABI coder v1 even when v2 is selected. This is fine in terms of correctness of decoding, but does not account for the validations, since most of them were only introduced in v2.

This will happen for example when you try to access a bytes element of bytes[] calldata. Note that this is not even a v1 type, but the routine kicks in, because we're only accessing bytes inside it and that one is.

I'm not yet sure this change (i.e. replacing everything with a call to v2) is the right solution here. It will likely significantly increase the cost and still needs adjustment for types with a different number of slots (assertions are failing in tests). It at least fix the validations though.

1
);
}
break;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,5 +21,5 @@ contract C {
// f(uint256[][1]): 32, 32, 1, 42 -> true
// f(uint256[][1]): 32, 32, 8, 421, 422, 423, 424, 425, 426, 427, 428 -> true
// gas irOptimized: 120978
// gas legacy: 98324
// gas legacy: 101289
// gas legacyOptimized: 94348
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
// Adapted example from a bug report we received.

contract SolidityCalldataAccess {
event LogBytes(bytes);
event LogUint256(uint256);

struct InnerStruct {
uint256 innerField1;
bytes innerField2;
uint256 innerField3;
}

struct OuterStruct {
InnerStruct outerField1;
}

function accessAndReturn(OuterStruct calldata s) external returns (uint256, bytes memory, uint256) {
return ( // With InnerStruct at offset -100 these map to:
s.outerField1.innerField1, // -96..-64
s.outerField1.innerField2, // -66..0
s.outerField1.innerField3 // 0..32 (which includes function selector)
);
}

function accessAndEmit(OuterStruct calldata s) external {
emit LogUint256(s.outerField1.innerField1);
emit LogBytes(s.outerField1.innerField2);
emit LogUint256(s.outerField1.innerField3); // Note that this value is not the same as in accessAndReturn() due to selector
}

function f(uint256, bytes memory, uint256) external {}

function accessAndPassIntoCall(OuterStruct calldata s) external {
this.f(
s.outerField1.innerField1,
s.outerField1.innerField2,
s.outerField1.innerField3
);
}
}
// ====
// revertStrings: debug
// ----
// accessAndReturn(((uint256,bytes,uint256))): 32, -100 -> FAILURE, hex"08c379a0", 0x20, 39, "ABI decoding: invalid byte array", " length"
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is a fourth bug. This one may result in accepting input that will result in reading beyond calldatasize(). This one is not covered by tests yet, but the problem is the faulty condition that produces a revert with this message.

The condition is:

if gt(add(src, length), end) { <revertStringLength>() }

Firstly, the error message is inaccurate. Note that it's not only invalid length that may cause this. It can also be the offset. If the offset is negative enough, the absolute start position ends up past the end due to underflow and the (unsigned) comparison fails.

However, when src is negative, but src + length still positive, the condition may be satisfied. This is wrong, because the initial part of the string is likely past calldatasize() in that case.

// accessAndEmit(((uint256,bytes,uint256))): 32, -100 ->
// ~ emit LogUint256(uint256): 0x00
// ~ emit LogBytes(bytes): 0x20, 0x00
// ~ emit LogUint256(uint256): 0x185a652e00000000000000000000000000000000000000000000000000000000
// accessAndPassIntoCall(((uint256,bytes,uint256))): 32, -100 ->
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
contract C {
// Access at every nesting level must be validated against reading outside of the [0, calldatasize() - 1] range.
// NOTE: Returning a calldata type as memory always results in the data being decoded.
function accessTop(bytes[][][] calldata a) external returns (bytes[][][] memory) { return a; }
function accessMiddle(bytes[][][] calldata a) external returns (bytes[][] memory) { return a[0]; }
function accessBottom(bytes[][][] calldata a) external returns (bytes[] memory) { return a[0][0]; }
function accessContent(bytes[][][] calldata a) external returns (bytes memory) { return a[0][0][0]; }
}
// Every case below which uses -999 as offset has tail at a negative position and should revert.
// ====
// revertStrings: debug
// ----
// accessTop(bytes[][][]): 0x20, 1, 0x20, 1, 0x20, 1, 0x20, 0 -> 0x20, 1, 0x20, 1, 0x20, 1, 0x20, 0
// accessTop(bytes[][][]): 0x20, 1, 0x20, 1, 0x20, 1, -999 -> FAILURE, hex"08c379a0", 0x20, 43, "ABI decoding: invalid calldata a", "rray offset"
// accessTop(bytes[][][]): 0x20, 1, 0x20, 1, -999 -> FAILURE, hex"08c379a0", 0x20, 43, "ABI decoding: invalid calldata a", "rray offset"
// accessTop(bytes[][][]): 0x20, 1, -999 -> FAILURE, hex"08c379a0", 0x20, 43, "ABI decoding: invalid calldata a", "rray offset"
// accessTop(bytes[][][]): -999 -> FAILURE, hex"08c379a0", 0x20, 34, "ABI decoding: invalid tuple offs", "et"

// accessMiddle(bytes[][][]): 0x20, 1, 0x20, 1, 0x20, 1, 0x20, 0 -> 0x20, 1, 0x20, 1, 0x20, 0
// accessMiddle(bytes[][][]): 0x20, 1, 0x20, 1, 0x20, 1, -999 -> FAILURE, hex"08c379a0", 0x20, 43, "ABI decoding: invalid calldata a", "rray offset"
// accessMiddle(bytes[][][]): 0x20, 1, 0x20, 1, -999 -> FAILURE, hex"08c379a0", 0x20, 43, "ABI decoding: invalid calldata a", "rray offset"
// accessMiddle(bytes[][][]): 0x20, 1, -999 -> FAILURE, hex"08c379a0", 0x20, 43, "ABI decoding: invalid calldata a", "rray stride"
// accessMiddle(bytes[][][]): -999 -> FAILURE, hex"08c379a0", 0x20, 34, "ABI decoding: invalid tuple offs", "et"

// accessBottom(bytes[][][]): 0x20, 1, 0x20, 1, 0x20, 1, 0x20, 0 -> 0x20, 1, 0x20, 0
// accessBottom(bytes[][][]): 0x20, 1, 0x20, 1, 0x20, 1, -999 -> FAILURE, hex"08c379a0", 0x20, 43, "ABI decoding: invalid calldata a", "rray offset"
// accessBottom(bytes[][][]): 0x20, 1, 0x20, 1, -999 -> FAILURE, hex"08c379a0", 0x20, 43, "ABI decoding: invalid calldata a", "rray stride"
// accessBottom(bytes[][][]): 0x20, 1, -999 -> FAILURE, hex"4e487b71", 0x32
// accessBottom(bytes[][][]): -999 -> FAILURE, hex"08c379a0", 0x20, 34, "ABI decoding: invalid tuple offs", "et"

// accessContent(bytes[][][]): 0x20, 1, 0x20, 1, 0x20, 1, 0x20, 0 -> 0x20, 0
// accessContent(bytes[][][]): 0x20, 1, 0x20, 1, 0x20, 1, -999 -> FAILURE, hex"08c379a0", 0x20, 39, "ABI decoding: invalid byte array", " length"
// accessContent(bytes[][][]): 0x20, 1, 0x20, 1, -999 -> FAILURE, hex"4e487b71", 0x32
// accessContent(bytes[][][]): 0x20, 1, -999 -> FAILURE, hex"4e487b71", 0x32
// accessContent(bytes[][][]): -999 -> FAILURE, hex"08c379a0", 0x20, 34, "ABI decoding: invalid tuple offs", "et"
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
contract C {
// Access at every nesting level must be validated against reading outside of the [0, calldatasize() - 1] range.
// NOTE: Returning the input as calldata does not trigger decoding.
// The encoded data is just copied. All validations expected during normal decoding must still run though.
function accessTop(bytes[][][] calldata a) external returns (bytes[][][] calldata) { return a; }
function accessMiddle(bytes[][][] calldata a) external returns (bytes[][] calldata) { return a[0]; }
function accessBottom(bytes[][][] calldata a) external returns (bytes[] calldata) { return a[0][0]; }
function accessContent(bytes[][][] calldata a) external returns (bytes calldata) { return a[0][0][0]; }
}
// Every case below which uses -999 as offset has tail at a negative position and should revert.
// ====
// revertStrings: debug
// ----
// accessTop(bytes[][][]): 0x20, 1, 0x20, 1, 0x20, 1, 0x20, 0 -> 0x20, 1, 0x20, 1, 0x20, 1, 0x20, 0
// accessTop(bytes[][][]): 0x20, 1, 0x20, 1, 0x20, 1, -999 -> 0x20, 1, 0x20, 1, 0x20, 1, 0x20, 0
// accessTop(bytes[][][]): 0x20, 1, 0x20, 1, -999 -> 0x20, 1, 0x20, 1, 0x20, 0
// accessTop(bytes[][][]): 0x20, 1, -999 -> 0x20, 1, 0x20, 0
// accessTop(bytes[][][]): -999 -> FAILURE, hex"08c379a0", 0x20, 34, "ABI decoding: invalid tuple offs", "et"

// accessMiddle(bytes[][][]): 0x20, 1, 0x20, 1, 0x20, 1, 0x20, 0 -> 0x20, 1, 0x20, 1, 0x20, 0
// accessMiddle(bytes[][][]): 0x20, 1, 0x20, 1, 0x20, 1, -999 -> 0x20, 1, 0x20, 1, 0x20, 0
// accessMiddle(bytes[][][]): 0x20, 1, 0x20, 1, -999 -> 0x20, 1, 0x20, 0
// accessMiddle(bytes[][][]): 0x20, 1, -999 -> 0x20, 0
// accessMiddle(bytes[][][]): -999 -> FAILURE, hex"08c379a0", 0x20, 34, "ABI decoding: invalid tuple offs", "et"

// accessBottom(bytes[][][]): 0x20, 1, 0x20, 1, 0x20, 1, 0x20, 0 -> 0x20, 1, 0x20, 0
// accessBottom(bytes[][][]): 0x20, 1, 0x20, 1, 0x20, 1, -999 -> 0x20, 1, 0x20, 0
// accessBottom(bytes[][][]): 0x20, 1, 0x20, 1, -999 -> 0x20, 0
// accessBottom(bytes[][][]): 0x20, 1, -999 -> FAILURE, hex"4e487b71", 0x32
// accessBottom(bytes[][][]): -999 -> FAILURE, hex"08c379a0", 0x20, 34, "ABI decoding: invalid tuple offs", "et"

// accessContent(bytes[][][]): 0x20, 1, 0x20, 1, 0x20, 1, 0x20, 0 -> 0x20, 0
// accessContent(bytes[][][]): 0x20, 1, 0x20, 1, 0x20, 1, -999 -> 0x20, 0
// accessContent(bytes[][][]): 0x20, 1, 0x20, 1, -999 -> FAILURE, hex"4e487b71", 0x32
// accessContent(bytes[][][]): 0x20, 1, -999 -> FAILURE, hex"4e487b71", 0x32
// accessContent(bytes[][][]): -999 -> FAILURE, hex"08c379a0", 0x20, 34, "ABI decoding: invalid tuple offs", "et"
Comment on lines +14 to +36
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is essentially the bug from the report. The output here should match negative_offset_negative_tail_position_dynamic_array.sol, i.e. every case except the first one in each group (which is there only to show what the output looks like in the equivalent non-negative-offset case) should revert.

Copy link
Copy Markdown
Collaborator Author

@cameel cameel Mar 12, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The _abi_encode.sol test variants do not add much. I will probably remove them unless someone thinks they are are worth keeping.

Each one has a corresponding, slightly simpler test that just returns the value as calldata. I wrote the _abi_encode.sol ones first and only later realized that returning calldata will not trigger decoding either.

Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
contract C {
// Access at every nesting level must be validated against reading outside of the [0, calldatasize() - 1] range.
// NOTE: Passing a calldata type into abi.encode() does not trigger decoding.
// The encoded data is just copied. All validations expected during normal decoding must still run though.
function accessTop(bytes[][][] calldata a) external returns (bytes memory) { return abi.encode(a); }
function accessMiddle(bytes[][][] calldata a) external returns (bytes memory) { return abi.encode(a[0]); }
function accessBottom(bytes[][][] calldata a) external returns (bytes memory) { return abi.encode(a[0][0]); }
function accessContent(bytes[][][] calldata a) external returns (bytes memory) { return abi.encode(a[0][0][0]); }
}
// Every case below which uses -999 as offset has tail at a negative position and should revert.
// ====
// revertStrings: debug
// ----
// accessTop(bytes[][][]): 0x20, 1, 0x20, 1, 0x20, 1, 0x20, 0 -> 0x20, 256, 0x20, 1, 0x20, 1, 0x20, 1, 0x20, 0
// accessTop(bytes[][][]): 0x20, 1, 0x20, 1, 0x20, 1, -999 -> 0x20, 256, 0x20, 1, 0x20, 1, 0x20, 1, 0x20, 0
// accessTop(bytes[][][]): 0x20, 1, 0x20, 1, -999 -> 0x20, 192, 0x20, 1, 0x20, 1, 0x20, 0
// accessTop(bytes[][][]): 0x20, 1, -999 -> 0x20, 128, 0x20, 1, 0x20, 0
// accessTop(bytes[][][]): -999 -> FAILURE, hex"08c379a0", 0x20, 34, "ABI decoding: invalid tuple offs", "et"

// accessMiddle(bytes[][][]): 0x20, 1, 0x20, 1, 0x20, 1, 0x20, 0 -> 0x20, 192, 0x20, 1, 0x20, 1, 0x20, 0
// accessMiddle(bytes[][][]): 0x20, 1, 0x20, 1, 0x20, 1, -999 -> 0x20, 192, 0x20, 1, 0x20, 1, 0x20, 0
// accessMiddle(bytes[][][]): 0x20, 1, 0x20, 1, -999 -> 0x20, 128, 0x20, 1, 0x20, 0
// accessMiddle(bytes[][][]): 0x20, 1, -999 -> 0x20, 64, 0x20, 0
// accessMiddle(bytes[][][]): -999 -> FAILURE, hex"08c379a0", 0x20, 34, "ABI decoding: invalid tuple offs", "et"

// accessBottom(bytes[][][]): 0x20, 1, 0x20, 1, 0x20, 1, 0x20, 0 -> 0x20, 128, 0x20, 1, 0x20, 0
// accessBottom(bytes[][][]): 0x20, 1, 0x20, 1, 0x20, 1, -999 -> 0x20, 128, 0x20, 1, 0x20, 0
// accessBottom(bytes[][][]): 0x20, 1, 0x20, 1, -999 -> 0x20, 64, 0x20, 0
// accessBottom(bytes[][][]): 0x20, 1, -999 -> FAILURE, hex"4e487b71", 0x32
// accessBottom(bytes[][][]): -999 -> FAILURE, hex"08c379a0", 0x20, 34, "ABI decoding: invalid tuple offs", "et"

// accessContent(bytes[][][]): 0x20, 1, 0x20, 1, 0x20, 1, 0x20, 0 -> 0x20, 64, 0x20, 0
// accessContent(bytes[][][]): 0x20, 1, 0x20, 1, 0x20, 1, -999 -> 0x20, 64, 0x20, 0
// accessContent(bytes[][][]): 0x20, 1, 0x20, 1, -999 -> FAILURE, hex"4e487b71", 0x32
// accessContent(bytes[][][]): 0x20, 1, -999 -> FAILURE, hex"4e487b71", 0x32
// accessContent(bytes[][][]): -999 -> FAILURE, hex"08c379a0", 0x20, 34, "ABI decoding: invalid tuple offs", "et"
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
contract C {
// Access at every nesting level must be validated against reading outside of the [0, calldatasize() - 1] range.
// NOTE: Returning the input as calldata does not trigger decoding.
// The encoded data is just copied. All validations expected during normal decoding must still run though.
function accessTop(bytes[1][1][1] calldata a) external returns (bytes[1][1][1] calldata) { return a; }
function accessMiddle(bytes[1][1][1] calldata a) external returns (bytes[1][1] calldata) { return a[0]; }
function accessBottom(bytes[1][1][1] calldata a) external returns (bytes[1] calldata) { return a[0][0]; }
function accessContent(bytes[1][1][1] calldata a) external returns (bytes calldata) { return a[0][0][0]; }
}
// Every case below which uses -999 as offset has tail at a negative position and should revert.
// ====
// revertStrings: debug
// ----
// accessTop(bytes[1][1][1]): 0x20, 0x20, 0x20, 0x20, 0 -> 0x20, 0x20, 0x20, 0x20, 0
// accessTop(bytes[1][1][1]): 0x20, 0x20, 0x20, -999 -> 0x20, 0x20, 0x20, 0x20, 0
// accessTop(bytes[1][1][1]): 0x20, 0x20, -999 -> 0x20, 0x20, 0x20, 0x20, 0
// accessTop(bytes[1][1][1]): 0x20, -999 -> 0x20, 0x20, 0x20, 0x20, 0
// accessTop(bytes[1][1][1]): -999 -> FAILURE, hex"08c379a0", 0x20, 34, "ABI decoding: invalid tuple offs", "et"

// accessMiddle(bytes[1][1][1]): 0x20, 0x20, 0x20, 0x20, 0 -> 0x20, 0x20, 0x20, 0
// accessMiddle(bytes[1][1][1]): 0x20, 0x20, 0x20, -999 -> 0x20, 0x20, 0x20, 0
// accessMiddle(bytes[1][1][1]): 0x20, 0x20, -999 -> 0x20, 0x20, 0x20, 0
// accessMiddle(bytes[1][1][1]): 0x20, -999 -> 0x20, 0x20, 0x20, 0
// accessMiddle(bytes[1][1][1]): -999 -> FAILURE, hex"08c379a0", 0x20, 34, "ABI decoding: invalid tuple offs", "et"

// accessBottom(bytes[1][1][1]): 0x20, 0x20, 0x20, 0x20, 0 -> 0x20, 0x20, 0
// accessBottom(bytes[1][1][1]): 0x20, 0x20, 0x20, -999 -> 0x20, 0x20, 0
// accessBottom(bytes[1][1][1]): 0x20, 0x20, -999 -> 0x20, 0x20, 0
// accessBottom(bytes[1][1][1]): 0x20, -999 -> 0x20, 0x20, 0
// accessBottom(bytes[1][1][1]): -999 -> FAILURE, hex"08c379a0", 0x20, 34, "ABI decoding: invalid tuple offs", "et"

// accessContent(bytes[1][1][1]): 0x20, 0x20, 0x20, 0x20, 0 -> 0x20, 0
// accessContent(bytes[1][1][1]): 0x20, 0x20, 0x20, -999 -> 0x20, 0
// accessContent(bytes[1][1][1]): 0x20, 0x20, -999 -> 0x20, 0
// accessContent(bytes[1][1][1]): 0x20, -999 -> 0x20, 0
// accessContent(bytes[1][1][1]): -999 -> FAILURE, hex"08c379a0", 0x20, 34, "ABI decoding: invalid tuple offs", "et"
Loading