Skip to content

Commit c427259

Browse files
committed
Binary encode from back-to-front
Our current front-to-back binary encoder has a subtle performance problem: In order to allocate the correct number of bytes for the size of a sub-message, it needs to know the final size of the sub-message. This results in recursive calls to size each message, leading to overall performance that is quadratic in the depth of nesting. This is not usually a serious problem since few people have messages with more than 2 or 3 layers of nesting. This PR changes the encoder to encode from the end of the buffer back towards the front. This allows us to write the size after we finish each sub-message, avoiding the recursive sizing calls. With this version, we have only one sizing traversal in the initial top-level encoding request in order to properly allocate the output buffer. Working from back to front does mean that individual fields get written in the opposite order. This is _mostly_ not a problem: Protobuf explicitly states that decoders must accept fields in any order. The one exception is for repeated fields, which we handle here by iterating the arrays backwards inside the encoder. A few cases where order might matter: * Unrecognized enum cases in repeated fields are treted as "unknown" fields which means that re-serializing puts them into a different place. Since this code puts the unknown fields at the beginning of the buffer rather than the end, this means that we've changed the resulting order after deserializing/reserializing. * The conformance test has one test case that verifies merging behavior and seems very sensitive to the order of fields. I suspect this is a bug in the test, but need to check further.
1 parent cef408d commit c427259

File tree

4 files changed

+656
-1
lines changed

4 files changed

+656
-1
lines changed
Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
// Sources/SwiftProtobuf/BinaryReverseEncoder.swift - Binary encoding support
2+
//
3+
// Copyright (c) 2014 - 2016 Apple Inc. and the project authors
4+
// Licensed under Apache License v2.0 with Runtime Library Exception
5+
//
6+
// See LICENSE.txt for license information:
7+
// https://github.com/apple/swift-protobuf/blob/main/LICENSE.txt
8+
//
9+
// -----------------------------------------------------------------------------
10+
///
11+
/// Core support for protobuf binary encoding. Note that this is built
12+
/// on the general traversal machinery.
13+
///
14+
// -----------------------------------------------------------------------------
15+
16+
import Foundation
17+
18+
/// Encoder for Binary Protocol Buffer format
19+
internal struct BinaryReverseEncoder {
20+
private var pointer: UnsafeMutableRawPointer
21+
private var buffer: UnsafeMutableRawBufferPointer
22+
23+
init(forWritingInto buffer: UnsafeMutableRawBufferPointer) {
24+
self.buffer = buffer
25+
self.pointer = buffer.baseAddress! + buffer.count
26+
}
27+
28+
private mutating func prepend(_ byte: UInt8) {
29+
consume(1)
30+
pointer.storeBytes(of: byte, as: UInt8.self)
31+
}
32+
33+
private mutating func prepend<Bytes: SwiftProtobufContiguousBytes>(contentsOf bytes: Bytes) {
34+
bytes.withUnsafeBytes { dataPointer in
35+
if let baseAddress = dataPointer.baseAddress, dataPointer.count > 0 {
36+
consume(dataPointer.count)
37+
pointer.copyMemory(from: baseAddress, byteCount: dataPointer.count)
38+
}
39+
}
40+
}
41+
42+
internal var used: Int {
43+
return pointer.distance(to: buffer.baseAddress!) + buffer.count
44+
}
45+
46+
internal var remainder: UnsafeMutableRawBufferPointer {
47+
return UnsafeMutableRawBufferPointer(start: buffer.baseAddress!,
48+
count: buffer.count - used)
49+
}
50+
51+
internal mutating func consume(_ bytes: Int) {
52+
pointer = pointer.advanced(by: -bytes)
53+
}
54+
55+
@discardableResult
56+
private mutating func prepend(contentsOf bufferPointer: UnsafeRawBufferPointer) -> Int {
57+
let count = bufferPointer.count
58+
consume(count)
59+
if let baseAddress = bufferPointer.baseAddress, count > 0 {
60+
pointer.copyMemory(from: baseAddress, byteCount: count)
61+
}
62+
return count
63+
}
64+
65+
mutating func appendUnknown(data: Data) {
66+
prepend(contentsOf: data)
67+
}
68+
69+
mutating func startField(fieldNumber: Int, wireFormat: WireFormat) {
70+
startField(tag: FieldTag(fieldNumber: fieldNumber, wireFormat: wireFormat))
71+
}
72+
73+
mutating func startField(tag: FieldTag) {
74+
putVarInt(value: UInt64(tag.rawValue))
75+
}
76+
77+
mutating func putVarInt(value: UInt64) {
78+
if value > 127 {
79+
putVarInt(value: value >> 7)
80+
prepend(UInt8(value & 0x7f | 0x80))
81+
} else {
82+
prepend(UInt8(value))
83+
}
84+
}
85+
86+
mutating func putVarInt(value: Int64) {
87+
putVarInt(value: UInt64(bitPattern: value))
88+
}
89+
90+
mutating func putVarInt(value: Int) {
91+
putVarInt(value: Int64(value))
92+
}
93+
94+
mutating func putZigZagVarInt(value: Int64) {
95+
let coded = ZigZag.encoded(value)
96+
putVarInt(value: coded)
97+
}
98+
99+
mutating func putBoolValue(value: Bool) {
100+
prepend(value ? 1 : 0)
101+
}
102+
103+
mutating func putFixedUInt64(value: UInt64) {
104+
var v = value.littleEndian
105+
let n = MemoryLayout<UInt64>.size
106+
consume(n)
107+
pointer.copyMemory(from: &v, byteCount: n)
108+
}
109+
110+
mutating func putFixedUInt32(value: UInt32) {
111+
var v = value.littleEndian
112+
let n = MemoryLayout<UInt32>.size
113+
consume(n)
114+
pointer.copyMemory(from: &v, byteCount: n)
115+
}
116+
117+
mutating func putFloatValue(value: Float) {
118+
let n = MemoryLayout<Float>.size
119+
var v = value.bitPattern.littleEndian
120+
consume(n)
121+
pointer.copyMemory(from: &v, byteCount: n)
122+
}
123+
124+
mutating func putDoubleValue(value: Double) {
125+
let n = MemoryLayout<Double>.size
126+
var v = value.bitPattern.littleEndian
127+
consume(n)
128+
pointer.copyMemory(from: &v, byteCount: n)
129+
}
130+
131+
// Write a string field, including the leading index/tag value.
132+
mutating func putStringValue(value: String) {
133+
let utf8 = value.utf8
134+
// If the String does not support an internal representation in a form
135+
// of contiguous storage, body is not called and nil is returned.
136+
let isAvailable = utf8.withContiguousStorageIfAvailable { (body: UnsafeBufferPointer<UInt8>) -> Int in
137+
let r = prepend(contentsOf: UnsafeRawBufferPointer(body))
138+
putVarInt(value: body.count)
139+
return r
140+
}
141+
if isAvailable == nil {
142+
precondition(false)
143+
let count = utf8.count
144+
putVarInt(value: count)
145+
for b in utf8 {
146+
pointer.storeBytes(of: b, as: UInt8.self)
147+
consume(1)
148+
}
149+
}
150+
}
151+
152+
mutating func putBytesValue<Bytes: SwiftProtobufContiguousBytes>(value: Bytes) {
153+
prepend(contentsOf: value)
154+
putVarInt(value: value.count)
155+
}
156+
}

0 commit comments

Comments
 (0)