Skip to content

Commit b755243

Browse files
javachemeta-codesync[bot]
authored andcommitted
Optimize MapBuffer representation (#57361)
Summary: Pull Request resolved: #57361 Broadens the former header-shrink change into a single representation-optimization commit for MapBuffer. It bundles three layout optimizations that were previously split: (1) the header is reduced to a single 2-byte `count` field; (2) multi-byte values are read via `memcpy` so unaligned access is well-defined on all platforms; (3) every dynamic-data entry (`String`, `Map`, `MapBufferList`, `IntBuffer`, `DoubleBuffer`) packs its `[offset][byteLength]` into the bucket's 8-byte value instead of writing an in-band length prefix into the dynamic data section. Net effect: 4 fewer bytes per dynamic entry, one fewer indirection on read (the length is already in the bucket), and every dynamic entry becomes self-delimiting from its bucket alone. No public API change — only the internal serialized representation. Changelog: [Internal] Reviewed By: lenaic, zeyap Differential Revision: D109848478
1 parent 4d996b5 commit b755243

14 files changed

Lines changed: 181 additions & 178 deletions

File tree

packages/react-native/ReactAndroid/src/main/java/com/facebook/react/common/mapbuffer/ReadableMapBuffer.kt

Lines changed: 23 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -51,13 +51,11 @@ private constructor(
5151
ReadableMapBuffer(buffer.duplicate().apply { position(offset) }, offset)
5252

5353
private fun readHeader() {
54-
// byte order
55-
val storedAlignment = buffer.short
56-
if (storedAlignment.toInt() != ALIGNMENT) {
57-
buffer.order(ByteOrder.LITTLE_ENDIAN)
58-
}
59-
// count
60-
count = readUnsignedShort(buffer.position()).toInt()
54+
// The C++ writer always serializes in little-endian byte order. ByteBuffer
55+
// defaults to big-endian and duplicate() resets the order, so set it
56+
// explicitly on every instance, including nested clones.
57+
buffer.order(ByteOrder.LITTLE_ENDIAN)
58+
count = readUnsignedShort(offsetToMapBuffer).toInt()
6159
}
6260

6361
/**
@@ -122,26 +120,27 @@ private constructor(
122120
return readIntValue(bufferPosition) == 1
123121
}
124122

123+
// Dynamic-data entries store [offset][byteLength] in the bucket's 8-byte
124+
// value: getInt(bufferPosition) is the offset, getInt(bufferPosition + 4) is
125+
// the byte length. The dynamic data section itself carries no length prefix.
125126
private fun readStringValue(bufferPosition: Int): String {
126127
val offset = offsetForDynamicData + buffer.getInt(bufferPosition)
127-
val sizeOfString = buffer.getInt(offset)
128+
val sizeOfString = buffer.getInt(bufferPosition + Int.SIZE_BYTES)
128129
val result = ByteArray(sizeOfString)
129-
val stringOffset = offset + Int.SIZE_BYTES
130-
buffer.position(stringOffset)
131-
buffer[result, 0, sizeOfString]
130+
buffer.position(offset)
131+
buffer.get(result, 0, sizeOfString)
132132
return String(result)
133133
}
134134

135135
private fun readMapBufferValue(position: Int): ReadableMapBuffer {
136136
val offset = offsetForDynamicData + buffer.getInt(position)
137-
return cloneWithOffset(offset + Int.SIZE_BYTES)
137+
return cloneWithOffset(offset)
138138
}
139139

140140
private fun readMapBufferListValue(position: Int): List<ReadableMapBuffer> {
141141
val readMapBufferList = arrayListOf<ReadableMapBuffer>()
142-
var offset = offsetForDynamicData + buffer.getInt(position)
143-
val sizeMapBufferList = buffer.getInt(offset)
144-
offset += Int.SIZE_BYTES
142+
val offset = offsetForDynamicData + buffer.getInt(position)
143+
val sizeMapBufferList = buffer.getInt(position + Int.SIZE_BYTES)
145144
var curLen = 0
146145
while (curLen < sizeMapBufferList) {
147146
val sizeMapBuffer = buffer.getInt(offset + curLen)
@@ -153,16 +152,16 @@ private constructor(
153152
}
154153

155154
private fun readIntBufferValue(bufferPosition: Int): IntArray {
156-
var offset = offsetForDynamicData + buffer.getInt(bufferPosition)
157-
val count = buffer.getInt(offset)
158-
offset += Int.SIZE_BYTES
155+
val offset = offsetForDynamicData + buffer.getInt(bufferPosition)
156+
val byteLength = buffer.getInt(bufferPosition + Int.SIZE_BYTES)
157+
val count = byteLength / Int.SIZE_BYTES
159158
return IntArray(count) { i -> buffer.getInt(offset + i * Int.SIZE_BYTES) }
160159
}
161160

162161
private fun readDoubleBufferValue(bufferPosition: Int): DoubleArray {
163-
var offset = offsetForDynamicData + buffer.getInt(bufferPosition)
164-
val count = buffer.getInt(offset)
165-
offset += Int.SIZE_BYTES
162+
val offset = offsetForDynamicData + buffer.getInt(bufferPosition)
163+
val byteLength = buffer.getInt(bufferPosition + Int.SIZE_BYTES)
164+
val count = byteLength / Double.SIZE_BYTES
166165
return DoubleArray(count) { i -> buffer.getDouble(offset + i * Double.SIZE_BYTES) }
167166
}
168167

@@ -355,13 +354,10 @@ private constructor(
355354
}
356355

357356
public companion object {
358-
// Value used to verify if the data is serialized with LittleEndian order.
359-
private const val ALIGNMENT = 0xFE
360-
361-
// 8 bytes = 2 (alignment) + 2 (count) + 4 (size)
362-
private const val HEADER_SIZE = 8
357+
// 2 bytes = 2 (count)
358+
private const val HEADER_SIZE = 2
363359

364-
// 10 bytes = 2 (key) + 2 (type) + 8 (value)
360+
// 12 bytes = 2 (key) + 2 (type) + 8 (value)
365361
private const val BUCKET_SIZE = 12
366362

367363
// 2 bytes = 2 (key)

packages/react-native/ReactCommon/react/renderer/mapbuffer/MapBuffer.cpp

Lines changed: 87 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,43 @@
88
#include "MapBuffer.h"
99
#include <react/renderer/mapbuffer/MapBufferBuilder.h>
1010

11+
#include <algorithm>
12+
#include <cstring>
13+
1114
namespace facebook::react {
1215

16+
namespace {
17+
// Reads a value of type T from a (possibly unaligned) offset in the buffer.
18+
// MapBuffer's packed layout places multi-byte values at offsets that are not
19+
// naturally aligned for their type (e.g. an 8-byte value at a 2-byte boundary),
20+
// so dereferencing a reinterpret_cast pointer there is undefined behavior and
21+
// can fault on 32-bit ARM. memcpy compiles to a single unaligned load on
22+
// arm64/x86 and to alignment-safe loads on armv7.
23+
template <typename T>
24+
inline T readUnaligned(const uint8_t* data, int32_t offset) {
25+
T value;
26+
std::memcpy(&value, data + offset, sizeof(T));
27+
return value;
28+
}
29+
30+
// Debug-asserts on OOB (catches corrupt buffers early in dev) AND clamps in
31+
// release so a corrupt bucket length can never drive an OOB memcpy read.
32+
// react_native_assert is compiled out in release, so the runtime cost outside
33+
// dev is the single min() call.
34+
inline int32_t
35+
clampToBufferBounds(int32_t offset, int32_t byteLength, size_t bufferSize) {
36+
react_native_assert(offset >= 0 && byteLength >= 0);
37+
react_native_assert(
38+
static_cast<size_t>(offset) + static_cast<size_t>(byteLength) <=
39+
bufferSize);
40+
size_t maxLength = bufferSize > static_cast<size_t>(offset)
41+
? bufferSize - static_cast<size_t>(offset)
42+
: 0;
43+
return static_cast<int32_t>(
44+
std::min(static_cast<size_t>(std::max(byteLength, 0)), maxLength));
45+
}
46+
} // namespace
47+
1348
static inline int32_t bucketOffset(int32_t index) {
1449
return sizeof(MapBuffer::Header) + sizeof(MapBuffer::Bucket) * index;
1550
}
@@ -18,16 +53,20 @@ static inline int32_t valueOffset(int32_t bucketIndex) {
1853
return bucketOffset(bucketIndex) + offsetof(MapBuffer::Bucket, data);
1954
}
2055

56+
// Dynamic-data entries pack [offset (low 32 bits)][byteLength (high 32 bits)]
57+
// into the bucket's 8-byte value, so the payload in the dynamic data section
58+
// carries no in-band length prefix. This returns the position of the high
59+
// 32 bits (the length).
60+
static inline int32_t lengthOffset(int32_t bucketIndex) {
61+
return valueOffset(bucketIndex) + static_cast<int32_t>(sizeof(int32_t));
62+
}
63+
2164
// TODO T83483191: Extend MapBuffer C++ implementation to support basic random
2265
// access
2366
MapBuffer::MapBuffer(std::vector<uint8_t> data) : bytes_(std::move(data)) {
24-
auto header = reinterpret_cast<const Header*>(bytes_.data());
25-
count_ = header->count;
26-
27-
if (header->bufferSize != bytes_.size()) {
28-
LOG(ERROR) << "Error: Data size does not match, expected "
29-
<< header->bufferSize << " found: " << bytes_.size();
30-
abort();
67+
if (bytes_.size() >= sizeof(Header)) {
68+
auto header = reinterpret_cast<const Header*>(bytes_.data());
69+
count_ = header->count;
3170
}
3271
}
3372

@@ -37,8 +76,7 @@ int32_t MapBuffer::getKeyBucket(Key key) const {
3776
while (lo <= hi) {
3877
int32_t mid = (lo + hi) >> 1;
3978

40-
Key midVal =
41-
*reinterpret_cast<const Key*>(bytes_.data() + bucketOffset(mid));
79+
Key midVal = readUnaligned<Key>(bytes_.data(), bucketOffset(mid));
4280

4381
if (midVal < key) {
4482
lo = mid + 1;
@@ -53,8 +91,7 @@ int32_t MapBuffer::getKeyBucket(Key key) const {
5391
}
5492

5593
inline int32_t MapBuffer::getIntAtBucket(int32_t bucketIndex) const {
56-
return *reinterpret_cast<const int32_t*>(
57-
bytes_.data() + valueOffset(bucketIndex));
94+
return readUnaligned<int32_t>(bytes_.data(), valueOffset(bucketIndex));
5895
}
5996

6097
int32_t MapBuffer::getInt(Key key) const {
@@ -74,8 +111,7 @@ int64_t MapBuffer::getLong(Key key) const {
74111
return 0;
75112
}
76113

77-
return *reinterpret_cast<const int64_t*>(
78-
bytes_.data() + valueOffset(bucketIndex));
114+
return readUnaligned<int64_t>(bytes_.data(), valueOffset(bucketIndex));
79115
}
80116

81117
bool MapBuffer::getBool(Key key) const {
@@ -89,8 +125,7 @@ double MapBuffer::getDouble(Key key) const {
89125
return 0;
90126
}
91127

92-
return *reinterpret_cast<const double*>(
93-
bytes_.data() + valueOffset(bucketIndex));
128+
return readUnaligned<double>(bytes_.data(), valueOffset(bucketIndex));
94129
}
95130

96131
int32_t MapBuffer::getDynamicDataOffset() const {
@@ -107,9 +142,10 @@ std::string MapBuffer::getString(Key key) const {
107142
}
108143

109144
int32_t offset = getDynamicDataOffset() + getIntAtBucket(bucketIndex);
110-
int32_t stringLength =
111-
*reinterpret_cast<const int32_t*>(bytes_.data() + offset);
112-
const uint8_t* stringPtr = bytes_.data() + offset + sizeof(int);
145+
auto stringLength =
146+
readUnaligned<int32_t>(bytes_.data(), lengthOffset(bucketIndex));
147+
stringLength = clampToBufferBounds(offset, stringLength, bytes_.size());
148+
const uint8_t* stringPtr = bytes_.data() + offset;
113149

114150
return {stringPtr, stringPtr + stringLength};
115151
}
@@ -122,17 +158,13 @@ MapBuffer MapBuffer::getMapBuffer(Key key) const {
122158
}
123159

124160
int32_t offset = getDynamicDataOffset() + getIntAtBucket(bucketIndex);
125-
int32_t mapBufferLength =
126-
*reinterpret_cast<const int32_t*>(bytes_.data() + offset);
127-
size_t maxLength = bytes_.size() - offset - sizeof(int32_t);
128-
if (mapBufferLength > maxLength) {
129-
mapBufferLength = maxLength;
130-
}
161+
auto mapBufferLength =
162+
readUnaligned<int32_t>(bytes_.data(), lengthOffset(bucketIndex));
163+
mapBufferLength = clampToBufferBounds(offset, mapBufferLength, bytes_.size());
131164

132165
std::vector<uint8_t> value(mapBufferLength);
133166

134-
memcpy(
135-
value.data(), bytes_.data() + offset + sizeof(int32_t), mapBufferLength);
167+
memcpy(value.data(), bytes_.data() + offset, mapBufferLength);
136168

137169
return MapBuffer(std::move(value));
138170
}
@@ -146,19 +178,27 @@ std::vector<MapBuffer> MapBuffer::getMapBufferList(MapBuffer::Key key) const {
146178

147179
std::vector<MapBuffer> mapBufferList;
148180
int32_t offset = getDynamicDataOffset() + getIntAtBucket(bucketIndex);
149-
int32_t mapBufferListLength =
150-
*reinterpret_cast<const int32_t*>(bytes_.data() + offset);
151-
offset = offset + sizeof(uint32_t);
181+
auto mapBufferListLength =
182+
readUnaligned<int32_t>(bytes_.data(), lengthOffset(bucketIndex));
183+
mapBufferListLength =
184+
clampToBufferBounds(offset, mapBufferListLength, bytes_.size());
152185

153186
int32_t curLen = 0;
154187
while (curLen < mapBufferListLength) {
155-
int32_t mapBufferLength =
156-
*reinterpret_cast<const int32_t*>(bytes_.data() + offset + curLen);
157-
curLen = curLen + sizeof(uint32_t);
188+
if (curLen + sizeof(int32_t) > mapBufferListLength) {
189+
break;
190+
}
191+
192+
auto mapBufferLength =
193+
readUnaligned<int32_t>(bytes_.data(), offset + curLen);
194+
curLen += sizeof(int32_t);
195+
196+
mapBufferLength =
197+
clampToBufferBounds(offset + curLen, mapBufferLength, bytes_.size());
158198
std::vector<uint8_t> value(mapBufferLength);
159199
memcpy(value.data(), bytes_.data() + offset + curLen, mapBufferLength);
160200
mapBufferList.emplace_back(std::move(value));
161-
curLen = curLen + mapBufferLength;
201+
curLen += mapBufferLength;
162202
}
163203
return mapBufferList;
164204
}
@@ -171,13 +211,18 @@ std::vector<int32_t> MapBuffer::getIntBuffer(MapBuffer::Key key) const {
171211
}
172212

173213
int32_t offset = getDynamicDataOffset() + getIntAtBucket(bucketIndex);
174-
int32_t count = *reinterpret_cast<const int32_t*>(bytes_.data() + offset);
214+
auto byteLength =
215+
readUnaligned<int32_t>(bytes_.data(), lengthOffset(bucketIndex));
216+
byteLength = clampToBufferBounds(offset, byteLength, bytes_.size());
217+
int32_t count = byteLength / static_cast<int32_t>(sizeof(int32_t));
175218

176219
std::vector<int32_t> result(count);
177220
if (count > 0) {
221+
// Copy only whole elements: a clamped byteLength may not be a multiple of
222+
// sizeof(int32_t), and result holds exactly count elements.
178223
memcpy(
179224
result.data(),
180-
bytes_.data() + offset + sizeof(int32_t),
225+
bytes_.data() + offset,
181226
static_cast<size_t>(count) * sizeof(int32_t));
182227
}
183228
return result;
@@ -191,13 +236,18 @@ std::vector<double> MapBuffer::getDoubleBuffer(MapBuffer::Key key) const {
191236
}
192237

193238
int32_t offset = getDynamicDataOffset() + getIntAtBucket(bucketIndex);
194-
int32_t count = *reinterpret_cast<const int32_t*>(bytes_.data() + offset);
239+
auto byteLength =
240+
readUnaligned<int32_t>(bytes_.data(), lengthOffset(bucketIndex));
241+
byteLength = clampToBufferBounds(offset, byteLength, bytes_.size());
242+
int32_t count = byteLength / static_cast<int32_t>(sizeof(double));
195243

196244
std::vector<double> result(count);
197245
if (count > 0) {
246+
// Copy only whole elements: a clamped byteLength may not be a multiple of
247+
// sizeof(double), and result holds exactly count elements.
198248
memcpy(
199249
result.data(),
200-
bytes_.data() + offset + sizeof(int32_t),
250+
bytes_.data() + offset,
201251
static_cast<size_t>(count) * sizeof(double));
202252
}
203253
return result;

packages/react-native/ReactCommon/react/renderer/mapbuffer/MapBuffer.h

Lines changed: 14 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -40,11 +40,11 @@ class JReadableMapBuffer;
4040
*
4141
* MapBuffer data is stored in a continuous chunk of memory (bytes_ field below) with the following layout:
4242
*
43-
* ┌─────────────────────Header──────────────────────┐
44-
* │ 10 bytes
45-
* ├─Alignment─┬─Item count─┬──────Buffer size───────┤
46-
* │ 2 bytes │ 2 bytes │ 4 bytes
47-
* └───────────┴────────────┴────────────────────────┘
43+
* ┌──────Header──────┐
44+
* │ 2 bytes │
45+
* ├────Item count────┤
46+
* │ 2 bytes │
47+
* └──────────────────┘
4848
* ┌────────────────────────────────────────────────────────────────────────────────────────┐
4949
* │ Buckets (one per item in the map) │
5050
* │ │
@@ -69,14 +69,8 @@ class MapBuffer {
6969
public:
7070
using Key = uint16_t;
7171

72-
// The first value in the buffer, used to check correct encoding/endianness on
73-
// JVM side.
74-
constexpr static uint16_t HEADER_ALIGNMENT = 0xFE;
75-
7672
struct Header {
77-
uint16_t alignment = HEADER_ALIGNMENT; // alignment of serialization
7873
uint16_t count; // amount of items in the map
79-
uint32_t bufferSize; // Amount of bytes used to store the map in memory
8074
};
8175

8276
#pragma pack(push, 1)
@@ -89,7 +83,7 @@ class MapBuffer {
8983
};
9084
#pragma pack(pop)
9185

92-
static_assert(sizeof(Header) == 8, "MapBuffer header size is incorrect.");
86+
static_assert(sizeof(Header) == 2, "MapBuffer header size is incorrect.");
9387
static_assert(sizeof(Bucket) == 12, "MapBuffer bucket size is incorrect.");
9488

9589
/**
@@ -105,15 +99,17 @@ class MapBuffer {
10599
String = 3,
106100
Map = 4,
107101
Long = 5,
108-
// Homogeneous, length-prefixed arrays stored contiguously in the dynamic
102+
// Homogeneous arrays of raw elements stored contiguously in the dynamic
109103
// data section. Unlike Map, they carry no per-element key/type overhead, so
110-
// a batch of N values costs ~N*elementSize bytes plus a single 4-byte count
111-
// prefix instead of N*12-byte buckets. The bucket value is the offset of the
112-
// array within the dynamic data section.
104+
// a batch of N values costs ~N*elementSize bytes instead of N*12-byte
105+
// buckets. The bucket value packs [offset][byteLength]; the element count is
106+
// recovered as byteLength / elementSize.
113107
IntBuffer = 6,
114108
DoubleBuffer = 7,
115-
// A homogeneous, ordered array of nested MapBuffers. Distinct from `Map` so
116-
// that a list of MapBuffers is self-describing (a single Map and a list are
109+
// A homogeneous, ordered array of nested MapBuffers. The bucket value packs
110+
// [offset][byteLength] for the whole list region; within it each child stays
111+
// framed as [int32 childSize][child bytes]. Distinct from `Map` so that a
112+
// list of MapBuffers is self-describing (a single Map and a list are
117113
// byte-distinct in payload but previously shared the `Map` type tag).
118114
MapBufferList = 8,
119115
};

0 commit comments

Comments
 (0)