Skip to content

Commit 3318e5b

Browse files
committed
fix(swingset): don't deduplicate inbound mailbox messages
The mailbox device tracking the highest inbound message number and ack for each peer, to de-duplicate repeated messages, so it could reduce the amount of kernel activity. Each call to `deliverInbound` would return a boolean to indicate whether the messages/ack were new, and thus the kernel needed to be cycled. However, the device was holding this tracking data in non-durable state, so if/when the kernel was restarted, the state would be lost. A duplicate message/ack arriving in the restarted process would trigger kernel activity that would not have run in the original process. These extra cranks caused diverge between validators when one of them was restarted, and the client sent a duplicate message (such as the pre-emptive `ack` all clients send at startup). The extra crank does not get very far, because vattp does its own deduplication, so the divergence was only visible in the slog. But when #3442 is implemented, even a single extra crank will flag the validator as out of consensus. The fix is to remove the mailbox device's dedup code, and rely upon vattp for this function. The test was also updated to match, and a new test (comparing two parallel kernels, one restarted, one not) was added. closes #3471
1 parent 72fb780 commit 3318e5b

File tree

2 files changed

+136
-125
lines changed

2 files changed

+136
-125
lines changed

packages/SwingSet/src/devices/mailbox-src.js

+10-33
Original file line numberDiff line numberDiff line change
@@ -5,50 +5,27 @@ import { assert, details as X } from '@agoric/assert';
55

66
export function buildRootDeviceNode(tools) {
77
const { SO, getDeviceState, setDeviceState, endowments } = tools;
8-
const highestInboundDelivered = harden(new Map());
9-
const highestInboundAck = harden(new Map());
108

119
let deliverInboundMessages;
1210
let deliverInboundAck;
1311

14-
function inboundCallback(hPeer, hMessages, hAck) {
15-
const peer = `${hPeer}`;
12+
function inboundCallback(peer, messages, ack) {
1613
if (!deliverInboundMessages) {
1714
throw new Error(
1815
`mailbox.inboundCallback(${peer}) called before handler was registered`,
1916
);
2017
}
21-
const ack = Nat(hAck);
22-
let didSomething = false;
23-
24-
let latestMsg = 0;
25-
if (highestInboundDelivered.has(peer)) {
26-
latestMsg = highestInboundDelivered.get(peer);
27-
}
28-
const newMessages = [];
29-
hMessages.forEach(m => {
30-
const [hNum, hMsg] = m;
31-
const num = Nat(hNum);
32-
if (num > latestMsg) {
33-
newMessages.push([num, `${hMsg}`]);
34-
latestMsg = num;
35-
highestInboundDelivered.set(peer, latestMsg);
36-
}
18+
assert.typeof(peer, 'string');
19+
messages.forEach(m => {
20+
Nat(m[0]);
21+
assert.typeof(m[1], 'string');
3722
});
38-
if (newMessages.length) {
39-
deliverInboundMessages(peer, harden(newMessages));
40-
didSomething = true;
41-
}
42-
let latestAck = 0;
43-
if (highestInboundAck.has(peer)) {
44-
latestAck = highestInboundAck.get(peer);
45-
}
46-
if (ack > latestAck) {
47-
highestInboundAck.set(peer, ack);
48-
deliverInboundAck(peer, ack);
49-
didSomething = true;
23+
Nat(ack);
24+
if (messages.length) {
25+
deliverInboundMessages(peer, harden(messages));
5026
}
51-
return didSomething;
27+
deliverInboundAck(peer, ack);
28+
return true; // always didSomething
5229
}
5330
endowments.registerInboundCallback(inboundCallback);
5431

packages/SwingSet/test/device-mailbox/test-device-mailbox.js

+126-92
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ import path from 'path';
66
import bundleSource from '@agoric/bundle-source';
77
import { getAllState } from '@agoric/swing-store-simple';
88
import { provideHostStorage } from '../../src/hostStorage.js';
9-
109
import {
1110
initializeSwingset,
1211
makeSwingsetController,
@@ -16,6 +15,7 @@ import {
1615
buildMailboxStateMap,
1716
buildMailbox,
1817
} from '../../src/devices/mailbox.js';
18+
import { capargs } from '../util.js';
1919

2020
test.before(async t => {
2121
const kernelBundles = await buildKernelBundles();
@@ -92,106 +92,140 @@ test('mailbox inbound', async t => {
9292
mailbox: { ...mb.endowments },
9393
};
9494

95-
let rc;
96-
9795
const hostStorage = provideHostStorage();
9896
await initializeSwingset(config, ['mailbox2'], hostStorage, t.context.data);
9997
const c = await makeSwingsetController(hostStorage, deviceEndowments);
10098
await c.run();
101-
rc = mb.deliverInbound(
102-
'peer1',
103-
[
104-
[1, 'msg1'],
105-
[2, 'msg2'],
106-
],
107-
0,
108-
);
109-
t.truthy(rc);
99+
const m1 = [1, 'msg1'];
100+
const m2 = [2, 'msg2'];
101+
const m3 = [3, 'msg3'];
102+
t.true(mb.deliverInbound('peer1', [m1, m2], 0));
110103
await c.run();
111-
t.deepEqual(c.dump().log, ['dm-peer1', 'm-1-msg1', 'm-2-msg2']);
112-
113-
// delivering the same messages should not trigger sends, but the ack is new
114-
rc = mb.deliverInbound(
115-
'peer1',
116-
[
117-
[1, 'msg1'],
118-
[2, 'msg2'],
119-
],
120-
3,
121-
);
122-
t.truthy(rc);
104+
const expected = ['dm-peer1', 'm-1-msg1', 'm-2-msg2', 'da-peer1-0'];
105+
t.deepEqual(c.dump().log, expected);
106+
107+
// all messages/acks should be delivered, even duplicates
108+
t.true(mb.deliverInbound('peer1', [m1, m2], 0));
123109
await c.run();
124-
t.deepEqual(c.dump().log, ['dm-peer1', 'm-1-msg1', 'm-2-msg2', 'da-peer1-3']);
125-
126-
// no new messages/acks makes deliverInbound return 'false'
127-
rc = mb.deliverInbound(
128-
'peer1',
129-
[
130-
[1, 'msg1'],
131-
[2, 'msg2'],
132-
],
133-
3,
134-
);
135-
t.falsy(rc);
110+
expected.push(...['dm-peer1', 'm-1-msg1', 'm-2-msg2', 'da-peer1-0']);
111+
t.deepEqual(c.dump().log, expected);
112+
113+
// new messages too
114+
t.true(mb.deliverInbound('peer1', [m1, m2, m3], 0));
136115
await c.run();
137-
t.deepEqual(c.dump().log, ['dm-peer1', 'm-1-msg1', 'm-2-msg2', 'da-peer1-3']);
138-
139-
// but new messages should be sent
140-
rc = mb.deliverInbound(
141-
'peer1',
142-
[
143-
[1, 'msg1'],
144-
[2, 'msg2'],
145-
[3, 'msg3'],
146-
],
147-
3,
116+
expected.push(
117+
...['dm-peer1', 'm-1-msg1', 'm-2-msg2', 'm-3-msg3', 'da-peer1-0'],
148118
);
149-
t.truthy(rc);
119+
t.deepEqual(c.dump().log, expected);
120+
121+
// and new ack
122+
t.true(mb.deliverInbound('peer1', [m1, m2, m3], 6));
150123
await c.run();
151-
t.deepEqual(c.dump().log, [
152-
'dm-peer1',
153-
'm-1-msg1',
154-
'm-2-msg2',
155-
'da-peer1-3',
156-
'dm-peer1',
157-
'm-3-msg3',
158-
]);
159-
160-
// and a higher ack should be sent
161-
rc = mb.deliverInbound(
162-
'peer1',
163-
[
164-
[1, 'msg1'],
165-
[2, 'msg2'],
166-
[3, 'msg3'],
167-
],
168-
4,
124+
expected.push(
125+
...['dm-peer1', 'm-1-msg1', 'm-2-msg2', 'm-3-msg3', 'da-peer1-6'],
169126
);
170-
t.truthy(rc);
171-
await c.run();
172-
t.deepEqual(c.dump().log, [
173-
'dm-peer1',
174-
'm-1-msg1',
175-
'm-2-msg2',
176-
'da-peer1-3',
177-
'dm-peer1',
178-
'm-3-msg3',
179-
'da-peer1-4',
180-
]);
181-
182-
rc = mb.deliverInbound('peer2', [[4, 'msg4']], 5);
183-
t.truthy(rc);
127+
t.deepEqual(c.dump().log, expected);
128+
});
129+
130+
async function initializeMailboxKernel(t) {
131+
const s = buildMailboxStateMap();
132+
const mb = buildMailbox(s);
133+
const config = {
134+
bootstrap: 'bootstrap',
135+
vats: {
136+
bootstrap: {
137+
bundle: t.context.data.bootstrap,
138+
},
139+
},
140+
devices: {
141+
mailbox: {
142+
sourceSpec: require.resolve(mb.srcPath),
143+
},
144+
},
145+
};
146+
const hostStorage = provideHostStorage();
147+
await initializeSwingset(
148+
config,
149+
['mailbox-determinism'],
150+
hostStorage,
151+
t.context.data,
152+
);
153+
return hostStorage;
154+
}
155+
156+
async function makeMailboxKernel(hostStorage) {
157+
const s = buildMailboxStateMap();
158+
const mb = buildMailbox(s);
159+
const deviceEndowments = {
160+
mailbox: { ...mb.endowments },
161+
};
162+
const c = await makeSwingsetController(hostStorage, deviceEndowments);
163+
c.pinVatRoot('bootstrap');
184164
await c.run();
185-
t.deepEqual(c.dump().log, [
186-
'dm-peer1',
187-
'm-1-msg1',
188-
'm-2-msg2',
189-
'da-peer1-3',
190-
'dm-peer1',
191-
'm-3-msg3',
192-
'da-peer1-4',
193-
'dm-peer2',
194-
'm-4-msg4',
195-
'da-peer2-5',
196-
]);
165+
return [c, mb];
166+
}
167+
168+
test('mailbox determinism', async t => {
169+
// we run two kernels in parallel
170+
const hostStorage1 = await initializeMailboxKernel(t);
171+
const hostStorage2 = await initializeMailboxKernel(t);
172+
const [c1a, mb1a] = await makeMailboxKernel(hostStorage1);
173+
const [c2, mb2] = await makeMailboxKernel(hostStorage2);
174+
175+
// they get the same inbound message
176+
const msg1 = [[1, 'msg1']];
177+
t.true(mb1a.deliverInbound('peer1', msg1, 0));
178+
await c1a.run();
179+
t.deepEqual(c1a.dump().log, ['comms receive msg1']);
180+
const kp1 = c1a.queueToVatRoot('bootstrap', 'getNumReceived', capargs([]));
181+
await c1a.run();
182+
t.deepEqual(JSON.parse(c1a.kpResolution(kp1).body), 1);
183+
184+
t.true(mb2.deliverInbound('peer1', msg1, 0));
185+
await c2.run();
186+
t.deepEqual(c2.dump().log, ['comms receive msg1']);
187+
const kp2 = c2.queueToVatRoot('bootstrap', 'getNumReceived', capargs([]));
188+
await c2.run();
189+
t.deepEqual(JSON.parse(c2.kpResolution(kp1).body), 1);
190+
191+
// both should have the same number of cranks
192+
t.is(
193+
hostStorage1.kvStore.get('crankNumber'),
194+
hostStorage2.kvStore.get('crankNumber'),
195+
);
196+
197+
// then one is restarted, but the other keeps running
198+
const [c1b, mb1b] = await makeMailboxKernel(hostStorage1);
199+
200+
// Now we repeat delivery of that message to both. The mailbox should send
201+
// it to vattp, even though it's duplicate, because the mailbox doesn't
202+
// have durable state, and cannot correctly (deterministically) tell that
203+
// it's a duplicate.
204+
t.true(mb1b.deliverInbound('peer1', msg1, 0));
205+
await c1b.run();
206+
// the testlog is part of the ephemeral kernel state, so it will only have
207+
// a record of messages in the second run, however the vat is replayed
208+
// during the second-run startup, so we expect to see one copy of the
209+
// original message, delivered during the second run
210+
t.deepEqual(c1b.dump().log, ['comms receive msg1']);
211+
// but vattp dedups, so only one message should be delivered to comms
212+
const kp3 = c1b.queueToVatRoot('bootstrap', 'getNumReceived', capargs([]));
213+
await c1b.run();
214+
t.deepEqual(JSON.parse(c1b.kpResolution(kp3).body), 1);
215+
216+
t.true(mb2.deliverInbound('peer1', msg1, 0));
217+
await c2.run();
218+
// the second kernel still has that ephemeral testlog, however the vat is
219+
// still running, so we only see the original message from the first run
220+
t.deepEqual(c2.dump().log, ['comms receive msg1']);
221+
const kp4 = c2.queueToVatRoot('bootstrap', 'getNumReceived', capargs([]));
222+
await c2.run();
223+
t.deepEqual(JSON.parse(c2.kpResolution(kp4).body), 1);
224+
225+
// Both should *still* have the same number of cranks. This is what bug
226+
// #3471 exposed.
227+
t.is(
228+
hostStorage1.kvStore.get('crankNumber'),
229+
hostStorage2.kvStore.get('crankNumber'),
230+
);
197231
});

0 commit comments

Comments
 (0)