Skip to content

Commit 636e4ef

Browse files
alexghrspalladino
authored andcommitted
feat: track world state metrics (#8109)
Extract relevant stats from world state: - sync duration (by synch type) - fork duration - db size (estimate) - individual tree sizes (ie. how many leaves are filled)
1 parent 9e11baf commit 636e4ef

File tree

18 files changed

+134
-20
lines changed

18 files changed

+134
-20
lines changed

yarn-project/aztec-node/src/aztec-node/server.ts

+2-2
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ export class AztecNodeService implements AztecNode {
153153
);
154154

155155
// now create the merkle trees and the world state synchronizer
156-
const worldStateSynchronizer = await createWorldStateSynchronizer(config, store, archiver);
156+
const worldStateSynchronizer = await createWorldStateSynchronizer(config, store, archiver, telemetry);
157157

158158
// start both and wait for them to sync from the block source
159159
await Promise.all([p2pClient.start(), worldStateSynchronizer.start()]);
@@ -723,7 +723,7 @@ export class AztecNodeService implements AztecNode {
723723
// Instantiate merkle trees so uncommitted updates by this simulation are local to it.
724724
// TODO we should be able to remove this after https://github.com/AztecProtocol/aztec-packages/issues/1869
725725
// So simulation of public functions doesn't affect the merkle trees.
726-
const merkleTrees = await MerkleTrees.new(this.merkleTreesDb, this.log);
726+
const merkleTrees = await MerkleTrees.new(this.merkleTreesDb, new NoopTelemetryClient(), this.log);
727727

728728
const publicProcessorFactory = new PublicProcessorFactory(
729729
merkleTrees.asLatest(),

yarn-project/end-to-end/src/composed/integration_l1_publisher.test.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ describe('L1Publisher integration', () => {
148148
});
149149

150150
const tmpStore = openTmpStore();
151-
builderDb = await MerkleTrees.new(tmpStore);
151+
builderDb = await MerkleTrees.new(tmpStore, new NoopTelemetryClient());
152152
blockSource = mock<ArchiveSource>();
153153
blockSource.getBlocks.mockResolvedValue([]);
154154
const worldStateConfig: WorldStateConfig = {

yarn-project/kv-store/src/interfaces/store.ts

+5
Original file line numberDiff line numberDiff line change
@@ -68,4 +68,9 @@ export interface AztecKVStore {
6868
* Deletes the store
6969
*/
7070
delete(): Promise<void>;
71+
72+
/**
73+
* Estimates the size of the store in bytes.
74+
*/
75+
estimateSize(): { bytes: number };
7176
}

yarn-project/kv-store/src/lmdb/store.ts

+12
Original file line numberDiff line numberDiff line change
@@ -154,4 +154,16 @@ export class AztecLmdbStore implements AztecKVStore {
154154
async delete() {
155155
await this.#rootDb.drop();
156156
}
157+
158+
estimateSize(): { bytes: number } {
159+
const stats = this.#rootDb.getStats();
160+
// `mapSize` represents to total amount of memory currently being used by the database.
161+
// since the database is mmap'd, this is a good estimate of the size of the database for now.
162+
// http://www.lmdb.tech/doc/group__mdb.html#a4bde3c8b676457342cba2fe27aed5fbd
163+
if ('mapSize' in stats && typeof stats.mapSize === 'number') {
164+
return { bytes: stats.mapSize };
165+
} else {
166+
return { bytes: 0 };
167+
}
168+
}
157169
}

yarn-project/prover-client/src/mocks/test_context.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -68,8 +68,8 @@ export class TestContext {
6868
const publicContractsDB = mock<ContractsDataSourcePublicDB>();
6969
const publicWorldStateDB = mock<WorldStatePublicDB>();
7070
const publicKernel = new RealPublicKernelCircuitSimulator(new WASMSimulator());
71-
const actualDb = await MerkleTrees.new(openTmpStore()).then(t => t.asLatest());
7271
const telemetry = new NoopTelemetryClient();
72+
const actualDb = await MerkleTrees.new(openTmpStore(), telemetry).then(t => t.asLatest());
7373
const processor = new PublicProcessor(
7474
actualDb,
7575
publicExecutor,

yarn-project/prover-client/src/orchestrator/orchestrator_mixed_blocks_2.test.ts

+2-1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import { range } from '@aztec/foundation/array';
55
import { times } from '@aztec/foundation/collection';
66
import { createDebugLogger } from '@aztec/foundation/log';
77
import { openTmpStore } from '@aztec/kv-store/utils';
8+
import { NoopTelemetryClient } from '@aztec/telemetry-client/noop';
89
import { type MerkleTreeOperations, MerkleTrees } from '@aztec/world-state';
910

1011
import { makeBloatedProcessedTx, updateExpectedTreesFromTxs } from '../mocks/fixtures.js';
@@ -18,7 +19,7 @@ describe('prover/orchestrator/mixed-blocks', () => {
1819

1920
beforeEach(async () => {
2021
context = await TestContext.new(logger);
21-
expectsDb = await MerkleTrees.new(openTmpStore()).then(t => t.asLatest());
22+
expectsDb = await MerkleTrees.new(openTmpStore(), new NoopTelemetryClient()).then(t => t.asLatest());
2223
});
2324

2425
afterEach(async () => {

yarn-project/prover-client/src/orchestrator/orchestrator_single_blocks.test.ts

+2-1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import { range } from '@aztec/foundation/array';
55
import { createDebugLogger } from '@aztec/foundation/log';
66
import { sleep } from '@aztec/foundation/sleep';
77
import { openTmpStore } from '@aztec/kv-store/utils';
8+
import { NoopTelemetryClient } from '@aztec/telemetry-client/noop';
89
import { type MerkleTreeOperations, MerkleTrees } from '@aztec/world-state';
910

1011
import { makeBloatedProcessedTx, updateExpectedTreesFromTxs } from '../mocks/fixtures.js';
@@ -18,7 +19,7 @@ describe('prover/orchestrator/blocks', () => {
1819

1920
beforeEach(async () => {
2021
context = await TestContext.new(logger);
21-
expectsDb = await MerkleTrees.new(openTmpStore()).then(t => t.asLatest());
22+
expectsDb = await MerkleTrees.new(openTmpStore(), new NoopTelemetryClient()).then(t => t.asLatest());
2223
});
2324

2425
afterEach(async () => {

yarn-project/prover-client/src/orchestrator/orchestrator_workflow.test.ts

+3-2
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,10 @@ describe('prover/orchestrator', () => {
2323
let mockProver: MockProxy<ServerCircuitProver>;
2424
let actualDb: MerkleTreeOperations;
2525
beforeEach(async () => {
26-
actualDb = await MerkleTrees.new(openTmpStore()).then(t => t.asLatest());
26+
const telemetryClient = new NoopTelemetryClient();
27+
actualDb = await MerkleTrees.new(openTmpStore(), telemetryClient).then(t => t.asLatest());
2728
mockProver = mock<ServerCircuitProver>();
28-
orchestrator = new ProvingOrchestrator(actualDb, mockProver, new NoopTelemetryClient());
29+
orchestrator = new ProvingOrchestrator(actualDb, mockProver, telemetryClient);
2930
});
3031

3132
it('calls root parity circuit only when ready', async () => {

yarn-project/prover-node/src/factory.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ export async function createProverNode(
3535
log.verbose(`Created archiver and synced to block ${await archiver.getBlockNumber()}`);
3636

3737
const worldStateConfig = { ...config, worldStateProvenBlocksOnly: true };
38-
const worldStateSynchronizer = await createWorldStateSynchronizer(worldStateConfig, store, archiver);
38+
const worldStateSynchronizer = await createWorldStateSynchronizer(worldStateConfig, store, archiver, telemetry);
3939
await worldStateSynchronizer.start();
4040

4141
const simulationProvider = await createSimulationProvider(config, log);

yarn-project/telemetry-client/src/attributes.ts

+2
Original file line numberDiff line numberDiff line change
@@ -60,3 +60,5 @@ export const L1_TX_TYPE = 'aztec.l1.tx_type';
6060
export const TX_PHASE_NAME = 'aztec.tx.phase_name';
6161
/** The proving job type */
6262
export const PROVING_JOB_TYPE = 'aztec.proving.job_type';
63+
64+
export const MERKLE_TREE_NAME = 'aztec.merkle_tree.name';

yarn-project/telemetry-client/src/metrics.ts

+5
Original file line numberDiff line numberDiff line change
@@ -62,3 +62,8 @@ export const PROVING_ORCHESTRATOR_BASE_ROLLUP_INPUTS_DURATION =
6262

6363
export const PROVING_QUEUE_JOB_SIZE = 'aztec.proving_queue.job_size';
6464
export const PROVING_QUEUE_SIZE = 'aztec.proving_queue.size';
65+
66+
export const WORLD_STATE_FORK_DURATION = 'aztec.world_state.fork.duration';
67+
export const WORLD_STATE_SYNC_DURATION = 'aztec.world_state.sync.duration';
68+
export const WORLD_STATE_MERKLE_TREE_SIZE = 'aztec.world_state.merkle_tree_size';
69+
export const WORLD_STATE_DB_SIZE = 'aztec.world_state.db_size';

yarn-project/txe/src/txe_service/txe_service.ts

+2-1
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import { type Logger } from '@aztec/foundation/log';
1818
import { KeyStore } from '@aztec/key-store';
1919
import { openTmpStore } from '@aztec/kv-store/utils';
2020
import { ExecutionNoteCache, PackedValuesCache, type TypedOracle } from '@aztec/simulator';
21+
import { NoopTelemetryClient } from '@aztec/telemetry-client/noop';
2122
import { MerkleTrees } from '@aztec/world-state';
2223

2324
import { TXE } from '../oracle/txe_oracle.js';
@@ -38,7 +39,7 @@ export class TXEService {
3839

3940
static async init(logger: Logger) {
4041
const store = openTmpStore(true);
41-
const trees = await MerkleTrees.new(store, logger);
42+
const trees = await MerkleTrees.new(store, new NoopTelemetryClient(), logger);
4243
const packedValuesCache = new PackedValuesCache();
4344
const txHash = new Fr(1); // The txHash is used for computing the revertible nullifiers for non-revertible note hashes. It can be any value for testing.
4445
const noteCache = new ExecutionNoteCache(txHash);

yarn-project/world-state/package.json

+1
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@
5858
"@aztec/foundation": "workspace:^",
5959
"@aztec/kv-store": "workspace:^",
6060
"@aztec/merkle-tree": "workspace:^",
61+
"@aztec/telemetry-client": "workspace:^",
6162
"@aztec/types": "workspace:^",
6263
"tslib": "^2.4.0"
6364
},

yarn-project/world-state/src/synchronizer/factory.ts

+3-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import { type L1ToL2MessageSource, type L2BlockSource } from '@aztec/circuit-types';
22
import { type AztecKVStore } from '@aztec/kv-store';
3+
import { type TelemetryClient } from '@aztec/telemetry-client';
34

45
import { MerkleTrees } from '../world-state-db/merkle_trees.js';
56
import { type WorldStateConfig } from './config.js';
@@ -9,7 +10,8 @@ export async function createWorldStateSynchronizer(
910
config: WorldStateConfig,
1011
store: AztecKVStore,
1112
l2BlockSource: L2BlockSource & L1ToL2MessageSource,
13+
client: TelemetryClient,
1214
) {
13-
const merkleTrees = await MerkleTrees.new(store);
15+
const merkleTrees = await MerkleTrees.new(store, client);
1416
return new ServerWorldStateSynchronizer(store, merkleTrees, l2BlockSource, config);
1517
}

yarn-project/world-state/src/world-state-db/merkle_trees.ts

+24-9
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ import {
3232
import { padArrayEnd } from '@aztec/foundation/collection';
3333
import { type DebugLogger, createDebugLogger } from '@aztec/foundation/log';
3434
import { SerialQueue } from '@aztec/foundation/queue';
35+
import { Timer, elapsed } from '@aztec/foundation/timer';
3536
import { type IndexedTreeLeafPreimage } from '@aztec/foundation/trees';
3637
import { type AztecKVStore, type AztecSingleton } from '@aztec/kv-store';
3738
import {
@@ -45,6 +46,7 @@ import {
4546
loadTree,
4647
newTree,
4748
} from '@aztec/merkle-tree';
49+
import { type TelemetryClient } from '@aztec/telemetry-client';
4850
import { type Hasher } from '@aztec/types/interfaces';
4951

5052
import {
@@ -55,6 +57,7 @@ import {
5557
} from './merkle_tree_db.js';
5658
import { type MerkleTreeMap } from './merkle_tree_map.js';
5759
import { MerkleTreeOperationsFacade } from './merkle_tree_operations_facade.js';
60+
import { WorldStateMetrics } from './metrics.js';
5861

5962
/**
6063
* The nullifier tree is an indexed tree.
@@ -98,18 +101,20 @@ export class MerkleTrees implements MerkleTreeDb {
98101
private trees: MerkleTreeMap = null as any;
99102
private jobQueue = new SerialQueue();
100103
private initialStateReference: AztecSingleton<Buffer>;
104+
private metrics: WorldStateMetrics;
101105

102-
private constructor(private store: AztecKVStore, private log: DebugLogger) {
106+
private constructor(private store: AztecKVStore, private telemetryClient: TelemetryClient, private log: DebugLogger) {
103107
this.initialStateReference = store.openSingleton('merkle_trees_initial_state_reference');
108+
this.metrics = new WorldStateMetrics(telemetryClient);
104109
}
105110

106111
/**
107112
* Method to asynchronously create and initialize a MerkleTrees instance.
108113
* @param store - The db instance to use for data persistance.
109114
* @returns - A fully initialized MerkleTrees instance.
110115
*/
111-
public static async new(store: AztecKVStore, log = createDebugLogger('aztec:merkle_trees')) {
112-
const merkleTrees = new MerkleTrees(store, log);
116+
public static async new(store: AztecKVStore, client: TelemetryClient, log = createDebugLogger('aztec:merkle_trees')) {
117+
const merkleTrees = new MerkleTrees(store, client, log);
113118
await merkleTrees.#init();
114119
return merkleTrees;
115120
}
@@ -181,12 +186,17 @@ export class MerkleTrees implements MerkleTreeDb {
181186
}
182187

183188
public async fork(): Promise<MerkleTrees> {
184-
// TODO(palla/prover-node): If the underlying store is being shared with other components, we're unnecessarily
185-
// copying a lot of data unrelated to merkle trees. This may be fine for now, and we may be able to ditch backup-based
186-
// forking in favor of a more elegant proposal. But if we see this operation starts taking a lot of time, we may want
187-
// to open separate stores for merkle trees and other components.
188-
const forked = await this.store.fork();
189-
return MerkleTrees.new(forked, this.log);
189+
const [ms, db] = await elapsed(async () => {
190+
// TODO(palla/prover-node): If the underlying store is being shared with other components, we're unnecessarily
191+
// copying a lot of data unrelated to merkle trees. This may be fine for now, and we may be able to ditch backup-based
192+
// forking in favor of a more elegant proposal. But if we see this operation starts taking a lot of time, we may want
193+
// to open separate stores for merkle trees and other components.
194+
const forked = await this.store.fork();
195+
return MerkleTrees.new(forked, this.telemetryClient, this.log);
196+
});
197+
198+
this.metrics.recordForkDuration(ms);
199+
return db;
190200
}
191201

192202
public async delete() {
@@ -581,6 +591,8 @@ export class MerkleTrees implements MerkleTreeDb {
581591
* @param l1ToL2Messages - The L1 to L2 messages for the block.
582592
*/
583593
async #handleL2BlockAndMessages(l2Block: L2Block, l1ToL2Messages: Fr[]): Promise<HandleL2BlockAndMessagesResult> {
594+
const timer = new Timer();
595+
584596
const treeRootWithIdPairs = [
585597
[l2Block.header.state.partial.nullifierTree.root, MerkleTreeId.NULLIFIER_TREE],
586598
[l2Block.header.state.partial.noteHashTree.root, MerkleTreeId.NOTE_HASH_TREE],
@@ -664,10 +676,13 @@ export class MerkleTrees implements MerkleTreeDb {
664676
);
665677
} else {
666678
this.log.debug(`Tree ${treeName} synched with size ${info.size} root ${rootStr}`);
679+
this.metrics.recordTreeSize(treeName, info.size);
667680
}
668681
}
669682
await this.#snapshot(l2Block.number);
670683

684+
this.metrics.recordDbSize(this.store.estimateSize().bytes);
685+
this.metrics.recordSyncDuration(ourBlock ? 'commit' : 'rollback_and_update', timer);
671686
return { isBlockOurs: ourBlock };
672687
}
673688

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
import { type Timer } from '@aztec/foundation/timer';
2+
import {
3+
Attributes,
4+
type Gauge,
5+
type Histogram,
6+
Metrics,
7+
type TelemetryClient,
8+
ValueType,
9+
} from '@aztec/telemetry-client';
10+
11+
export class WorldStateMetrics {
12+
private treeSize: Gauge;
13+
private dbSize: Gauge;
14+
private forkDuration: Histogram;
15+
private syncDuration: Histogram;
16+
17+
constructor(client: TelemetryClient, name = 'MerkleTreesDb') {
18+
const meter = client.getMeter(name);
19+
this.treeSize = meter.createGauge(Metrics.WORLD_STATE_MERKLE_TREE_SIZE, {
20+
description: 'The size of Merkle trees',
21+
valueType: ValueType.INT,
22+
});
23+
24+
this.dbSize = meter.createGauge(Metrics.WORLD_STATE_DB_SIZE, {
25+
description: 'The size of the World State DB',
26+
valueType: ValueType.INT,
27+
unit: 'By',
28+
});
29+
30+
this.forkDuration = meter.createHistogram(Metrics.WORLD_STATE_FORK_DURATION, {
31+
description: 'The duration of a fork operation',
32+
unit: 'ms',
33+
valueType: ValueType.INT,
34+
});
35+
36+
this.syncDuration = meter.createHistogram(Metrics.WORLD_STATE_SYNC_DURATION, {
37+
description: 'The duration of a sync operation',
38+
unit: 'ms',
39+
valueType: ValueType.INT,
40+
});
41+
}
42+
43+
recordTreeSize(treeName: string, treeSize: bigint) {
44+
this.treeSize.record(Number(treeSize), {
45+
[Attributes.MERKLE_TREE_NAME]: treeName,
46+
});
47+
}
48+
49+
recordDbSize(dbSizeInBytes: number) {
50+
this.dbSize.record(dbSizeInBytes);
51+
}
52+
53+
recordForkDuration(timerOrMs: Timer | number) {
54+
const ms = Math.ceil(typeof timerOrMs === 'number' ? timerOrMs : timerOrMs.ms());
55+
this.forkDuration.record(ms);
56+
}
57+
58+
recordSyncDuration(syncType: 'commit' | 'rollback_and_update', timerOrMs: Timer | number) {
59+
const ms = Math.ceil(typeof timerOrMs === 'number' ? timerOrMs : timerOrMs.ms());
60+
this.syncDuration.record(ms, {
61+
[Attributes.STATUS]: syncType,
62+
});
63+
}
64+
}

yarn-project/world-state/tsconfig.json

+3
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,9 @@
2121
{
2222
"path": "../merkle-tree"
2323
},
24+
{
25+
"path": "../telemetry-client"
26+
},
2427
{
2528
"path": "../types"
2629
}

yarn-project/yarn.lock

+1
Original file line numberDiff line numberDiff line change
@@ -1208,6 +1208,7 @@ __metadata:
12081208
"@aztec/foundation": "workspace:^"
12091209
"@aztec/kv-store": "workspace:^"
12101210
"@aztec/merkle-tree": "workspace:^"
1211+
"@aztec/telemetry-client": "workspace:^"
12111212
"@aztec/types": "workspace:^"
12121213
"@jest/globals": ^29.5.0
12131214
"@types/jest": ^29.5.0

0 commit comments

Comments
 (0)