Skip to content

Commit c2c8cc6

Browse files
authored
feat: agent and broker expose OTEL metrics (#10264)
This PR adds instrumentation to both the proving broker and agent.
1 parent 9d833c5 commit c2c8cc6

20 files changed

+332
-49
lines changed

yarn-project/archiver/src/archiver/archiver.ts

+1-4
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,7 @@ export class Archiver implements ArchiveSource {
176176
config.l1Contracts.registryAddress,
177177
archiverStore,
178178
config.archiverPollingIntervalMS ?? 10_000,
179-
new ArchiverInstrumentation(telemetry),
179+
new ArchiverInstrumentation(telemetry, () => archiverStore.estimateSize()),
180180
{ l1StartBlock, l1GenesisTime, epochDuration, slotDuration, ethereumSlotDuration },
181181
);
182182
await archiver.start(blockUntilSynced);
@@ -271,9 +271,6 @@ export class Archiver implements ArchiveSource {
271271
// the chain locally before we start unwinding stuff. This can be optimized by figuring out
272272
// up to which point we're pruning, and then requesting L2 blocks up to that point only.
273273
await this.handleEpochPrune(provenBlockNumber, currentL1BlockNumber);
274-
275-
const storeSizes = this.store.estimateSize();
276-
this.instrumentation.recordDBMetrics(storeSizes);
277274
}
278275
}
279276

yarn-project/archiver/src/archiver/instrumentation.ts

+3-5
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import {
55
type Gauge,
66
type Histogram,
77
LmdbMetrics,
8+
type LmdbStatsCallback,
89
Metrics,
910
type TelemetryClient,
1011
type UpDownCounter,
@@ -23,7 +24,7 @@ export class ArchiverInstrumentation {
2324

2425
private log = createDebugLogger('aztec:archiver:instrumentation');
2526

26-
constructor(private telemetry: TelemetryClient) {
27+
constructor(private telemetry: TelemetryClient, lmdbStats?: LmdbStatsCallback) {
2728
const meter = telemetry.getMeter('Archiver');
2829
this.blockHeight = meter.createGauge(Metrics.ARCHIVER_BLOCK_HEIGHT, {
2930
description: 'The height of the latest block processed by the archiver',
@@ -72,13 +73,10 @@ export class ArchiverInstrumentation {
7273
name: Metrics.ARCHIVER_DB_NUM_ITEMS,
7374
description: 'Num items in the archiver database',
7475
},
76+
lmdbStats,
7577
);
7678
}
7779

78-
public recordDBMetrics(metrics: { mappingSize: number; numItems: number; actualSize: number }) {
79-
this.dbMetrics.recordDBMetrics(metrics);
80-
}
81-
8280
public isEnabled(): boolean {
8381
return this.telemetry.isEnabled();
8482
}

yarn-project/aztec/src/cli/cmds/start_prover_agent.ts

+12-1
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,18 @@ export async function startProverAgent(
4040
);
4141
const prover = await buildServerCircuitProver(config, telemetry);
4242
const proofStore = new InlineProofStore();
43-
const agents = times(config.proverAgentCount, () => new ProvingAgent(broker, proofStore, prover));
43+
const agents = times(
44+
config.proverAgentCount,
45+
() =>
46+
new ProvingAgent(
47+
broker,
48+
proofStore,
49+
prover,
50+
telemetry,
51+
config.proverAgentProofTypes,
52+
config.proverAgentPollIntervalMs,
53+
),
54+
);
4455

4556
await Promise.all(agents.map(agent => agent.start()));
4657

yarn-project/aztec/src/cli/cmds/start_prover_broker.ts

+6-1
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,10 @@ import { type NamespacedApiHandlers } from '@aztec/foundation/json-rpc/server';
33
import { type LogFn } from '@aztec/foundation/log';
44
import { ProvingJobBrokerSchema, createAndStartProvingBroker } from '@aztec/prover-client/broker';
55
import { getProverNodeBrokerConfigFromEnv } from '@aztec/prover-node';
6+
import {
7+
createAndStartTelemetryClient,
8+
getConfigEnvVars as getTelemetryClientConfig,
9+
} from '@aztec/telemetry-client/start';
610

711
import { extractRelevantOptions } from '../util.js';
812

@@ -22,7 +26,8 @@ export async function startProverBroker(
2226
...extractRelevantOptions<ProverBrokerConfig>(options, proverBrokerConfigMappings, 'proverBroker'), // override with command line options
2327
};
2428

25-
const broker = await createAndStartProvingBroker(config);
29+
const client = await createAndStartTelemetryClient(getTelemetryClientConfig());
30+
const broker = await createAndStartProvingBroker(config, client);
2631
services.proverBroker = [broker, ProvingJobBrokerSchema];
2732
signalHandlers.push(() => broker.stop());
2833

yarn-project/p2p/src/mem_pools/instrumentation.ts

+3-5
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import {
33
Attributes,
44
type Histogram,
55
LmdbMetrics,
6+
type LmdbStatsCallback,
67
Metrics,
78
type TelemetryClient,
89
type UpDownCounter,
@@ -58,7 +59,7 @@ export class PoolInstrumentation<PoolObject extends Gossipable> {
5859

5960
private defaultAttributes;
6061

61-
constructor(telemetry: TelemetryClient, name: PoolName) {
62+
constructor(telemetry: TelemetryClient, name: PoolName, dbStats?: LmdbStatsCallback) {
6263
const meter = telemetry.getMeter(name);
6364
this.defaultAttributes = { [Attributes.POOL_NAME]: name };
6465

@@ -98,13 +99,10 @@ export class PoolInstrumentation<PoolObject extends Gossipable> {
9899
name: Metrics.MEMPOOL_DB_NUM_ITEMS,
99100
description: 'Num items in database for the Tx mempool',
100101
},
102+
dbStats,
101103
);
102104
}
103105

104-
public recordDBMetrics(metrics: { mappingSize: number; numItems: number; actualSize: number }) {
105-
this.dbMetrics.recordDBMetrics(metrics);
106-
}
107-
108106
public recordSize(poolObject: PoolObject) {
109107
this.objectSize.record(poolObject.getSize());
110108
}

yarn-project/p2p/src/mem_pools/tx_pool/aztec_kv_tx_pool.ts

+1-3
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ export class AztecKVTxPool implements TxPool {
3737

3838
this.#store = store;
3939
this.#log = log;
40-
this.#metrics = new PoolInstrumentation(telemetry, PoolName.TX_POOL);
40+
this.#metrics = new PoolInstrumentation(telemetry, PoolName.TX_POOL, () => store.estimateSize());
4141
}
4242

4343
public markAsMined(txHashes: TxHash[], blockNumber: number): Promise<void> {
@@ -53,8 +53,6 @@ export class AztecKVTxPool implements TxPool {
5353
}
5454
this.#metrics.recordRemovedObjects(deleted, 'pending');
5555
this.#metrics.recordAddedObjects(txHashes.length, 'mined');
56-
const storeSizes = this.#store.estimateSize();
57-
this.#metrics.recordDBMetrics(storeSizes);
5856
});
5957
}
6058

yarn-project/prover-client/src/prover-client/prover-client.ts

+9-1
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,15 @@ export class ProverClient implements EpochProverManager {
137137
const prover = await buildServerCircuitProver(this.config, this.telemetry);
138138
this.agents = times(
139139
this.config.proverAgentCount,
140-
() => new ProvingAgent(this.agentClient!, proofStore, prover, [], this.config.proverAgentPollIntervalMs),
140+
() =>
141+
new ProvingAgent(
142+
this.agentClient!,
143+
proofStore,
144+
prover,
145+
this.telemetry,
146+
[],
147+
this.config.proverAgentPollIntervalMs,
148+
),
141149
);
142150

143151
await Promise.all(this.agents.map(agent => agent.start()));

yarn-project/prover-client/src/proving_broker/factory.ts

+7-3
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,20 @@
11
import { type ProverBrokerConfig } from '@aztec/circuit-types';
22
import { AztecLmdbStore } from '@aztec/kv-store/lmdb';
3+
import { type TelemetryClient } from '@aztec/telemetry-client';
34

45
import { ProvingBroker } from './proving_broker.js';
56
import { InMemoryBrokerDatabase } from './proving_broker_database/memory.js';
67
import { KVBrokerDatabase } from './proving_broker_database/persisted.js';
78

8-
export async function createAndStartProvingBroker(config: ProverBrokerConfig): Promise<ProvingBroker> {
9+
export async function createAndStartProvingBroker(
10+
config: ProverBrokerConfig,
11+
client: TelemetryClient,
12+
): Promise<ProvingBroker> {
913
const database = config.proverBrokerDataDirectory
10-
? new KVBrokerDatabase(AztecLmdbStore.open(config.proverBrokerDataDirectory))
14+
? new KVBrokerDatabase(AztecLmdbStore.open(config.proverBrokerDataDirectory), client)
1115
: new InMemoryBrokerDatabase();
1216

13-
const broker = new ProvingBroker(database, {
17+
const broker = new ProvingBroker(database, client, {
1418
jobTimeoutMs: config.proverBrokerJobTimeoutMs,
1519
maxRetries: config.proverBrokerJobMaxRetries,
1620
timeoutIntervalMs: config.proverBrokerPollIntervalMs,

yarn-project/prover-client/src/proving_broker/proving_agent.test.ts

+2-1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ import { makeBaseParityInputs, makeParityPublicInputs } from '@aztec/circuits.js
1919
import { randomBytes } from '@aztec/foundation/crypto';
2020
import { AbortError } from '@aztec/foundation/error';
2121
import { promiseWithResolvers } from '@aztec/foundation/promise';
22+
import { NoopTelemetryClient } from '@aztec/telemetry-client/noop';
2223

2324
import { jest } from '@jest/globals';
2425

@@ -50,7 +51,7 @@ describe('ProvingAgent', () => {
5051
saveProofOutput: jest.fn(),
5152
};
5253

53-
agent = new ProvingAgent(jobSource, proofDB, prover, [ProvingRequestType.BASE_PARITY]);
54+
agent = new ProvingAgent(jobSource, proofDB, prover, new NoopTelemetryClient(), [ProvingRequestType.BASE_PARITY]);
5455
});
5556

5657
afterEach(async () => {

yarn-project/prover-client/src/proving_broker/proving_agent.ts

+15
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,11 @@ import {
1010
} from '@aztec/circuit-types';
1111
import { createDebugLogger } from '@aztec/foundation/log';
1212
import { RunningPromise } from '@aztec/foundation/running-promise';
13+
import { Timer } from '@aztec/foundation/timer';
14+
import { type TelemetryClient } from '@aztec/telemetry-client';
1315

1416
import { type ProofStore } from './proof_store.js';
17+
import { ProvingAgentInstrumentation } from './proving_agent_instrumentation.js';
1518
import { ProvingJobController, ProvingJobControllerStatus } from './proving_job_controller.js';
1619

1720
/**
@@ -20,6 +23,8 @@ import { ProvingJobController, ProvingJobControllerStatus } from './proving_job_
2023
export class ProvingAgent {
2124
private currentJobController?: ProvingJobController;
2225
private runningPromise: RunningPromise;
26+
private instrumentation: ProvingAgentInstrumentation;
27+
private idleTimer: Timer | undefined;
2328

2429
constructor(
2530
/** The source of proving jobs */
@@ -28,12 +33,15 @@ export class ProvingAgent {
2833
private proofStore: ProofStore,
2934
/** The prover implementation to defer jobs to */
3035
private circuitProver: ServerCircuitProver,
36+
/** A telemetry client through which to emit metrics */
37+
client: TelemetryClient,
3138
/** Optional list of allowed proof types to build */
3239
private proofAllowList: Array<ProvingRequestType> = [],
3340
/** How long to wait between jobs */
3441
private pollIntervalMs = 1000,
3542
private log = createDebugLogger('aztec:prover-client:proving-agent'),
3643
) {
44+
this.instrumentation = new ProvingAgentInstrumentation(client);
3745
this.runningPromise = new RunningPromise(this.safeWork, this.pollIntervalMs);
3846
}
3947

@@ -46,6 +54,7 @@ export class ProvingAgent {
4654
}
4755

4856
public start(): void {
57+
this.idleTimer = new Timer();
4958
this.runningPromise.start();
5059
}
5160

@@ -114,6 +123,11 @@ export class ProvingAgent {
114123
);
115124
}
116125

126+
if (this.idleTimer) {
127+
this.instrumentation.recordIdleTime(this.idleTimer);
128+
}
129+
this.idleTimer = undefined;
130+
117131
this.currentJobController.start();
118132
} catch (err) {
119133
this.log.error(`Error in ProvingAgent: ${String(err)}`);
@@ -126,6 +140,7 @@ export class ProvingAgent {
126140
err: Error | undefined,
127141
result: ProvingJobResultsMap[T] | undefined,
128142
) => {
143+
this.idleTimer = new Timer();
129144
if (err) {
130145
const retry = err.name === ProvingError.NAME ? (err as ProvingError).retry : false;
131146
this.log.error(`Job id=${jobId} type=${ProvingRequestType[type]} failed err=${err.message} retry=${retry}`, err);
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
import { type Timer } from '@aztec/foundation/timer';
2+
import { type Histogram, Metrics, type TelemetryClient, ValueType } from '@aztec/telemetry-client';
3+
4+
export class ProvingAgentInstrumentation {
5+
private idleTime: Histogram;
6+
7+
constructor(client: TelemetryClient, name = 'ProvingAgent') {
8+
const meter = client.getMeter(name);
9+
10+
this.idleTime = meter.createHistogram(Metrics.PROVING_AGENT_IDLE, {
11+
description: 'Records how long an agent was idle',
12+
unit: 'ms',
13+
valueType: ValueType.INT,
14+
});
15+
}
16+
17+
recordIdleTime(msOrTimer: Timer | number) {
18+
const duration = typeof msOrTimer === 'number' ? msOrTimer : Math.floor(msOrTimer.ms());
19+
this.idleTime.record(duration);
20+
}
21+
}

yarn-project/prover-client/src/proving_broker/proving_broker.test.ts

+6-5
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import { type ProofUri, type ProvingJob, type ProvingJobId, ProvingRequestType } from '@aztec/circuit-types';
22
import { randomBytes } from '@aztec/foundation/crypto';
33
import { openTmpStore } from '@aztec/kv-store/utils';
4+
import { NoopTelemetryClient } from '@aztec/telemetry-client/noop';
45

56
import { jest } from '@jest/globals';
67

@@ -17,7 +18,7 @@ describe.each([
1718
() => ({ database: new InMemoryBrokerDatabase(), cleanup: undefined }),
1819
() => {
1920
const store = openTmpStore(true);
20-
const database = new KVBrokerDatabase(store);
21+
const database = new KVBrokerDatabase(store, new NoopTelemetryClient());
2122
const cleanup = () => store.close();
2223
return { database, cleanup };
2324
},
@@ -35,7 +36,7 @@ describe.each([
3536
maxRetries = 2;
3637
({ database, cleanup } = createDb());
3738

38-
broker = new ProvingBroker(database, {
39+
broker = new ProvingBroker(database, new NoopTelemetryClient(), {
3940
jobTimeoutMs,
4041
timeoutIntervalMs: jobTimeoutMs / 4,
4142
maxRetries,
@@ -409,7 +410,7 @@ describe.each([
409410
// fake some time passing while the broker restarts
410411
await jest.advanceTimersByTimeAsync(10_000);
411412

412-
broker = new ProvingBroker(database);
413+
broker = new ProvingBroker(database, new NoopTelemetryClient());
413414
await broker.start();
414415

415416
await assertJobStatus(job1.id, 'in-queue');
@@ -470,7 +471,7 @@ describe.each([
470471
// fake some time passing while the broker restarts
471472
await jest.advanceTimersByTimeAsync(10_000);
472473

473-
broker = new ProvingBroker(database);
474+
broker = new ProvingBroker(database, new NoopTelemetryClient());
474475
await broker.start();
475476

476477
await assertJobStatus(job1.id, 'in-queue');
@@ -521,7 +522,7 @@ describe.each([
521522
// fake some time passing while the broker restarts
522523
await jest.advanceTimersByTimeAsync(100 * jobTimeoutMs);
523524

524-
broker = new ProvingBroker(database);
525+
broker = new ProvingBroker(database, new NoopTelemetryClient());
525526
await broker.start();
526527
await assertJobStatus(job1.id, 'in-queue');
527528

0 commit comments

Comments
 (0)