Skip to content

Commit 773e9cc

Browse files
authored
RRD manifests bootstrap (#9053)
1 parent 920de91 commit 773e9cc

File tree

5 files changed

+663
-9
lines changed

5 files changed

+663
-9
lines changed

crates/store/re_format_arrow/src/lib.rs

+3
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,9 @@ pub fn format_record_batch_opts(
247247
}
248248

249249
/// Nicely format this record batch, either with the given fixed width, or with the terminal width (`None`).
250+
///
251+
/// If `transposed` is `true`, the dataframe will be printed transposed on its diagonal axis.
252+
/// This is very useful for wide (i.e. lots of columns), short (i.e. not many rows) datasets.
250253
pub fn format_record_batch_with_width(
251254
batch: &arrow::array::RecordBatch,
252255
width: Option<usize>,

crates/store/re_log_types/src/time_point/mod.rs

+9
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,15 @@ pub enum TimeType {
124124
Sequence,
125125
}
126126

127+
impl std::fmt::Display for TimeType {
128+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
129+
match self {
130+
Self::Time => f.write_str("time"),
131+
Self::Sequence => f.write_str("sequence"),
132+
}
133+
}
134+
}
135+
127136
impl TimeType {
128137
#[inline]
129138
fn hash(&self) -> u64 {

crates/store/re_protos/proto/rerun/v0/remote_store.proto

+189-4
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,20 @@ package rerun.remote_store.v0;
55
import "rerun/v0/common.proto";
66

77
service StorageNode {
8-
// data API calls
8+
// Data APIs
9+
910
rpc Query(QueryRequest) returns (stream DataframePart) {}
11+
1012
rpc FetchRecording(FetchRecordingRequest) returns (stream rerun.common.v0.RerunChunk) {}
1113

14+
// Index APIs
15+
1216
rpc CreateIndex(CreateIndexRequest) returns (CreateIndexResponse) {}
17+
1318
rpc ReIndex(ReIndexRequest) returns (ReIndexResponse) {}
1419

1520
rpc GetChunkIds(GetChunkIdsRequest) returns (stream GetChunkIdsResponse) {}
21+
1622
rpc GetChunks(GetChunksRequest) returns (stream rerun.common.v0.RerunChunk) {}
1723

1824
// The response to `SearchIndex` a RecordBatch with 3 columns:
@@ -25,17 +31,32 @@ service StorageNode {
2531
// - 'data' column with the data that is returned for the matched timepoints
2632
rpc SearchIndex(SearchIndexRequest) returns (stream DataframePart) {}
2733

28-
// metadata API calls
34+
// Chunk manifest APIs
35+
36+
rpc CreateManifests(CreateManifestsRequest) returns (CreateManifestsResponse) {}
37+
38+
rpc ListManifests(ListManifestsRequest) returns (stream DataframePart) {}
39+
40+
rpc QueryManifest(QueryManifestRequest) returns (stream DataframePart) {}
41+
42+
// TODO(zehiko, cmc): DeleteManifest
43+
44+
// Metadata APIs
45+
2946
rpc QueryCatalog(QueryCatalogRequest) returns (stream DataframePart) {}
47+
3048
rpc UpdateCatalog(UpdateCatalogRequest) returns (UpdateCatalogResponse) {}
49+
3150
rpc GetRecordingSchema(GetRecordingSchemaRequest) returns (GetRecordingSchemaResponse) {}
3251

52+
// Registration APIs
53+
3354
// TODO(zehiko) support registering more than one recording at a time
3455
rpc RegisterRecording(RegisterRecordingRequest) returns (DataframePart) {}
3556

3657
rpc UnregisterRecording(UnregisterRecordingRequest) returns (UnregisterRecordingResponse) {}
37-
rpc UnregisterAllRecordings(UnregisterAllRecordingsRequest)
38-
returns (UnregisterAllRecordingsResponse) {}
58+
59+
rpc UnregisterAllRecordings(UnregisterAllRecordingsRequest) returns (UnregisterAllRecordingsResponse) {}
3960
}
4061

4162
// ---------------- Common response message ------------------
@@ -192,6 +213,170 @@ message CatalogEntry {
192213
string name = 1;
193214
}
194215

216+
217+
// ---------------- CreateManifests ------------------
218+
219+
// TODO(zehiko, cmc): At some point, this will need to be fully async (i.e. caller gets assigned
220+
// a unique request ID and polls it for completion), but:
221+
// A) Let's wait until we have a real need for this.
222+
// B) That's true of everything in the platform, so this needs to be properly generalized.
223+
224+
message CreateManifestsRequest {
225+
// Which catalog entry do we want to create manifests for?
226+
CatalogEntry entry = 1;
227+
}
228+
229+
message CreateManifestsResponse {}
230+
231+
// ---------------- ListManifests ------------------
232+
233+
message ListManifestsRequest {
234+
// Which catalog entry do we want to list the manifests of?
235+
CatalogEntry entry = 1;
236+
237+
// Generic parameters that will influence the behavior of the Lance scanner.
238+
//
239+
// TODO(zehiko, cmc): actually support those.
240+
ScanParameters scan_parameters = 500;
241+
}
242+
243+
message ListManifestsResponse {
244+
rerun.common.v0.EncoderVersion encoder_version = 1;
245+
246+
// The record batch of the response, encoded according to `encoder_version`.
247+
bytes payload = 2;
248+
}
249+
250+
// ---------------- QueryManifest ------------------
251+
252+
// TODO(zehiko, cmc): Being able to specify only a collection ID rather than a resource ID could be
253+
// super useful for cross-recording queries (resource_id becomes a column of the result).
254+
255+
// A manifest query will find all the relevant chunk IDs (and optionally a bunch of related metadata)
256+
// for a given Rerun query (latest-at, range, etc).
257+
//
258+
// The result might contain duplicated chunk IDs, it is the responsibility of the caller to deduplicate
259+
// them as needed.
260+
message QueryManifestRequest {
261+
// What resource are we querying the manifest for?
262+
rerun.common.v0.RecordingId resource_id = 100;
263+
264+
// What columns of the manifest are we interested in?
265+
ColumnProjection columns = 200;
266+
267+
// If true, `columns` will contain the entire schema.
268+
bool columns_always_include_everything = 210;
269+
270+
// If true, `columns` always includes `chunk_id`,
271+
bool columns_always_include_chunk_ids = 220;
272+
273+
// If true, `columns` always includes `byte_offset` and `byte_size`.
274+
bool columns_always_include_byte_offsets = 230;
275+
276+
// If true, `columns` always includes all static component-level indexes.
277+
bool columns_always_include_static_indexes = 240;
278+
279+
// If true, `columns` always includes all temporal chunk-level indexes.
280+
bool columns_always_include_global_indexes = 250;
281+
282+
// If true, `columns` always includes all component-level indexes.
283+
bool columns_always_include_component_indexes = 260;
284+
285+
// If specified, will perform a latest-at query with the given parameters.
286+
//
287+
// Incompatible with `range`.
288+
QueryManifestLatestAtRelevantChunks latest_at = 300;
289+
290+
// If specified, will perform a range query with the given parameters.
291+
//
292+
// Incompatible with `latest_at`.
293+
QueryManifestRangeRelevantChunks range = 400;
294+
295+
// Generic parameters that will influence the behavior of the Lance scanner.
296+
ScanParameters scan_parameters = 500;
297+
}
298+
299+
message QueryManifestLatestAtRelevantChunks {
300+
// Which index column should we perform the query on? E.g. `log_time`.
301+
rerun.common.v0.IndexColumnSelector index = 1;
302+
303+
// What index value are we looking for?
304+
int64 at = 2;
305+
306+
// Which components are we interested in?
307+
//
308+
// If left unspecified, all existing components are considered of interest.
309+
//
310+
// This will perform a basic fuzzy match on the available columns' descriptors.
311+
// The fuzzy logic is a simple case-sensitive `contains()` query.
312+
// For example, given a `log_tick__SeriesLine:StrokeWidth#width` index, all of the following
313+
// would match: `SeriesLine:StrokeWidth#width`, `StrokeWidth`, `Stroke`, `Width`, `width`,
314+
// `SeriesLine`, etc.
315+
repeated string fuzzy_descriptors = 3;
316+
}
317+
318+
message QueryManifestRangeRelevantChunks {
319+
// Which index column should we perform the query on? E.g. `log_time`.
320+
rerun.common.v0.IndexColumnSelector index = 1;
321+
322+
// What index range are we looking for?
323+
rerun.common.v0.TimeRange index_range = 2;
324+
325+
// Which components are we interested in?
326+
//
327+
// If left unspecified, all existing components are considered of interest.
328+
//
329+
// This will perform a basic fuzzy match on the available columns' descriptors.
330+
// The fuzzy logic is a simple case-sensitive `contains()` query.
331+
// For example, given a `log_tick__SeriesLine:StrokeWidth#width` index, all of the following
332+
// would match: `SeriesLine:StrokeWidth#width`, `StrokeWidth`, `Stroke`, `Width`, `width`,
333+
// `SeriesLine`, etc.
334+
repeated string fuzzy_descriptors = 3;
335+
}
336+
337+
// Generic parameters that will influence the behavior of the Lance scanner.
338+
//
339+
// TODO(zehiko, cmc): This should be available for every endpoint that queries data in
340+
// one way or another.
341+
message ScanParameters {
342+
// An arbitrary filter expression that will be passed to the Lance scanner as-is.
343+
//
344+
// ```text
345+
// scanner.filter(filter)
346+
// ```
347+
string filter = 100;
348+
349+
// An arbitrary offset that will be passed to the Lance scanner as-is.
350+
//
351+
// ```text
352+
// scanner.limit(_, limit_offset)
353+
// ```
354+
int64 limit_offset = 200;
355+
356+
// An arbitrary limit that will be passed to the Lance scanner as-is.
357+
//
358+
// ```text
359+
// scanner.limit(limit_len, _)
360+
// ```
361+
int64 limit_len = 201;
362+
363+
// An arbitrary order clause that will be passed to the Lance scanner as-is.
364+
//
365+
// ```text
366+
// scanner.order_by(…)
367+
// ```
368+
ScanParametersOrderClause order_by = 300;
369+
370+
// If set, the output of `scanner.explain_plan` will be dumped to the server's log.
371+
bool explain = 400;
372+
}
373+
374+
message ScanParametersOrderClause {
375+
bool ascending = 10;
376+
bool nulls_first = 20;
377+
string column_name = 30;
378+
}
379+
195380
// ---------------- GetRecordingSchema ------------------
196381

197382
message GetRecordingSchemaRequest {

0 commit comments

Comments
 (0)