Skip to content

Commit

Permalink
Add python bindings for new dataframe APIs (#7357)
Browse files Browse the repository at this point in the history
### What
- First pass at implementing APIs for:
#7455
- Introduces a new mechanism for directly exposing rust types into the
python bridge via a .pyi definition

Example notebook for testing
```
pixi run py-build-examples
pixi run -e examples jupyter notebook tests/python/dataframe/examples.ipynb
```

### Future work:
- More docs / help strings
- Remaining API features

### Checklist
* [x] I have read and agree to [Contributor
Guide](https://github.com/rerun-io/rerun/blob/main/CONTRIBUTING.md) and
the [Code of
Conduct](https://github.com/rerun-io/rerun/blob/main/CODE_OF_CONDUCT.md)
* [x] I've included a screenshot or gif (if applicable)
* [x] I have tested the web demo (if applicable):
* Using examples from latest `main` build:
[rerun.io/viewer](https://rerun.io/viewer/pr/7357?manifest_url=https://app.rerun.io/version/main/examples_manifest.json)
* Using full set of examples from `nightly` build:
[rerun.io/viewer](https://rerun.io/viewer/pr/7357?manifest_url=https://app.rerun.io/version/nightly/examples_manifest.json)
* [x] The PR title and labels are set such as to maximize their
usefulness for the next release's CHANGELOG
* [x] If applicable, add a new check to the [release
checklist](https://github.com/rerun-io/rerun/blob/main/tests/python/release_checklist)!
* [x] If have noted any breaking changes to the log API in
`CHANGELOG.md` and the migration guide

- [PR Build Summary](https://build.rerun.io/pr/7357)
- [Recent benchmark results](https://build.rerun.io/graphs/crates.html)
- [Wasm size tracking](https://build.rerun.io/graphs/sizes.html)

To run all checks from `main`, comment on the PR with `@rerun-bot
full-check`.
  • Loading branch information
jleibs authored Oct 4, 2024
1 parent 2408689 commit 5da39a5
Show file tree
Hide file tree
Showing 13 changed files with 1,214 additions and 2 deletions.
5 changes: 5 additions & 0 deletions Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -6237,10 +6237,15 @@ dependencies = [
"re_build_info",
"re_build_tools",
"re_chunk",
"re_chunk_store",
"re_dataframe2",
"re_entity_db",
"re_log",
"re_log_encoding",
"re_log_types",
"re_memory",
"re_sdk",
"re_types",
"re_video",
"re_web_viewer_server",
"re_ws_comms",
Expand Down
8 changes: 7 additions & 1 deletion crates/store/re_chunk_store/src/dataframe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -193,12 +193,13 @@ impl Ord for TimeColumnDescriptor {

impl TimeColumnDescriptor {
#[inline]
// Time column must be nullable since static data doesn't have a time.
pub fn to_arrow_field(&self) -> ArrowField {
let Self { timeline, datatype } = self;
ArrowField::new(
timeline.name().to_string(),
datatype.clone(),
false, /* nullable */
true, /* nullable */
)
}
}
Expand Down Expand Up @@ -337,6 +338,11 @@ impl ComponentColumnDescriptor {
}
}

#[inline]
pub fn matches(&self, entity_path: &EntityPath, component_name: &ComponentName) -> bool {
&self.entity_path == entity_path && &self.component_name == component_name
}

fn metadata(&self) -> arrow2::datatypes::Metadata {
let Self {
entity_path,
Expand Down
8 changes: 7 additions & 1 deletion rerun_py/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -41,15 +41,21 @@ web_viewer = [

[dependencies]
re_build_info.workspace = true
re_chunk.workspace = true
re_chunk = { workspace = true, features = ["arrow"] }
re_chunk_store = { workspace = true }
re_dataframe2 = { workspace = true }
re_entity_db = { workspace = true }
re_log = { workspace = true, features = ["setup"] }
re_log_encoding = { workspace = true }
re_log_types.workspace = true
re_memory.workspace = true
re_sdk = { workspace = true, features = ["data_loaders"] }
re_types = { workspace = true }
re_video.workspace = true
re_web_viewer_server = { workspace = true, optional = true }
re_ws_comms = { workspace = true, optional = true }


arrow = { workspace = true, features = ["pyarrow"] }
arrow2 = { workspace = true, features = ["io_ipc", "io_print", "arrow"] }
crossbeam.workspace = true
Expand Down
5 changes: 5 additions & 0 deletions rerun_py/rerun_bindings/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from __future__ import annotations

from typing import TYPE_CHECKING, TypeAlias, Union

from .rerun_bindings import *
Empty file.
107 changes: 107 additions & 0 deletions rerun_py/rerun_bindings/rerun_bindings.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
from typing import Optional, Sequence

import pyarrow as pa

from .types import AnyColumn, ComponentLike, ViewContentsLike

class ControlColumnDescriptor:
"""A control-level column such as `RowId`."""

class ControlColumnSelector:
"""A selector for a control column."""

@staticmethod
def row_id() -> ControlColumnSelector: ...

class IndexColumnDescriptor:
"""A column containing the index values for when the component data was updated."""

class IndexColumnSelector:
"""A selector for an index column."""

def __init__(self, timeline: str): ...

class ComponentColumnDescriptor:
"""A column containing the component data."""

def with_dictionary_encoding(self) -> ComponentColumnDescriptor: ...

class ComponentColumnSelector:
"""A selector for a component column."""

def __new__(cls, entity_path: str, component_type: ComponentLike): ...
def with_dictionary_encoding(self) -> ComponentColumnSelector: ...

class Schema:
"""The schema representing all columns in a [`Recording`][]."""

def control_columns(self) -> list[ControlColumnDescriptor]: ...
def index_columns(self) -> list[IndexColumnDescriptor]: ...
def component_columns(self) -> list[ComponentColumnDescriptor]: ...
def column_for(self, entity_path: str, component: ComponentLike) -> Optional[ComponentColumnDescriptor]: ...

class RecordingView:
"""
A view of a recording restricted to a given index, containing a specific set of entities and components.
Can only be created by calling `view(...)` on a `Recording`.
The only type of index currently supported is the name of a timeline.
The view will only contain a single row for each unique value of the index. If the same entity / component pair
was logged to a given index multiple times, only the most recent row will be included in the view, as determined
by the `row_id` column. This will generally be the last value logged, as row_ids are guaranteed to be monotonically
increasing when data is sent from a single process.
"""

def filter_range_sequence(self, start: int, end: int) -> RecordingView:
"""Filter the view to only include data between the given index sequence numbers."""
...

def filter_range_seconds(self, start: float, end: float) -> RecordingView:
"""Filter the view to only include data between the given index time values."""
...

def filter_range_nanos(self, start: int, end: int) -> RecordingView:
"""Filter the view to only include data between the given index time values."""
...

def select(self, columns: Sequence[AnyColumn]) -> list[pa.RecordBatch]: ...

class Recording:
"""A single recording."""

def schema(self) -> Schema: ...
def view(self, index: str, contents: ViewContentsLike) -> RecordingView: ...

class RRDArchive:
"""An archive loaded from an RRD, typically containing 1 or more recordings or blueprints."""

def num_recordings(self) -> int: ...
def all_recordings(self) -> list[Recording]: ...

def load_recording(filename: str) -> Recording:
"""
Load a single recording from an RRD.
Will raise a `ValueError` if the file does not contain exactly one recording.
Parameters
----------
filename : str
The path to the file to load.
"""
...

def load_archive(filename: str) -> RRDArchive:
"""
Load a rerun archive file from disk.
Parameters
----------
filename : str
The path to the file to load.
"""
...
37 changes: 37 additions & 0 deletions rerun_py/rerun_bindings/types.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
from __future__ import annotations

from typing import TYPE_CHECKING, Sequence, TypeAlias, Union

if TYPE_CHECKING:
from rerun._baseclasses import ComponentMixin

from .rerun_bindings import (
ComponentColumnDescriptor as ComponentColumnDescriptor,
ComponentColumnSelector as ComponentColumnSelector,
ControlColumnDescriptor as ControlColumnDescriptor,
ControlColumnSelector as ControlColumnSelector,
TimeColumnDescriptor as TimeColumnDescriptor,
TimeColumnSelector as TimeColumnSelector,
)


ComponentLike: TypeAlias = Union[str, type["ComponentMixin"]]

AnyColumn: TypeAlias = Union[
"ControlColumnDescriptor",
"TimeColumnDescriptor",
"ComponentColumnDescriptor",
"ControlColumnSelector",
"TimeColumnSelector",
"ComponentColumnSelector",
]

AnyComponentColumn: TypeAlias = Union[
"ComponentColumnDescriptor",
"ComponentColumnSelector",
]

ViewContentsLike: TypeAlias = Union[
str,
dict[str, Union[AnyColumn, Sequence[ComponentLike]]],
]
1 change: 1 addition & 0 deletions rerun_py/rerun_sdk/rerun/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

from . import (
blueprint as blueprint,
dataframe as dataframe,
experimental as experimental,
notebook as notebook,
)
Expand Down
20 changes: 20 additions & 0 deletions rerun_py/rerun_sdk/rerun/dataframe.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from __future__ import annotations

from rerun_bindings import (
ComponentColumnDescriptor as ComponentColumnDescriptor,
ComponentColumnSelector as ComponentColumnSelector,
ControlColumnDescriptor as ControlColumnDescriptor,
ControlColumnSelector as ControlColumnSelector,
Recording as Recording,
RRDArchive as RRDArchive,
Schema as Schema,
TimeColumnDescriptor as TimeColumnDescriptor,
TimeColumnSelector as TimeColumnSelector,
load_archive as load_archive,
load_recording as load_recording,
)
from rerun_bindings.types import (
AnyColumn as AnyColumn,
AnyComponentColumn as AnyComponentColumn,
ComponentLike as ComponentLike,
)
Loading

0 comments on commit 5da39a5

Please sign in to comment.