Skip to content

Commit 731df86

Browse files
committed
Implement modality structure representation
Resolves #44
1 parent 6e35697 commit 731df86

File tree

3 files changed

+129
-5
lines changed

3 files changed

+129
-5
lines changed

mudata/_core/io.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
from pathlib import Path
2020
from scipy import sparse
2121

22-
from mudata import MuData
22+
from .mudata import ModDict, MuData
2323
from .file_backing import MuDataFileManager, AnnDataFileManager
2424

2525
#
@@ -374,7 +374,7 @@ def read_h5mu(filename: PathLike, backed: Union[str, bool, None] = None):
374374
if k in ["obs", "var"]:
375375
d[k] = read_dataframe(f[k])
376376
if k == "mod":
377-
mods = {}
377+
mods = ModDict()
378378
gmods = f[k]
379379
for m in gmods.keys():
380380
ad = _read_h5mu_mod(gmods[m], manager, backed not in (None, False))

mudata/_core/mudata.py

+81-3
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,74 @@ class MuAxisArrays(AxisArrays):
5050
_view_class = MuAxisArraysView
5151

5252

53+
class ModDict(dict):
54+
def __init__(self, *args, **kwargs):
55+
super().__init__(*args, **kwargs)
56+
57+
def _repr_hierarchy(
58+
self, nest_level: int = 0, is_last: bool = False, active_levels: Optional[List[int]] = None
59+
) -> str:
60+
descr = ""
61+
active_levels = active_levels or []
62+
for i, kv in enumerate(self.items()):
63+
k, v = kv
64+
indent = (" " * nest_level) + ("└─ " if i == len(self) - 1 else "├─ ")
65+
66+
if len(active_levels) > 0:
67+
indent_list = list(indent)
68+
for level in active_levels:
69+
indent_list[level * 3] = "│"
70+
indent = "".join(indent_list)
71+
72+
is_view = " view" if v.is_view else ""
73+
backed_at = f" backed at {str(v.filename)!r}" if v.isbacked else ""
74+
75+
if isinstance(v, MuData):
76+
maybe_axis = (
77+
(
78+
f" [shared obs] "
79+
if v.axis == 0
80+
else f" [shared var] "
81+
if v.axis == 1
82+
else f" [shared obs and var] "
83+
)
84+
if hasattr(v, "axis")
85+
else ""
86+
)
87+
descr += (
88+
f"\n{indent}{k} MuData{maybe_axis}({v.n_obs} × {v.n_vars}){backed_at}{is_view}"
89+
)
90+
91+
if i != len(self) - 1:
92+
levels = [nest_level] + [level for level in active_levels]
93+
else:
94+
levels = [level for level in active_levels if level != nest_level]
95+
descr += v.mod._repr_hierarchy(nest_level=nest_level + 1, active_levels=levels)
96+
elif isinstance(v, AnnData):
97+
descr += f"\n{indent}{k} AnnData ({v.n_obs} x {v.n_vars}){backed_at}{is_view}"
98+
else:
99+
continue
100+
101+
return descr
102+
103+
def __repr__(self) -> str:
104+
"""
105+
Represent the hierarchy of the modalities in the object.
106+
107+
A MuData object with two modalities, protein and RNA,
108+
with the latter being a MuData containing raw, QC'ed and hvg-filtered AnnData objects,
109+
will be represented as:
110+
111+
root MuData (axis=0) (5000 x 20100)
112+
├── protein AnnData (5000 x 100)
113+
└── rna MuData (axis=-1) (5000 x 20000)
114+
├── raw AnnData (5000 x 20000)
115+
├── quality-filtered AnnData (3000 x 20000)
116+
└── hvg-filtered AnnData (3000 x 4000)
117+
"""
118+
return "MuData" + self._repr_hierarchy()
119+
120+
53121
class MuData:
54122
"""
55123
Multimodal data object
@@ -81,7 +149,7 @@ def __init__(
81149
return
82150

83151
# Add all modalities to a MuData object
84-
self.mod = dict()
152+
self.mod = ModDict()
85153
if isinstance(data, abc.Mapping):
86154
for k, v in data.items():
87155
self.mod[k] = v
@@ -185,7 +253,7 @@ def _init_as_view(self, mudata_ref: "MuData", index):
185253
if isinstance(varidx, Integral):
186254
varidx = slice(varidx, varidx + 1)
187255

188-
self.mod = dict()
256+
self.mod = ModDict()
189257
for m, a in mudata_ref.mod.items():
190258
cobsidx, cvaridx = mudata_ref.obsmap[m][obsidx], mudata_ref.varmap[m][varidx]
191259
cobsidx, cvaridx = cobsidx[cobsidx > 0] - 1, cvaridx[cvaridx > 0] - 1
@@ -1239,7 +1307,17 @@ def _gen_repr(self, n_obs, n_vars, extensive: bool = False, nest_level: int = 0)
12391307
indent = " " * nest_level
12401308
backed_at = f" backed at {str(self.filename)!r}" if self.isbacked else ""
12411309
view_of = "View of " if self.is_view else ""
1242-
maybe_axis = f" (axis={self.axis}) " if hasattr(self, "axis") and self.axis != 0 else ""
1310+
maybe_axis = (
1311+
(
1312+
f" (shared obs) "
1313+
if self.axis == 0
1314+
else f" (shared var) "
1315+
if self.axis == 1
1316+
else f" (shared obs and var) "
1317+
)
1318+
if hasattr(self, "axis")
1319+
else ""
1320+
)
12431321
descr = f"{view_of}MuData object with n_obs × n_vars = {n_obs} × {n_vars}{maybe_axis}{backed_at}"
12441322
for attr in ["obs", "var", "uns", "obsm", "varm", "obsp", "varp"]:
12451323
if hasattr(self, attr) and getattr(self, attr) is not None:

tests/test_repr.py

+46
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
import unittest
2+
import pytest
3+
4+
import numpy as np
5+
from anndata import AnnData
6+
from mudata import MuData
7+
8+
9+
# Dimensions
10+
N = 100
11+
D1, D2 = 10, 20
12+
D = D1 + D2
13+
14+
15+
@pytest.fixture()
16+
def mdata():
17+
mod1 = AnnData(np.arange(0, 100, 0.1).reshape(-1, D1))
18+
mod1.obs_names = [f"obs{i}" for i in range(mod1.n_obs)]
19+
mod1.var_names = [f"var{i}" for i in range(D1)]
20+
21+
mod21 = AnnData(np.arange(3101, 5101, 1).reshape(-1, D2))
22+
mod22 = AnnData(np.arange(3101, 5101, 1).reshape(-1, D2))
23+
# Same obs_names and var_names
24+
mod21.obs_names = mod1.obs_names.copy()
25+
mod22.obs_names = mod1.obs_names.copy()
26+
mod21.var_names = [f"var{i}" for i in range(D1, D)]
27+
mod22.var_names = [f"var{i}" for i in range(D1, D)]
28+
mod2 = MuData({"mod21": mod21, "mod22": mod22}, axis=-1)
29+
30+
mdata = MuData({"mod1": mod1, "mod2": mod2})
31+
yield mdata
32+
33+
34+
class TestMuData:
35+
def test_nested_mudata(self, mdata):
36+
assert mdata.shape == (N, D)
37+
assert mdata["mod1"].shape == (N, D1)
38+
assert mdata["mod2"].shape == (N, D2)
39+
assert mdata.axis == 0
40+
assert mdata["mod2"].axis == -1
41+
42+
def test_mod_repr(self, mdata):
43+
assert (
44+
mdata.mod.__repr__()
45+
== f"MuData\n├─ mod1 AnnData ({N} x {D1})\n└─ mod2 MuData [shared obs and var] ({N} × 20)\n ├─ mod21 AnnData ({N} x {D2})\n └─ mod22 AnnData ({N} x {D2})"
46+
)

0 commit comments

Comments
 (0)