Skip to content

Commit 9e3b147

Browse files
authoredMar 9, 2025··
Merge pull request #45 from trossi/dataframe-writer
Add writing functionality for dataframes
2 parents 55c1973 + e50eb9c commit 9e3b147

25 files changed

+1205
-372
lines changed
 

‎rdata/_write.py

+24-21
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,23 @@
11
"""Functions to perform conversion and unparsing in one step."""
2+
23
from __future__ import annotations
34

45
from typing import TYPE_CHECKING
56

6-
from .conversion import build_r_data, convert_to_r_object, convert_to_r_object_for_rda
7-
from .conversion.to_r import DEFAULT_FORMAT_VERSION
7+
from .conversion import (
8+
convert_python_to_r_data,
9+
)
10+
from .conversion.to_r import (
11+
DEFAULT_CLASS_MAP,
12+
DEFAULT_FORMAT_VERSION,
13+
)
814
from .unparser import unparse_file
915

1016
if TYPE_CHECKING:
1117
import os
1218
from typing import Any
1319

14-
from .conversion.to_r import Encoding
20+
from .conversion.to_r import ConstructorDict, Encoding
1521
from .unparser import Compression, FileFormat
1622

1723

@@ -23,14 +29,12 @@ def write_rds(
2329
compression: Compression = "gzip",
2430
encoding: Encoding = "utf-8",
2531
format_version: int = DEFAULT_FORMAT_VERSION,
32+
constructor_dict: ConstructorDict = DEFAULT_CLASS_MAP,
2633
) -> None:
2734
"""
2835
Write an RDS file.
2936
30-
This is a convenience function that wraps
31-
:func:`rdata.conversion.convert_to_r_object`,
32-
:func:`rdata.conversion.build_r_data`,
33-
and :func:`rdata.unparser.unparse_file`,
37+
This is a convenience function that wraps conversion and unparsing
3438
as it is the common use case.
3539
3640
Args:
@@ -40,6 +44,8 @@ def write_rds(
4044
compression: Compression.
4145
encoding: Encoding to be used for strings within data.
4246
format_version: File format version.
47+
constructor_dict: Dictionary mapping Python classes to
48+
functions converting them to R classes.
4349
4450
See Also:
4551
:func:`write_rda`: Similar function that writes an RDA or RDATA file.
@@ -52,15 +58,13 @@ def write_rds(
5258
>>> data = ["hello", 1, 2.2, 3.3+4.4j]
5359
>>> rdata.write_rds("test.rds", data)
5460
"""
55-
r_object = convert_to_r_object(
61+
r_data = convert_python_to_r_data(
5662
data,
5763
encoding=encoding,
58-
)
59-
r_data = build_r_data(
60-
r_object,
61-
encoding=encoding,
6264
format_version=format_version,
65+
constructor_dict=constructor_dict,
6366
)
67+
6468
unparse_file(
6569
path,
6670
r_data,
@@ -78,14 +82,12 @@ def write_rda(
7882
compression: Compression = "gzip",
7983
encoding: Encoding = "utf-8",
8084
format_version: int = DEFAULT_FORMAT_VERSION,
85+
constructor_dict: ConstructorDict = DEFAULT_CLASS_MAP,
8186
) -> None:
8287
"""
8388
Write an RDA or RDATA file.
8489
85-
This is a convenience function that wraps
86-
:func:`rdata.conversion.convert_to_r_object_for_rda`,
87-
:func:`rdata.conversion.build_r_data`,
88-
and :func:`rdata.unparser.unparse_file`,
90+
This is a convenience function that wraps conversion and unparsing
8991
as it is the common use case.
9092
9193
Args:
@@ -95,6 +97,8 @@ def write_rda(
9597
compression: Compression.
9698
encoding: Encoding to be used for strings within data.
9799
format_version: File format version.
100+
constructor_dict: Dictionary mapping Python classes to
101+
functions converting them to R classes.
98102
99103
See Also:
100104
:func:`write_rds`: Similar function that writes an RDS file.
@@ -107,15 +111,14 @@ def write_rda(
107111
>>> data = {"name": "hello", "values": [1, 2.2, 3.3+4.4j]}
108112
>>> rdata.write_rda("test.rda", data)
109113
"""
110-
r_object = convert_to_r_object_for_rda(
114+
r_data = convert_python_to_r_data(
111115
data,
112116
encoding=encoding,
113-
)
114-
r_data = build_r_data(
115-
r_object,
116-
encoding=encoding,
117117
format_version=format_version,
118+
constructor_dict=constructor_dict,
119+
file_type="rda",
118120
)
121+
119122
unparse_file(
120123
path,
121124
r_data,

‎rdata/conversion/__init__.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
"""Utilities for converting R objects to Python ones."""
1+
"""Utilities for converting between R and Python objects."""
2+
23
from ._conversion import (
34
DEFAULT_CLASS_MAP as DEFAULT_CLASS_MAP,
45
Converter as Converter,
@@ -25,7 +26,7 @@
2526
ts_constructor as ts_constructor,
2627
)
2728
from .to_r import (
28-
build_r_data as build_r_data,
29-
convert_to_r_object as convert_to_r_object,
30-
convert_to_r_object_for_rda as convert_to_r_object_for_rda,
29+
ConverterFromPythonToR as ConverterFromPythonToR,
30+
convert_python_to_r_data as convert_python_to_r_data,
31+
convert_python_to_r_object as convert_python_to_r_object,
3132
)

‎rdata/conversion/_conversion.py

+28-10
Original file line numberDiff line numberDiff line change
@@ -394,20 +394,38 @@ def convert_array(
394394
return value # type: ignore [no-any-return]
395395

396396

397-
R_INT_MIN = -2**31
398-
399-
400397
def _dataframe_column_transform(source: Any) -> Any: # noqa: ANN401
401398

402399
if isinstance(source, np.ndarray):
400+
dtype: Any
403401
if np.issubdtype(source.dtype, np.integer):
404-
return pd.Series(source, dtype=pd.Int32Dtype()).array
405-
406-
if np.issubdtype(source.dtype, np.bool_):
407-
return pd.Series(source, dtype=pd.BooleanDtype()).array
402+
dtype = pd.Int32Dtype()
403+
elif np.issubdtype(source.dtype, np.floating):
404+
# We return the numpy array here, which keeps
405+
# R_FLOAT_NA, np.nan, and other NaNs as they were originally in the file.
406+
# Users can then decide if they prefer to interpret
407+
# only R_FLOAT_NA or all NaNs as "missing".
408+
return source
409+
# This would create an array with all NaNs as "missing":
410+
# dtype = pd.Float64Dtype() # noqa: ERA001
411+
# This would create an array with only R_FLOAT_NA as "missing":
412+
# from rdata.missing import is_na # noqa: ERA001
413+
# return pd.arrays.FloatingArray(source, is_na(source)) # noqa: ERA001
414+
elif np.issubdtype(source.dtype, np.complexfloating):
415+
# There seems to be no pandas type for complex array
416+
return source
417+
elif np.issubdtype(source.dtype, np.bool_):
418+
dtype = pd.BooleanDtype()
419+
elif np.issubdtype(source.dtype, np.str_):
420+
dtype = pd.StringDtype()
421+
elif np.issubdtype(source.dtype, np.object_):
422+
for value in source:
423+
assert isinstance(value, str) or value is None
424+
dtype = pd.StringDtype()
425+
else:
426+
return source
408427

409-
if np.issubdtype(source.dtype, np.str_):
410-
return pd.Series(source, dtype=pd.StringDtype()).array
428+
return pd.Series(source, dtype=dtype).array
411429

412430
return source
413431

@@ -430,7 +448,7 @@ def dataframe_constructor(
430448
and isinstance(row_names, np.ma.MaskedArray)
431449
and row_names.mask[0]
432450
)
433-
else tuple(row_names)
451+
else row_names
434452
)
435453

436454
return pd.DataFrame(obj, columns=obj, index=index)

‎rdata/conversion/to_r.py

+580-209
Large diffs are not rendered by default.

‎rdata/missing.py

+109
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
"""Utilities for missing (NA) values in R."""
2+
3+
from __future__ import annotations
4+
5+
from typing import TYPE_CHECKING
6+
7+
import numpy as np
8+
9+
if TYPE_CHECKING:
10+
from typing import Any, Final
11+
12+
import numpy.typing as npt
13+
14+
15+
#: Value used to represent a missing integer in R.
16+
R_INT_NA: Final[int] = np.int32(-2**31) # type: ignore [assignment]
17+
18+
#: Value used to represent a missing float in R.
19+
# This is a NaN with a particular payload, but it's not the same as np.nan.
20+
R_FLOAT_NA: Final[float] = np.uint64(0x7ff00000000007a2).view(np.float64) # type: ignore [assignment]
21+
22+
23+
def get_na_value(dtype: np.dtype[Any]) -> Any: # noqa: ANN401
24+
"""
25+
Get NA value for a given type.
26+
27+
Args:
28+
dtype: NumPy dtype.
29+
30+
Returns:
31+
NA value of given dtype.
32+
"""
33+
if dtype == np.int32:
34+
return R_INT_NA
35+
if dtype == np.float64:
36+
return R_FLOAT_NA
37+
msg = f"NA for numpy dtype {dtype} not implemented"
38+
raise NotImplementedError(msg)
39+
40+
41+
def is_na(
42+
array: Any | npt.NDArray[Any], # noqa: ANN401
43+
) -> bool | npt.NDArray[np.bool_]:
44+
"""
45+
Check if the array elements are NA.
46+
47+
Args:
48+
array: NumPy array or single value.
49+
50+
Returns:
51+
Boolean mask of NA values in the array.
52+
"""
53+
if isinstance(array, np.ndarray):
54+
dtype = array.dtype
55+
na = get_na_value(dtype)
56+
if dtype == np.int32:
57+
# Use the native dtype for comparison when possible;
58+
# slightly faster than the steps below
59+
return array == na # type: ignore [no-any-return]
60+
# Convert dtype to unsigned integer to perform byte-by-byte
61+
# equality comparison to distinguish different NaN values
62+
raw_dtype = f"u{array.dtype.itemsize}"
63+
return array.view(raw_dtype) == np.array(na).view(raw_dtype) # type: ignore [no-any-return]
64+
65+
if isinstance(array, int):
66+
try:
67+
# Python built-in integer is 64 bits or larger, so
68+
# we try to cast it to 32-bit int if possible
69+
return is_na(np.array(array, dtype=np.int32))
70+
except OverflowError:
71+
# Proceed with larger integer (in case it is supported at some point)
72+
return is_na(np.array(array))
73+
74+
if isinstance(array, (float, np.int32, np.float64)):
75+
return is_na(np.array(array))
76+
77+
msg = f"NA for {type(array)} not implemented"
78+
raise NotImplementedError(msg)
79+
80+
81+
def mask_na_values(
82+
array: npt.NDArray[Any],
83+
*,
84+
fill_value: Any | None = None, # noqa: ANN401
85+
) -> npt.NDArray[Any] | np.ma.MaskedArray[Any, Any]:
86+
"""
87+
Mask NA elements of the array.
88+
89+
Args:
90+
array: NumPy array.
91+
fill_value: Fill value for the masked array.
92+
Defaults to the NA value.
93+
94+
Returns:
95+
NumPy masked array with NA values as the mask
96+
or the original array if there is no NA elements.
97+
"""
98+
mask = is_na(array)
99+
if np.any(mask):
100+
if fill_value is None:
101+
fill_value = get_na_value(array.dtype)
102+
103+
array[mask] = fill_value
104+
return np.ma.array( # type: ignore [no-untyped-call,no-any-return]
105+
data=array,
106+
mask=mask,
107+
fill_value=fill_value,
108+
)
109+
return array

‎rdata/parser/__init__.py

-1
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
"""Utilities for parsing a rdata file."""
22

33
from ._parser import (
44
DEFAULT_ALTREP_MAP as DEFAULT_ALTREP_MAP,
5-
R_INT_NA as R_INT_NA,
65
CharFlags as CharFlags,
76
RData as RData,
87
RExtraInfo as RExtraInfo,

‎rdata/parser/_ascii.py

+19-8
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,17 @@
66
import numpy as np
77
import numpy.typing as npt
88

9-
from ._parser import R_INT_NA, AltRepConstructorMap, Parser
9+
from rdata.missing import R_FLOAT_NA, R_INT_NA
10+
11+
from ._parser import AltRepConstructorMap, Parser
12+
13+
14+
def map_int_na(line: str) -> int:
15+
return R_INT_NA if line == "NA" else int(line)
16+
17+
18+
def map_float_na(line: str) -> float:
19+
return R_FLOAT_NA if line == "NA" else float(line)
1020

1121

1222
class ParserASCII(Parser):
@@ -30,26 +40,27 @@ def _readline(self) -> str:
3040
return self.file.readline()[:-1]
3141

3242
def _parse_array_values(
33-
self,
34-
dtype: npt.DTypeLike,
35-
length: int,
43+
self,
44+
dtype: npt.DTypeLike,
45+
length: int,
3646
) -> npt.NDArray[Any]:
37-
3847
array = np.empty(length, dtype=dtype)
3948
value: int | float | complex
4049

4150
for i in range(length):
4251
line = self._readline()
4352

4453
if np.issubdtype(dtype, np.integer):
45-
value = R_INT_NA if line == "NA" else int(line)
54+
value = map_int_na(line)
4655

4756
elif np.issubdtype(dtype, np.floating):
48-
value = float(line)
57+
value = map_float_na(line)
4958

5059
elif np.issubdtype(dtype, np.complexfloating):
60+
value1 = map_float_na(line)
5161
line2 = self._readline()
52-
value = complex(float(line), float(line2))
62+
value2 = map_float_na(line2)
63+
value = complex(value1, value2)
5364

5465
else:
5566
msg = f"Unknown dtype: {dtype}"

‎rdata/parser/_parser.py

+23-27
Original file line numberDiff line numberDiff line change
@@ -23,15 +23,13 @@
2323
import numpy as np
2424
import numpy.typing as npt
2525

26+
from rdata.missing import R_INT_NA, mask_na_values
27+
2628
if TYPE_CHECKING:
2729
from ._ascii import ParserASCII
2830
from ._xdr import ParserXDR
2931

3032

31-
#: Value used to represent a missing integer in R.
32-
R_INT_NA: Final = -2**31
33-
34-
3533
@runtime_checkable
3634
class BinaryFileLike(Protocol):
3735
"""Protocol for binary files."""
@@ -371,6 +369,9 @@ def __eq__(self, other: object) -> bool:
371369
return False
372370

373371
# Compare value field
372+
if not isinstance(other.value, type(self.value)):
373+
return False
374+
374375
if isinstance(self.value, np.ndarray):
375376
if not np.array_equal(self.value, other.value, equal_nan=True):
376377
return False
@@ -540,6 +541,22 @@ def wrap_constructor(
540541
return new_info, value
541542

542543

544+
def get_altrep_name(info: RObject) -> bytes:
545+
"""Get the name of the ALTREP object."""
546+
assert info.info.type == RObjectType.LIST
547+
548+
class_sym = info.value[0]
549+
while class_sym.info.type == RObjectType.REF:
550+
class_sym = class_sym.referenced_object
551+
552+
assert class_sym.info.type == RObjectType.SYM
553+
assert class_sym.value.info.type == RObjectType.CHAR
554+
555+
altrep_name = class_sym.value.value
556+
assert isinstance(altrep_name, bytes)
557+
return altrep_name
558+
559+
543560
default_altrep_map_dict: Final[Mapping[bytes, AltRepConstructor]] = {
544561
b"deferred_string": deferred_string_constructor,
545562
b"compact_intseq": compact_intseq_constructor,
@@ -608,17 +625,7 @@ def parse_nullable_int_array(
608625
) -> npt.NDArray[np.int32] | np.ma.MaskedArray[Any, Any]:
609626
"""Parse an integer array."""
610627
data = self._parse_array(np.int32)
611-
mask = (data == R_INT_NA)
612-
data[mask] = fill_value
613-
614-
if np.any(mask):
615-
return np.ma.array( # type: ignore [no-untyped-call,no-any-return]
616-
data=data,
617-
mask=mask,
618-
fill_value=fill_value,
619-
)
620-
621-
return data
628+
return mask_na_values(data, fill_value=fill_value)
622629

623630
def parse_double_array(self) -> npt.NDArray[np.float64]:
624631
"""Parse a double array."""
@@ -678,18 +685,7 @@ def expand_altrep_to_object(
678685
state: RObject,
679686
) -> tuple[RObjectInfo, Any]:
680687
"""Expand alternative representation to normal object."""
681-
assert info.info.type == RObjectType.LIST
682-
683-
class_sym = info.value[0]
684-
while class_sym.info.type == RObjectType.REF:
685-
class_sym = class_sym.referenced_object
686-
687-
assert class_sym.info.type == RObjectType.SYM
688-
assert class_sym.value.info.type == RObjectType.CHAR
689-
690-
altrep_name = class_sym.value.value
691-
assert isinstance(altrep_name, bytes)
692-
688+
altrep_name = get_altrep_name(info)
693689
constructor = self.altrep_constructor_dict[altrep_name]
694690
return constructor(state)
695691

‎rdata/parser/_xdr.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,9 @@ def __init__(
2626
self.file = io.BytesIO(data)
2727

2828
def _parse_array_values(
29-
self,
30-
dtype: npt.DTypeLike,
31-
length: int,
29+
self,
30+
dtype: npt.DTypeLike,
31+
length: int,
3232
) -> npt.NDArray[Any]:
3333
dtype = np.dtype(dtype)
3434
buffer = self.file.read(length * dtype.itemsize)

‎rdata/tests/data/test_dataframe.rda

1 Byte
Binary file not shown.

‎rdata/tests/data/test_dataframe.rds

1 Byte
Binary file not shown.
217 Bytes
Binary file not shown.
235 Bytes
Binary file not shown.
Binary file not shown.
123 Bytes
Binary file not shown.
163 Bytes
Binary file not shown.
1 Byte
Binary file not shown.
0 Bytes
Binary file not shown.

‎rdata/tests/test_missing.py

+90
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
"""Tests of missing value functionality."""
2+
3+
from __future__ import annotations
4+
5+
from typing import Any
6+
7+
import numpy as np
8+
import pytest
9+
10+
from rdata.missing import R_FLOAT_NA, R_INT_NA, is_na, mask_na_values
11+
12+
13+
def test_int_is_na() -> None:
14+
"""Test checking NA values in int array."""
15+
array = np.array([1, 2, R_INT_NA], dtype=np.int32)
16+
ref_mask = np.array([0, 0, 1], dtype=np.bool_)
17+
18+
mask = is_na(array)
19+
np.testing.assert_array_equal(mask, ref_mask)
20+
21+
22+
def test_float_is_na() -> None:
23+
"""Test checking NA values in float array."""
24+
array = np.array([1, 2, R_FLOAT_NA, np.nan], dtype=np.float64)
25+
ref_mask = np.array([0, 0, 1, 0], dtype=np.bool_)
26+
27+
mask = is_na(array)
28+
np.testing.assert_array_equal(mask, ref_mask)
29+
30+
31+
@pytest.mark.parametrize("value", [R_INT_NA, R_FLOAT_NA])
32+
def test_value_is_na(value: Any) -> None: # noqa: ANN401
33+
"""Test checking single NA values."""
34+
assert is_na(value)
35+
36+
37+
@pytest.mark.parametrize("value", [
38+
np.int32(0), 0, np.float64(0.0), 0.0, np.nan,
39+
])
40+
def test_value_is_not_na(value: Any) -> None: # noqa: ANN401
41+
"""Test checking single NA values."""
42+
assert not is_na(value)
43+
44+
45+
def test_int64() -> None:
46+
"""Test checking int64."""
47+
with pytest.raises(NotImplementedError):
48+
is_na(2**32)
49+
with pytest.raises(NotImplementedError):
50+
is_na(-2**32)
51+
52+
53+
def test_wrong_type() -> None:
54+
"""Test checking int64."""
55+
with pytest.raises(NotImplementedError):
56+
is_na("test")
57+
58+
59+
def test_masked_array() -> None:
60+
"""Test checking masked array creation."""
61+
array = np.array([1, 2, R_FLOAT_NA, np.nan], dtype=np.float64)
62+
ref_mask = np.array([0, 0, 1, 0], dtype=np.bool_)
63+
ref_data = array.copy()
64+
65+
masked = mask_na_values(array)
66+
assert isinstance(masked, np.ma.MaskedArray)
67+
np.testing.assert_array_equal(masked.data, ref_data)
68+
np.testing.assert_array_equal(masked.mask, ref_mask)
69+
70+
71+
def test_masked_array_fill() -> None:
72+
"""Test checking masked array creation."""
73+
array = np.array([1, 2, R_FLOAT_NA, np.nan], dtype=np.float64)
74+
ref_mask = np.array([0, 0, 1, 0], dtype=np.bool_)
75+
ref_data = array.copy()
76+
ref_data[ref_mask] = 42
77+
78+
masked = mask_na_values(array, fill_value=42)
79+
assert isinstance(masked, np.ma.MaskedArray)
80+
np.testing.assert_array_equal(masked.data, ref_data)
81+
np.testing.assert_array_equal(masked.mask, ref_mask)
82+
83+
84+
def test_nonmasked_array() -> None:
85+
"""Test checking masked array no-op."""
86+
array = np.array([1, 2, np.nan, np.nan], dtype=np.float64)
87+
88+
masked = mask_na_values(array)
89+
assert not isinstance(masked, np.ma.MaskedArray)
90+
np.testing.assert_array_equal(masked, array)

‎rdata/tests/test_rdata.py

+120
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import xarray
1414

1515
import rdata
16+
from rdata.missing import R_FLOAT_NA
1617

1718
TESTDATA_PATH = rdata.TESTDATA_PATH
1819

@@ -453,6 +454,9 @@ def test_encodings_v3(self) -> None:
453454

454455
def test_dataframe(self) -> None:
455456
"""Test dataframe conversion."""
457+
# Files created in R with
458+
# test_dataframe = data.frame(class=factor(c("a", "b", "b")), value=c(1L, 2L, 3L)); save(test_dataframe, file="test_dataframe.rda", version=2) # noqa: E501
459+
# test_dataframe = data.frame(class=factor(c("a", "b", "b")), value=c(1L, 2L, 3L)); save(test_dataframe, file="test_dataframe_v3.rda") # noqa: E501
456460
for f in ("test_dataframe.rda", "test_dataframe_v3.rda"):
457461
with self.subTest(file=f):
458462
data = rdata.read_rda(TESTDATA_PATH / f)
@@ -475,6 +479,9 @@ def test_dataframe(self) -> None:
475479

476480
def test_dataframe_rds(self) -> None:
477481
"""Test dataframe conversion."""
482+
# Files created in R with
483+
# df = data.frame(class=factor(c("a", "b", "b")), value=c(1L, 2L, 3L)); saveRDS(df, file="test_dataframe.rds", version=2) # noqa: E501
484+
# df = data.frame(class=factor(c("a", "b", "b")), value=c(1L, 2L, 3L)); saveRDS(df, file="test_dataframe_v3.rds") # noqa: E501
478485
for f in ("test_dataframe.rds", "test_dataframe_v3.rds"):
479486
with self.subTest(file=f):
480487
data = rdata.read_rds(TESTDATA_PATH / f)
@@ -515,6 +522,118 @@ def test_dataframe_rownames(self) -> None:
515522
),
516523
)
517524

525+
def test_dataframe_int_rownames(self) -> None:
526+
"""Test dataframe conversion."""
527+
# File created in R with
528+
# df = data.frame(col1=c(10, 20, 30), row.names=c(3L, 6L, 9L)); saveRDS(df, file="test_dataframe_int_rownames.rds") # noqa: E501
529+
data = rdata.read_rds(TESTDATA_PATH / "test_dataframe_int_rownames.rds")
530+
531+
index = np.array([3, 6, 9], dtype=np.int32)
532+
ref = pd.DataFrame(
533+
{
534+
"col1": pd.Series(
535+
[10., 20., 30.],
536+
dtype=float, index=index),
537+
},
538+
index=index,
539+
)
540+
pd.testing.assert_frame_equal(data, ref)
541+
542+
def test_dataframe_range_rownames(self) -> None:
543+
"""Test dataframe conversion."""
544+
# File created in R with
545+
# df = data.frame(col1=c(10, 20, 30), row.names=2:4); saveRDS(df, file="test_dataframe_range_rownames.rds") # noqa: E501
546+
data = rdata.read_rds(TESTDATA_PATH / "test_dataframe_range_rownames.rds")
547+
548+
index = pd.RangeIndex(2, 5)
549+
ref = pd.DataFrame(
550+
{
551+
"col1": pd.Series(
552+
[10., 20., 30.],
553+
dtype=float, index=index),
554+
},
555+
index=index,
556+
)
557+
pd.testing.assert_frame_equal(data, ref)
558+
559+
def test_dataframe_dtypes(self) -> None:
560+
"""Test dataframe conversion."""
561+
# File created in R with
562+
# df = data.frame(int=c(10L, 20L, 30L), float=c(1.1, 2.2, 3.3), string=c("x", "y", "z"), bool=as.logical(c(1, 0, 1)), complex=c(4+5i, 6+7i, 8+9i)); print(df); saveRDS(df, file="test_dataframe_dtypes.rds") # noqa: E501
563+
data = rdata.read_rds(TESTDATA_PATH / "test_dataframe_dtypes.rds")
564+
565+
index = pd.RangeIndex(1, 4)
566+
ref = pd.DataFrame(
567+
{
568+
"int": pd.Series(
569+
[10, 20, 30],
570+
dtype=pd.Int32Dtype(), index=index),
571+
"float": pd.Series(
572+
[1.1, 2.2, 3.3],
573+
dtype=float, index=index),
574+
"string": pd.Series(
575+
["x" ,"y", "z"],
576+
dtype=pd.StringDtype(), index=index),
577+
"bool": pd.Series(
578+
[True, False, True],
579+
dtype=pd.BooleanDtype(), index=index),
580+
"complex": pd.Series(
581+
[4+5j, 6+7j, 8+9j],
582+
dtype=complex, index=index),
583+
},
584+
index=index,
585+
)
586+
pd.testing.assert_frame_equal(data, ref)
587+
588+
def test_dataframe_dtypes_with_na(self) -> None:
589+
"""Test dataframe conversion."""
590+
# File created in R with
591+
# df = data.frame(int=c(10L, 20L, 30L, NA), float=c(1.1, 2.2, 3.3, NA), string=c("x", "y", "z", NA), bool=as.logical(c(1, 0, 1, NA)), complex=c(4+5i, 6+7i, 8+9i, NA)); saveRDS(df, file="test_dataframe_dtypes_with_na.rds") # noqa: E501
592+
data = rdata.read_rds(TESTDATA_PATH / "test_dataframe_dtypes_with_na.rds")
593+
594+
index = pd.RangeIndex(1, 5)
595+
ref = pd.DataFrame(
596+
{
597+
"int": pd.Series(
598+
[10, 20, 30, pd.NA],
599+
dtype=pd.Int32Dtype(), index=index),
600+
"float": pd.Series(
601+
[1.1, 2.2, 3.3, R_FLOAT_NA],
602+
dtype=float, index=index),
603+
"string": pd.Series(
604+
["x" ,"y", "z", pd.NA],
605+
dtype=pd.StringDtype(), index=index),
606+
"bool": pd.Series(
607+
[True, False, True, pd.NA],
608+
dtype=pd.BooleanDtype(), index=index),
609+
"complex": pd.Series(
610+
[4+5j, 6+7j, 8+9j, R_FLOAT_NA],
611+
dtype=complex, index=index),
612+
},
613+
index=index,
614+
)
615+
616+
with np.errstate(invalid="ignore"):
617+
# Comparing complex arrays with R_FLOAT_NA gives warning
618+
pd.testing.assert_frame_equal(data, ref)
619+
620+
def test_dataframe_float_with_na_nan(self) -> None:
621+
"""Test dataframe conversion."""
622+
# File created in R with
623+
# df = data.frame(float=c(1.1, 2.2, 3.3, NA, NaN, Inf, -Inf)); saveRDS(df, file="test_dataframe_float_with_na_nan.rds") # noqa: E501,ERA001
624+
data = rdata.read_rds(TESTDATA_PATH / "test_dataframe_float_with_na_nan.rds")
625+
626+
index = pd.RangeIndex(1, 8)
627+
ref = pd.DataFrame(
628+
{
629+
"float": pd.Series(
630+
[1.1, 2.2, 3.3, R_FLOAT_NA, np.nan, np.inf, -np.inf],
631+
dtype=float, index=index),
632+
},
633+
index=index,
634+
)
635+
pd.testing.assert_frame_equal(data, ref)
636+
518637
def test_ts(self) -> None:
519638
"""Test time series conversion."""
520639
data = rdata.read_rda(TESTDATA_PATH / "test_ts.rda")
@@ -689,6 +808,7 @@ def test_altrep_wrap_real_attributes(self) -> None:
689808
data = rdata.conversion.convert(parsed)
690809
np.testing.assert_equal(data, [1., 2., 3.])
691810

811+
@pytest.mark.filterwarnings("ignore:Missing constructor")
692812
def test_altrep_wrap_real_class_attribute(self) -> None:
693813
"""Test alternative representation of wrap_real with class attribute."""
694814
# File created in R with

‎rdata/tests/test_write.py

+131-35
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,12 @@
77
from pathlib import Path
88
from typing import TYPE_CHECKING, Any
99

10+
import numpy as np
11+
import pandas as pd
1012
import pytest
1113

1214
import rdata
15+
from rdata.conversion import ConverterFromPythonToR, convert_python_to_r_object
1316
from rdata.unparser import unparse_data
1417

1518
if TYPE_CHECKING:
@@ -82,7 +85,8 @@ def test_unparse(fname: str) -> None:
8285
with (TESTDATA_PATH / fname).open("rb") as f:
8386
data = decompress_data(f.read())
8487
file_type, file_format = parse_file_type_and_format(data)
85-
r_data = rdata.parser.parse_data(data, expand_altrep=False)
88+
r_data = rdata.parser.parse_data(
89+
data, expand_altrep=False, extension=f".{file_type}")
8690

8791
try:
8892
out_data = unparse_data(
@@ -96,23 +100,30 @@ def test_unparse(fname: str) -> None:
96100
assert data == out_data
97101

98102

103+
@pytest.mark.filterwarnings("ignore:Missing constructor")
99104
@pytest.mark.parametrize("fname", fnames, ids=fnames)
100-
def test_convert_to_r(fname: str) -> None:
105+
@pytest.mark.parametrize("expand_altrep", [True, False])
106+
def test_convert_to_r(fname: str, expand_altrep: bool) -> None: # noqa: FBT001
101107
"""Test converting Python data to RData object."""
102108
with (TESTDATA_PATH / fname).open("rb") as f:
103-
# Skip test files without unique R->py->R transformation
109+
# Skip test files without unique transformation
104110
if fname in [
105-
"test_encodings.rda", # encoding not kept in Python
106-
"test_encodings_v3.rda", # encoding not kept in Python
107-
"test_list_attrs.rda", # attributes not kept in Python
108-
"test_file.rda", # attributes not kept in Python
111+
# encoding not kept in Python
112+
"test_encodings.rda",
113+
"test_encodings_v3.rda",
114+
# attributes not kept in Python
115+
"test_list_attrs.rda",
116+
"test_file.rda",
117+
"test_altrep_wrap_real_attributes.rds",
118+
"test_altrep_wrap_real_class_attribute.rds",
109119
]:
110-
pytest.skip("ambiguous R->py->R transformation")
120+
pytest.skip("ambiguous R-to-Python-to-R transformation")
111121

112122
data = decompress_data(f.read())
113123
file_type, file_format = parse_file_type_and_format(data)
114124

115-
r_data = rdata.parser.parse_data(data, expand_altrep=False)
125+
r_data = rdata.parser.parse_data(
126+
data, expand_altrep=expand_altrep, extension=f".{file_type}")
116127

117128
try:
118129
py_data = rdata.conversion.convert(r_data)
@@ -126,68 +137,153 @@ def test_convert_to_r(fname: str) -> None:
126137
else:
127138
encoding = encoding.lower() # type: ignore [assignment]
128139

140+
converter = ConverterFromPythonToR(
141+
encoding=encoding,
142+
format_version=r_data.versions.format,
143+
r_version_serialized=r_data.versions.serialized,
144+
)
145+
129146
try:
130-
if file_type == "rds":
131-
r_obj = rdata.conversion.convert_to_r_object(
132-
py_data, encoding=encoding)
133-
else:
134-
r_obj = rdata.conversion.convert_to_r_object_for_rda(
135-
py_data, encoding=encoding)
136-
new_r_data = rdata.conversion.build_r_data(
137-
r_obj,
138-
encoding=encoding,
139-
format_version=r_data.versions.format,
140-
r_version_serialized=r_data.versions.serialized,
141-
)
147+
new_r_data = converter.convert_to_r_data(py_data, file_type=file_type)
142148
except NotImplementedError as e:
143149
pytest.xfail(str(e))
144150

145-
assert r_data == new_r_data
146151
assert str(r_data) == str(new_r_data)
152+
assert r_data == new_r_data
153+
154+
# Check further that the resulting unparsed data is correct to ensure that
155+
# Python-to-R conversion hasn't created any odd objects that can't be unparsed
156+
if not expand_altrep:
157+
file_type, file_format = parse_file_type_and_format(data)
158+
out_data = unparse_data(
159+
new_r_data, file_format=file_format, file_type=file_type)
160+
161+
if file_format == "ascii":
162+
data = data.replace(b"\r\n", b"\n")
163+
164+
assert data == out_data
147165

148166

149-
def test_convert_to_r_bad_rda() -> None:
167+
def test_convert_to_r_rda_missing_names() -> None:
150168
"""Test checking that data for RDA has variable names."""
151-
py_data = "hello"
169+
converter = ConverterFromPythonToR()
152170
with pytest.raises(TypeError, match="(?i)data must be a dictionary"):
153-
rdata.conversion.convert_to_r_object_for_rda(py_data) # type: ignore [arg-type]
171+
converter.convert_to_r_data("hello", file_type="rda")
172+
173+
174+
def test_convert_to_r_rda_nonstr_names() -> None:
175+
"""Test checking that RDA variable names are strings."""
176+
converter = ConverterFromPythonToR()
177+
with pytest.raises(ValueError, match="(?i)keys must be strings"):
178+
converter.convert_to_r_data({1: "hello"}, file_type="rda")
154179

155180

156181
def test_convert_to_r_empty_rda() -> None:
157182
"""Test checking that data for RDA has variable names."""
158183
py_data: dict[str, Any] = {}
184+
converter = ConverterFromPythonToR()
159185
with pytest.raises(ValueError, match="(?i)data must not be empty"):
160-
rdata.conversion.convert_to_r_object_for_rda(py_data)
186+
converter.convert_to_r_data(py_data, file_type="rda")
161187

162188

163189
def test_unparse_bad_rda() -> None:
164190
"""Test checking that data for RDA has variable names."""
165191
py_data = "hello"
166-
r_obj = rdata.conversion.convert_to_r_object(py_data)
167-
r_data = rdata.conversion.build_r_data(r_obj)
192+
converter = ConverterFromPythonToR()
193+
r_data = converter.convert_to_r_data(py_data)
168194
with pytest.raises(ValueError, match="(?i)must be dictionary-like"):
169195
unparse_data(r_data, file_type="rda")
170196

171197

172198
def test_convert_to_r_bad_encoding() -> None:
173199
"""Test checking encoding."""
200+
converter = ConverterFromPythonToR(encoding="non-existent") # type: ignore [arg-type]
174201
with pytest.raises(LookupError, match="(?i)unknown encoding"):
175-
rdata.conversion.convert_to_r_object("ä", encoding="non-existent") # type: ignore [arg-type]
202+
converter.convert_to_r_object("ä")
176203

177204

178205
def test_convert_to_r_unsupported_encoding() -> None:
179206
"""Test checking encoding."""
207+
converter = ConverterFromPythonToR(encoding="cp1250") # type: ignore [arg-type]
180208
with pytest.raises(ValueError, match="(?i)unsupported encoding"):
181-
rdata.conversion.convert_to_r_object("ä", encoding="cp1250") # type: ignore [arg-type]
209+
converter.convert_to_r_object("ä")
210+
182211

212+
def test_convert_to_r_nonstr_dict_keys() -> None:
213+
"""Test checking non-string dict keys."""
214+
converter = ConverterFromPythonToR()
215+
with pytest.raises(ValueError, match="(?i)keys must be strings"):
216+
converter.convert_to_r_object({"a": 1, 2: 2})
183217

184-
def test_unparse_big_int() -> None:
218+
219+
@pytest.mark.parametrize("file_format", valid_formats)
220+
@pytest.mark.parametrize("value", [-2**31 - 1, 2**31])
221+
def test_unparse_big_int(file_format: FileFormat, value: int) -> None:
185222
"""Test checking too large integers."""
186-
big_int = 2**32
187-
r_obj = rdata.conversion.convert_to_r_object(big_int)
188-
r_data = rdata.conversion.build_r_data(r_obj)
223+
converter = ConverterFromPythonToR()
224+
r_data = converter.convert_to_r_data(value)
189225
with pytest.raises(ValueError, match="(?i)not castable"):
190-
unparse_data(r_data, file_format="xdr")
226+
unparse_data(r_data, file_format=file_format)
227+
228+
229+
def test_convert_dataframe_pandas_dtypes() -> None:
230+
"""Test converting dataframe with pandas dtypes."""
231+
df1 = pd.DataFrame(
232+
{
233+
"int": np.array([10, 20, 30], dtype=np.int32),
234+
"float": [1.1, 2.2, 3.3],
235+
"string": ["x" ,"y", "z"],
236+
"bool": [True, False, True],
237+
"complex": [4+5j, 6+7j, 8+9j],
238+
},
239+
index=range(3),
240+
)
241+
242+
index = pd.RangeIndex(3)
243+
df2 = pd.DataFrame(
244+
{
245+
"int": pd.Series([10, 20, 30], dtype=pd.Int32Dtype(), index=index),
246+
"float": pd.Series([1.1, 2.2, 3.3], dtype=pd.Float64Dtype(), index=index),
247+
"string": pd.Series(["x" ,"y", "z"], dtype=pd.StringDtype(), index=index),
248+
"bool": pd.Series([1, 0, 1], dtype=pd.BooleanDtype(), index=index),
249+
"complex": pd.Series([4+5j, 6+7j, 8+9j], dtype=complex, index=index),
250+
},
251+
index=index,
252+
)
253+
254+
r_obj1 = convert_python_to_r_object(df1)
255+
r_obj2 = convert_python_to_r_object(df2)
256+
257+
assert str(r_obj1) == str(r_obj2)
258+
assert r_obj1 == r_obj2
259+
260+
261+
def test_convert_dataframe_rangeindex() -> None:
262+
"""Test converting dataframe with rangeindex."""
263+
data = {"data": np.array([10, 20, 30], dtype=np.int32)}
264+
265+
df1 = pd.DataFrame(data, index=pd.RangeIndex(3))
266+
df2 = pd.DataFrame(data, index=pd.Index([0, 1, 2]))
267+
268+
r_obj1 = convert_python_to_r_object(df1)
269+
r_obj2 = convert_python_to_r_object(df2)
270+
271+
assert str(r_obj1) != str(r_obj2)
272+
assert r_obj1 != r_obj2
273+
274+
275+
def test_convert_dataframe_rangeindex_flattened() -> None:
276+
"""Test converting dataframe with rangeindex."""
277+
data = {"data": np.array([10, 20, 30], dtype=np.int32)}
278+
279+
df1 = pd.DataFrame(data, index=pd.RangeIndex(3, 8, 2))
280+
df2 = pd.DataFrame(data, index=pd.Index([3, 5, 7]))
281+
282+
r_obj1 = convert_python_to_r_object(df1)
283+
r_obj2 = convert_python_to_r_object(df2)
284+
285+
assert str(r_obj1) == str(r_obj2)
286+
assert r_obj1 == r_obj2
191287

192288

193289
@pytest.mark.parametrize("compression", [*valid_compressions, "fail"])

‎rdata/unparser/__init__.py

+22-18
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,12 @@
2525

2626

2727
def unparse_file(
28-
path: os.PathLike[Any] | str,
29-
r_data: RData,
30-
*,
31-
file_format: FileFormat = "xdr",
32-
file_type: FileType = "rds",
33-
compression: Compression = "gzip",
28+
path: os.PathLike[Any] | str,
29+
r_data: RData,
30+
*,
31+
file_format: FileFormat = "xdr",
32+
file_type: FileType = "rds",
33+
compression: Compression = "gzip",
3434
) -> None:
3535
"""
3636
Unparse RData object to a file.
@@ -59,11 +59,11 @@ def unparse_file(
5959

6060

6161
def unparse_fileobj(
62-
fileobj: IO[Any],
63-
r_data: RData,
64-
*,
65-
file_format: FileFormat = "xdr",
66-
file_type: FileType = "rds",
62+
fileobj: IO[Any],
63+
r_data: RData,
64+
*,
65+
file_format: FileFormat = "xdr",
66+
file_type: FileType = "rds",
6767
) -> None:
6868
"""
6969
Unparse RData object to a file object.
@@ -78,9 +78,11 @@ def unparse_fileobj(
7878

7979
if file_format == "ascii":
8080
from ._ascii import UnparserASCII as Unparser
81+
8182
rda_magic = "RDA"
8283
elif file_format == "xdr":
8384
from ._xdr import UnparserXDR as Unparser
85+
8486
rda_magic = "RDX"
8587
else:
8688
msg = f"Unknown file format: {file_format}"
@@ -89,9 +91,11 @@ def unparse_fileobj(
8991
# Check that RData object for rda file is of correct kind
9092
if file_type == "rda":
9193
r_object = r_data.object
92-
if not (r_object.info.type is RObjectType.LIST
93-
and r_object.tag is not None
94-
and r_object.tag.info.type is RObjectType.SYM):
94+
if not (
95+
r_object.info.type is RObjectType.LIST
96+
and r_object.tag is not None
97+
and r_object.tag.info.type is RObjectType.SYM
98+
):
9599
msg = "r_data object must be dictionary-like for rda file"
96100
raise ValueError(msg)
97101

@@ -104,10 +108,10 @@ def unparse_fileobj(
104108

105109

106110
def unparse_data(
107-
r_data: RData,
108-
*,
109-
file_format: FileFormat = "xdr",
110-
file_type: FileType = "rds",
111+
r_data: RData,
112+
*,
113+
file_format: FileFormat = "xdr",
114+
file_type: FileType = "rds",
111115
) -> bytes:
112116
"""
113117
Unparse RData object to a bytestring.

‎rdata/unparser/_ascii.py

+13-11
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,13 @@
77

88
import numpy as np
99

10+
from rdata.missing import is_na
11+
1012
from ._unparser import Unparser
1113

1214
if TYPE_CHECKING:
1315
import io
14-
from typing import Any, Final
16+
from typing import Final
1517

1618
import numpy.typing as npt
1719

@@ -33,7 +35,7 @@ def escape(b: bytes) -> str:
3335
byte_to_str[byte] = escape(bytes([byte]))
3436

3537
# Update mapping for special characters
36-
byte_to_str[b'"'[0]] = r'\"'
38+
byte_to_str[b'"'[0]] = r"\""
3739
byte_to_str[b"'"[0]] = r"\'"
3840
byte_to_str[b"?"[0]] = r"\?"
3941
byte_to_str[b" "[0]] = r"\040"
@@ -66,11 +68,10 @@ def unparse_magic(self) -> None:
6668
"""Unparse magic bits."""
6769
self._add_line("A")
6870

69-
def _unparse_array_values(self, array: npt.NDArray[Any]) -> None:
70-
# Convert boolean to int
71-
if np.issubdtype(array.dtype, np.bool_):
72-
array = array.astype(np.int32)
73-
71+
def _unparse_array_values_raw(
72+
self,
73+
array: npt.NDArray[np.int32 | np.float64 | np.complex128],
74+
) -> None:
7475
# Convert complex to pairs of floats
7576
if np.issubdtype(array.dtype, np.complexfloating):
7677
assert array.dtype == np.complex128
@@ -79,19 +80,20 @@ def _unparse_array_values(self, array: npt.NDArray[Any]) -> None:
7980
# Unparse data
8081
for value in array:
8182
if np.issubdtype(array.dtype, np.integer):
82-
line = "NA" if value is None or np.ma.is_masked(value) else str(value) # type: ignore [no-untyped-call]
83+
line = "NA" if is_na(value) else str(value)
8384

8485
elif np.issubdtype(array.dtype, np.floating):
85-
if np.isnan(value):
86+
if is_na(value):
87+
line = "NA"
88+
elif np.isnan(value):
8689
line = "NaN"
8790
elif value == np.inf:
8891
line = "Inf"
8992
elif value == -np.inf:
9093
line = "-Inf"
9194
else:
9295
line = str(value)
93-
if line.endswith(".0"):
94-
line = line[:-2]
96+
line = line.removesuffix(".0")
9597

9698
else:
9799
msg = f"Unknown dtype: {array.dtype}"

‎rdata/unparser/_unparser.py

+32-3
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
import numpy as np
99

10+
from rdata.missing import R_INT_NA
1011
from rdata.parser import (
1112
RData,
1213
RExtraInfo,
@@ -69,9 +70,35 @@ def unparse_array(self, array: npt.NDArray[Any]) -> None:
6970
self.unparse_int(array.size)
7071
self._unparse_array_values(array)
7172

72-
@abc.abstractmethod
7373
def _unparse_array_values(self, array: npt.NDArray[Any]) -> None:
7474
"""Unparse the values of an array."""
75+
# Convert boolean to int
76+
if np.issubdtype(array.dtype, np.bool_):
77+
array = array.astype(np.int32)
78+
79+
# Flatten masked values and convert int arrays to int32
80+
if np.issubdtype(array.dtype, np.integer):
81+
if np.ma.is_masked(array): # type: ignore [no-untyped-call]
82+
mask = np.ma.getmask(array) # type: ignore [no-untyped-call]
83+
array = np.ma.getdata(array).copy() # type: ignore [no-untyped-call]
84+
array[mask] = R_INT_NA
85+
86+
if array.dtype != np.int32:
87+
info = np.iinfo(np.int32)
88+
if np.any(array > info.max) or np.any(array < info.min):
89+
msg = "Integer array not castable to int32"
90+
raise ValueError(msg)
91+
array = array.astype(np.int32)
92+
93+
assert array.dtype in (np.int32, np.float64, np.complex128)
94+
self._unparse_array_values_raw(array)
95+
96+
@abc.abstractmethod
97+
def _unparse_array_values_raw(
98+
self,
99+
array: npt.NDArray[np.int32 | np.float64 | np.complex128],
100+
) -> None:
101+
"""Unparse the values of an array as such."""
75102

76103
def unparse_string(self, value: bytes | None) -> None:
77104
"""Unparse a string."""
@@ -106,8 +133,9 @@ def unparse_r_object(self, obj: RObject) -> None: # noqa: C901, PLR0912
106133
# Unparse data
107134
value = obj.value
108135
if info.type in {
109-
RObjectType.NIL,
110-
RObjectType.NILVALUE,
136+
RObjectType.NIL,
137+
RObjectType.NILVALUE,
138+
RObjectType.REF,
111139
}:
112140
# These types don't have any data
113141
assert value is None
@@ -118,6 +146,7 @@ def unparse_r_object(self, obj: RObject) -> None: # noqa: C901, PLR0912
118146
elif info.type in {
119147
RObjectType.LIST,
120148
RObjectType.LANG,
149+
RObjectType.ALTREP,
121150
# Parser treats the following equal to LIST.
122151
# Not tested if they work
123152
# RObjectType.CLO,

‎rdata/unparser/_xdr.py

+6-22
Original file line numberDiff line numberDiff line change
@@ -2,17 +2,14 @@
22

33
from __future__ import annotations
44

5-
from typing import TYPE_CHECKING, Any
6-
7-
import numpy as np
8-
9-
from rdata.parser import R_INT_NA
5+
from typing import TYPE_CHECKING
106

117
from ._unparser import Unparser
128

139
if TYPE_CHECKING:
1410
import io
1511

12+
import numpy as np
1613
import numpy.typing as npt
1714

1815

@@ -30,23 +27,10 @@ def unparse_magic(self) -> None:
3027
"""Unparse magic bits."""
3128
self.file.write(b"X\n")
3229

33-
def _unparse_array_values(self, array: npt.NDArray[Any]) -> None:
34-
# Convert boolean to int
35-
if np.issubdtype(array.dtype, np.bool_):
36-
array = array.astype(np.int32)
37-
38-
# Flatten masked values and convert int arrays to int32
39-
if np.issubdtype(array.dtype, np.integer):
40-
if np.ma.is_masked(array): # type: ignore [no-untyped-call]
41-
mask = np.ma.getmask(array) # type: ignore [no-untyped-call]
42-
array = np.ma.getdata(array).copy() # type: ignore [no-untyped-call]
43-
array[mask] = R_INT_NA
44-
info = np.iinfo(np.int32)
45-
if not all(info.min <= val <= info.max for val in array):
46-
msg = "Integer array not castable to int32"
47-
raise ValueError(msg)
48-
array = array.astype(np.int32)
49-
30+
def _unparse_array_values_raw(
31+
self,
32+
array: npt.NDArray[np.int32 | np.float64 | np.complex128],
33+
) -> None:
5034
# Convert to big endian if needed
5135
array = array.astype(array.dtype.newbyteorder(">"))
5236

0 commit comments

Comments
 (0)
Please sign in to comment.