vnmabus · Mar 9, 2025
diff --git a/‎rdata/_write.py
+24-21 b/‎rdata/_write.py
+24-21
diff --git a/‎rdata/conversion/__init__.py
+5-4 b/‎rdata/conversion/__init__.py
+5-4
diff --git a/‎rdata/conversion/_conversion.py
+28-10 b/‎rdata/conversion/_conversion.py
+28-10
diff --git a/‎rdata/conversion/to_r.py
+580-209 b/‎rdata/conversion/to_r.py
+580-209
diff --git a/‎rdata/missing.py
+109 b/‎rdata/missing.py
+109
diff --git a/‎rdata/parser/__init__.py
-1 b/‎rdata/parser/__init__.py
-1
diff --git a/‎rdata/parser/_ascii.py
+19-8 b/‎rdata/parser/_ascii.py
+19-8
diff --git a/‎rdata/parser/_parser.py
+23-27 b/‎rdata/parser/_parser.py
+23-27
diff --git a/‎rdata/parser/_xdr.py
+3-3 b/‎rdata/parser/_xdr.py
+3-3
diff --git a/‎rdata/tests/data/test_dataframe.rda
1 Byte b/‎rdata/tests/data/test_dataframe.rda
1 Byte
diff --git a/‎rdata/tests/data/test_dataframe.rds
1 Byte b/‎rdata/tests/data/test_dataframe.rds
1 Byte
diff --git a/‎rdata/tests/data/test_dataframe_dtypes.rds
217 Bytes b/‎rdata/tests/data/test_dataframe_dtypes.rds
217 Bytes
diff --git a/‎rdata/tests/data/test_dataframe_dtypes_with_na.rds
235 Bytes b/‎rdata/tests/data/test_dataframe_dtypes_with_na.rds
235 Bytes
diff --git a/‎rdata/tests/data/test_dataframe_float_with_na_nan.rds
147 Bytes b/‎rdata/tests/data/test_dataframe_float_with_na_nan.rds
147 Bytes
diff --git a/‎rdata/tests/data/test_dataframe_int_rownames.rds
123 Bytes b/‎rdata/tests/data/test_dataframe_int_rownames.rds
123 Bytes
diff --git a/‎rdata/tests/data/test_dataframe_range_rownames.rds
163 Bytes b/‎rdata/tests/data/test_dataframe_range_rownames.rds
163 Bytes
diff --git a/‎rdata/tests/data/test_dataframe_v3.rda
1 Byte b/‎rdata/tests/data/test_dataframe_v3.rda
1 Byte
diff --git a/‎rdata/tests/data/test_dataframe_v3.rds
0 Bytes b/‎rdata/tests/data/test_dataframe_v3.rds
0 Bytes
diff --git a/‎rdata/tests/test_missing.py
+90 b/‎rdata/tests/test_missing.py
+90
diff --git a/‎rdata/tests/test_rdata.py
+120 b/‎rdata/tests/test_rdata.py
+120
diff --git a/‎rdata/tests/test_write.py
+131-35 b/‎rdata/tests/test_write.py
+131-35
diff --git a/‎rdata/unparser/__init__.py
+22-18 b/‎rdata/unparser/__init__.py
+22-18
diff --git a/‎rdata/unparser/_ascii.py
+13-11 b/‎rdata/unparser/_ascii.py
+13-11
diff --git a/‎rdata/unparser/_unparser.py
+32-3 b/‎rdata/unparser/_unparser.py
+32-3
diff --git a/‎rdata/unparser/_xdr.py
+6-22 b/‎rdata/unparser/_xdr.py
+6-22
@@ -1,17 +1,23 @@
 """Functions to perform conversion and unparsing in one step."""
+
 from __future__ import annotations
 
 from typing import TYPE_CHECKING
 
-from .conversion import build_r_data, convert_to_r_object, convert_to_r_object_for_rda
-from .conversion.to_r import DEFAULT_FORMAT_VERSION
+from .conversion import (
+    convert_python_to_r_data,
+)
+from .conversion.to_r import (
+    DEFAULT_CLASS_MAP,
+    DEFAULT_FORMAT_VERSION,
+)
 from .unparser import unparse_file
 
 if TYPE_CHECKING:
     import os
     from typing import Any
 
-    from .conversion.to_r import Encoding
+    from .conversion.to_r import ConstructorDict, Encoding
     from .unparser import Compression, FileFormat
 
 
@@ -23,14 +29,12 @@ def write_rds(
     compression: Compression = "gzip",
     encoding: Encoding = "utf-8",
     format_version: int = DEFAULT_FORMAT_VERSION,
+    constructor_dict: ConstructorDict = DEFAULT_CLASS_MAP,
 ) -> None:
     """
     Write an RDS file.
 
-    This is a convenience function that wraps
-    :func:`rdata.conversion.convert_to_r_object`,
-    :func:`rdata.conversion.build_r_data`,
-    and :func:`rdata.unparser.unparse_file`,
+    This is a convenience function that wraps conversion and unparsing
     as it is the common use case.
 
     Args:
@@ -40,6 +44,8 @@ def write_rds(
         compression: Compression.
         encoding: Encoding to be used for strings within data.
         format_version: File format version.
+        constructor_dict: Dictionary mapping Python classes to
+            functions converting them to R classes.
 
     See Also:
         :func:`write_rda`: Similar function that writes an RDA or RDATA file.
@@ -52,15 +58,13 @@ def write_rds(
         >>> data = ["hello", 1, 2.2, 3.3+4.4j]
         >>> rdata.write_rds("test.rds", data)
     """
-    r_object = convert_to_r_object(
+    r_data = convert_python_to_r_data(
         data,
         encoding=encoding,
-    )
-    r_data = build_r_data(
-        r_object,
-        encoding=encoding,
         format_version=format_version,
+        constructor_dict=constructor_dict,
     )
+
     unparse_file(
         path,
         r_data,
@@ -78,14 +82,12 @@ def write_rda(
     compression: Compression = "gzip",
     encoding: Encoding = "utf-8",
     format_version: int = DEFAULT_FORMAT_VERSION,
+    constructor_dict: ConstructorDict = DEFAULT_CLASS_MAP,
 ) -> None:
     """
     Write an RDA or RDATA file.
 
-    This is a convenience function that wraps
-    :func:`rdata.conversion.convert_to_r_object_for_rda`,
-    :func:`rdata.conversion.build_r_data`,
-    and :func:`rdata.unparser.unparse_file`,
+    This is a convenience function that wraps conversion and unparsing
     as it is the common use case.
 
     Args:
@@ -95,6 +97,8 @@ def write_rda(
         compression: Compression.
         encoding: Encoding to be used for strings within data.
         format_version: File format version.
+        constructor_dict: Dictionary mapping Python classes to
+            functions converting them to R classes.
 
     See Also:
         :func:`write_rds`: Similar function that writes an RDS file.
@@ -107,15 +111,14 @@ def write_rda(
         >>> data = {"name": "hello", "values": [1, 2.2, 3.3+4.4j]}
         >>> rdata.write_rda("test.rda", data)
     """
-    r_object = convert_to_r_object_for_rda(
+    r_data = convert_python_to_r_data(
         data,
         encoding=encoding,
-    )
-    r_data = build_r_data(
-        r_object,
-        encoding=encoding,
         format_version=format_version,
+        constructor_dict=constructor_dict,
+        file_type="rda",
     )
+
     unparse_file(
         path,
         r_data,
 
@@ -1,4 +1,5 @@
-"""Utilities for converting R objects to Python ones."""
+"""Utilities for converting between R and Python objects."""
+
 from ._conversion import (
     DEFAULT_CLASS_MAP as DEFAULT_CLASS_MAP,
     Converter as Converter,
@@ -25,7 +26,7 @@
     ts_constructor as ts_constructor,
 )
 from .to_r import (
-    build_r_data as build_r_data,
-    convert_to_r_object as convert_to_r_object,
-    convert_to_r_object_for_rda as convert_to_r_object_for_rda,
+    ConverterFromPythonToR as ConverterFromPythonToR,
+    convert_python_to_r_data as convert_python_to_r_data,
+    convert_python_to_r_object as convert_python_to_r_object,
 )
@@ -394,20 +394,38 @@ def convert_array(
     return value  # type: ignore [no-any-return]
 
 
-R_INT_MIN = -2**31
-
-
 def _dataframe_column_transform(source: Any) -> Any:  # noqa: ANN401
 
     if isinstance(source, np.ndarray):
+        dtype: Any
         if np.issubdtype(source.dtype, np.integer):
-            return pd.Series(source, dtype=pd.Int32Dtype()).array
-
-        if np.issubdtype(source.dtype, np.bool_):
-            return pd.Series(source, dtype=pd.BooleanDtype()).array
+            dtype = pd.Int32Dtype()
+        elif np.issubdtype(source.dtype, np.floating):
+            # We return the numpy array here, which keeps
+            # R_FLOAT_NA, np.nan, and other NaNs as they were originally in the file.
+            # Users can then decide if they prefer to interpret
+            # only R_FLOAT_NA or all NaNs as "missing".
+            return source
+            # This would create an array with all NaNs as "missing":
+            # dtype = pd.Float64Dtype()  # noqa: ERA001
+            # This would create an array with only R_FLOAT_NA as "missing":
+            # from rdata.missing import is_na  # noqa: ERA001
+            # return pd.arrays.FloatingArray(source, is_na(source))  # noqa: ERA001
+        elif np.issubdtype(source.dtype, np.complexfloating):
+            # There seems to be no pandas type for complex array
+            return source
+        elif np.issubdtype(source.dtype, np.bool_):
+            dtype = pd.BooleanDtype()
+        elif np.issubdtype(source.dtype, np.str_):
+            dtype = pd.StringDtype()
+        elif np.issubdtype(source.dtype, np.object_):
+            for value in source:
+                assert isinstance(value, str) or value is None
+            dtype = pd.StringDtype()
+        else:
+            return source
 
-        if np.issubdtype(source.dtype, np.str_):
-            return pd.Series(source, dtype=pd.StringDtype()).array
+        return pd.Series(source, dtype=dtype).array
 
     return source
 
@@ -430,7 +448,7 @@ def dataframe_constructor(
             and isinstance(row_names, np.ma.MaskedArray)
             and row_names.mask[0]
         )
-        else tuple(row_names)
+        else row_names
     )
 
     return pd.DataFrame(obj, columns=obj, index=index)
 
@@ -0,0 +1,109 @@
+"""Utilities for missing (NA) values in R."""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import numpy as np
+
+if TYPE_CHECKING:
+    from typing import Any, Final
+
+    import numpy.typing as npt
+
+
+#: Value used to represent a missing integer in R.
+R_INT_NA: Final[int] = np.int32(-2**31)  # type: ignore [assignment]
+
+#: Value used to represent a missing float in R.
+#  This is a NaN with a particular payload, but it's not the same as np.nan.
+R_FLOAT_NA: Final[float] = np.uint64(0x7ff00000000007a2).view(np.float64)  # type: ignore [assignment]
+
+
+def get_na_value(dtype: np.dtype[Any]) -> Any:  # noqa: ANN401
+    """
+    Get NA value for a given type.
+
+    Args:
+        dtype: NumPy dtype.
+
+    Returns:
+        NA value of given dtype.
+    """
+    if dtype == np.int32:
+        return R_INT_NA
+    if dtype == np.float64:
+        return R_FLOAT_NA
+    msg = f"NA for numpy dtype {dtype} not implemented"
+    raise NotImplementedError(msg)
+
+
+def is_na(
+    array: Any | npt.NDArray[Any],  # noqa: ANN401
+) -> bool | npt.NDArray[np.bool_]:
+    """
+    Check if the array elements are NA.
+
+    Args:
+        array: NumPy array or single value.
+
+    Returns:
+        Boolean mask of NA values in the array.
+    """
+    if isinstance(array, np.ndarray):
+        dtype = array.dtype
+        na = get_na_value(dtype)
+        if dtype == np.int32:
+            # Use the native dtype for comparison when possible;
+            # slightly faster than the steps below
+            return array == na  # type: ignore [no-any-return]
+        # Convert dtype to unsigned integer to perform byte-by-byte
+        # equality comparison to distinguish different NaN values
+        raw_dtype = f"u{array.dtype.itemsize}"
+        return array.view(raw_dtype) == np.array(na).view(raw_dtype)  # type: ignore [no-any-return]
+
+    if isinstance(array, int):
+        try:
+            # Python built-in integer is 64 bits or larger, so
+            # we try to cast it to 32-bit int if possible
+            return is_na(np.array(array, dtype=np.int32))
+        except OverflowError:
+            # Proceed with larger integer (in case it is supported at some point)
+            return is_na(np.array(array))
+
+    if isinstance(array, (float, np.int32, np.float64)):
+        return is_na(np.array(array))
+
+    msg = f"NA for {type(array)} not implemented"
+    raise NotImplementedError(msg)
+
+
+def mask_na_values(
+    array: npt.NDArray[Any],
+    *,
+    fill_value: Any | None = None,  # noqa: ANN401
+) -> npt.NDArray[Any] | np.ma.MaskedArray[Any, Any]:
+    """
+    Mask NA elements of the array.
+
+    Args:
+        array: NumPy array.
+        fill_value: Fill value for the masked array.
+            Defaults to the NA value.
+
+    Returns:
+        NumPy masked array with NA values as the mask
+        or the original array if there is no NA elements.
+    """
+    mask = is_na(array)
+    if np.any(mask):
+        if fill_value is None:
+            fill_value = get_na_value(array.dtype)
+
+        array[mask] = fill_value
+        return np.ma.array(  # type: ignore [no-untyped-call,no-any-return]
+            data=array,
+            mask=mask,
+            fill_value=fill_value,
+        )
+    return array
@@ -1,8 +1,7 @@
 """Utilities for parsing a rdata file."""
 
 from ._parser import (
     DEFAULT_ALTREP_MAP as DEFAULT_ALTREP_MAP,
-    R_INT_NA as R_INT_NA,
     CharFlags as CharFlags,
     RData as RData,
     RExtraInfo as RExtraInfo,
 
@@ -6,7 +6,17 @@
 import numpy as np
 import numpy.typing as npt
 
-from ._parser import R_INT_NA, AltRepConstructorMap, Parser
+from rdata.missing import R_FLOAT_NA, R_INT_NA
+
+from ._parser import AltRepConstructorMap, Parser
+
+
+def map_int_na(line: str) -> int:
+    return R_INT_NA if line == "NA" else int(line)
+
+
+def map_float_na(line: str) -> float:
+    return R_FLOAT_NA if line == "NA" else float(line)
 
 
 class ParserASCII(Parser):
@@ -30,26 +40,27 @@ def _readline(self) -> str:
         return self.file.readline()[:-1]
 
     def _parse_array_values(
-            self,
-            dtype: npt.DTypeLike,
-            length: int,
+        self,
+        dtype: npt.DTypeLike,
+        length: int,
     ) -> npt.NDArray[Any]:
-
         array = np.empty(length, dtype=dtype)
         value: int | float | complex
 
         for i in range(length):
             line = self._readline()
 
             if np.issubdtype(dtype, np.integer):
-                value = R_INT_NA if line == "NA" else int(line)
+                value = map_int_na(line)
 
             elif np.issubdtype(dtype, np.floating):
-                value = float(line)
+                value = map_float_na(line)
 
             elif np.issubdtype(dtype, np.complexfloating):
+                value1 = map_float_na(line)
                 line2 = self._readline()
-                value = complex(float(line), float(line2))
+                value2 = map_float_na(line2)
+                value = complex(value1, value2)
 
             else:
                 msg = f"Unknown dtype: {dtype}"
 
@@ -23,15 +23,13 @@
 import numpy as np
 import numpy.typing as npt
 
+from rdata.missing import R_INT_NA, mask_na_values
+
 if TYPE_CHECKING:
     from ._ascii import ParserASCII
     from ._xdr import ParserXDR
 
 
-#: Value used to represent a missing integer in R.
-R_INT_NA: Final = -2**31
-
-
 @runtime_checkable
 class BinaryFileLike(Protocol):
     """Protocol for binary files."""
@@ -371,6 +369,9 @@ def __eq__(self, other: object) -> bool:
             return False
 
         # Compare value field
+        if not isinstance(other.value, type(self.value)):
+            return False
+
         if isinstance(self.value, np.ndarray):
             if not np.array_equal(self.value, other.value, equal_nan=True):
                 return False
@@ -540,6 +541,22 @@ def wrap_constructor(
     return new_info, value
 
 
+def get_altrep_name(info: RObject) -> bytes:
+    """Get the name of the ALTREP object."""
+    assert info.info.type == RObjectType.LIST
+
+    class_sym = info.value[0]
+    while class_sym.info.type == RObjectType.REF:
+        class_sym = class_sym.referenced_object
+
+    assert class_sym.info.type == RObjectType.SYM
+    assert class_sym.value.info.type == RObjectType.CHAR
+
+    altrep_name = class_sym.value.value
+    assert isinstance(altrep_name, bytes)
+    return altrep_name
+
+
 default_altrep_map_dict: Final[Mapping[bytes, AltRepConstructor]] = {
     b"deferred_string": deferred_string_constructor,
     b"compact_intseq": compact_intseq_constructor,
@@ -608,17 +625,7 @@ def parse_nullable_int_array(
     ) -> npt.NDArray[np.int32] | np.ma.MaskedArray[Any, Any]:
         """Parse an integer array."""
         data = self._parse_array(np.int32)
-        mask = (data == R_INT_NA)
-        data[mask] = fill_value
-
-        if np.any(mask):
-            return np.ma.array(  # type: ignore [no-untyped-call,no-any-return]
-                data=data,
-                mask=mask,
-                fill_value=fill_value,
-            )
-
-        return data
+        return mask_na_values(data, fill_value=fill_value)
 
     def parse_double_array(self) -> npt.NDArray[np.float64]:
         """Parse a double array."""
@@ -678,18 +685,7 @@ def expand_altrep_to_object(
         state: RObject,
     ) -> tuple[RObjectInfo, Any]:
         """Expand alternative representation to normal object."""
-        assert info.info.type == RObjectType.LIST
-
-        class_sym = info.value[0]
-        while class_sym.info.type == RObjectType.REF:
-            class_sym = class_sym.referenced_object
-
-        assert class_sym.info.type == RObjectType.SYM
-        assert class_sym.value.info.type == RObjectType.CHAR
-
-        altrep_name = class_sym.value.value
-        assert isinstance(altrep_name, bytes)
-
+        altrep_name = get_altrep_name(info)
         constructor = self.altrep_constructor_dict[altrep_name]
         return constructor(state)
 
 
@@ -26,9 +26,9 @@ def __init__(
         self.file = io.BytesIO(data)
 
     def _parse_array_values(
-            self,
-            dtype: npt.DTypeLike,
-            length: int,
+        self,
+        dtype: npt.DTypeLike,
+        length: int,
     ) -> npt.NDArray[Any]:
         dtype = np.dtype(dtype)
         buffer = self.file.read(length * dtype.itemsize)
 
@@ -0,0 +1,90 @@
+"""Tests of missing value functionality."""
+
+from __future__ import annotations
+
+from typing import Any
+
+import numpy as np
+import pytest
+
+from rdata.missing import R_FLOAT_NA, R_INT_NA, is_na, mask_na_values
+
+
+def test_int_is_na() -> None:
+    """Test checking NA values in int array."""
+    array = np.array([1, 2, R_INT_NA], dtype=np.int32)
+    ref_mask = np.array([0, 0, 1], dtype=np.bool_)
+
+    mask = is_na(array)
+    np.testing.assert_array_equal(mask, ref_mask)
+
+
+def test_float_is_na() -> None:
+    """Test checking NA values in float array."""
+    array = np.array([1, 2, R_FLOAT_NA, np.nan], dtype=np.float64)
+    ref_mask = np.array([0, 0, 1, 0], dtype=np.bool_)
+
+    mask = is_na(array)
+    np.testing.assert_array_equal(mask, ref_mask)
+
+
+@pytest.mark.parametrize("value", [R_INT_NA, R_FLOAT_NA])
+def test_value_is_na(value: Any) -> None:  # noqa: ANN401
+    """Test checking single NA values."""
+    assert is_na(value)
+
+
+@pytest.mark.parametrize("value", [
+    np.int32(0), 0, np.float64(0.0), 0.0, np.nan,
+])
+def test_value_is_not_na(value: Any) -> None:  # noqa: ANN401
+    """Test checking single NA values."""
+    assert not is_na(value)
+
+
+def test_int64() -> None:
+    """Test checking int64."""
+    with pytest.raises(NotImplementedError):
+        is_na(2**32)
+    with pytest.raises(NotImplementedError):
+        is_na(-2**32)
+
+
+def test_wrong_type() -> None:
+    """Test checking int64."""
+    with pytest.raises(NotImplementedError):
+        is_na("test")
+
+
+def test_masked_array() -> None:
+    """Test checking masked array creation."""
+    array = np.array([1, 2, R_FLOAT_NA, np.nan], dtype=np.float64)
+    ref_mask = np.array([0, 0, 1, 0], dtype=np.bool_)
+    ref_data = array.copy()
+
+    masked = mask_na_values(array)
+    assert isinstance(masked, np.ma.MaskedArray)
+    np.testing.assert_array_equal(masked.data, ref_data)
+    np.testing.assert_array_equal(masked.mask, ref_mask)
+
+
+def test_masked_array_fill() -> None:
+    """Test checking masked array creation."""
+    array = np.array([1, 2, R_FLOAT_NA, np.nan], dtype=np.float64)
+    ref_mask = np.array([0, 0, 1, 0], dtype=np.bool_)
+    ref_data = array.copy()
+    ref_data[ref_mask] = 42
+
+    masked = mask_na_values(array, fill_value=42)
+    assert isinstance(masked, np.ma.MaskedArray)
+    np.testing.assert_array_equal(masked.data, ref_data)
+    np.testing.assert_array_equal(masked.mask, ref_mask)
+
+
+def test_nonmasked_array() -> None:
+    """Test checking masked array no-op."""
+    array = np.array([1, 2, np.nan, np.nan], dtype=np.float64)
+
+    masked = mask_na_values(array)
+    assert not isinstance(masked, np.ma.MaskedArray)
+    np.testing.assert_array_equal(masked, array)
@@ -13,6 +13,7 @@
 import xarray
 
 import rdata
+from rdata.missing import R_FLOAT_NA
 
 TESTDATA_PATH = rdata.TESTDATA_PATH
 
@@ -453,6 +454,9 @@ def test_encodings_v3(self) -> None:
 
     def test_dataframe(self) -> None:
         """Test dataframe conversion."""
+        # Files created in R with
+        # test_dataframe = data.frame(class=factor(c("a", "b", "b")), value=c(1L, 2L, 3L)); save(test_dataframe, file="test_dataframe.rda", version=2)  # noqa: E501
+        # test_dataframe = data.frame(class=factor(c("a", "b", "b")), value=c(1L, 2L, 3L)); save(test_dataframe, file="test_dataframe_v3.rda")  # noqa: E501
         for f in ("test_dataframe.rda", "test_dataframe_v3.rda"):
             with self.subTest(file=f):
                 data = rdata.read_rda(TESTDATA_PATH / f)
@@ -475,6 +479,9 @@ def test_dataframe(self) -> None:
 
     def test_dataframe_rds(self) -> None:
         """Test dataframe conversion."""
+        # Files created in R with
+        # df = data.frame(class=factor(c("a", "b", "b")), value=c(1L, 2L, 3L)); saveRDS(df, file="test_dataframe.rds", version=2)  # noqa: E501
+        # df = data.frame(class=factor(c("a", "b", "b")), value=c(1L, 2L, 3L)); saveRDS(df, file="test_dataframe_v3.rds")  # noqa: E501
         for f in ("test_dataframe.rds", "test_dataframe_v3.rds"):
             with self.subTest(file=f):
                 data = rdata.read_rds(TESTDATA_PATH / f)
@@ -515,6 +522,118 @@ def test_dataframe_rownames(self) -> None:
             ),
         )
 
+    def test_dataframe_int_rownames(self) -> None:
+        """Test dataframe conversion."""
+        # File created in R with
+        # df = data.frame(col1=c(10, 20, 30), row.names=c(3L, 6L, 9L)); saveRDS(df, file="test_dataframe_int_rownames.rds")  # noqa: E501
+        data = rdata.read_rds(TESTDATA_PATH / "test_dataframe_int_rownames.rds")
+
+        index = np.array([3, 6, 9], dtype=np.int32)
+        ref = pd.DataFrame(
+            {
+                "col1": pd.Series(
+                    [10., 20., 30.],
+                    dtype=float, index=index),
+            },
+            index=index,
+        )
+        pd.testing.assert_frame_equal(data, ref)
+
+    def test_dataframe_range_rownames(self) -> None:
+        """Test dataframe conversion."""
+        # File created in R with
+        # df = data.frame(col1=c(10, 20, 30), row.names=2:4); saveRDS(df, file="test_dataframe_range_rownames.rds")  # noqa: E501
+        data = rdata.read_rds(TESTDATA_PATH / "test_dataframe_range_rownames.rds")
+
+        index = pd.RangeIndex(2, 5)
+        ref = pd.DataFrame(
+            {
+                "col1": pd.Series(
+                    [10., 20., 30.],
+                    dtype=float, index=index),
+            },
+            index=index,
+        )
+        pd.testing.assert_frame_equal(data, ref)
+
+    def test_dataframe_dtypes(self) -> None:
+        """Test dataframe conversion."""
+        # File created in R with
+        # df = data.frame(int=c(10L, 20L, 30L), float=c(1.1, 2.2, 3.3), string=c("x", "y", "z"), bool=as.logical(c(1, 0, 1)), complex=c(4+5i, 6+7i, 8+9i)); print(df); saveRDS(df, file="test_dataframe_dtypes.rds")  # noqa: E501
+        data = rdata.read_rds(TESTDATA_PATH / "test_dataframe_dtypes.rds")
+
+        index = pd.RangeIndex(1, 4)
+        ref = pd.DataFrame(
+            {
+                "int": pd.Series(
+                    [10, 20, 30],
+                    dtype=pd.Int32Dtype(), index=index),
+                "float": pd.Series(
+                    [1.1, 2.2, 3.3],
+                    dtype=float, index=index),
+                "string": pd.Series(
+                    ["x" ,"y", "z"],
+                    dtype=pd.StringDtype(), index=index),
+                "bool": pd.Series(
+                    [True, False, True],
+                    dtype=pd.BooleanDtype(), index=index),
+                "complex": pd.Series(
+                    [4+5j, 6+7j, 8+9j],
+                    dtype=complex, index=index),
+            },
+            index=index,
+        )
+        pd.testing.assert_frame_equal(data, ref)
+
+    def test_dataframe_dtypes_with_na(self) -> None:
+        """Test dataframe conversion."""
+        # File created in R with
+        # df = data.frame(int=c(10L, 20L, 30L, NA), float=c(1.1, 2.2, 3.3, NA), string=c("x", "y", "z", NA), bool=as.logical(c(1, 0, 1, NA)), complex=c(4+5i, 6+7i, 8+9i, NA)); saveRDS(df, file="test_dataframe_dtypes_with_na.rds")  # noqa: E501
+        data = rdata.read_rds(TESTDATA_PATH / "test_dataframe_dtypes_with_na.rds")
+
+        index = pd.RangeIndex(1, 5)
+        ref = pd.DataFrame(
+            {
+                "int": pd.Series(
+                    [10, 20, 30, pd.NA],
+                    dtype=pd.Int32Dtype(), index=index),
+                "float": pd.Series(
+                    [1.1, 2.2, 3.3, R_FLOAT_NA],
+                    dtype=float, index=index),
+                "string": pd.Series(
+                    ["x" ,"y", "z", pd.NA],
+                    dtype=pd.StringDtype(), index=index),
+                "bool": pd.Series(
+                    [True, False, True, pd.NA],
+                    dtype=pd.BooleanDtype(), index=index),
+                "complex": pd.Series(
+                    [4+5j, 6+7j, 8+9j, R_FLOAT_NA],
+                    dtype=complex, index=index),
+            },
+            index=index,
+        )
+
+        with np.errstate(invalid="ignore"):
+            # Comparing complex arrays with R_FLOAT_NA gives warning
+            pd.testing.assert_frame_equal(data, ref)
+
+    def test_dataframe_float_with_na_nan(self) -> None:
+        """Test dataframe conversion."""
+        # File created in R with
+        # df = data.frame(float=c(1.1, 2.2, 3.3, NA, NaN, Inf, -Inf)); saveRDS(df, file="test_dataframe_float_with_na_nan.rds")  # noqa: E501,ERA001
+        data = rdata.read_rds(TESTDATA_PATH / "test_dataframe_float_with_na_nan.rds")
+
+        index = pd.RangeIndex(1, 8)
+        ref = pd.DataFrame(
+            {
+                "float": pd.Series(
+                    [1.1, 2.2, 3.3, R_FLOAT_NA, np.nan, np.inf, -np.inf],
+                    dtype=float, index=index),
+            },
+            index=index,
+        )
+        pd.testing.assert_frame_equal(data, ref)
+
     def test_ts(self) -> None:
         """Test time series conversion."""
         data = rdata.read_rda(TESTDATA_PATH / "test_ts.rda")
@@ -689,6 +808,7 @@ def test_altrep_wrap_real_attributes(self) -> None:
         data = rdata.conversion.convert(parsed)
         np.testing.assert_equal(data, [1., 2., 3.])
 
+    @pytest.mark.filterwarnings("ignore:Missing constructor")
     def test_altrep_wrap_real_class_attribute(self) -> None:
         """Test alternative representation of wrap_real with class attribute."""
         # File created in R with
 
@@ -7,9 +7,12 @@
 from pathlib import Path
 from typing import TYPE_CHECKING, Any
 
+import numpy as np
+import pandas as pd
 import pytest
 
 import rdata
+from rdata.conversion import ConverterFromPythonToR, convert_python_to_r_object
 from rdata.unparser import unparse_data
 
 if TYPE_CHECKING:
@@ -82,7 +85,8 @@ def test_unparse(fname: str) -> None:
     with (TESTDATA_PATH / fname).open("rb") as f:
         data = decompress_data(f.read())
         file_type, file_format = parse_file_type_and_format(data)
-        r_data = rdata.parser.parse_data(data, expand_altrep=False)
+        r_data = rdata.parser.parse_data(
+            data, expand_altrep=False, extension=f".{file_type}")
 
         try:
             out_data = unparse_data(
@@ -96,23 +100,30 @@ def test_unparse(fname: str) -> None:
         assert data == out_data
 
 
+@pytest.mark.filterwarnings("ignore:Missing constructor")
 @pytest.mark.parametrize("fname", fnames, ids=fnames)
-def test_convert_to_r(fname: str) -> None:
+@pytest.mark.parametrize("expand_altrep", [True, False])
+def test_convert_to_r(fname: str, expand_altrep: bool) -> None:  # noqa: FBT001
     """Test converting Python data to RData object."""
     with (TESTDATA_PATH / fname).open("rb") as f:
-        # Skip test files without unique R->py->R transformation
+        # Skip test files without unique transformation
         if fname in [
-            "test_encodings.rda",     # encoding not kept in Python
-            "test_encodings_v3.rda",  # encoding not kept in Python
-            "test_list_attrs.rda",    # attributes not kept in Python
-            "test_file.rda",          # attributes not kept in Python
+            # encoding not kept in Python
+            "test_encodings.rda",
+            "test_encodings_v3.rda",
+            # attributes not kept in Python
+            "test_list_attrs.rda",
+            "test_file.rda",
+            "test_altrep_wrap_real_attributes.rds",
+            "test_altrep_wrap_real_class_attribute.rds",
         ]:
-            pytest.skip("ambiguous R->py->R transformation")
+            pytest.skip("ambiguous R-to-Python-to-R transformation")
 
         data = decompress_data(f.read())
         file_type, file_format = parse_file_type_and_format(data)
 
-        r_data = rdata.parser.parse_data(data, expand_altrep=False)
+        r_data = rdata.parser.parse_data(
+            data, expand_altrep=expand_altrep, extension=f".{file_type}")
 
         try:
             py_data = rdata.conversion.convert(r_data)
@@ -126,68 +137,153 @@ def test_convert_to_r(fname: str) -> None:
         else:
             encoding = encoding.lower()  # type: ignore [assignment]
 
+        converter = ConverterFromPythonToR(
+            encoding=encoding,
+            format_version=r_data.versions.format,
+            r_version_serialized=r_data.versions.serialized,
+        )
+
         try:
-            if file_type == "rds":
-                r_obj = rdata.conversion.convert_to_r_object(
-                    py_data, encoding=encoding)
-            else:
-                r_obj = rdata.conversion.convert_to_r_object_for_rda(
-                    py_data, encoding=encoding)
-            new_r_data = rdata.conversion.build_r_data(
-                r_obj,
-                encoding=encoding,
-                format_version=r_data.versions.format,
-                r_version_serialized=r_data.versions.serialized,
-            )
+            new_r_data = converter.convert_to_r_data(py_data, file_type=file_type)
         except NotImplementedError as e:
             pytest.xfail(str(e))
 
-        assert r_data == new_r_data
         assert str(r_data) == str(new_r_data)
+        assert r_data == new_r_data
+
+        # Check further that the resulting unparsed data is correct to ensure that
+        # Python-to-R conversion hasn't created any odd objects that can't be unparsed
+        if not expand_altrep:
+            file_type, file_format = parse_file_type_and_format(data)
+            out_data = unparse_data(
+                new_r_data, file_format=file_format, file_type=file_type)
+
+            if file_format == "ascii":
+                data = data.replace(b"\r\n", b"\n")
+
+            assert data == out_data
 
 
-def test_convert_to_r_bad_rda() -> None:
+def test_convert_to_r_rda_missing_names() -> None:
     """Test checking that data for RDA has variable names."""
-    py_data = "hello"
+    converter = ConverterFromPythonToR()
     with pytest.raises(TypeError, match="(?i)data must be a dictionary"):
-        rdata.conversion.convert_to_r_object_for_rda(py_data)  # type: ignore [arg-type]
+        converter.convert_to_r_data("hello", file_type="rda")
+
+
+def test_convert_to_r_rda_nonstr_names() -> None:
+    """Test checking that RDA variable names are strings."""
+    converter = ConverterFromPythonToR()
+    with pytest.raises(ValueError, match="(?i)keys must be strings"):
+        converter.convert_to_r_data({1: "hello"}, file_type="rda")
 
 
 def test_convert_to_r_empty_rda() -> None:
     """Test checking that data for RDA has variable names."""
     py_data: dict[str, Any] = {}
+    converter = ConverterFromPythonToR()
     with pytest.raises(ValueError, match="(?i)data must not be empty"):
-        rdata.conversion.convert_to_r_object_for_rda(py_data)
+        converter.convert_to_r_data(py_data, file_type="rda")
 
 
 def test_unparse_bad_rda() -> None:
     """Test checking that data for RDA has variable names."""
     py_data = "hello"
-    r_obj = rdata.conversion.convert_to_r_object(py_data)
-    r_data = rdata.conversion.build_r_data(r_obj)
+    converter = ConverterFromPythonToR()
+    r_data = converter.convert_to_r_data(py_data)
     with pytest.raises(ValueError, match="(?i)must be dictionary-like"):
         unparse_data(r_data, file_type="rda")
 
 
 def test_convert_to_r_bad_encoding() -> None:
     """Test checking encoding."""
+    converter = ConverterFromPythonToR(encoding="non-existent")  # type: ignore [arg-type]
     with pytest.raises(LookupError, match="(?i)unknown encoding"):
-        rdata.conversion.convert_to_r_object("ä", encoding="non-existent")  # type: ignore [arg-type]
+        converter.convert_to_r_object("ä")
 
 
 def test_convert_to_r_unsupported_encoding() -> None:
     """Test checking encoding."""
+    converter = ConverterFromPythonToR(encoding="cp1250")  # type: ignore [arg-type]
     with pytest.raises(ValueError, match="(?i)unsupported encoding"):
-        rdata.conversion.convert_to_r_object("ä", encoding="cp1250")  # type: ignore [arg-type]
+        converter.convert_to_r_object("ä")
+
 
+def test_convert_to_r_nonstr_dict_keys() -> None:
+    """Test checking non-string dict keys."""
+    converter = ConverterFromPythonToR()
+    with pytest.raises(ValueError, match="(?i)keys must be strings"):
+        converter.convert_to_r_object({"a": 1, 2: 2})
 
-def test_unparse_big_int() -> None:
+
+@pytest.mark.parametrize("file_format", valid_formats)
+@pytest.mark.parametrize("value", [-2**31 - 1, 2**31])
+def test_unparse_big_int(file_format: FileFormat, value: int) -> None:
     """Test checking too large integers."""
-    big_int = 2**32
-    r_obj = rdata.conversion.convert_to_r_object(big_int)
-    r_data = rdata.conversion.build_r_data(r_obj)
+    converter = ConverterFromPythonToR()
+    r_data = converter.convert_to_r_data(value)
     with pytest.raises(ValueError, match="(?i)not castable"):
-        unparse_data(r_data, file_format="xdr")
+        unparse_data(r_data, file_format=file_format)
+
+
+def test_convert_dataframe_pandas_dtypes() -> None:
+    """Test converting dataframe with pandas dtypes."""
+    df1 = pd.DataFrame(
+        {
+            "int": np.array([10, 20, 30], dtype=np.int32),
+            "float": [1.1, 2.2, 3.3],
+            "string": ["x" ,"y", "z"],
+            "bool": [True, False, True],
+            "complex": [4+5j, 6+7j, 8+9j],
+        },
+        index=range(3),
+    )
+
+    index = pd.RangeIndex(3)
+    df2 = pd.DataFrame(
+        {
+            "int": pd.Series([10, 20, 30], dtype=pd.Int32Dtype(), index=index),
+            "float": pd.Series([1.1, 2.2, 3.3], dtype=pd.Float64Dtype(), index=index),
+            "string": pd.Series(["x" ,"y", "z"], dtype=pd.StringDtype(), index=index),
+            "bool": pd.Series([1, 0, 1], dtype=pd.BooleanDtype(), index=index),
+            "complex": pd.Series([4+5j, 6+7j, 8+9j], dtype=complex, index=index),
+        },
+        index=index,
+    )
+
+    r_obj1 = convert_python_to_r_object(df1)
+    r_obj2 = convert_python_to_r_object(df2)
+
+    assert str(r_obj1) == str(r_obj2)
+    assert r_obj1 == r_obj2
+
+
+def test_convert_dataframe_rangeindex() -> None:
+    """Test converting dataframe with rangeindex."""
+    data = {"data": np.array([10, 20, 30], dtype=np.int32)}
+
+    df1 = pd.DataFrame(data, index=pd.RangeIndex(3))
+    df2 = pd.DataFrame(data, index=pd.Index([0, 1, 2]))
+
+    r_obj1 = convert_python_to_r_object(df1)
+    r_obj2 = convert_python_to_r_object(df2)
+
+    assert str(r_obj1) != str(r_obj2)
+    assert r_obj1 != r_obj2
+
+
+def test_convert_dataframe_rangeindex_flattened() -> None:
+    """Test converting dataframe with rangeindex."""
+    data = {"data": np.array([10, 20, 30], dtype=np.int32)}
+
+    df1 = pd.DataFrame(data, index=pd.RangeIndex(3, 8, 2))
+    df2 = pd.DataFrame(data, index=pd.Index([3, 5, 7]))
+
+    r_obj1 = convert_python_to_r_object(df1)
+    r_obj2 = convert_python_to_r_object(df2)
+
+    assert str(r_obj1) == str(r_obj2)
+    assert r_obj1 == r_obj2
 
 
 @pytest.mark.parametrize("compression", [*valid_compressions, "fail"])
 
@@ -25,12 +25,12 @@
 
 
 def unparse_file(
-        path: os.PathLike[Any] | str,
-        r_data: RData,
-        *,
-        file_format: FileFormat = "xdr",
-        file_type: FileType = "rds",
-        compression: Compression = "gzip",
+    path: os.PathLike[Any] | str,
+    r_data: RData,
+    *,
+    file_format: FileFormat = "xdr",
+    file_type: FileType = "rds",
+    compression: Compression = "gzip",
 ) -> None:
     """
     Unparse RData object to a file.
@@ -59,11 +59,11 @@ def unparse_file(
 
 
 def unparse_fileobj(
-        fileobj: IO[Any],
-        r_data: RData,
-        *,
-        file_format: FileFormat = "xdr",
-        file_type: FileType = "rds",
+    fileobj: IO[Any],
+    r_data: RData,
+    *,
+    file_format: FileFormat = "xdr",
+    file_type: FileType = "rds",
 ) -> None:
     """
     Unparse RData object to a file object.
@@ -78,9 +78,11 @@ def unparse_fileobj(
 
     if file_format == "ascii":
         from ._ascii import UnparserASCII as Unparser
+
         rda_magic = "RDA"
     elif file_format == "xdr":
         from ._xdr import UnparserXDR as Unparser
+
         rda_magic = "RDX"
     else:
         msg = f"Unknown file format: {file_format}"
@@ -89,9 +91,11 @@ def unparse_fileobj(
     # Check that RData object for rda file is of correct kind
     if file_type == "rda":
         r_object = r_data.object
-        if not (r_object.info.type is RObjectType.LIST
-                and r_object.tag is not None
-                and r_object.tag.info.type is RObjectType.SYM):
+        if not (
+            r_object.info.type is RObjectType.LIST
+            and r_object.tag is not None
+            and r_object.tag.info.type is RObjectType.SYM
+        ):
             msg = "r_data object must be dictionary-like for rda file"
             raise ValueError(msg)
 
@@ -104,10 +108,10 @@ def unparse_fileobj(
 
 
 def unparse_data(
-        r_data: RData,
-        *,
-        file_format: FileFormat = "xdr",
-        file_type: FileType = "rds",
+    r_data: RData,
+    *,
+    file_format: FileFormat = "xdr",
+    file_type: FileType = "rds",
 ) -> bytes:
     """
     Unparse RData object to a bytestring.
 
@@ -7,11 +7,13 @@
 
 import numpy as np
 
+from rdata.missing import is_na
+
 from ._unparser import Unparser
 
 if TYPE_CHECKING:
     import io
-    from typing import Any, Final
+    from typing import Final
 
     import numpy.typing as npt
 
@@ -33,7 +35,7 @@ def escape(b: bytes) -> str:
         byte_to_str[byte] = escape(bytes([byte]))
 
     # Update mapping for special characters
-    byte_to_str[b'"'[0]] = r'\"'
+    byte_to_str[b'"'[0]] = r"\""
     byte_to_str[b"'"[0]] = r"\'"
     byte_to_str[b"?"[0]] = r"\?"
     byte_to_str[b" "[0]] = r"\040"
@@ -66,11 +68,10 @@ def unparse_magic(self) -> None:
         """Unparse magic bits."""
         self._add_line("A")
 
-    def _unparse_array_values(self, array: npt.NDArray[Any]) -> None:
-        # Convert boolean to int
-        if np.issubdtype(array.dtype, np.bool_):
-            array = array.astype(np.int32)
-
+    def _unparse_array_values_raw(
+        self,
+        array: npt.NDArray[np.int32 | np.float64 | np.complex128],
+    ) -> None:
         # Convert complex to pairs of floats
         if np.issubdtype(array.dtype, np.complexfloating):
             assert array.dtype == np.complex128
@@ -79,19 +80,20 @@ def _unparse_array_values(self, array: npt.NDArray[Any]) -> None:
         # Unparse data
         for value in array:
             if np.issubdtype(array.dtype, np.integer):
-                line = "NA" if value is None or np.ma.is_masked(value) else str(value)  # type: ignore [no-untyped-call]
+                line = "NA" if is_na(value) else str(value)
 
             elif np.issubdtype(array.dtype, np.floating):
-                if np.isnan(value):
+                if is_na(value):
+                    line = "NA"
+                elif np.isnan(value):
                     line = "NaN"
                 elif value == np.inf:
                     line = "Inf"
                 elif value == -np.inf:
                     line = "-Inf"
                 else:
                     line = str(value)
-                    if line.endswith(".0"):
-                        line = line[:-2]
+                    line = line.removesuffix(".0")
 
             else:
                 msg = f"Unknown dtype: {array.dtype}"
 
@@ -7,6 +7,7 @@
 
 import numpy as np
 
+from rdata.missing import R_INT_NA
 from rdata.parser import (
     RData,
     RExtraInfo,
@@ -69,9 +70,35 @@ def unparse_array(self, array: npt.NDArray[Any]) -> None:
         self.unparse_int(array.size)
         self._unparse_array_values(array)
 
-    @abc.abstractmethod
     def _unparse_array_values(self, array: npt.NDArray[Any]) -> None:
         """Unparse the values of an array."""
+        # Convert boolean to int
+        if np.issubdtype(array.dtype, np.bool_):
+            array = array.astype(np.int32)
+
+        # Flatten masked values and convert int arrays to int32
+        if np.issubdtype(array.dtype, np.integer):
+            if np.ma.is_masked(array):  # type: ignore [no-untyped-call]
+                mask = np.ma.getmask(array)  # type: ignore [no-untyped-call]
+                array = np.ma.getdata(array).copy()  # type: ignore [no-untyped-call]
+                array[mask] = R_INT_NA
+
+            if array.dtype != np.int32:
+                info = np.iinfo(np.int32)
+                if np.any(array > info.max) or np.any(array < info.min):
+                    msg = "Integer array not castable to int32"
+                    raise ValueError(msg)
+                array = array.astype(np.int32)
+
+        assert array.dtype in (np.int32, np.float64, np.complex128)
+        self._unparse_array_values_raw(array)
+
+    @abc.abstractmethod
+    def _unparse_array_values_raw(
+        self,
+        array: npt.NDArray[np.int32 | np.float64 | np.complex128],
+    ) -> None:
+        """Unparse the values of an array as such."""
 
     def unparse_string(self, value: bytes | None) -> None:
         """Unparse a string."""
@@ -106,8 +133,9 @@ def unparse_r_object(self, obj: RObject) -> None:  # noqa: C901, PLR0912
         # Unparse data
         value = obj.value
         if info.type in {
-           RObjectType.NIL,
-           RObjectType.NILVALUE,
+            RObjectType.NIL,
+            RObjectType.NILVALUE,
+            RObjectType.REF,
         }:
             # These types don't have any data
             assert value is None
@@ -118,6 +146,7 @@ def unparse_r_object(self, obj: RObject) -> None:  # noqa: C901, PLR0912
         elif info.type in {
             RObjectType.LIST,
             RObjectType.LANG,
+            RObjectType.ALTREP,
             # Parser treats the following equal to LIST.
             # Not tested if they work
             # RObjectType.CLO,
 
@@ -2,17 +2,14 @@
 
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, Any
-
-import numpy as np
-
-from rdata.parser import R_INT_NA
+from typing import TYPE_CHECKING
 
 from ._unparser import Unparser
 
 if TYPE_CHECKING:
     import io
 
+    import numpy as np
     import numpy.typing as npt
 
 
@@ -30,23 +27,10 @@ def unparse_magic(self) -> None:
         """Unparse magic bits."""
         self.file.write(b"X\n")
 
-    def _unparse_array_values(self, array: npt.NDArray[Any]) -> None:
-        # Convert boolean to int
-        if np.issubdtype(array.dtype, np.bool_):
-            array = array.astype(np.int32)
-
-        # Flatten masked values and convert int arrays to int32
-        if np.issubdtype(array.dtype, np.integer):
-            if np.ma.is_masked(array):  # type: ignore [no-untyped-call]
-                mask = np.ma.getmask(array)  # type: ignore [no-untyped-call]
-                array = np.ma.getdata(array).copy()  # type: ignore [no-untyped-call]
-                array[mask] = R_INT_NA
-            info = np.iinfo(np.int32)
-            if not all(info.min <= val <= info.max for val in array):
-                msg = "Integer array not castable to int32"
-                raise ValueError(msg)
-            array = array.astype(np.int32)
-
+    def _unparse_array_values_raw(
+        self,
+        array: npt.NDArray[np.int32 | np.float64 | np.complex128],
+    ) -> None:
         # Convert to big endian if needed
         array = array.astype(array.dtype.newbyteorder(">"))