rerun-io · timsaucer · Mar 12, 2025 · Mar 12, 2025 · Mar 12, 2025
diff --git a/rerun_py/rerun_sdk/rerun/any_value.py b/rerun_py/rerun_sdk/rerun/any_value.py
@@ -42,12 +42,12 @@ def __init__(self, descriptor: str | ComponentDescriptor, value: Any, drop_untyp
         will be dropped, and a warning will be sent to the log.
 
         If you are want to inspect how your component will be converted to the
-        underlying arrow code, the following snippet is what is happening
-        internally:
+        underlying arrow code, we first attempt to cast it directly to a pyarrow
+        array. Failing this, we call
 
         ```
-        np_value = np.atleast_1d(np.array(value, copy=False))
-        pa_value = pa.array(value)
+        pa_scalar = pa.scalar(value)
+        pa_value = pa.array(pa_scalar)
         ```
 
         Parameters
@@ -77,19 +77,52 @@ def __init__(self, descriptor: str | ComponentDescriptor, value: Any, drop_untyp
             elif hasattr(value, "as_arrow_array"):
                 self.pa_array = value.as_arrow_array()
             else:
-                if np_type is not None:
+                if pa_type is not None:
                     if value is None:
                         value = []
-                    np_value = np.atleast_1d(np.asarray(value, dtype=np_type))
-                    self.pa_array = pa.array(np_value, type=pa_type)
+                    # Special case: strings are iterables so pyarrow will not
+                    # handle them properly
+                    if not isinstance(value, (str, bytes)):
+                        try:
+                            self.pa_array = pa.array(value, type=pa_type)
+                        except TypeError:
+                            pass
+                    if self.pa_array is None:
+                        try:
+                            pa_scalar = pa.scalar(value, type=pa_type)
+                            self.pa_array = pa.array([pa_scalar], type=pa_type)
+                        except TypeError:
+                            pass
+                    if self.pa_array is None:
+                        # Fall back - use numpy
+                        np_value = np.atleast_1d(np.asarray(value, dtype=np_type))
+                        self.pa_array = pa.array(np_value, type=pa_type)
                 else:
                     if value is None:
                         if not drop_untyped_nones:
                             raise ValueError("Cannot convert None to arrow array. Type is unknown.")
                     else:
-                        np_value = np.atleast_1d(np.asarray(value))
-                        self.pa_array = pa.array(np_value)
-                        ANY_VALUE_TYPE_REGISTRY[descriptor] = (np_value.dtype, self.pa_array.type)
+                        # This should handle most non-scalar values, but we have to
+                        # treat str and bytes special because they are iterable
+                        if not isinstance(value, (str, bytes)) and value is not None:
+                            try:
+                                self.pa_array = pa.array(value)
+                                ANY_VALUE_TYPE_REGISTRY[descriptor] = (None, self.pa_array.type)
+                            except TypeError:
+                                pass
+                        if self.pa_array is None:
+                            try:
+                                pa_scalar = pa.scalar(value)
+                                self.pa_array = pa.array([pa_scalar])
+                                ANY_VALUE_TYPE_REGISTRY[descriptor] = (None, self.pa_array.type)
+                            except TypeError:
+                                pass
+                        if self.pa_array is None:
+                            # Fall back - use numpy which handles a wide variety of lists, tuples,
+                            # and mixtures of them and will turn into a well formed array
+                            np_value = np.atleast_1d(np.asarray(value))
+                            self.pa_array = pa.array(np_value)
+                            ANY_VALUE_TYPE_REGISTRY[descriptor] = (np_value.dtype, self.pa_array.type)
 
     def is_valid(self) -> bool:
         return self.pa_array is not None

diff --git a/rerun_py/tests/unit/test_any_values.py b/rerun_py/tests/unit/test_any_values.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 import numpy as np
+import pyarrow as pa
 import pytest
 import rerun as rr
 from rerun.error_utils import RerunWarning
@@ -98,3 +99,34 @@ def test_none_any_value() -> None:
 
         assert len(batches) == 1
         assert len(warnings) == 1  # no new warnings
+
+
+def test_iterable_any_value() -> None:
+    SHORT_TEXT = "short"
+    LONG_TEXT = "longer_text"
+
+    SHORT_BYTES = b"ABCD"
+    LONG_BYTES = b"ABCDEFGH"
+
+    values = rr.AnyValues(str_values=SHORT_TEXT, bytes_values=SHORT_BYTES)
+    batches = list(values.as_component_batches())
+
+    assert len(batches) == 2
+    assert batches[0].as_arrow_array() == pa.array([SHORT_TEXT], type=pa.string())
+    assert batches[1].as_arrow_array() == pa.array([SHORT_BYTES], type=pa.binary())
+
+    # Issue #8781 - ensure subsequent calls do not truncate data
+    values = rr.AnyValues(str_values=LONG_TEXT, bytes_values=LONG_BYTES)
+    batches = list(values.as_component_batches())
+
+    assert len(batches) == 2
+    assert batches[0].as_arrow_array() == pa.array([LONG_TEXT], type=pa.string())
+    assert batches[1].as_arrow_array() == pa.array([LONG_BYTES], type=pa.binary())
+
+    # Ensure iterables of these types are handled as arrays
+    values = rr.AnyValues(str_values=[SHORT_TEXT, LONG_TEXT], bytes_values=[SHORT_BYTES, LONG_BYTES])
+    batches = list(values.as_component_batches())
+
+    assert len(batches) == 2
+    assert batches[0].as_arrow_array() == pa.array([SHORT_TEXT, LONG_TEXT], type=pa.string())
+    assert batches[1].as_arrow_array() == pa.array([SHORT_BYTES, LONG_BYTES], type=pa.binary())