From 78a967a6b573f91d67d61f6c174e87c15e7392c8 Mon Sep 17 00:00:00 2001
From: Naoki Kanazawa <>
Date: Wed, 10 Jan 2024 14:48:21 +0900
Subject: [PATCH 01/29] Update internals of AnalysisResultTable

 .../framework/        | 409 ++++++++++--------
 .../framework/              | 132 +-----
 test/                     |   4 +-
 test/framework/ | 120 +++++
 test/framework/             | 221 ----------
 5 files changed, 378 insertions(+), 508 deletions(-)
 create mode 100644 test/framework/
 delete mode 100644 test/framework/

diff --git a/qiskit_experiments/framework/ b/qiskit_experiments/framework/
index 16b8716874..4b03f4ad45 100644
--- a/qiskit_experiments/framework/
+++ b/qiskit_experiments/framework/
@@ -10,256 +10,315 @@
 # copyright notice, and modified files need to carry a notice indicating
 # that they have been altered from the originals.
-"""Table representation of analysis results."""
+"""A table-like dataset for analysis results."""
+from __future__ import annotations
-import logging
-import threading
 import re
+import threading
 import uuid
 import warnings
-from typing import List, Dict, Union, Optional, Any
+from typing import Any
 import numpy as np
 import pandas as pd
-from qiskit_experiments.database_service.utils import ThreadSafeContainer
-from .table_mixin import DefaultColumnsMixIn
-LOG = logging.getLogger(__name__)
-class AnalysisResultContainer(pd.DataFrame, DefaultColumnsMixIn):
-    """Data container of the thread-safe analysis result table."""
-    @classmethod
-    def _default_columns(cls) -> List[str]:
-        return [
-            "name",
-            "experiment",
-            "components",
-            "value",
-            "quality",
-            "experiment_id",
-            "result_id",
-            "tags",
-            "backend",
-            "run_time",
-            "created_time",
-        ]
-    @property
-    def _constructor(self):
-        #
-        return AnalysisResultContainer
+class AnalysisResultTable:
+    """A table-like dataset for analysis results.
-class AnalysisResultTable(ThreadSafeContainer):
-    """A thread-safe table form container of analysis results.
+    Default table columns are defined in the class attribute :attr:`.DEFAULT_COLUMNS`.
+    The table is automatically expanded when extra key is included in the
+    input dictionary data. Missing columns in the input data are filled with null value.
-    This table is a dataframe wrapper with the thread-safe mechanism with predefined columns.
-    This object is attached to the :class:`.ExperimentData` container to store
-    analysis results. Each table row contains series of metadata in addition to the
-    result value itself.
+    Table row index (i.e. entry ID) is created by truncating the result_id string which
+    is basically UUID-4 string. A random unique ID is generated when the result_id
+    is missing in the input data.
-    User can rely on the dataframe filtering mechanism to analyze large scale experiment
-    results, e.g. massive parallel experiment and batch experiment outcomes, efficiently.
-    See `pandas dataframe documentation <>`_
-    for more details.
+    Any operation on the table value via the instance methods guarantees thread safety.
     VALID_ID_REGEX = re.compile(r"\A(?P<short_id>\w{8})-\w{4}-\w{4}-\w{4}-\w{12}\Z")
-    def _init_container(self, init_values: Any):
-        if init_values is None:
-            return AnalysisResultContainer()
-        return init_values
+        "name",
+        "experiment",
+        "components",
+        "value",
+        "quality",
+        "experiment_id",
+        "result_id",
+        "tags",
+        "backend",
+        "run_time",
+        "created_time",
+    ]
+    def __init__(self):
+        """Create new dataset."""
+        super().__init__()
+        self._data = pd.DataFrame(columns=self.DEFAULT_COLUMNS)
+        self._lock = threading.RLock()
-    def result_ids(self) -> List[str]:
-        """Return all result IDs in this table."""
-        with self._lock:
-            return self._container["result_id"].to_list()
-    def filter_columns(self, columns: Union[str, List[str]]) -> List[str]:
-        """Filter columns names available in this table.
+    @classmethod
+    def from_dataframe(cls, data: pd.DataFrame) -> "AnalysisResultTable":
+        """Create new dataset with existing dataframe.
-            columns: Specifying a set of columns to return. You can pass a list of each
-                column name to return, otherwise builtin column groups are available:
-                * ``all``: Return all columns, including metadata to communicate
-                  with experiment service, such as entry IDs.
-                * ``default``: Return columns including analysis result with supplementary
-                  information about experiment.
-                * ``minimal``: Return only analysis subroutine returns.
+            data: Bare dataframe object.
-        Raises:
-            ValueError: When column is given in string which doesn't match with any builtin group.
+        Returns:
+            A new AnalysisResults instance.
-        with self._lock:
-            if columns == "all":
-                return self._container.columns
-            if columns == "default":
-                return [
-                    "name",
-                    "experiment",
-                    "components",
-                    "value",
-                    "quality",
-                    "backend",
-                    "run_time",
-                ] + self._container.extra_columns()
-            if columns == "minimal":
-                return [
-                    "name",
-                    "components",
-                    "value",
-                    "quality",
-                ] + self._container.extra_columns()
-            if not isinstance(columns, str):
-                out = []
-                for column in columns:
-                    if column in self._container.columns:
-                        out.append(column)
-                    else:
-                        warnings.warn(
-                            f"Specified column name {column} does not exist in this table.",
-                            UserWarning,
-                        )
-                return out
-        raise ValueError(
-            f"Column group {columns} is not valid name. Use either 'all', 'default', 'minimal'."
-        )
-    def get_entry(
-        self,
-        index: str,
-    ) -> pd.Series:
-        """Get entry from the dataframe.
-        Args:
-            index: Name of entry to acquire.
+        instance = AnalysisResultTable()
+        instance._data = pd.concat([instance._data, data])
+        return instance
-        Returns:
-            Pandas Series of acquired entry. This doesn't mutate the table.
+    @property
+    def dataframe(self) -> pd.DataFrame:
+        """Dataframe object of analysis results."""
+        with self._lock:
+            return self._data.copy(deep=False)
-        Raises:
-            ValueError: When index is not in this table.
-        """
+    @property
+    def result_ids(self) -> list[str]:
+        """Result IDs in current dataset."""
         with self._lock:
-            if index not in self._container.index:
-                raise ValueError(f"Table index {index} doesn't exist in this table.")
+            return list(self._data.result_id)
-            return self._container.loc[index]
+    @property
+    def columns(self) -> list[str]:
+        """All columns in current dataset."""
+        with self._lock:
+            return list(self._data.columns)
-    # pylint: disable=arguments-renamed
-    def add_entry(
+    def add_data(
-        result_id: Optional[str] = None,
-        **kwargs,
-    ) -> pd.Series:
-        """Add new entry to the table.
+        key: str | int | None = None,
+        **data,
+    ) -> str:
+        """Add new data to this dataset.
-            result_id: Result ID. Automatically generated when not provided.
-                This must be valid hexadecimal UUID string.
-            kwargs: Description of new entry to register.
+            key: Identifier of this entry. This must be UUID-4 format.
+                The result_id string in the input data is used if nothing provided.
+                Random unique ID is prepared if result_id is also missing.
+            data: Arbitrary key-value pairs representing a single data entry.
+                Missing values for default columns are filled with None.
-            Pandas Series of added entry. This doesn't mutate the table.
-        Raises:
-            ValueError: When the truncated result id causes a collision in the table.
+            Assigned analysis result ID.
-        if not result_id:
-            result_id = self._unique_table_index()
-        matched = self.VALID_ID_REGEX.match(result_id)
-        if matched is None:
+        if not key:
+            if result_id := data.get("result_id"):
+                key = result_id
+            else:
+                key = self._create_unique_hash()
+        if data.get("result_id", None) is None:
+            data["result_id"] = key
+        if matched := re.match(self.VALID_ID_REGEX, key):
+            # Short unique index is generated from result id.
+            # Showing full result id unnecessary occupies horizontal space of the html table.
+            # This mechanism is inspired by the github commit hash.
+            index ="short_id")
+        else:
-                f"The result ID {result_id} is not a valid result ID string. "
-                "This entry might fail in saving with the experiment service.",
+                f"Data key {key} is not valid result ID string. ",
-            short_id = result_id[:8]
-        else:
-            # Short unique index is generated from result id.
-            # Showing full result id unnecessary occupies horizontal space of the html table.
-            # This mechanism is similar with the github commit hash.
-            short_id ="short_id")
+            index = key[:8]
         with self._lock:
-            if short_id in self._container.index:
+            if index in self._data.index:
                 raise ValueError(
-                    f"The short ID of the result_id '{short_id}' already exists in the "
-                    "experiment data. Please use another ID to avoid index collision."
+                    f"Table entry index {index} already exists. "
+                    "Please use another ID to avoid index collision."
-            return self._container.add_entry(
-                index=short_id,
-                result_id=result_id,
-                **kwargs,
-            )
-    def drop_entry(
+            # Add missing columns to the table
+            if missing := data.keys() - set(self._data.columns):
+                for k in data:
+                    # Order sensitive
+                    if k in missing:
+                        loc = len(self._data.columns)
+                        self._data.insert(loc, k, value=None)
+            # A hack to avoid unwanted dtype update. Appending new row with .loc indexer
+            # performs enlargement and implicitly changes dtype. This often induces a confusion of
+            # NaN (numeric container) and None (object container) for missing values.
+            # Filling a row with None values before assigning actual values can keep column dtype,
+            # but this behavior might change in future pandas version.
+            #
+            # Also see test.framework.test_data_table.TestBaseTable.test_type_*
+            self._data.loc[index, :] = [None] * len(self._data.columns)
+            template = dict.fromkeys(self.columns, None)
+            template.update(data)
+            self._data.loc[index, :] = pd.array(list(template.values()), dtype=object)
+        return index
+    def get_data(
-        index: str,
-    ):
-        """Drop specified labels from rows.
+        key: str | int | slice | None = None,
+        columns: str | list[str] = "default",
+    ) -> pd.DataFrame:
+        """Get matched entries from this dataset.
-        This directly calls :meth:`.drop` of the DataFrame container object.
+        Args:
+            key: Identifier of the entry of interest.
+            columns: List of names or a policy (default, minimal, all)
+                of data columns included in the returned data frame.
+        Returns:
+            Matched entries in a single data frame or series.
+        """
+        if key is None:
+            with self._lock:
+                out = self._data.copy()
+        else:
+            uids = self._resolve_key(key)
+            with self._lock:
+                out = self._data.filter(items=uids, axis=0)
+        if columns != "all":
+            valid_columns = self._resolve_columns(columns)
+            out = out[valid_columns]
+        return out
+    def del_data(
+        self,
+        key: str | int,
+    ) -> list[str]:
+        """Delete matched entries from this dataset.
-            index: Name of entry to drop.
+            key: Identifier of the entry of interest.
-        Raises:
-            ValueError: When index is not in this table.
+        Returns:
+            Deleted analysis result IDs.
+        uids = self._resolve_key(key)
         with self._lock:
-            if index not in self._container.index:
-                raise ValueError(f"Table index {index} doesn't exist in this table.")
-            self._container.drop(index, inplace=True)
+            self._data.drop(uids, inplace=True)
+        return uids
     def clear(self):
-        """Remove all elements from this container."""
+        """Clear all table entries."""
         with self._lock:
-            self._container = AnalysisResultContainer()
+            self._data = pd.DataFrame(columns=self.DEFAULT_COLUMNS)
+    def copy(self):
+        """Create new thread-safe instance with the same data.
-    def _unique_table_index(self):
-        """Generate unique UUID which is unique in the table with first 8 characters."""
+        .. note::
+            This returns a new object with shallow copied data frame.
+        """
+        with self._lock:
+            # Hold the lock so that no data can be added
+            new_instance = self.__class__()
+            new_instance._data = self._data.copy(deep=False)
+        return new_instance
+    def _create_unique_hash(self) -> str:
         with self._lock:
             n = 0
             while n < 1000:
                 tmp_id = str(uuid.uuid4())
-                if tmp_id[:8] not in self._container.index:
+                if tmp_id[:8] not in self._data.index:
                     return tmp_id
         raise RuntimeError(
             "Unique result_id string cannot be prepared for this table within 1000 trials. "
             "Reduce number of entries, or manually provide a unique result_id."
-    def _repr_html_(self) -> Union[str, None]:
-        """Return HTML representation of this dataframe."""
+    def _resolve_columns(self, columns: str | list[str]):
         with self._lock:
-            return self._container._repr_html_()
+            extra_columns = [c for c in self._data.columns if c not in self.DEFAULT_COLUMNS]
+            if columns == "default":
+                return [
+                    "name",
+                    "experiment",
+                    "components",
+                    "value",
+                    "quality",
+                    "backend",
+                    "run_time",
+                ] + extra_columns
+            if columns == "minimal":
+                return [
+                    "name",
+                    "components",
+                    "value",
+                    "quality",
+                ] + extra_columns
+            if not isinstance(columns, str):
+                out = []
+                for column in columns:
+                    if column in self._data.columns:
+                        out.append(column)
+                    else:
+                        warnings.warn(
+                            f"Specified column {column} does not exist in this table.",
+                            UserWarning,
+                        )
+                return out
+        raise ValueError(
+            f"Column group {columns} is not valid name. Use either 'all', 'default', 'minimal'."
+        )
-    def __json_encode__(self) -> Dict[str, Any]:
+    def _resolve_key(self, key: int | slice | str) -> list[str]:
+        with self._lock:
+            if isinstance(key, int):
+                if key >= len(self):
+                    raise KeyError(f"Analysis result {key} not found.")
+                return [self._data.index[key]]
+            if isinstance(key, slice):
+                keys = list(self._data.index)[key]
+                if len(keys) == 0:
+                    raise KeyError(f"Analysis result {key} not found.")
+                return keys
+            if isinstance(key, str):
+                if key in self._data.index:
+                    return [key]
+                # This key is name of entry
+                loc = self._data["name"] == key
+                if not any(loc):
+                    raise KeyError(f"Analysis result {key} not found.")
+                return list(self._data.index[loc])
+        raise TypeError(f"Invalid key type {type(key)}. The key must be either int, slice, or str.")
+    def __len__(self):
+        return len(self._data)
+    def __contains__(self, item):
+        return item in self._data.index
+    def __json_encode__(self) -> dict[str, Any]:
         with self._lock:
             return {
                 "class": "AnalysisResultTable",
-                "data": self._container.to_dict(orient="index"),
+                "data": self._data.to_dict(orient="index"),
-    def __json_decode__(cls, value: Dict[str, Any]) -> "AnalysisResultTable":
+    def __json_decode__(cls, value: dict[str, Any]) -> "AnalysisResultTable":
         if not value.get("class", None) == "AnalysisResultTable":
             raise ValueError("JSON decoded value for AnalysisResultTable is not valid class type.")
         instance = object.__new__(cls)
         instance._lock = threading.RLock()
-        instance._container = AnalysisResultContainer.from_dict(
+        instance._data = pd.DataFrame.from_dict(
             data=value.get("data", {}),
         ).replace({np.nan: None})
         return instance
+    def __getstate__(self):
+        state = self.__dict__.copy()
+        del state["_lock"]
+        return state
+    def __setstate__(self, state):
+        self.__dict__.update(state)
+        self._lock = threading.RLock()
diff --git a/qiskit_experiments/framework/ b/qiskit_experiments/framework/
index b6dfff5793..16d9029580 100644
--- a/qiskit_experiments/framework/
+++ b/qiskit_experiments/framework/
@@ -21,7 +21,7 @@
 from datetime import datetime, timezone
 from concurrent import futures
 from threading import Event
-from functools import wraps, singledispatch
+from functools import wraps
 from collections import deque
 import contextlib
 import copy
@@ -686,7 +686,7 @@ def hgp(self, new_hgp: str) -> None:
     def _clear_results(self):
         """Delete all currently stored analysis results and figures"""
         # Schedule existing analysis results for deletion next save call
-        self._deleted_analysis_results.extend(list(self._analysis_results.result_ids()))
+        self._deleted_analysis_results.extend(list(self._analysis_results.result_ids))
         # Schedule existing figures for deletion next save call
         for key in self._figures.keys():
@@ -1397,7 +1397,7 @@ def add_analysis_results(
                 backend = extra_values.pop("backend", self.backend_name)
                 run_time = extra_values.pop("run_time", self.running_time)
                 created_time = extra_values.pop("created_time", None)
-                self._analysis_results.add_entry(
+                self._analysis_results.add_data(
@@ -1419,7 +1419,7 @@ def add_analysis_results(
             tags = tags or []
             backend = backend or self.backend_name
-            self._analysis_results.add_entry(
+            uid = self._analysis_results.add_data(
@@ -1429,14 +1429,13 @@ def add_analysis_results(
                 tags=tags or [],
-                run_time=run_time,  # TODO add job RUNNING time
+                run_time=run_time,
             if self.auto_save:
-                last_index = self._analysis_results.result_ids()[-1][:8]
                 service_result = _series_to_service_result(
-                    series=self._analysis_results.get_entry(last_index),
+                    series=self._analysis_results.get_data(uid),
@@ -1446,39 +1445,28 @@ def add_analysis_results(
     def delete_analysis_result(
         result_key: Union[int, str],
-    ) -> str:
+    ) -> list[str]:
         """Delete the analysis result.
             result_key: ID or index of the analysis result to be deleted.
-            Analysis result ID.
+            Deleted analysis result IDs.
             ExperimentEntryNotFound: If analysis result not found or multiple entries are found.
-        # Retrieve from DB if needed.
-        to_delete = self.analysis_results(
-            index=result_key,
-            block=False,
-            columns="all",
-            dataframe=True,
-        )
-        if not isinstance(to_delete, pd.Series):
-            raise ExperimentEntryNotFound(
-                f"Multiple entries are found with result_key = {result_key}. "
-                "Try another key that can uniquely determine entry to delete."
-            )
+        uids = self._analysis_results.del_data(result_key)
-        self._analysis_results.drop_entry(str(
         if self._service and self.auto_save:
             with service_exception_to_warning():
-                self.service.delete_analysis_result(result_id=to_delete.result_id)
+                for uid in uids:
+                    self.service.delete_analysis_result(result_id=uid)
-            self._deleted_analysis_results.append(to_delete.result_id)
+            self._deleted_analysis_results.extend(uids)
-        return to_delete.result_id
+        return uids
     def _retrieve_analysis_results(self, refresh: bool = False):
         """Retrieve service analysis results.
@@ -1500,7 +1488,7 @@ def _retrieve_analysis_results(self, refresh: bool = False):
                 extra = result.result_data["_extra"]
                 if result.chisq is not None:
                     extra["chisq"] = result.chisq
-                self._analysis_results.add_entry(
+                self._analysis_results.add_data(
@@ -1569,30 +1557,19 @@ def analysis_results(
-        out = self._analysis_results.copy()
-        if index is not None:
-            out = _filter_analysis_results(index, out)
-            if out is None:
-                msg = [f"Analysis result {index} not found."]
-                errors = self.errors()
-                if errors:
-                    msg.append(f"Errors: {errors}")
-                raise ExperimentEntryNotFound("\n".join(msg))
         if dataframe:
-            valid_columns = self._analysis_results.filter_columns(columns)
-            out = out[valid_columns]
-            if len(out) == 1 and index is not None:
+            df = self._analysis_results.get_data(index, columns=columns)
+            if len(df) == 1 and index is not None:
                 # For backward compatibility.
                 # One can directly access attributes with Series. e.g. out.value
-                return out.iloc[0]
-            return out
+                return df.iloc[0]
+            return df
         # Convert back into List[AnalysisResult] which is payload for IBM experiment service.
         # This will be removed in future version.
+        tmp_df = self._analysis_results.get_data(index, columns="all")
         service_results = []
-        for _, series in out.iterrows():
+        for _, series in tmp_df.iterrows():
@@ -1731,7 +1708,7 @@ def save(
         analysis_results_to_create = []
-        for _, series in self._analysis_results.copy().iterrows():
+        for _, series in self._analysis_results.dataframe.iterrows():
             # TODO We should support saving entire dataframe
             #  Calling API per entry takes huge amount of time.
             legacy_result = _series_to_service_result(
@@ -2343,7 +2320,7 @@ def copy(self, copy_results: bool = True) -> "ExperimentData":
         # Copy results and figures.
         # This requires analysis callbacks to finish
         self._wait_for_futures(self._analysis_futures.values(), name="analysis")
-        new_instance._analysis_results = self._analysis_results.copy_object()
+        new_instance._analysis_results = self._analysis_results.copy()
         with self._figures.lock:
             new_instance._figures = ThreadSafeOrderedDict()
@@ -2720,68 +2697,3 @@ def _series_to_service_result(
         service_result.auto_save = auto_save
     return service_result
-def _filter_analysis_results(
-    search_key: Union[int, slice, str],
-    data: pd.DataFrame,
-) -> pd.DataFrame:
-    """Helper function to search result data for given key.
-    Args:
-        search_key: Key to search for.
-        data: Full result dataframe.
-    Returns:
-        Truncated dataframe.
-    """
-    out = _search_data(search_key, data)
-    if isinstance(out, pd.Series):
-        return pd.DataFrame([out])
-    return out
-def _search_data(search_key, data):
-    if search_key is None:
-        return data
-    raise TypeError(
-        f"Invalid search key {search_key}. " f"This must be either int, slice or str type."
-    )
-def _search_with_int(
-    search_key: int,
-    data: pd.DataFrame,
-    if search_key >= len(data):
-        return None
-    return data.iloc[search_key]
-def _search_with_slice(
-    search_key: slice,
-    data: pd.DataFrame,
-    out = data[search_key]
-    if len(out) == 0:
-        return None
-    return out
-def _search_with_str(
-    search_key: str,
-    data: pd.DataFrame,
-    if search_key in data.index:
-        # This key is table entry hash
-        return data.loc[search_key]
-    # This key is name of entry
-    out = data[data["name"] == search_key]
-    if len(out) == 0:
-        return None
-    return out
diff --git a/test/ b/test/
index 9131492904..369d6f169d 100644
--- a/test/
+++ b/test/
@@ -294,8 +294,8 @@ def _check_result_table(
     """Check equality of data frame which may involve Qiskit Experiments class value."""
-    table1 = data1.copy().to_dict(orient="index")
-    table2 = data2.copy().to_dict(orient="index")
+    table1 = data1.dataframe.to_dict(orient="index")
+    table2 = data2.dataframe.to_dict(orient="index")
     for table in (table1, table2):
         for result in table.values():
diff --git a/test/framework/ b/test/framework/
new file mode 100644
index 0000000000..a6dae47f7a
--- /dev/null
+++ b/test/framework/
@@ -0,0 +1,120 @@
+# This code is part of Qiskit.
+# (C) Copyright IBM 2023.
+# This code is licensed under the Apache License, Version 2.0. You may
+# obtain a copy of this license in the LICENSE.txt file in the root directory
+# of this source tree or at
+# Any modifications or derivative works of this code must retain this
+# copyright notice, and modified files need to carry a notice indicating
+# that they have been altered from the originals.
+"""Test case for data table."""
+from test.base import QiskitExperimentsTestCase
+import uuid
+from qiskit_experiments.framework.analysis_result_table import AnalysisResultTable
+class TestAnalysisTable(QiskitExperimentsTestCase):
+    """Test case for extra functionality of analysis table."""
+    def test_add_get_entry_with_result_id(self):
+        """Test adding entry with result_id. Index is created by truncating long string."""
+        table = AnalysisResultTable()
+        table.add_data(result_id="9a0bdec8-c010-4ef7-bb7d-b84939717a6b", value=0.123)
+        self.assertEqual(table.get_data("9a0bdec8").iloc[0].value, 0.123)
+    def test_drop_entry(self):
+        """Test drop entry from the table."""
+        table = AnalysisResultTable()
+        table.add_data(result_id="9a0bdec8-c010-4ef7-bb7d-b84939717a6b", value=0.123)
+        table.del_data("9a0bdec8")
+        self.assertEqual(len(table), 0)
+    def test_drop_non_existing_entry(self):
+        """Test dropping non-existing entry raises ValueError."""
+        table = AnalysisResultTable()
+        with self.assertRaises(KeyError):
+            table.del_data("9a0bdec8")
+    def test_raises_adding_duplicated_index(self):
+        """Test adding duplicated index should raise."""
+        table = AnalysisResultTable()
+        table.add_data(result_id="9a0bdec8-c010-4ef7-bb7d-b84939717a6b", value=0.0)
+        with self.assertRaises(ValueError):
+            # index 9a0bdec8 is already used
+            table.add_data(result_id="9a0bdec8-c010-4ef7-bb7d-b84939717a6b", value=1.0)
+    def test_clear_container(self):
+        """Test reset table."""
+        table = AnalysisResultTable()
+        table.add_data(result_id="9a0bdec8-c010-4ef7-bb7d-b84939717a6b", value=0.0, extra=123)
+        self.assertEqual(len(table), 1)
+        table.clear()
+        self.assertEqual(len(table), 0)
+        self.assertListEqual(table.columns, AnalysisResultTable.DEFAULT_COLUMNS)
+    def test_extra_column_name_is_always_returned(self):
+        """Test extra column names are always returned in filtered column names."""
+        table = AnalysisResultTable()
+        table.add_data(extra=0.123)
+        minimal_columns = table.get_data(0, "minimal")
+        self.assertTrue("extra" in minimal_columns.columns)
+        default_columns = table.get_data(0, "default")
+        self.assertTrue("extra" in default_columns.columns)
+        all_columns = table.get_data(0, "all")
+        self.assertTrue("extra" in all_columns.columns)
+    def test_listing_result_id(self):
+        """Test returning result IDs of all stored entries."""
+        table = AnalysisResultTable()
+        ref_ids = [str(uuid.uuid4()) for _ in range(10)]
+        for ref_id in ref_ids:
+            table.add_data(result_id=ref_id, value=0)
+        self.assertListEqual(table.result_ids, ref_ids)
+    def test_no_overlap_result_id(self):
+        """Test automatically prepare unique result IDs for sufficient number of entries."""
+        table = AnalysisResultTable()
+        for i in range(100):
+            table.add_data(value=i)
+        self.assertEqual(len(table), 100)
+    def test_round_trip(self):
+        """Test JSON roundtrip serialization with the experiment encoder."""
+        table = AnalysisResultTable()
+        table.add_data(result_id="30d5d05c-c074-4d3c-9530-07a83d48883a", name="x", value=0.0)
+        table.add_data(result_id="7c305972-858d-42a0-9b5e-57162efe20a1", name="y", value=1.0)
+        table.add_data(result_id="61d8d351-c0cf-4a0a-ae57-fde0f3baa00d", name="z", value=2.0)
+        self.assertRoundTripSerializable(table)
+    def test_round_trip_with_extra(self):
+        """Test JSON roundtrip serialization with extra columns containing missing value."""
+        table = AnalysisResultTable()
+        table.add_data(
+            result_id="30d5d05c-c074-4d3c-9530-07a83d48883a",
+            name="x",
+            value=0.0,
+            extra1=2,
+        )
+        table.add_data(
+            result_id="7c305972-858d-42a0-9b5e-57162efe20a1",
+            name="y",
+            value=1.0,
+            extra2=0.123,
+        )
+        self.assertRoundTripSerializable(table)
diff --git a/test/framework/ b/test/framework/
deleted file mode 100644
index a1e34e7a1f..0000000000
--- a/test/framework/
+++ /dev/null
@@ -1,221 +0,0 @@
-# This code is part of Qiskit.
-# (C) Copyright IBM 2023.
-# This code is licensed under the Apache License, Version 2.0. You may
-# obtain a copy of this license in the LICENSE.txt file in the root directory
-# of this source tree or at
-# Any modifications or derivative works of this code must retain this
-# copyright notice, and modified files need to carry a notice indicating
-# that they have been altered from the originals.
-"""Test case for data table."""
-from test.base import QiskitExperimentsTestCase
-import uuid
-import pandas as pd
-from qiskit_experiments.framework.analysis_result_table import AnalysisResultTable
-from qiskit_experiments.framework.table_mixin import DefaultColumnsMixIn
-def _callable_thread_local_add_entry(args, thread_table):
-    """A test callable that is called from multi-thread."""
-    index, kwargs = args
-    thread_table.add_entry(index, **kwargs)
-class TestBaseTable(QiskitExperimentsTestCase):
-    """Test case for default columns mix-in."""
-    class TestTable(pd.DataFrame, DefaultColumnsMixIn):
-        """A table class under test with test columns."""
-        @classmethod
-        def _default_columns(cls):
-            return ["value1", "value2", "value3"]
-    def test_initializing_with_dict(self):
-        """Test initializing table with dictionary."""
-        table = TestBaseTable.TestTable.from_dict(
-            {
-                "x": {"value1": 1.0, "value2": 2.0, "value3": 3.0},
-                "y": {"value1": 4.0, "value2": 5.0, "value3": 6.0},
-            },
-            orient="index",
-        )
-        self.assertListEqual(list(table.columns), ["value1", "value2", "value3"])
-    def test_add_entry(self):
-        """Test adding data with default keys to table."""
-        table = TestBaseTable.TestTable()
-        table.add_entry(index="x", value1=0.0, value2=1.0, value3=2.0)
-        self.assertListEqual(table.loc["x"].to_list(), [0.0, 1.0, 2.0])
-    def test_add_entry_with_missing_key(self):
-        """Test adding entry with partly specified keys."""
-        table = TestBaseTable.TestTable()
-        table.add_entry(index="x", value1=0.0, value3=2.0)
-        self.assertListEqual(table.loc["x"].to_list(), [0.0, None, 2.0])
-    def test_add_entry_with_new_key(self):
-        """Test adding data with new keys to table."""
-        table = TestBaseTable.TestTable()
-        table.add_entry(index="x", value1=0.0, value2=1.0, value3=2.0, extra=3.0)
-        self.assertListEqual(list(table.columns), ["value1", "value2", "value3", "extra"])
-        self.assertListEqual(table.loc["x"].to_list(), [0.0, 1.0, 2.0, 3.0])
-    def test_add_entry_with_multiple_new_keys(self):
-        """Test new keys are added to column and the key order is preserved."""
-        table = TestBaseTable.TestTable()
-        table.add_entry(index="x", phi=0.1, lamb=0.2, theta=0.3)
-        self.assertListEqual(
-            list(table.columns), ["value1", "value2", "value3", "phi", "lamb", "theta"]
-        )
-    def test_dtype_missing_value_is_none(self):
-        """Test if missing value is always None.
-        Deta frame implicitly convert None into NaN for numeric container.
-        This should not happen.
-        """
-        table = TestBaseTable.TestTable()
-        table.add_entry(index="x", value1=1.0)
-        table.add_entry(index="y", value2=1.0)
-        self.assertEqual(table.loc["x", "value2"], None)
-        self.assertEqual(table.loc["y", "value1"], None)
-    def test_dtype_adding_extra_later(self):
-        """Test adding new row later with a numeric value doesn't change None to NaN."""
-        table = TestBaseTable.TestTable()
-        table.add_entry(index="x")
-        table.add_entry(index="y", extra=1.0)
-        self.assertListEqual(table.loc["x"].to_list(), [None, None, None, None])
-    def test_dtype_adding_null_row(self):
-        """Test adding new row with empty value doesn't change dtype of the columns."""
-        table = TestBaseTable.TestTable()
-        table.add_entry(index="x", extra1=1, extra2=1.0, extra3=True, extra4="abc")
-        table.add_entry(index="y")
-        self.assertIsInstance(table.loc["x", "extra1"], int)
-        self.assertIsInstance(table.loc["x", "extra2"], float)
-        self.assertIsInstance(table.loc["x", "extra3"], bool)
-        self.assertIsInstance(table.loc["x", "extra4"], str)
-    def test_filter_columns(self):
-        """Test filtering table with columns."""
-        table = TestBaseTable.TestTable()
-        table.add_entry(index="x", value1=0.0, value2=1.0, value3=2.0)
-        filt_table = table[["value1", "value3"]]
-        self.assertListEqual(filt_table.loc["x"].to_list(), [0.0, 2.0])
-class TestAnalysisTable(QiskitExperimentsTestCase):
-    """Test case for extra functionality of analysis table."""
-    def test_add_get_entry_with_result_id(self):
-        """Test adding entry with result_id. Index is created by truncating long string."""
-        table = AnalysisResultTable()
-        table.add_entry(result_id="9a0bdec8-c010-4ef7-bb7d-b84939717a6b", value=0.123)
-        self.assertEqual(table.get_entry("9a0bdec8").value, 0.123)
-    def test_drop_entry(self):
-        """Test drop entry from the table."""
-        table = AnalysisResultTable()
-        table.add_entry(result_id="9a0bdec8-c010-4ef7-bb7d-b84939717a6b", value=0.123)
-        table.drop_entry("9a0bdec8")
-        self.assertEqual(len(table), 0)
-    def test_drop_non_existing_entry(self):
-        """Test dropping non-existing entry raises ValueError."""
-        table = AnalysisResultTable()
-        with self.assertRaises(ValueError):
-            table.drop_entry("9a0bdec8")
-    def test_raises_adding_duplicated_index(self):
-        """Test adding duplicated index should raise."""
-        table = AnalysisResultTable()
-        table.add_entry(result_id="9a0bdec8-c010-4ef7-bb7d-b84939717a6b", value=0.0)
-        with self.assertRaises(ValueError):
-            # index 9a0bdec8 is already used
-            table.add_entry(result_id="9a0bdec8-c010-4ef7-bb7d-b84939717a6b", value=1.0)
-    def test_clear_container(self):
-        """Test reset table."""
-        table = AnalysisResultTable()
-        table.add_entry(result_id="9a0bdec8-c010-4ef7-bb7d-b84939717a6b", value=0.0, extra=123)
-        self.assertEqual(len(table), 1)
-        table.clear()
-        self.assertEqual(len(table), 0)
-        self.assertListEqual(table.copy().extra_columns(), [])
-    def test_extra_column_name_is_always_returned(self):
-        """Test extra column names are always returned in filtered column names."""
-        table = AnalysisResultTable()
-        table.add_entry(extra=0.123)
-        minimal_columns = table.filter_columns("minimal")
-        self.assertTrue("extra" in minimal_columns)
-        default_columns = table.filter_columns("default")
-        self.assertTrue("extra" in default_columns)
-        all_columns = table.filter_columns("all")
-        self.assertTrue("extra" in all_columns)
-    def test_listing_result_id(self):
-        """Test returning result IDs of all stored entries."""
-        table = AnalysisResultTable()
-        ref_ids = [str(uuid.uuid4()) for _ in range(10)]
-        for ref_id in ref_ids:
-            table.add_entry(result_id=ref_id, value=0)
-        self.assertListEqual(table.result_ids(), ref_ids)
-    def test_no_overlap_result_id(self):
-        """Test automatically prepare unique result IDs for sufficient number of entries."""
-        table = AnalysisResultTable()
-        for i in range(100):
-            table.add_entry(value=i)
-        self.assertEqual(len(table), 100)
-    def test_round_trip(self):
-        """Test JSON roundtrip serialization with the experiment encoder."""
-        table = AnalysisResultTable()
-        table.add_entry(result_id="30d5d05c-c074-4d3c-9530-07a83d48883a", name="x", value=0.0)
-        table.add_entry(result_id="7c305972-858d-42a0-9b5e-57162efe20a1", name="y", value=1.0)
-        table.add_entry(result_id="61d8d351-c0cf-4a0a-ae57-fde0f3baa00d", name="z", value=2.0)
-        self.assertRoundTripSerializable(table)
-    def test_round_trip_with_extra(self):
-        """Test JSON roundtrip serialization with extra columns containing missing value."""
-        table = AnalysisResultTable()
-        table.add_entry(
-            result_id="30d5d05c-c074-4d3c-9530-07a83d48883a",
-            name="x",
-            value=0.0,
-            extra1=2,
-        )
-        table.add_entry(
-            result_id="7c305972-858d-42a0-9b5e-57162efe20a1",
-            name="y",
-            value=1.0,
-            extra2=0.123,
-        )
-        self.assertRoundTripSerializable(table)

From 7a8317978e668f44989c96eb3bded4a7a11561e7 Mon Sep 17 00:00:00 2001
From: Naoki Kanazawa <>
Date: Thu, 18 Jan 2024 16:57:37 +0900
Subject: [PATCH 02/29] Update internals of ScatterTable

 qiskit_experiments/curve_analysis/ |   1 +
 .../               |  69 ++--
 .../curve_analysis/          | 206 +++++-----
 .../curve_analysis/           | 364 +++++++++++++-----
 4 files changed, 386 insertions(+), 254 deletions(-)

diff --git a/qiskit_experiments/curve_analysis/ b/qiskit_experiments/curve_analysis/
index 2db6044fff..992946d28e 100644
--- a/qiskit_experiments/curve_analysis/
+++ b/qiskit_experiments/curve_analysis/
@@ -39,6 +39,7 @@
 .. autosummary::
     :toctree: ../stubs/
+    ScatterTable
diff --git a/qiskit_experiments/curve_analysis/ b/qiskit_experiments/curve_analysis/
index 77dad880d8..83d0053190 100644
--- a/qiskit_experiments/curve_analysis/
+++ b/qiskit_experiments/curve_analysis/
@@ -230,34 +230,35 @@ def _create_figures(
             A list of figures.
         for analysis in self.analyses():
-            sub_data = curve_data[ ==]
-            for name, data in list(sub_data.groupby("name")):
-                full_name = f"{name}_{}"
+            group_data = curve_data.filter(
+            model_names = analysis.model_names()
+            for uid, sub_data in group_data.iter_by_class():
+                full_name = f"{model_names[uid]}_{}"
                 # Plot raw data scatters
                 if analysis.options.plot_raw_data:
-                    raw_data = data[data.category == "raw"]
+                    raw_data = sub_data.filter(category="raw")
-                        x=raw_data.xval.to_numpy(),
-                        y=raw_data.yval.to_numpy(),
+                        x=raw_data.x,
+                        y=raw_data.y,
                 # Plot formatted data scatters
-                formatted_data = data[data.category == analysis.options.fit_category]
+                formatted_data = sub_data.filter(category=analysis.options.fit_category)
-                    x_formatted=formatted_data.xval.to_numpy(),
-                    y_formatted=formatted_data.yval.to_numpy(),
-                    y_formatted_err=formatted_data.yerr.to_numpy(),
+                    x_formatted=formatted_data.x,
+                    y_formatted=formatted_data.y,
+                    y_formatted_err=formatted_data.y_err,
                 # Plot fit lines
-                line_data = data[data.category == "fitted"]
+                line_data = sub_data.filter(category="fitted")
                 if len(line_data) == 0:
-                fit_stdev = line_data.yerr.to_numpy()
+                fit_stdev = line_data.y_err
-                    x_interp=line_data.xval.to_numpy(),
-                    y_interp=line_data.yval.to_numpy(),
+                    x_interp=line_data.x,
+                    y_interp=line_data.y,
                     y_interp_err=fit_stdev if np.isfinite(fit_stdev).all() else None,
@@ -354,7 +355,7 @@ def _run_analysis(
             metadata["group"] =
             table = analysis._format_data(analysis._run_data_processing(
-            formatted_subset = table[table.category == analysis.options.fit_category]
+            formatted_subset = table.filter(category=analysis.options.fit_category)
             fit_data = analysis._run_curve_fit(formatted_subset)
             fit_dataset[] = fit_data
@@ -376,32 +377,36 @@ def _run_analysis(
             if fit_data.success:
                 # Add fit data to curve data table
-                fit_curves = []
-                columns = list(table.columns)
                 model_names = analysis.model_names()
-                for i, sub_data in list(formatted_subset.groupby("class_id")):
-                    xval = sub_data.xval.to_numpy()
+                for i, sub_data in formatted_subset.iter_by_class():
+                    xval = sub_data.x
                     if len(xval) == 0:
                         # If data is empty, skip drawing this model.
                         # This is the case when fit model exist but no data to fit is provided.
                     # Compute X, Y values with fit parameters.
-                    xval_fit = np.linspace(np.min(xval), np.max(xval), num=100)
-                    yval_fit = eval_with_uncertainties(
-                        x=xval_fit,
+                    xval_arr_fit = np.linspace(np.min(xval), np.max(xval), num=100, dtype=float)
+                    uval_arr_fit = eval_with_uncertainties(
+                        x=xval_arr_fit,
-                    model_fit = np.full((100, len(columns)), np.nan, dtype=object)
-                    fit_curves.append(model_fit)
-                    model_fit[:, columns.index("xval")] = xval_fit
-                    model_fit[:, columns.index("yval")] = unp.nominal_values(yval_fit)
+                    yval_arr_fit = unp.nominal_values(uval_arr_fit)
                     if fit_data.covar is not None:
-                        model_fit[:, columns.index("yerr")] = unp.std_devs(yval_fit)
-                    model_fit[:, columns.index("name")] = model_names[i]
-                    model_fit[:, columns.index("class_id")] = i
-                    model_fit[:, columns.index("category")] = "fitted"
-                table = table.append_list_values(other=np.vstack(fit_curves))
+                        yerr_arr_fit = unp.std_devs(uval_arr_fit)
+                    else:
+                        yerr_arr_fit = np.zeros_like(xval_arr_fit)
+                    for xval, yval, yerr in zip(xval_arr_fit, yval_arr_fit, yerr_arr_fit):
+                        table.add_row(
+                            name=model_names[i],
+                            class_id=i,
+                            category="fitted",
+                            x=xval,
+                            y=yval,
+                            y_err=yerr,
+                            shots=pd.NA,
+                  ,
+                        )
@@ -416,8 +421,6 @@ def _run_analysis(
                     analysis._create_curve_data(curve_data=formatted_subset, **metadata)
-            # Add extra column to identify the fit model
-            table["group"] =
         combined_curve_data = pd.concat(curve_data_set)
diff --git a/qiskit_experiments/curve_analysis/ b/qiskit_experiments/curve_analysis/
index d2eea799cc..0c84c03750 100644
--- a/qiskit_experiments/curve_analysis/
+++ b/qiskit_experiments/curve_analysis/
@@ -17,8 +17,6 @@
 from typing import Dict, List, Tuple, Union, Optional
 from functools import partial
-from itertools import groupby
-from operator import itemgetter
 import lmfit
 import numpy as np
@@ -178,18 +176,20 @@ def _run_data_processing(
             to_process = raw_data
-        # This must align with ScatterTable columns. Use struct array.
-        dtypes = np.dtype(
-            [
-                ("xval", float),
-                ("yval", float),
-                ("yerr", float),
-                ("name", "U30"),
-                ("class_id", int),
-                ("category", "U30"),
-                ("shots", int),
-            ]
-        )
+        # Compute y value
+        if not self.options.data_processor:
+            raise ValueError(
+                f"Data processor is not set for the {self.__class__.__name__} instance. "
+                "Initialize the instance with the experiment data, or set the "
+                "data_processor analysis options."
+            )
+        processed = self.options.data_processor(to_process)
+        yvals = unp.nominal_values(processed).flatten()
+        with np.errstate(invalid="ignore"):
+            # For averaged data, the processed std dev will be NaN.
+            # Setting std_devs to NaN will trigger floating point exceptions
+            # which we can ignore. See
+            yerrs = unp.std_devs(processed).flatten()
         # Prepare circuit metadata to data class mapper from data_subfit_map value.
         if len(self._models) == 1:
@@ -197,8 +197,8 @@ def _run_data_processing(
             classifier = self.options.data_subfit_map
-        source = np.empty(len(to_process), dtype=dtypes)
-        for idx, datum in enumerate(to_process):
+        table = ScatterTable()
+        for datum, yval, yerr in zip(to_process, yvals, yerrs):
             metadata = datum["metadata"]
                 xval = metadata[opt.x_key]
@@ -206,44 +206,25 @@ def _run_data_processing(
                 raise DataProcessorError(
                     f"X value key {opt.x_key} is not defined in the circuit metadata."
                 ) from ex
-            source[idx]["xval"] = xval
-            source[idx]["shots"] = datum.get("shots", -1)
             # Assign entry name and class id
-            for class_id, (name, spec) in enumerate(classifier.items()):
+            for uid, (name, spec) in enumerate(classifier.items()):
                 if spec.items() <= metadata.items():
-                    source[idx]["class_id"] = class_id
-                    source[idx]["name"] = name
                 # This is unclassified data.
-                # Assume that normal ID will never become negative number.
-                # This is numpy struct array object and cannot store pandas nullable integer.
-                source[idx]["class_id"] = -1
-                source[idx]["name"] = ""
-        # Compute y value
-        if not self.options.data_processor:
-            raise ValueError(
-                f"Data processor is not set for the {self.__class__.__name__} instance. "
-                "Initialize the instance with the experiment data, or set the "
-                "data_processor analysis options."
+                uid = pd.NA
+                name = pd.NA
+            table.add_row(
+                name=name,
+                class_id=uid,
+                category=category,
+                x=xval,
+                y=yval,
+                y_err=yerr,
+                shots=datum.get("shots", pd.NA),
+      ,
-        processed_values = self.options.data_processor(to_process)
-        source["yval"] = unp.nominal_values(processed_values).flatten()
-        with np.errstate(invalid="ignore"):
-            # For averaged data, the processed std dev will be NaN.
-            # Setting std_devs to NaN will trigger floating point exceptions
-            # which we can ignore. See
-            source["yerr"] = unp.std_devs(processed_values).flatten()
-        source["category"] = category
-        table = ScatterTable(data=source)
-        # Replace temporary -1 value with nullable integer
-        table["class_id"] = table["class_id"].replace(-1, pd.NA)
-        table["shots"] = table["shots"].replace(-1, pd.NA)
         return table
     def _format_data(
@@ -265,39 +246,31 @@ def _format_data(
             "iwv": inverse_weighted_variance,
             "sample": sample_average,
-        columns = list(curve_data.columns)
-        sort_by = itemgetter(
-            columns.index("class_id"),
-            columns.index("xval"),
-        )
-        # Use python native groupby method on ndarray. This is more performant than pandas one.
         average = averaging_methods[self.options.average_method]
         model_names = self.model_names()
-        formatted = []
-        for (_, xv), g in groupby(sorted(curve_data.values, key=sort_by), key=sort_by):
-            g_values = np.array(list(g))
-            g_dict = dict(zip(columns, g_values.T))
-            avg_yval, avg_yerr, shots = average(g_dict["yval"], g_dict["yerr"], g_dict["shots"])
-            data_name = g_dict["name"][0]
+        for (name, xval), sub_data in curve_data.iter_groups("name", "xval"):
+            avg_yval, avg_yerr, shots = average(
+                sub_data.y,
+                sub_data.y_err,
+                sub_data.shots,
+            )
-                # Map data index to model index through assigned name.
-                # Data name should match with the model name.
-                # Otherwise, the model index is unclassified.
-                model_id = model_names.index(data_name)
-            except ValueError:
-                model_id = pd.NA
-            averaged = dict.fromkeys(columns)
-            averaged["category"] = category
-            averaged["xval"] = xv
-            averaged["yval"] = avg_yval
-            averaged["yerr"] = avg_yerr
-            averaged["name"] = data_name
-            averaged["class_id"] = model_id
-            averaged["shots"] = shots
-            formatted.append(list(averaged.values()))
-        return curve_data.append_list_values(formatted)
+                uid = model_names.index(name)
+            except IndexError:
+                uid = pd.NA
+            curve_data.add_row(
+                name=name,
+                class_id=uid,
+                category=category,
+                x=xval,
+                y=avg_yval,
+                y_err=avg_yerr,
+                shots=shots,
+      ,
+            )
+        return curve_data
     def _generate_fit_guesses(
@@ -365,13 +338,13 @@ def _run_curve_fit(
         # Create convenient function to compute residual of the models.
         partial_residuals = []
-        valid_uncertainty = np.all(np.isfinite(curve_data.yerr.to_numpy()))
-        for i, sub_data in list(curve_data.groupby("class_id")):
+        valid_uncertainty = np.all(np.isfinite(curve_data.y_err))
+        for i, sub_data in curve_data.iter_by_class():
             if valid_uncertainty:
                 nonzero_yerr = np.where(
-                    np.isclose(sub_data.yerr, 0.0),
+                    np.isclose(sub_data.y_err, 0.0),
-                    sub_data.yerr,
+                    sub_data.y_err,
                 raw_weights = 1 / nonzero_yerr
                 # Remove outlier. When all sample values are the same with sample average,
@@ -384,9 +357,9 @@ def _run_curve_fit(
                 weights = None
             model_residual = partial(
-                data=sub_data.yval.to_numpy(),
+                data=sub_data.y,
-                x=sub_data.xval.to_numpy(),
+                x=sub_data.x,
@@ -428,8 +401,8 @@ def _run_curve_fit(
         return convert_lmfit_result(
-            curve_data.xval.to_numpy(),
-            curve_data.yval.to_numpy(),
+            curve_data.x,
+            curve_data.y,
     def _create_figures(
@@ -444,33 +417,34 @@ def _create_figures(
             A list of figures.
-        for name, data in list(curve_data.groupby("name")):
+        for i, sub_data in curve_data.iter_by_class():
+            name = self.model_names()[i]
             # Plot raw data scatters
             if self.options.plot_raw_data:
-                raw_data = data[data.category == "raw"]
+                raw_data = sub_data.filter(category="raw")
-                    x=raw_data.xval.to_numpy(),
-                    y=raw_data.yval.to_numpy(),
+                    x=raw_data.x,
+                    y=raw_data.y,
             # Plot formatted data scatters
-            formatted_data = data[data.category == self.options.fit_category]
+            formatted_data = sub_data.filter(category=self.options.fit_category)
-                x_formatted=formatted_data.xval.to_numpy(),
-                y_formatted=formatted_data.yval.to_numpy(),
-                y_formatted_err=formatted_data.yerr.to_numpy(),
+                x_formatted=formatted_data.x,
+                y_formatted=formatted_data.y,
+                y_formatted_err=formatted_data.y_err,
             # Plot fit lines
-            line_data = data[data.category == "fitted"]
+            line_data = sub_data.filter(category="fitted")
             if len(line_data) == 0:
-                x_interp=line_data.xval.to_numpy(),
-                y_interp=line_data.yval.to_numpy(),
+                x_interp=line_data.x,
+                y_interp=line_data.y,
-            fit_stdev = line_data.yerr.to_numpy()
+            fit_stdev = line_data.y_err
             if np.isfinite(fit_stdev).all():
@@ -499,7 +473,7 @@ def _run_analysis(
         table = self._format_data(self._run_data_processing(
-        formatted_subset = table[table.category == self.options.fit_category]
+        formatted_subset = table.filter(category=self.options.fit_category)
         fit_data = self._run_curve_fit(formatted_subset)
         if fit_data.success:
@@ -524,32 +498,36 @@ def _run_analysis(
         if fit_data.success:
             # Add fit data to curve data table
-            fit_curves = []
-            columns = list(table.columns)
             model_names = self.model_names()
-            for i, sub_data in list(formatted_subset.groupby("class_id")):
-                xval = sub_data.xval.to_numpy()
+            for i, sub_data in formatted_subset.iter_by_class():
+                xval = sub_data.x
                 if len(xval) == 0:
                     # If data is empty, skip drawing this model.
                     # This is the case when fit model exist but no data to fit is provided.
                 # Compute X, Y values with fit parameters.
-                xval_fit = np.linspace(np.min(xval), np.max(xval), num=100, dtype=float)
-                yval_fit = eval_with_uncertainties(
-                    x=xval_fit,
+                xval_arr_fit = np.linspace(np.min(xval), np.max(xval), num=100, dtype=float)
+                uval_arr_fit = eval_with_uncertainties(
+                    x=xval_arr_fit,
-                model_fit = np.full((100, len(columns)), None, dtype=object)
-                fit_curves.append(model_fit)
-                model_fit[:, columns.index("xval")] = xval_fit
-                model_fit[:, columns.index("yval")] = unp.nominal_values(yval_fit)
+                yval_arr_fit = unp.nominal_values(uval_arr_fit)
                 if fit_data.covar is not None:
-                    model_fit[:, columns.index("yerr")] = unp.std_devs(yval_fit)
-                model_fit[:, columns.index("name")] = model_names[i]
-                model_fit[:, columns.index("class_id")] = i
-                model_fit[:, columns.index("category")] = "fitted"
-            table = table.append_list_values(other=np.vstack(fit_curves))
+                    yerr_arr_fit = unp.std_devs(uval_arr_fit)
+                else:
+                    yerr_arr_fit = np.zeros_like(xval_arr_fit)
+                for xval, yval, yerr in zip(xval_arr_fit, yval_arr_fit, yerr_arr_fit):
+                    table.add_row(
+                        name=model_names[i],
+                        class_id=i,
+                        category="fitted",
+                        x=xval,
+                        y=yval,
+                        y_err=yerr,
+                        shots=pd.NA,
+              ,
+                    )
diff --git a/qiskit_experiments/curve_analysis/ b/qiskit_experiments/curve_analysis/
index 4361274b9e..8f7c401e2b 100644
--- a/qiskit_experiments/curve_analysis/
+++ b/qiskit_experiments/curve_analysis/
@@ -9,185 +9,335 @@
 # Any modifications or derivative works of this code must retain this
 # copyright notice, and modified files need to carry a notice indicating
 # that they have been altered from the originals.
 """Table representation of the x, y data for curve fitting."""
+from __future__ import annotations
 import logging
-from typing import List, Sequence, Dict, Any, Union
+from import Iterator
+from typing import Any
+from itertools import groupby
+from operator import itemgetter
 import numpy as np
 import pandas as pd
 from qiskit.utils import deprecate_func
-from qiskit_experiments.framework.table_mixin import DefaultColumnsMixIn
 LOG = logging.getLogger(__name__)
-class ScatterTable(pd.DataFrame, DefaultColumnsMixIn):
-    """A table to store x and y data with metadata associated with the data point.
+class ScatterTable:
+    """A table-like dataset for curve fitting intermediate data.
-    This class is implemented upon the pandas dataframe.
-    See `pandas dataframe documentation <>`_
-    for the base class API documentation.
+    Default table columns are defined in the class attribute :attr:`.DEFAULT_COLUMNS`.
+    This table cannot be expanded with user-provided column names.
+    See attribute documentation for what columns represent.
-    A single ``ScatterTable`` object can contain different kind of intermediate data
-    generated through the curve fitting, which are classified by the fit model.
-    When an experiment has sub-data for ``sub_exp_1``, the formatted x, y, and y-error
-    array data may be obtained from the original table object as follows:
+    This dataset is not thread safe. Do not use the same instance in multiple threads.
+    """
-    .. code-block::python
+        "xval",
+        "yval",
+        "yerr",
+        "name",
+        "class_id",
+        "category",
+        "shots",
+        "analysis",
+    ]
-        abc_data = table[
-            ( == "sub_exp_1") & (table.category == "formatted")
-        ]
-        x, y, e = abc_data.xval.to_numpy(), abc_data.yval.to_numpy(), abc_data.yerr.to_numpy()
+        "Float64",
+        "Float64",
+        "Float64",
+        "string",
+        "Int64",
+        "string",
+        "Int64",
+        "string",
+    ]
-    """
+    def __init__(self):
+        """Create new dataset."""
+        super().__init__()
+        self._lazy_add_rows = []
+        self._dump = pd.DataFrame(columns=self.DEFAULT_COLUMNS)
-    # TODO Add this to toctree. In current mechanism all pandas DataFrame members are rendered
-    #  and it fails in the Sphinx build process. We may need a custom directive to
-    #  exclude class members from an external package.
+    @property
+    def _data(self) -> pd.DataFrame:
+        if self._lazy_add_rows:
+            # Add data when table element is called.
+            # Adding rows in loop is extremely slow in pandas.
+            tmp_df = pd.DataFrame(self._lazy_add_rows, columns=self.DEFAULT_COLUMNS)
+            if len(self._dump) == 0:
+                self._dump = tmp_df
+            else:
+                self._dump = pd.concat([self._dump, tmp_df], ignore_index=True)
+            self._lazy_add_rows.clear()
+        return self._dump
-    def _default_columns(cls) -> List[str]:
-        return [
-            "xval",
-            "yval",
-            "yerr",
-            "name",
-            "class_id",
-            "category",
-            "shots",
-        ]
-    @deprecate_func(
-        since="0.6",
-        additional_msg="Curve data uses dataframe representation. Use dataframe filtering method.",
-        pending=True,
-        package_name="qiskit-experiments",
-    )
-    def get_subset_of(self, index: Union[str, int]) -> "ScatterTable":
-        """Filter data by series name or index.
+    def from_dataframe(cls, data: pd.DataFrame) -> "ScatterTable":
+        """Create new dataset with existing dataframe.
-            index: Series index of name.
+            data: Data dataframe object.
-            A subset of data corresponding to a particular series.
+            A new ScatterTable instance.
-        if isinstance(index, int):
-            index = self.labels[index]
-        return self[ == index]
+        if list(data.columns) != cls.DEFAULT_COLUMNS:
+            raise ValueError("Input dataframe columns don't match with the ScatterTable spec.")
+        instance = object.__new__(ScatterTable)
+        instance._lazy_add_rows = []
+        instance._dump = data
+        return instance
+    @property
+    def dataframe(self):
+        """Dataframe object of data points."""
+        return self._data.replace(np.nan, pd.NA).astype(
+            dict(zip(self.DEFAULT_COLUMNS, self.DEFAULT_DTYPES)),
+            copy=True,
+        )
-    @deprecate_func(
-        since="0.6",
-        additional_msg="Curve data uses dataframe representation. Call .xval.to_numpy() instead.",
-        pending=True,
-        package_name="qiskit-experiments",
-        is_property=True,
-    )
     def x(self) -> np.ndarray:
         """X values."""
-        return self.xval.to_numpy()
+        return self._data.xval.to_numpy(dtype=float)
+    @x.setter
+    def x(self, new_values):
+        self._data.loc[:, "x"] = new_values
-    @deprecate_func(
-        since="0.6",
-        additional_msg="Curve data uses dataframe representation. Call .yval.to_numpy() instead.",
-        pending=True,
-        package_name="qiskit-experiments",
-        is_property=True,
-    )
     def y(self) -> np.ndarray:
         """Y values."""
-        return self.yval.to_numpy()
+        return self._data.yval.to_numpy(dtype=float)
+    @y.setter
+    def y(self, new_values: np.ndarray):
+        self._data.loc[:, "y"] = new_values
-    @deprecate_func(
-        since="0.6",
-        additional_msg="Curve data uses dataframe representation. Call .yerr.to_numpy() instead.",
-        pending=True,
-        package_name="qiskit-experiments",
-        is_property=True,
-    )
     def y_err(self) -> np.ndarray:
         """Standard deviation of y values."""
-        return self.yerr.to_numpy()
+        return self._data.yerr.to_numpy(dtype=float)
+    @y_err.setter
+    def y_err(self, new_values: np.ndarray):
+        self._data.loc[:, "y_err"] = new_values
-    @deprecate_func(
-        since="0.6",
-        additional_msg="Curve data uses dataframe representation. Call .shots.to_numpy() instead.",
-        pending=True,
-        package_name="qiskit-experiments",
-        is_property=True,
-    )
-    def shots(self):
-        """Shot number of data points."""
-        return self["shots"].to_numpy()
+    def name(self) -> np.ndarray:
+        """Corresponding data name."""
+        return
+    @name.setter
+    def name(self, new_values: np.ndarray):
+        self._data.loc[:, "name"] = new_values
+    @property
+    def class_id(self) -> np.ndarray:
+        """Corresponding data UID."""
+        return self._data.class_id.to_numpy(dtype=int)
+    @class_id.setter
+    def class_id(self, new_values: np.ndarray):
+        self._data.loc[:, "class_id"] = new_values
+    @property
+    def category(self) -> np.ndarray:
+        """Category of data points."""
+        return self._data.category.to_numpy(dtype=str)
+    @category.setter
+    def category(self, new_values: np.ndarray):
+        self._data.loc[:, "category"] = new_values
+    @property
+    def shots(self) -> np.ndarray:
+        """Shot number used to acquire data points."""
+        return self._data.shots.to_numpy(dtype=int)
+    @shots.setter
+    def shots(self, new_values: np.ndarray):
+        self._data.loc[:, "shots"] = new_values
+    @property
+    def analysis(self) -> np.ndarray:
+        """Corresponding analysis name."""
+        return self._data.analysis.to_numpy(dtype=str)
+    @analysis.setter
+    def analysis(self, new_values: np.ndarray):
+        self._data.loc[:, "analysis"] = new_values
+    def filter(
+        self,
+        kind: int | str | None = None,
+        category: str | None = None,
+        analysis: str | None = None,
+    ) -> ScatterTable:
+        """Filter data by class, category, and/or analysis name.
+        Args:
+            kind: Identifier of the data, either data UID or name.
+            category: Name of data category.
+            analysis: Name of analysis.
+        Returns:
+            New ScatterTable object with filtered data.
+        """
+        filt_data = self._data
+        if kind is not None:
+            if isinstance(kind, int):
+                index = self._data.class_id == kind
+            elif isinstance(kind, str):
+                index = == kind
+            else:
+                raise ValueError(f"Invalid kind type {type(kind)}. This must be integer or string.")
+            filt_data = filt_data.loc[index, :]
+        if category is not None:
+            index = self._data.category == category
+            filt_data = filt_data.loc[index, :]
+        if analysis is not None:
+            index = self._data.analysis == analysis
+            filt_data = filt_data.loc[index, :]
+        return ScatterTable.from_dataframe(filt_data)
+    def iter_by_class(self) -> Iterator[tuple[int, "ScatterTable"]]:
+        """Iterate over subset of data sorted by the data UID.
+        Yields:
+            Tuple of data UID and subset of ScatterTable.
+        """
+        ids = self._data.class_id.dropna().sort_values().unique()
+        for mid in ids:
+            index = self._data.class_id == mid
+            yield mid, ScatterTable.from_dataframe(self._data.loc[index, :])
+    def iter_groups(
+        self,
+        *group_by: str,
+    ) -> Iterator[tuple[tuple[Any, ...], "ScatterTable"]]:
+        """Iterate over the subset sorted by multiple column values.
+        Args:
+            group_by: Name of column to group by.
+        Yields:
+            Tuple of keys and subset of ScatterTable.
+        """
+        try:
+            sort_by = itemgetter(*[self.DEFAULT_COLUMNS.index(c) for c in group_by])
+        except ValueError as ex:
+            raise ValueError(
+                f"Specified columns don't exist: {group_by} are not subset of {self.DEFAULT_COLUMNS}."
+            ) from ex
+        # Use python native groupby method on dataframe ndarray when sorting by multiple columns.
+        # This is more performant than pandas groupby implementation.
+        for vals, sub_data in groupby(sorted(self._data.values, key=sort_by), key=sort_by):
+            tmp_df = pd.DataFrame(list(sub_data), columns=self.DEFAULT_COLUMNS)
+            yield vals, ScatterTable.from_dataframe(tmp_df)
+    def add_row(
+        self,
+        name: str | pd.NA,
+        class_id: int | pd.NA,
+        category: str | pd.NA,
+        x: float | pd.NA,
+        y: float | pd.NA,
+        y_err: float | pd.NA,
+        shots: float | pd.NA,
+        analysis: str | pd.NA,
+    ):
+        """Add new data group to the table.
+        Data must be the same length.
+        Args:
+            x: X value.
+            y: Y value.
+            y_err: Standard deviation of y value.
+            shots: Shot number used to acquire this data point.
+            name: Name of this data if available.
+            class_id: Data UID of if available.
+            category: Data category if available.
+            analysis: Analysis name if available.
+        """
+        self._lazy_add_rows.append([x, y, y_err, name, class_id, category, shots, analysis])
-        additional_msg="Curve data uses dataframe representation. Call .model_id.to_numpy() instead.",
+        additional_msg="Curve data uses dataframe representation. Call .model_id instead.",
     def data_allocation(self) -> np.ndarray:
         """Index of corresponding fit model."""
-        # pylint: disable=no-member
-        return self.class_id.to_numpy()
+        return self.class_id
-        additional_msg="Curve data uses dataframe representation. Labels are a part of table.",
+        additional_msg="No alternative is provided. Use .name with set operation.",
-    def labels(self) -> List[str]:
+    def labels(self) -> list[str]:
         """List of model names."""
         # Order sensitive
-        name_id_tups = self.groupby(["name", "class_id"]).groups.keys()
+        name_id_tups = self._data.groupby(["name", "class_id"]).groups.keys()
         return [k[0] for k in sorted(name_id_tups, key=lambda k: k[1])]
-    def append_list_values(
-        self,
-        other: Sequence,
-    ) -> "ScatterTable":
-        """Add another list of dataframe values to this dataframe.
+    @deprecate_func(
+        since="0.6",
+        additional_msg="Use filter method instead.",
+        pending=True,
+        package_name="qiskit-experiments",
+    )
+    def get_subset_of(self, index: str | int) -> "ScatterTable":
+        """Filter data by series name or index.
-            other: List of dataframe values to be added.
+            index: Series index of name.
-            New scatter table instance including both self and added data.
+            A subset of data corresponding to a particular series.
-        return ScatterTable(data=[*self.values, *other], columns=self.columns)
+        return self.filter(kind=index)
-    def __json_encode__(self) -> Dict[str, Any]:
+    def __len__(self):
+        return len(self._data)
+    def __json_encode__(self) -> dict[str, Any]:
         return {
             "class": "ScatterTable",
-            "data": self.to_dict(orient="index"),
+            "data": self._data.to_dict(orient="index"),
-    def __json_decode__(cls, value: Dict[str, Any]) -> "ScatterTable":
+    def __json_decode__(cls, value: dict[str, Any]) -> "ScatterTable":
         if not value.get("class", None) == "ScatterTable":
             raise ValueError("JSON decoded value for ScatterTable is not valid class type.")
-        instance = cls.from_dict(
-            data=value.get("data", {}),
-            orient="index",
-        ).replace({np.nan: None})
-        return instance
-    @property
-    def _constructor(self):
-        #
-        return ScatterTable
+        tmp_df = (
+            pd.DataFrame.from_dict(
+                value.get("data", {}),
+                orient="index",
+            )
+            .replace(np.nan, pd.NA)
+            .astype(
+                dict(zip(cls.DEFAULT_COLUMNS, cls.DEFAULT_DTYPES)),
+                copy=False,
+            )
+        )
+        return ScatterTable.from_dataframe(tmp_df)

From 3bc95257800c72b24b0710e84876ba8c649465c4 Mon Sep 17 00:00:00 2001
From: Naoki Kanazawa <>
Date: Thu, 18 Jan 2024 16:59:13 +0900
Subject: [PATCH 03/29] Removed unused mixin

 qiskit_experiments/framework/ | 109 --------------------
 1 file changed, 109 deletions(-)
 delete mode 100644 qiskit_experiments/framework/

diff --git a/qiskit_experiments/framework/ b/qiskit_experiments/framework/
deleted file mode 100644
index fc59745199..0000000000
--- a/qiskit_experiments/framework/
+++ /dev/null
@@ -1,109 +0,0 @@
-# This code is part of Qiskit.
-# (C) Copyright IBM 2023.
-# This code is licensed under the Apache License, Version 2.0. You may
-# obtain a copy of this license in the LICENSE.txt file in the root directory
-# of this source tree or at
-# Any modifications or derivative works of this code must retain this
-# copyright notice, and modified files need to carry a notice indicating
-# that they have been altered from the originals.
-# pylint: disable=no-member
-"""MinIn class for pandas dataframe."""
-from typing import List, Callable
-from functools import wraps
-import pandas as pd
-class DefaultColumnsMixIn:
-    """A mixin that sets a default data columns to a dataframe subclass.
-    Subclass must define _default_columns class method to provide default column names.
-    This name list is order sensitive and the first element will show up at the
-    most left column of the dataframe table.
-    .. note::
-        This Mix-in class is designed for use with the pandas DataFrame.
-        Implementation of this class may change in the future without notification
-        when we switch to other data container.
-    """
-    _default_columns: Callable
-    def __init_subclass__(cls, **kwargs):
-        # To make sure the mixin constructor is called.
-        super().__init_subclass__(**kwargs)
-        @wraps(cls.__init__, assigned=("__annotations__",))
-        def _call_init_and_set_columns(self, *init_args, **init_kwargs):
-            super(cls, self).__init__(*init_args, **init_kwargs)
-            if len(self.columns) == 0:
-                self.add_columns(*cls._default_columns())
-        # Monkey patch the mixed class constructor to make sure default columns are added
-        cls.__init__ = _call_init_and_set_columns
-    def add_columns(
-        self: pd.DataFrame,
-        *new_columns: str,
-    ):
-        """Add new columns to the table.
-        This operation mutates the current container.
-        Args:
-            new_columns: Name of columns to add.
-        """
-        # Order sensitive
-        new_columns = [c for c in new_columns if c not in self.columns]
-        if len(new_columns) == 0:
-            return
-        # Update columns
-        for new_column in new_columns:
-            loc = len(self.columns)
-            self.insert(loc, new_column, value=None)
-    def add_entry(
-        self: pd.DataFrame,
-        index: str,
-        **kwargs,
-    ):
-        """Add new entry to the dataframe.
-        Args:
-            index: Name of this entry. Must be unique in this table.
-            kwargs: Description of new entry to register.
-        Returns:
-            Pandas Series of added entry. This doesn't mutate the table.
-        """
-        if not isinstance(index, str):
-            index = str(index)
-        if kwargs.keys() - set(self.columns):
-            self.add_columns(*kwargs.keys())
-        # A hack to avoid unwanted dtype update. Appending new row with .loc indexer
-        # performs enlargement and implicitly changes dtype. This often induces a confusion of
-        # NaN (numeric container) and None (object container) for missing values.
-        # Filling a row with None values before assigning actual values can keep column dtype,
-        # but this behavior might change in future pandas version.
-        #
-        # Also see test.framework.test_data_table.TestBaseTable.test_type_*
-        self.loc[index] = [None] * len(self.columns)
-        template = dict.fromkeys(self.columns, None)
-        template.update(kwargs)
-        self.loc[index] = pd.array(list(template.values()), dtype=object)
-    def extra_columns(
-        self: pd.DataFrame,
-    ) -> List[str]:
-        """Return a list of columns added by a user."""
-        return [c for c in self.columns if c not in self._default_columns()]

From 68013a81ab4d94d559dee26adbf2446dfd3ddd25 Mon Sep 17 00:00:00 2001
From: Naoki Kanazawa <>
Date: Wed, 31 Jan 2024 18:14:04 +0900
Subject: [PATCH 04/29] Fix index mismatch issue after JSON serialization

 qiskit_experiments/curve_analysis/ | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/qiskit_experiments/curve_analysis/ b/qiskit_experiments/curve_analysis/
index 8f7c401e2b..3cd848c427 100644
--- a/qiskit_experiments/curve_analysis/
+++ b/qiskit_experiments/curve_analysis/
@@ -319,6 +319,9 @@ def get_subset_of(self, index: str | int) -> "ScatterTable":
     def __len__(self):
         return len(self._data)
+    def __eq__(self, other):
+        return self.dataframe.equals(other.dataframe)
     def __json_encode__(self) -> dict[str, Any]:
         return {
             "class": "ScatterTable",
@@ -339,5 +342,6 @@ def __json_decode__(cls, value: dict[str, Any]) -> "ScatterTable":
                 dict(zip(cls.DEFAULT_COLUMNS, cls.DEFAULT_DTYPES)),
+            .reset_index(drop=True)
         return ScatterTable.from_dataframe(tmp_df)

From 5032e86b2b743c4970c27d97ae67393af9892f0b Mon Sep 17 00:00:00 2001
From: Naoki Kanazawa <>
Date: Thu, 1 Feb 2024 01:54:00 +0900
Subject: [PATCH 05/29] Add more tests

 .../curve_analysis/           |  61 +++--
 test/curve_analysis/     | 222 ++++++++++++++++++
 2 files changed, 250 insertions(+), 33 deletions(-)
 create mode 100644 test/curve_analysis/

diff --git a/qiskit_experiments/curve_analysis/ b/qiskit_experiments/curve_analysis/
index 3cd848c427..8702644ede 100644
--- a/qiskit_experiments/curve_analysis/
+++ b/qiskit_experiments/curve_analysis/
@@ -72,6 +72,7 @@ def _data(self) -> pd.DataFrame:
             # Add data when table element is called.
             # Adding rows in loop is extremely slow in pandas.
             tmp_df = pd.DataFrame(self._lazy_add_rows, columns=self.DEFAULT_COLUMNS)
+            tmp_df = self._format_table(tmp_df)
             if len(self._dump) == 0:
                 self._dump = tmp_df
@@ -93,21 +94,18 @@ def from_dataframe(cls, data: pd.DataFrame) -> "ScatterTable":
             raise ValueError("Input dataframe columns don't match with the ScatterTable spec.")
         instance = object.__new__(ScatterTable)
         instance._lazy_add_rows = []
-        instance._dump = data
+        instance._dump = cls._format_table(data)
         return instance
     def dataframe(self):
         """Dataframe object of data points."""
-        return self._data.replace(np.nan, pd.NA).astype(
-            dict(zip(self.DEFAULT_COLUMNS, self.DEFAULT_DTYPES)),
-            copy=True,
-        )
+        return self._data
     def x(self) -> np.ndarray:
         """X values."""
-        return self._data.xval.to_numpy(dtype=float)
+        return self._data.xval.to_numpy(dtype=float, na_value=np.nan)
     def x(self, new_values):
@@ -116,7 +114,7 @@ def x(self, new_values):
     def y(self) -> np.ndarray:
         """Y values."""
-        return self._data.yval.to_numpy(dtype=float)
+        return self._data.yval.to_numpy(dtype=float, na_value=np.nan)
     def y(self, new_values: np.ndarray):
@@ -125,7 +123,7 @@ def y(self, new_values: np.ndarray):
     def y_err(self) -> np.ndarray:
         """Standard deviation of y values."""
-        return self._data.yerr.to_numpy(dtype=float)
+        return self._data.yerr.to_numpy(dtype=float, na_value=np.nan)
     def y_err(self, new_values: np.ndarray):
@@ -134,7 +132,7 @@ def y_err(self, new_values: np.ndarray):
     def name(self) -> np.ndarray:
         """Corresponding data name."""
-        return
+        return, na_value=None)
     def name(self, new_values: np.ndarray):
@@ -143,7 +141,7 @@ def name(self, new_values: np.ndarray):
     def class_id(self) -> np.ndarray:
         """Corresponding data UID."""
-        return self._data.class_id.to_numpy(dtype=int)
+        return self._data.class_id.to_numpy(dtype=object, na_value=None)
     def class_id(self, new_values: np.ndarray):
@@ -152,7 +150,7 @@ def class_id(self, new_values: np.ndarray):
     def category(self) -> np.ndarray:
         """Category of data points."""
-        return self._data.category.to_numpy(dtype=str)
+        return self._data.category.to_numpy(dtype=object, na_value=None)
     def category(self, new_values: np.ndarray):
@@ -161,7 +159,7 @@ def category(self, new_values: np.ndarray):
     def shots(self) -> np.ndarray:
         """Shot number used to acquire data points."""
-        return self._data.shots.to_numpy(dtype=int)
+        return self._data.shots.to_numpy(dtype=object, na_value=None)
     def shots(self, new_values: np.ndarray):
@@ -170,7 +168,7 @@ def shots(self, new_values: np.ndarray):
     def analysis(self) -> np.ndarray:
         """Corresponding analysis name."""
-        return self._data.analysis.to_numpy(dtype=str)
+        return self._data.analysis.to_numpy(dtype=object, na_value=None)
     def analysis(self, new_values: np.ndarray):
@@ -248,14 +246,14 @@ def iter_groups(
     def add_row(
-        name: str | pd.NA,
-        class_id: int | pd.NA,
-        category: str | pd.NA,
-        x: float | pd.NA,
-        y: float | pd.NA,
-        y_err: float | pd.NA,
-        shots: float | pd.NA,
-        analysis: str | pd.NA,
+        name: str | pd.NA = pd.NA,
+        class_id: int | pd.NA = pd.NA,
+        category: str | pd.NA = pd.NA,
+        x: float | pd.NA = pd.NA,
+        y: float | pd.NA = pd.NA,
+        y_err: float | pd.NA = pd.NA,
+        shots: float | pd.NA = pd.NA,
+        analysis: str | pd.NA = pd.NA,
         """Add new data group to the table.
@@ -273,6 +271,14 @@ def add_row(
         self._lazy_add_rows.append([x, y, y_err, name, class_id, category, shots, analysis])
+    @classmethod
+    def _format_table(cls, data: pd.DataFrame) -> pd.DataFrame:
+        return (
+            data.replace(np.nan, pd.NA)
+            .astype(dict(zip(cls.DEFAULT_COLUMNS, cls.DEFAULT_DTYPES)))
+            .reset_index(drop=True)
+        )
@@ -332,16 +338,5 @@ def __json_encode__(self) -> dict[str, Any]:
     def __json_decode__(cls, value: dict[str, Any]) -> "ScatterTable":
         if not value.get("class", None) == "ScatterTable":
             raise ValueError("JSON decoded value for ScatterTable is not valid class type.")
-        tmp_df = (
-            pd.DataFrame.from_dict(
-                value.get("data", {}),
-                orient="index",
-            )
-            .replace(np.nan, pd.NA)
-            .astype(
-                dict(zip(cls.DEFAULT_COLUMNS, cls.DEFAULT_DTYPES)),
-                copy=False,
-            )
-            .reset_index(drop=True)
-        )
+        tmp_df = pd.DataFrame.from_dict(value.get("data", {}), orient="index")
         return ScatterTable.from_dataframe(tmp_df)
diff --git a/test/curve_analysis/ b/test/curve_analysis/
new file mode 100644
index 0000000000..8cc9a62981
--- /dev/null
+++ b/test/curve_analysis/
@@ -0,0 +1,222 @@
+# This code is part of Qiskit.
+# (C) Copyright IBM 2021.
+# This code is licensed under the Apache License, Version 2.0. You may
+# obtain a copy of this license in the LICENSE.txt file in the root directory
+# of this source tree or at
+# Any modifications or derivative works of this code must retain this
+# copyright notice, and modified files need to carry a notice indicating
+# that they have been altered from the originals.
+"""Test scatter table."""
+from test.base import QiskitExperimentsTestCase
+import pandas as pd
+import numpy as np
+from qiskit_experiments.curve_analysis.scatter_table import ScatterTable
+class TestScatterTable(QiskitExperimentsTestCase):
+    """Test cases for curve analysis ScatterTable."""
+    def setUp(self):
+        super().setUp()
+        source = {
+            "xval": [0.100, 0.100, 0.200, 0.200, 0.100, 0.200, 0.100, 0.200, 0.100, 0.200],
+            "yval": [0.192, 0.784, 0.854, 0.672, 0.567, 0.488, 0.379, 0.671, 0.784, 0.672],
+            "yerr": [0.002, 0.091, 0.090, 0.027, 0.033, 0.038, 0.016, 0.048, 0.091, 0.027],
+            "name": [
+                "model1",
+                "model2",
+                "model1",
+                "model2",
+                "model1",
+                "model1",
+                "model1",
+                "model1",
+                "model2",
+                "model2",
+            ],
+            "class_id": [0, 1, 0, 1, 0, 0, 0, 0, 1, 1],
+            "category": [
+                "raw",
+                "raw",
+                "raw",
+                "raw",
+                "raw",
+                "raw",
+                "formatted",
+                "formatted",
+                "formatted",
+                "formatted",
+            ],
+            "shots": [
+                1000,
+                1000,
+                1000,
+                1000,
+                1000,
+                1000,
+                2000,
+                2000,
+                1000,
+                1000,
+            ],
+            "analysis": [
+                "Fit1",
+                "Fit1",
+                "Fit1",
+                "Fit1",
+                "Fit2",
+                "Fit2",
+                "Fit1",
+                "Fit1",
+                "Fit1",
+                "Fit1",
+            ],
+        }
+        self.reference = pd.DataFrame.from_dict(source)
+    def test_create_table_from_dataframe(self):
+        """Test creating table from dataframe and output dataframe."""
+        # ScatterTable automatically converts dtype.
+        # For pure dataframe equality check pre-format the source.
+        formatted_ref = ScatterTable._format_table(self.reference)
+        obj = ScatterTable.from_dataframe(formatted_ref)
+        self.assertTrue(obj.dataframe.equals(formatted_ref))
+    def test_add_row(self):
+        """Test adding single row to the table without and with missing data."""
+        obj = ScatterTable()
+        obj.add_row(
+            name="model1",
+            class_id=0,
+            category="raw",
+            x=0.1,
+            y=2.3,
+            y_err=0.4,
+            shots=1000,
+            analysis="Test",
+        )
+        obj.add_row(
+            category="raw",
+            x=0.2,
+            y=3.4,
+        )
+        self.assertEqual(len(obj), 2)
+        np.testing.assert_array_equal(obj.x, np.array([0.1, 0.2]))
+        np.testing.assert_array_equal(obj.y, np.array([2.3, 3.4]))
+        np.testing.assert_array_equal(obj.y_err, np.array([0.4, np.nan]))
+        np.testing.assert_array_equal(, np.array(["model1", None]))
+        np.testing.assert_array_equal(obj.class_id, np.array([0, None]))
+        np.testing.assert_array_equal(obj.category, np.array(["raw", "raw"]))
+        np.testing.assert_array_equal(obj.shots, np.array([1000, None], dtype=object))
+        np.testing.assert_array_equal(obj.analysis, np.array(["Test", None]))
+    def test_filter_data_by_class_id(self):
+        """Test filter table data with data UID."""
+        obj = ScatterTable.from_dataframe(self.reference)
+        filtered = obj.filter(kind=0)
+        self.assertEqual(len(filtered), 6)
+        np.testing.assert_array_equal(filtered.x, np.array([0.1, 0.2, 0.1, 0.2, 0.1, 0.2]))
+        np.testing.assert_array_equal(filtered.class_id, np.array([0, 0, 0, 0, 0, 0]))
+    def test_filter_data_by_model_name(self):
+        """Test filter table data with data name."""
+        obj = ScatterTable.from_dataframe(self.reference)
+        filtered = obj.filter(kind="model1")
+        self.assertEqual(len(filtered), 6)
+        np.testing.assert_array_equal(filtered.x, np.array([0.1, 0.2, 0.1, 0.2, 0.1, 0.2]))
+        np.testing.assert_array_equal(
+  , np.array(["model1", "model1", "model1", "model1", "model1", "model1"])
+        )
+    def test_filter_data_by_category(self):
+        """Test filter table data with data category."""
+        obj = ScatterTable.from_dataframe(self.reference)
+        filtered = obj.filter(category="formatted")
+        self.assertEqual(len(filtered), 4)
+        np.testing.assert_array_equal(filtered.x, np.array([0.1, 0.2, 0.1, 0.2]))
+        np.testing.assert_array_equal(
+            filtered.category, np.array(["formatted", "formatted", "formatted", "formatted"])
+        )
+    def test_filter_data_by_analysis(self):
+        """Test filter table data with associated analysis class."""
+        obj = ScatterTable.from_dataframe(self.reference)
+        filtered = obj.filter(analysis="Fit2")
+        self.assertEqual(len(filtered), 2)
+        np.testing.assert_array_equal(filtered.x, np.array([0.1, 0.2]))
+        np.testing.assert_array_equal(filtered.analysis, np.array(["Fit2", "Fit2"]))
+    def test_filter_multiple(self):
+        """Test filter table data with multiple attributes."""
+        obj = ScatterTable.from_dataframe(self.reference)
+        filtered = obj.filter(kind=0, category="raw", analysis="Fit1")
+        self.assertEqual(len(filtered), 2)
+        np.testing.assert_array_equal(filtered.x, np.array([0.1, 0.2]))
+        np.testing.assert_array_equal(filtered.class_id, np.array([0, 0]))
+        np.testing.assert_array_equal(filtered.category, np.array(["raw", "raw"]))
+        np.testing.assert_array_equal(filtered.analysis, np.array(["Fit1", "Fit1"]))
+    def test_iter_class(self):
+        """Test iterating over mini tables associated with different data UID."""
+        obj = ScatterTable.from_dataframe(self.reference).filter(category="raw")
+        class_iter = obj.iter_by_class()
+        index, table_cls0 = next(class_iter)
+        ref_table_cls0 = obj.filter(kind=0)
+        self.assertEqual(index, 0)
+        self.assertEqual(table_cls0, ref_table_cls0)
+        index, table_cls1 = next(class_iter)
+        ref_table_cls1 = obj.filter(kind=1)
+        self.assertEqual(index, 1)
+        self.assertEqual(table_cls1, ref_table_cls1)
+    def test_iter_groups(self):
+        """Test iterating over mini tables associated with multiple attributes."""
+        obj = ScatterTable.from_dataframe(self.reference).filter(category="raw")
+        class_iter = obj.iter_groups("class_id", "xval")
+        (index, xval), table0 = next(class_iter)
+        self.assertEqual(index, 0)
+        self.assertEqual(xval, 0.1)
+        self.assertEqual(len(table0), 2)
+        np.testing.assert_array_equal(table0.y, [0.192, 0.567])
+        (index, xval), table1 = next(class_iter)
+        self.assertEqual(index, 0)
+        self.assertEqual(xval, 0.2)
+        self.assertEqual(len(table1), 2)
+        np.testing.assert_array_equal(table1.y, [0.854, 0.488])
+        (index, xval), table2 = next(class_iter)
+        self.assertEqual(index, 1)
+        self.assertEqual(xval, 0.1)
+        self.assertEqual(len(table2), 1)
+        np.testing.assert_array_equal(table2.y, [0.784])
+        (index, xval), table3 = next(class_iter)
+        self.assertEqual(index, 1)
+        self.assertEqual(xval, 0.2)
+        self.assertEqual(len(table3), 1)
+        np.testing.assert_array_equal(table3.y, [0.672])
+    def test_roundtrip_table(self):
+        """Test ScatterTable is JSON serializable."""
+        obj = ScatterTable.from_dataframe(self.reference)
+        self.assertRoundTripSerializable(obj)

From 7736f194609b0b62b3efb8f3fd8278396eda1a71 Mon Sep 17 00:00:00 2001
From: Naoki Kanazawa <>
Date: Thu, 1 Feb 2024 03:17:08 +0900
Subject: [PATCH 06/29] Bug fixes

 .../               |  5 ++-
 .../curve_analysis/          |  1 -
 .../curve_analysis/           |  6 +--
 qiskit_experiments/curve_analysis/    |  6 +--
 .../framework/        |  8 ++--
 .../characterization/analysis/  |  4 +-
 test/curve_analysis/     | 37 +++++++++++++++++++
 test/framework/ |  3 +-
 8 files changed, 55 insertions(+), 15 deletions(-)

diff --git a/qiskit_experiments/curve_analysis/ b/qiskit_experiments/curve_analysis/
index 83d0053190..5a220e38d1 100644
--- a/qiskit_experiments/curve_analysis/
+++ b/qiskit_experiments/curve_analysis/
@@ -404,7 +404,6 @@ def _run_analysis(
-                            shots=pd.NA,
@@ -423,7 +422,9 @@ def _run_analysis(
-        combined_curve_data = pd.concat(curve_data_set)
+        combined_curve_data = ScatterTable.from_dataframe(
+            pd.concat([d.dataframe for d in curve_data_set])
+        )
         total_quality = self._evaluate_quality(fit_dataset)
         # After the quality is determined, plot can become a boolean flag for whether
diff --git a/qiskit_experiments/curve_analysis/ b/qiskit_experiments/curve_analysis/
index 0c84c03750..1a62fa5819 100644
--- a/qiskit_experiments/curve_analysis/
+++ b/qiskit_experiments/curve_analysis/
@@ -525,7 +525,6 @@ def _run_analysis(
-                        shots=pd.NA,
diff --git a/qiskit_experiments/curve_analysis/ b/qiskit_experiments/curve_analysis/
index 8702644ede..288c564088 100644
--- a/qiskit_experiments/curve_analysis/
+++ b/qiskit_experiments/curve_analysis/
@@ -109,7 +109,7 @@ def x(self) -> np.ndarray:
     def x(self, new_values):
-        self._data.loc[:, "x"] = new_values
+        self._data.loc[:, "xval"] = new_values
     def y(self) -> np.ndarray:
@@ -118,7 +118,7 @@ def y(self) -> np.ndarray:
     def y(self, new_values: np.ndarray):
-        self._data.loc[:, "y"] = new_values
+        self._data.loc[:, "yval"] = new_values
     def y_err(self) -> np.ndarray:
@@ -127,7 +127,7 @@ def y_err(self) -> np.ndarray:
     def y_err(self, new_values: np.ndarray):
-        self._data.loc[:, "y_err"] = new_values
+        self._data.loc[:, "yerr"] = new_values
     def name(self) -> np.ndarray:
diff --git a/qiskit_experiments/curve_analysis/ b/qiskit_experiments/curve_analysis/
index 3cd8496849..cc6bbb3a70 100644
--- a/qiskit_experiments/curve_analysis/
+++ b/qiskit_experiments/curve_analysis/
@@ -248,7 +248,7 @@ def shot_weighted_average(
         # Shot number is unknown
         return np.mean(yvals), np.nan, pd.NA
-    total_shots = np.sum(shots)
+    total_shots = np.sum(shots.astype(float))
     weights = shots / total_shots
     avg_yval = np.sum(weights * yvals)
@@ -277,7 +277,7 @@ def inverse_weighted_variance(
     if len(yvals) == 1:
         return yvals[0], yerrs[0], shots[0]
-    total_shots = np.sum(shots)
+    total_shots = np.sum(shots.astype(float))
     weights = 1 / yerrs**2
     yvar = 1 / np.sum(weights)
@@ -308,7 +308,7 @@ def sample_average(
     if len(yvals) == 1:
         return yvals[0], 0.0, shots[0]
-    total_shots = np.sum(shots)
+    total_shots = np.sum(shots.astype(float))
     avg_yval = np.mean(yvals)
     avg_yerr = np.sqrt(np.mean((avg_yval - yvals) ** 2) / len(yvals))
diff --git a/qiskit_experiments/framework/ b/qiskit_experiments/framework/
index 4b03f4ad45..4f33fa1737 100644
--- a/qiskit_experiments/framework/
+++ b/qiskit_experiments/framework/
@@ -22,6 +22,8 @@
 import numpy as np
 import pandas as pd
+from qiskit_experiments.database_service.exceptions import ExperimentEntryNotFound
 class AnalysisResultTable:
     """A table-like dataset for analysis results.
@@ -270,12 +272,12 @@ def _resolve_key(self, key: int | slice | str) -> list[str]:
         with self._lock:
             if isinstance(key, int):
                 if key >= len(self):
-                    raise KeyError(f"Analysis result {key} not found.")
+                    raise ExperimentEntryNotFound(f"Analysis result {key} not found.")
                 return [self._data.index[key]]
             if isinstance(key, slice):
                 keys = list(self._data.index)[key]
                 if len(keys) == 0:
-                    raise KeyError(f"Analysis result {key} not found.")
+                    raise ExperimentEntryNotFound(f"Analysis result {key} not found.")
                 return keys
             if isinstance(key, str):
                 if key in self._data.index:
@@ -283,7 +285,7 @@ def _resolve_key(self, key: int | slice | str) -> list[str]:
                 # This key is name of entry
                 loc = self._data["name"] == key
                 if not any(loc):
-                    raise KeyError(f"Analysis result {key} not found.")
+                    raise ExperimentEntryNotFound(f"Analysis result {key} not found.")
                 return list(self._data.index[loc])
         raise TypeError(f"Invalid key type {type(key)}. The key must be either int, slice, or str.")
diff --git a/qiskit_experiments/library/characterization/analysis/ b/qiskit_experiments/library/characterization/analysis/
index 4bbbabb542..1d040c4bc3 100644
--- a/qiskit_experiments/library/characterization/analysis/
+++ b/qiskit_experiments/library/characterization/analysis/
@@ -134,10 +134,10 @@ def _format_data(
             New scatter table instance including fit data.
         # check if the SVD decomposition categorized 0 as 1 by calculating the average slope
-        diff_y = np.diff(curve_data.yval)
+        diff_y = np.diff(curve_data.y)
         avg_slope = sum(diff_y) / len(diff_y)
         if avg_slope > 0:
-            curve_data.yval = 1 - curve_data.yval
+            curve_data.y = 1 - curve_data.y
         return super()._format_data(curve_data)
diff --git a/test/curve_analysis/ b/test/curve_analysis/
index 8cc9a62981..2fbe887d8d 100644
--- a/test/curve_analysis/
+++ b/test/curve_analysis/
@@ -119,6 +119,43 @@ def test_add_row(self):
         np.testing.assert_array_equal(obj.shots, np.array([1000, None], dtype=object))
         np.testing.assert_array_equal(obj.analysis, np.array(["Test", None]))
+    def test_set_values(self):
+        """Test setting new column values through setter."""
+        obj = ScatterTable()
+        # add three empty rows
+        obj.add_row()
+        obj.add_row()
+        obj.add_row()
+        # Set sequence
+        obj.x = [0.1, 0.2, 0.3]
+        obj.y = [1.3, 1.4, 1.5]
+        obj.y_err = [0.3, 0.5, 0.7]
+        # Broadcast single value
+        obj.class_id = 0
+ = "model0"
+        np.testing.assert_array_equal(obj.x, np.array([0.1, 0.2, 0.3]))
+        np.testing.assert_array_equal(obj.y, np.array([1.3, 1.4, 1.5]))
+        np.testing.assert_array_equal(obj.y_err, np.array([0.3, 0.5, 0.7]))
+        np.testing.assert_array_equal(obj.class_id, np.array([0, 0, 0]))
+        np.testing.assert_array_equal(, np.array(["model0", "model0", "model0"]))
+    def test_set_y(self):
+        """Test setting new values to y column."""
+        obj = ScatterTable()
+        obj.add_row(x=0.1, y=2.0, y_err=0.3)
+        obj.y = [0.5]
+        np.testing.assert_array_equal(obj.y, np.array([0.5]))
+    def test_set_y_err(self):
+        """Test setting new values to y_err column."""
+        obj = ScatterTable()
+        obj.add_row(x=0.1, y=2.0, y_err=0.3)
+        obj.y_err = [0.5]
+        np.testing.assert_array_equal(obj.y_err, np.array([0.5]))
     def test_filter_data_by_class_id(self):
         """Test filter table data with data UID."""
         obj = ScatterTable.from_dataframe(self.reference)
diff --git a/test/framework/ b/test/framework/
index a6dae47f7a..dea7e66c3d 100644
--- a/test/framework/
+++ b/test/framework/
@@ -16,6 +16,7 @@
 import uuid
 from qiskit_experiments.framework.analysis_result_table import AnalysisResultTable
+from qiskit_experiments.database_service.exceptions import ExperimentEntryNotFound
 class TestAnalysisTable(QiskitExperimentsTestCase):
@@ -38,7 +39,7 @@ def test_drop_entry(self):
     def test_drop_non_existing_entry(self):
         """Test dropping non-existing entry raises ValueError."""
         table = AnalysisResultTable()
-        with self.assertRaises(KeyError):
+        with self.assertRaises(ExperimentEntryNotFound):
     def test_raises_adding_duplicated_index(self):

From fc9273eb6fdadffc802a1217dc9823cd029ed422 Mon Sep 17 00:00:00 2001
From: Naoki Kanazawa <>
Date: Thu, 1 Feb 2024 03:24:01 +0900
Subject: [PATCH 07/29] Unpin pandas 2.2

 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 2bda6ad888..54ea5ea51c 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,4 +6,4 @@ matplotlib>=3.4

From 0cae1163edb272dcd800ae94ff31ae11bbe8b08b Mon Sep 17 00:00:00 2001
From: Naoki Kanazawa <>
Date: Thu, 1 Feb 2024 04:28:36 +0900
Subject: [PATCH 08/29] Update old pattern

 .../curve_analysis/          |  2 +-
 .../curve_analysis/           |  2 +-
 .../standard_analysis/     |  6 +-
 qiskit_experiments/curve_analysis/    |  6 +-
 .../analysis/                 |  4 +-
 .../analysis/            |  4 +-
 .../analysis/            |  4 +-
 .../driven_freq_tuning/        |  1 +
 .../               | 86 ++++++++++---------
 .../                |  4 +-
 test/curve_analysis/     |  8 +-
 11 files changed, 68 insertions(+), 59 deletions(-)

diff --git a/qiskit_experiments/curve_analysis/ b/qiskit_experiments/curve_analysis/
index 1a62fa5819..657787557d 100644
--- a/qiskit_experiments/curve_analysis/
+++ b/qiskit_experiments/curve_analysis/
@@ -257,7 +257,7 @@ def _format_data(
                 uid = model_names.index(name)
-            except IndexError:
+            except ValueError:
                 uid = pd.NA
diff --git a/qiskit_experiments/curve_analysis/ b/qiskit_experiments/curve_analysis/
index 288c564088..b5c6ec253b 100644
--- a/qiskit_experiments/curve_analysis/
+++ b/qiskit_experiments/curve_analysis/
@@ -159,7 +159,7 @@ def category(self, new_values: np.ndarray):
     def shots(self) -> np.ndarray:
         """Shot number used to acquire data points."""
-        return self._data.shots.to_numpy(dtype=object, na_value=None)
+        return self._data.shots.to_numpy(dtype=object, na_value=np.nan)
     def shots(self, new_values: np.ndarray):
diff --git a/qiskit_experiments/curve_analysis/standard_analysis/ b/qiskit_experiments/curve_analysis/standard_analysis/
index a155eebe58..30931d026a 100644
--- a/qiskit_experiments/curve_analysis/standard_analysis/
+++ b/qiskit_experiments/curve_analysis/standard_analysis/
@@ -170,9 +170,9 @@ def _generate_fit_guesses(
         user_opt.bounds.set_if_empty(t_off=(0, np.inf), b=(-1, 1))
-        x_data = curve_data.get_subset_of("x")
-        y_data = curve_data.get_subset_of("y")
-        z_data = curve_data.get_subset_of("z")
+        x_data = curve_data.filter(kind="x")
+        y_data = curve_data.filter(kind="y")
+        z_data = curve_data.filter(kind="z")
         omega_xyz = []
         for data in (x_data, y_data, z_data):
diff --git a/qiskit_experiments/curve_analysis/ b/qiskit_experiments/curve_analysis/
index cc6bbb3a70..3cd8496849 100644
--- a/qiskit_experiments/curve_analysis/
+++ b/qiskit_experiments/curve_analysis/
@@ -248,7 +248,7 @@ def shot_weighted_average(
         # Shot number is unknown
         return np.mean(yvals), np.nan, pd.NA
-    total_shots = np.sum(shots.astype(float))
+    total_shots = np.sum(shots)
     weights = shots / total_shots
     avg_yval = np.sum(weights * yvals)
@@ -277,7 +277,7 @@ def inverse_weighted_variance(
     if len(yvals) == 1:
         return yvals[0], yerrs[0], shots[0]
-    total_shots = np.sum(shots.astype(float))
+    total_shots = np.sum(shots)
     weights = 1 / yerrs**2
     yvar = 1 / np.sum(weights)
@@ -308,7 +308,7 @@ def sample_average(
     if len(yvals) == 1:
         return yvals[0], 0.0, shots[0]
-    total_shots = np.sum(shots.astype(float))
+    total_shots = np.sum(shots)
     avg_yval = np.mean(yvals)
     avg_yerr = np.sqrt(np.mean((avg_yval - yvals) ** 2) / len(yvals))
diff --git a/qiskit_experiments/library/characterization/analysis/ b/qiskit_experiments/library/characterization/analysis/
index b6c9915a4f..ce2fb34fd4 100644
--- a/qiskit_experiments/library/characterization/analysis/
+++ b/qiskit_experiments/library/characterization/analysis/
@@ -116,9 +116,9 @@ def _generate_fit_guesses(
             List of fit options that are passed to the fitter function.
         # Use the highest-frequency curve to estimate the oscillation frequency.
-        max_rep_model_name = self._models[-1]._name
+        max_rep_model_name = self.model_names()[-1]
         max_rep = self.options.data_subfit_map[max_rep_model_name]["nrep"]
-        curve_data = curve_data.get_subset_of(max_rep_model_name)
+        curve_data = curve_data.filter(kind=max_rep_model_name)
         x_data = curve_data.x
         min_beta, max_beta = min(x_data), max(x_data)
diff --git a/qiskit_experiments/library/characterization/analysis/ b/qiskit_experiments/library/characterization/analysis/
index 27a588a550..ea7fc8fcb7 100644
--- a/qiskit_experiments/library/characterization/analysis/
+++ b/qiskit_experiments/library/characterization/analysis/
@@ -113,8 +113,8 @@ def _generate_fit_guesses(
             List of fit options that are passed to the fitter function.
-        ramx_data = curve_data.get_subset_of("X")
-        ramy_data = curve_data.get_subset_of("Y")
+        ramx_data = curve_data.filter(kind="X")
+        ramy_data = curve_data.filter(kind="Y")
         # At very low frequency, y value of X (Y) curve stay at P=1.0 (0.5) for all x values.
         # Computing y peak-to-peak with combined data gives fake amplitude of 0.25.
diff --git a/qiskit_experiments/library/characterization/analysis/ b/qiskit_experiments/library/characterization/analysis/
index 5f5c9770bc..b2fcb25482 100644
--- a/qiskit_experiments/library/characterization/analysis/
+++ b/qiskit_experiments/library/characterization/analysis/
@@ -142,8 +142,8 @@ def _generate_fit_guesses(
         y_ptp = y_max - y_min
         x_max = np.max(curve_data.x)
-        data_0 = curve_data.get_subset_of("0")
-        data_1 = curve_data.get_subset_of("1")
+        data_0 = curve_data.filter(kind="0")
+        data_1 = curve_data.filter(kind="1")
         def typical_step(arr):
             """Find the typical step size of an array"""
diff --git a/qiskit_experiments/library/driven_freq_tuning/ b/qiskit_experiments/library/driven_freq_tuning/
index 89dd840ed2..c487f87f0a 100644
--- a/qiskit_experiments/library/driven_freq_tuning/
+++ b/qiskit_experiments/library/driven_freq_tuning/
@@ -181,6 +181,7 @@ def __eq__(self, other):
                 self.neg_coef_o1 == other.neg_coef_o1,
                 self.neg_coef_o2 == other.neg_coef_o2,
                 self.neg_coef_o3 == other.neg_coef_o3,
+                self.offset == other.offset,
diff --git a/qiskit_experiments/library/driven_freq_tuning/ b/qiskit_experiments/library/driven_freq_tuning/
index 9ced48b07a..21b263b993 100644
--- a/qiskit_experiments/library/driven_freq_tuning/
+++ b/qiskit_experiments/library/driven_freq_tuning/
@@ -254,26 +254,21 @@ def _format_data(
     ) -> curve.ScatterTable:
         curve_data = super()._format_data(curve_data, category="ramsey_xy")
-        ramsey_xy = curve_data[curve_data.category == "ramsey_xy"]
+        ramsey_xy = curve_data.filter(category="ramsey_xy")
+        y_mean = ramsey_xy.y.mean()
         # Create phase data by arctan(Y/X)
-        columns = list(curve_data.columns)
-        phase_data = np.empty((0, len(columns)))
-        y_mean = ramsey_xy.yval.mean()
-        grouped = ramsey_xy.groupby("name")
         for m_id, direction in enumerate(("pos", "neg")):
-            x_quadrature = grouped.get_group(f"X{direction}")
-            y_quadrature = grouped.get_group(f"Y{direction}")
-            if not np.array_equal(x_quadrature.xval, y_quadrature.xval):
+            x_quadrature = ramsey_xy.filter(kind=f"X{direction}")
+            y_quadrature = ramsey_xy.filter(kind=f"Y{direction}")
+            if not np.array_equal(x_quadrature.x, y_quadrature.x):
                 raise ValueError(
                     "Amplitude values of X and Y quadrature are different. "
                     "Same values must be used."
-            x_uarray = unp.uarray(x_quadrature.yval, x_quadrature.yerr)
-            y_uarray = unp.uarray(y_quadrature.yval, y_quadrature.yerr)
-            amplitudes = x_quadrature.xval.to_numpy()
+            x_uarray = unp.uarray(x_quadrature.y, x_quadrature.y_err)
+            y_uarray = unp.uarray(y_quadrature.y, y_quadrature.y_err)
+            amplitudes = x_quadrature.x
             # pylint: disable=no-member
             phase = unp.arctan2(y_uarray - y_mean, x_uarray - y_mean)
@@ -288,17 +283,24 @@ def _format_data(
                 unwrapped_phase = unwrapped_phase + (phase_n[-1] - unwrapped_phase[-1])
             # Store new data
-            tmp = np.empty((len(amplitudes), len(columns)), dtype=object)
-            tmp[:, columns.index("xval")] = amplitudes
-            tmp[:, columns.index("yval")] = unwrapped_phase / self._freq_phase_coef()
-            tmp[:, columns.index("yerr")] = phase_s / self._freq_phase_coef()
-            tmp[:, columns.index("name")] = f"FREQ{direction}"
-            tmp[:, columns.index("class_id")] = m_id
-            tmp[:, columns.index("shots")] = x_quadrature.shots + y_quadrature.shots
-            tmp[:, columns.index("category")] = category
-            phase_data = np.r_[phase_data, tmp]
-        return curve_data.append_list_values(other=phase_data)
+            unwrapped_phase /= self._freq_phase_coef()
+            phase_s /= self._freq_phase_coef()
+            shot_sums = x_quadrature.shots + y_quadrature.shots
+            for new_x, new_y, new_y_err, shot in zip(
+                amplitudes, unwrapped_phase, phase_s, shot_sums
+            ):
+                curve_data.add_row(
+                    x=new_x,
+                    y=new_y,
+                    y_err=new_y_err,
+                    name=f"FREQ{direction}",
+                    class_id=m_id,
+                    shots=shot,
+                    category=category,
+          ,
+                )
+        return curve_data
     def _generate_fit_guesses(
@@ -355,39 +357,39 @@ def _create_figures(
     ) -> List["matplotlib.figure.Figure"]:
         # plot unwrapped phase on first axis
-        for d in ("pos", "neg"):
-            sub_data = curve_data[( == f"FREQ{d}") & (curve_data.category == "freq")]
+        for direction in ("pos", "neg"):
+            sub_data = curve_data.filter(kind=f"FREQ{direction}", category="freq")
-                series_name=f"F{d}",
-                x_formatted=sub_data.xval.to_numpy(),
-                y_formatted=sub_data.yval.to_numpy(),
-                y_formatted_err=sub_data.yerr.to_numpy(),
+                series_name=f"F{direction}",
+                x_formatted=sub_data.x,
+                y_formatted=sub_data.y,
+                y_formatted_err=sub_data.y_err,
         # plot raw RamseyXY plot on second axis
         for name in ("Xpos", "Ypos", "Xneg", "Yneg"):
-            sub_data = curve_data[( == name) & (curve_data.category == "ramsey_xy")]
+            sub_data = curve_data.filter(kind=name, category="ramsey_xy")
-                x_formatted=sub_data.xval.to_numpy(),
-                y_formatted=sub_data.yval.to_numpy(),
-                y_formatted_err=sub_data.yerr.to_numpy(),
+                x_formatted=sub_data.x,
+                y_formatted=sub_data.y,
+                y_formatted_err=sub_data.y_err,
         # find base and amplitude guess
-        ramsey_xy = curve_data[curve_data.category == "ramsey_xy"]
-        offset_guess = 0.5 * (ramsey_xy.yval.min() + ramsey_xy.yval.max())
-        amp_guess = 0.5 * np.ptp(ramsey_xy.yval)
+        ramsey_xy = curve_data.filter(category="ramsey_xy")
+        offset_guess = 0.5 * (np.min(ramsey_xy.y) + np.max(ramsey_xy.y))
+        amp_guess = 0.5 * np.ptp(ramsey_xy.y)
         # plot frequency and Ramsey fit lines
-        line_data = curve_data[curve_data.category == "fitted"]
+        line_data = curve_data.filter(category="fitted")
         for direction in ("pos", "neg"):
-            sub_data = line_data[ == f"FREQ{direction}"]
+            sub_data = line_data.filter(kind=f"FREQ{direction}")
             if len(sub_data) == 0:
-            xval = sub_data.xval.to_numpy()
-            yn = sub_data.yval.to_numpy()
-            ys = sub_data.yerr.to_numpy()
+            xval = sub_data.x
+            yn = sub_data.y
+            ys = sub_data.y_err
             yval = unp.uarray(yn, ys) * self._freq_phase_coef()
             # Ramsey fit lines are predicted from the phase fit line.
diff --git a/qiskit_experiments/library/randomized_benchmarking/ b/qiskit_experiments/library/randomized_benchmarking/
index 7864b20436..b918fd41fa 100644
--- a/qiskit_experiments/library/randomized_benchmarking/
+++ b/qiskit_experiments/library/randomized_benchmarking/
@@ -141,12 +141,12 @@ def _generate_fit_guesses(
         b_guess = 1 / 2**self._num_qubits
         # for standard RB curve
-        std_curve = curve_data.get_subset_of("standard")
+        std_curve = curve_data.filter(kind="standard")
         alpha_std = curve.guess.rb_decay(std_curve.x, std_curve.y, b=b_guess)
         a_std = (std_curve.y[0] - b_guess) / (alpha_std ** std_curve.x[0])
         # for interleaved RB curve
-        int_curve = curve_data.get_subset_of("interleaved")
+        int_curve = curve_data.filter(kind="interleaved")
         alpha_int = curve.guess.rb_decay(int_curve.x, int_curve.y, b=b_guess)
         a_int = (int_curve.y[0] - b_guess) / (alpha_int ** int_curve.x[0])
diff --git a/test/curve_analysis/ b/test/curve_analysis/
index 2fbe887d8d..cfbc43a380 100644
--- a/test/curve_analysis/
+++ b/test/curve_analysis/
@@ -116,7 +116,13 @@ def test_add_row(self):
         np.testing.assert_array_equal(, np.array(["model1", None]))
         np.testing.assert_array_equal(obj.class_id, np.array([0, None]))
         np.testing.assert_array_equal(obj.category, np.array(["raw", "raw"]))
-        np.testing.assert_array_equal(obj.shots, np.array([1000, None], dtype=object))
+        np.testing.assert_array_equal(
+            # Numpy tries to handle nan strictly, but isnan only works for float dtype.
+            # Original data is object type, because we want to keep shot number integer,
+            # and there is no Numpy nullable integer.
+            obj.shots.astype(float),
+            np.array([1000, np.nan], dtype=float),
+        )
         np.testing.assert_array_equal(obj.analysis, np.array(["Test", None]))
     def test_set_values(self):

From 2fb28dc548b254691c5c104d4b1d46c25b1eed78 Mon Sep 17 00:00:00 2001
From: Naoki Kanazawa <>
Date: Thu, 1 Feb 2024 14:31:45 +0900
Subject: [PATCH 09/29] Fix cross-reference

 docs/howtos/rerun_analysis.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/howtos/rerun_analysis.rst b/docs/howtos/rerun_analysis.rst
index 532d968cf2..c5d8653fcf 100644
--- a/docs/howtos/rerun_analysis.rst
+++ b/docs/howtos/rerun_analysis.rst
@@ -17,7 +17,7 @@ Solution
     consult the `migration guide <>`_.\
 Once you recreate the exact experiment you ran and all of its parameters and options,
-you can call the :meth:`.add_jobs` method with a list of :class:`Job
+you can call the :meth:`.ExperimentData.add_jobs` method with a list of :class:`Job
 <qiskit.providers.JobV1>` objects to generate the new :class:`.ExperimentData` object.
 The following example retrieves jobs from a provider that has access to them via their
 job IDs:
@@ -47,7 +47,7 @@ job IDs:
 instead of overwriting the existing one.
 If you have the job data in the form of a :class:`~qiskit.result.Result` object, you can
-invoke the :meth:`.add_data` method instead of :meth:`.add_jobs`:
+invoke the :meth:`.ExperimentData.add_data` method instead of :meth:`.ExperimentData.add_jobs`:
 .. jupyter-input::

From ac972fd2d8d63e18e84eacbdc15e13c9ab9d95ab Mon Sep 17 00:00:00 2001
From: Naoki Kanazawa <>
Date: Fri, 2 Feb 2024 11:49:15 +0900
Subject: [PATCH 10/29] Update curve analysis tutorial

 docs/tutorials/curve_analysis.rst | 39 ++++++++++++++++---------------
 1 file changed, 20 insertions(+), 19 deletions(-)

diff --git a/docs/tutorials/curve_analysis.rst b/docs/tutorials/curve_analysis.rst
index 8da46cbc75..77d394ad7d 100644
--- a/docs/tutorials/curve_analysis.rst
+++ b/docs/tutorials/curve_analysis.rst
@@ -273,19 +273,19 @@ This table may look like:
 .. code-block::
-        xval      yval      yerr name  class_id category  shots
-    0    0.1  0.153659  0.011258    A         0      raw   1024
-    1    0.1  0.590732  0.015351    B         1      raw   1024
-    2    0.1  0.315610  0.014510    A         0      raw   1024
-    3    0.1  0.376098  0.015123    B         1      raw   1024
-    4    0.2  0.937073  0.007581    A         0      raw   1024
-    5    0.2  0.323415  0.014604    B         1      raw   1024
-    6    0.2  0.538049  0.015565    A         0      raw   1024
-    7    0.2  0.530244  0.015581    B         1      raw   1024
-    8    0.3  0.143902  0.010958    A         0      raw   1024
-    9    0.3  0.261951  0.013727    B         1      raw   1024
-    10   0.3  0.830732  0.011707    A         0      raw   1024
-    11   0.3  0.874634  0.010338    B         1      raw   1024
+        xval      yval      yerr name  class_id category  shots     analysis
+    0    0.1  0.153659  0.011258    A         0      raw   1024   MyAnalysis
+    1    0.1  0.590732  0.015351    B         1      raw   1024   MyAnalysis
+    2    0.1  0.315610  0.014510    A         0      raw   1024   MyAnalysis
+    3    0.1  0.376098  0.015123    B         1      raw   1024   MyAnalysis
+    4    0.2  0.937073  0.007581    A         0      raw   1024   MyAnalysis
+    5    0.2  0.323415  0.014604    B         1      raw   1024   MyAnalysis
+    6    0.2  0.538049  0.015565    A         0      raw   1024   MyAnalysis
+    7    0.2  0.530244  0.015581    B         1      raw   1024   MyAnalysis
+    8    0.3  0.143902  0.010958    A         0      raw   1024   MyAnalysis
+    9    0.3  0.261951  0.013727    B         1      raw   1024   MyAnalysis
+    10   0.3  0.830732  0.011707    A         0      raw   1024   MyAnalysis
+    11   0.3  0.874634  0.010338    B         1      raw   1024   MyAnalysis
 where the experiment consists of two subset series A and B, and the experiment parameter (xval)
 is scanned from 0.1 to 0.3 in each subset. In this example, the experiment is run twice
@@ -298,6 +298,7 @@ for each condition. The role of each column is as follows:
 - ``class_id``: Numerical index corresponding to the result class. This number is automatically assigned.
 - ``category``: The attribute of data set. The "raw" category indicates an output from the data processing.
 - ``shots``: Number of measurement shots used to acquire this result.
+- ``analysis``: The name of curve analysis instance that generated this data. In :class:`.CompositeCurveAnalysis`, the table is a composite of tables from all component analyses.
 3. Formatting
@@ -319,12 +320,12 @@ This may return new scatter table object with the addition of rows like the foll
 .. code-block::
-    12   0.1  0.234634  0.009183    A         0  formatted   2048
-    13   0.2  0.737561  0.008656    A         0  formatted   2048
-    14   0.3  0.487317  0.008018    A         0  formatted   2048
-    15   0.1  0.483415  0.010774    B         1  formatted   2048
-    16   0.2  0.426829  0.010678    B         1  formatted   2048
-    17   0.3  0.568293  0.008592    B         1  formatted   2048
+    12   0.1  0.234634  0.009183    A         0  formatted   2048   MyAnalysis
+    13   0.2  0.737561  0.008656    A         0  formatted   2048   MyAnalysis
+    14   0.3  0.487317  0.008018    A         0  formatted   2048   MyAnalysis
+    15   0.1  0.483415  0.010774    B         1  formatted   2048   MyAnalysis
+    16   0.2  0.426829  0.010678    B         1  formatted   2048   MyAnalysis
+    17   0.3  0.568293  0.008592    B         1  formatted   2048   MyAnalysis
 The default :meth:`_format_data` method adds its output data with the category "formatted".
 This category name must be also specified in the analysis option ``fit_category``.

From 01471bb66dab455d5fa5cfdb8beeece0745fc216 Mon Sep 17 00:00:00 2001
From: Naoki Kanazawa <>
Date: Fri, 2 Feb 2024 11:58:03 +0900
Subject: [PATCH 11/29] Add shortcut methods

 .../curve_analysis/           | 62 ++++++++++++++++++-
 test/curve_analysis/     | 24 +++----
 2 files changed, 73 insertions(+), 13 deletions(-)

diff --git a/qiskit_experiments/curve_analysis/ b/qiskit_experiments/curve_analysis/
index b5c6ec253b..8dd6a90fb7 100644
--- a/qiskit_experiments/curve_analysis/
+++ b/qiskit_experiments/curve_analysis/
@@ -111,6 +111,26 @@ def x(self) -> np.ndarray:
     def x(self, new_values):
         self._data.loc[:, "xval"] = new_values
+    def get_x(
+        self,
+        kind: int | str | None = None,
+        category: str | None = None,
+        analysis: str | None = None,
+    ) -> np.ndarray:
+        """Get subset of X values.
+        A convenient shortcut of getting X data with filtering.
+        Args:
+            kind: Identifier of the data, either data UID or name.
+            category: Name of data category.
+            analysis: Name of analysis.
+        Returns:
+            Numpy array of X values.
+        """
+        return self.filter(kind, category, analysis).x
     def y(self) -> np.ndarray:
         """Y values."""
@@ -120,15 +140,55 @@ def y(self) -> np.ndarray:
     def y(self, new_values: np.ndarray):
         self._data.loc[:, "yval"] = new_values
+    def get_y(
+        self,
+        kind: int | str | None = None,
+        category: str | None = None,
+        analysis: str | None = None,
+    ) -> np.ndarray:
+        """Get subset of Y values.
+        A convenient shortcut of getting Y data with filtering.
+        Args:
+            kind: Identifier of the data, either data UID or name.
+            category: Name of data category.
+            analysis: Name of analysis.
+        Returns:
+            Numpy array of Y values.
+        """
+        return self.filter(kind, category, analysis).y
     def y_err(self) -> np.ndarray:
-        """Standard deviation of y values."""
+        """Standard deviation of Y values."""
         return self._data.yerr.to_numpy(dtype=float, na_value=np.nan)
     def y_err(self, new_values: np.ndarray):
         self._data.loc[:, "yerr"] = new_values
+    def get_y_err(
+        self,
+        kind: int | str | None = None,
+        category: str | None = None,
+        analysis: str | None = None,
+    ) -> np.ndarray:
+        """Get subset of standard deviation of Y values.
+        A convenient shortcut of getting Y error data with filtering.
+        Args:
+            kind: Identifier of the data, either data UID or name.
+            category: Name of data category.
+            analysis: Name of analysis.
+        Returns:
+            Numpy array of Y error values.
+        """
+        return self.filter(kind, category, analysis).y_err
     def name(self) -> np.ndarray:
         """Corresponding data name."""
diff --git a/test/curve_analysis/ b/test/curve_analysis/
index cfbc43a380..339fca1652 100644
--- a/test/curve_analysis/
+++ b/test/curve_analysis/
@@ -148,19 +148,19 @@ def test_set_values(self):
         np.testing.assert_array_equal(obj.class_id, np.array([0, 0, 0]))
         np.testing.assert_array_equal(, np.array(["model0", "model0", "model0"]))
-    def test_set_y(self):
-        """Test setting new values to y column."""
-        obj = ScatterTable()
-        obj.add_row(x=0.1, y=2.0, y_err=0.3)
-        obj.y = [0.5]
-        np.testing.assert_array_equal(obj.y, np.array([0.5]))
+    def test_get_subset_numbers(self):
+        """Test end-user shortcut for getting the subset of x, y, y_err data."""
+        obj = ScatterTable.from_dataframe(self.reference)
-    def test_set_y_err(self):
-        """Test setting new values to y_err column."""
-        obj = ScatterTable()
-        obj.add_row(x=0.1, y=2.0, y_err=0.3)
-        obj.y_err = [0.5]
-        np.testing.assert_array_equal(obj.y_err, np.array([0.5]))
+        np.testing.assert_array_equal(
+            obj.get_x("model1", "raw"), np.array([0.100, 0.200, 0.100, 0.200])
+        )
+        np.testing.assert_array_equal(
+            obj.get_y("model1", "raw"), np.array([0.192, 0.854, 0.567, 0.488])
+        )
+        np.testing.assert_array_equal(
+            obj.get_y_err("model1", "raw"), np.array([0.002, 0.090, 0.033, 0.038])
+        )
     def test_filter_data_by_class_id(self):
         """Test filter table data with data UID."""

From 8dc6c4fb74fef35f036070b5b7ba2eb7481aeaa6 Mon Sep 17 00:00:00 2001
From: Naoki Kanazawa <>
Date: Fri, 2 Feb 2024 12:25:27 +0900
Subject: [PATCH 12/29] Bugfix autosave

 qiskit_experiments/framework/ | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/qiskit_experiments/framework/ b/qiskit_experiments/framework/
index ecc1c6f3e2..1e29a940c2 100644
--- a/qiskit_experiments/framework/
+++ b/qiskit_experiments/framework/
@@ -1435,7 +1435,7 @@ def add_analysis_results(
             if self.auto_save:
                 service_result = _series_to_service_result(
-                    series=self._analysis_results.get_data(uid),
+                    series=self._analysis_results.get_data(uid, columns="all").iloc[0],

From 144127a5ed2046b5421eeb509c7365d02470c71b Mon Sep 17 00:00:00 2001
From: Naoki Kanazawa <>
Date: Fri, 2 Feb 2024 15:03:59 +0900
Subject: [PATCH 13/29] Raise user warning when numbers contain multiple series

 .../curve_analysis/           | 46 +++++++++++++++++--
 test/curve_analysis/     | 22 +++++----
 2 files changed, 57 insertions(+), 11 deletions(-)

diff --git a/qiskit_experiments/curve_analysis/ b/qiskit_experiments/curve_analysis/
index 8dd6a90fb7..d5227a6a26 100644
--- a/qiskit_experiments/curve_analysis/
+++ b/qiskit_experiments/curve_analysis/
@@ -14,6 +14,7 @@
 from __future__ import annotations
 import logging
+import warnings
 from import Iterator
 from typing import Any
 from itertools import groupby
@@ -116,6 +117,7 @@ def get_x(
         kind: int | str | None = None,
         category: str | None = None,
         analysis: str | None = None,
+        check_unique: bool = True,
     ) -> np.ndarray:
         """Get subset of X values.
@@ -125,11 +127,16 @@ def get_x(
             kind: Identifier of the data, either data UID or name.
             category: Name of data category.
             analysis: Name of analysis.
+            check_unique: Set True to check if multiple series are contained.
+                When multiple series are contained, it raises a user warning.
             Numpy array of X values.
-        return self.filter(kind, category, analysis).x
+        sub_table = self.filter(kind, category, analysis)
+        if check_unique:
+            self._warn_composite_data(sub_table)
+        return sub_table.x
     def y(self) -> np.ndarray:
@@ -145,6 +152,7 @@ def get_y(
         kind: int | str | None = None,
         category: str | None = None,
         analysis: str | None = None,
+        check_unique: bool = True,
     ) -> np.ndarray:
         """Get subset of Y values.
@@ -154,11 +162,16 @@ def get_y(
             kind: Identifier of the data, either data UID or name.
             category: Name of data category.
             analysis: Name of analysis.
+            check_unique: Set True to check if multiple series are contained.
+                When multiple series are contained, it raises a user warning.
             Numpy array of Y values.
-        return self.filter(kind, category, analysis).y
+        sub_table = self.filter(kind, category, analysis)
+        if check_unique:
+            self._warn_composite_data(sub_table)
+        return sub_table.y
     def y_err(self) -> np.ndarray:
@@ -174,6 +187,7 @@ def get_y_err(
         kind: int | str | None = None,
         category: str | None = None,
         analysis: str | None = None,
+        check_unique: bool = True,
     ) -> np.ndarray:
         """Get subset of standard deviation of Y values.
@@ -183,11 +197,16 @@ def get_y_err(
             kind: Identifier of the data, either data UID or name.
             category: Name of data category.
             analysis: Name of analysis.
+            check_unique: Set True to check if multiple series are contained.
+                When multiple series are contained, it raises a user warning.
             Numpy array of Y error values.
-        return self.filter(kind, category, analysis).y_err
+        sub_table = self.filter(kind, category, analysis)
+        if check_unique:
+            self._warn_composite_data(sub_table)
+        return sub_table.y_err
     def name(self) -> np.ndarray:
@@ -339,6 +358,27 @@ def _format_table(cls, data: pd.DataFrame) -> pd.DataFrame:
+    @staticmethod
+    def _warn_composite_data(table: ScatterTable):
+        if len( > 1:
+            warnings.warn(
+                "Returned data contains multiple series. "
+                "You may want to filter the data by a specific kind identifier.",
+                UserWarning,
+            )
+        if len(table._data.category.unique()) > 1:
+            warnings.warn(
+                "Returned data contains multiple categories. "
+                "You may want to filter the data by a specific category name.",
+                UserWarning,
+            )
+        if len(table._data.analysis.unique()) > 1:
+            warnings.warn(
+                "Returned data contains multiple datasets from different component analyses. "
+                "You may want to filter the data by a specific analysis name.",
+                UserWarning,
+            )
diff --git a/test/curve_analysis/ b/test/curve_analysis/
index 339fca1652..e8532cba60 100644
--- a/test/curve_analysis/
+++ b/test/curve_analysis/
@@ -13,7 +13,6 @@
 """Test scatter table."""
 from test.base import QiskitExperimentsTestCase
 import pandas as pd
 import numpy as np
@@ -152,16 +151,23 @@ def test_get_subset_numbers(self):
         """Test end-user shortcut for getting the subset of x, y, y_err data."""
         obj = ScatterTable.from_dataframe(self.reference)
+        np.testing.assert_array_equal(obj.get_x("model1", "raw", "Fit1"), np.array([0.100, 0.200]))
+        np.testing.assert_array_equal(obj.get_y("model1", "raw", "Fit1"), np.array([0.192, 0.854]))
-            obj.get_x("model1", "raw"), np.array([0.100, 0.200, 0.100, 0.200])
-        )
-        np.testing.assert_array_equal(
-            obj.get_y("model1", "raw"), np.array([0.192, 0.854, 0.567, 0.488])
-        )
-        np.testing.assert_array_equal(
-            obj.get_y_err("model1", "raw"), np.array([0.002, 0.090, 0.033, 0.038])
+            obj.get_y_err("model1", "raw", "Fit1"), np.array([0.002, 0.090])
+    def test_warn_composite_values(self):
+        """Test raise warning when returned x, y, y_err data contains multiple data series."""
+        obj = ScatterTable.from_dataframe(self.reference)
+        with self.assertWarns(UserWarning):
+            obj.get_x()
+        with self.assertWarns(UserWarning):
+            obj.get_y()
+        with self.assertWarns(UserWarning):
+            obj.get_y_err()
     def test_filter_data_by_class_id(self):
         """Test filter table data with data UID."""
         obj = ScatterTable.from_dataframe(self.reference)

From 7c0662cefdb039173549ab8833ffd4dc5b2c3ea1 Mon Sep 17 00:00:00 2001
From: Naoki Kanazawa <>
Date: Fri, 2 Feb 2024 16:59:32 +0900
Subject: [PATCH 14/29] Bugfix: Missing circuit metadata in composite analysis

 .../framework/composite/             | 5 ++++-
 test/framework/                          | 8 ++++++++
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/qiskit_experiments/framework/composite/ b/qiskit_experiments/framework/composite/
index 66f6b1642a..18c2c1f576 100644
--- a/qiskit_experiments/framework/composite/
+++ b/qiskit_experiments/framework/composite/
@@ -235,7 +235,10 @@ def _marginalized_component_data(self, composite_data: List[Dict]) -> List[List[
                 if index not in marginalized_data:
                     # Initialize data list for marginalized
                     marginalized_data[index] = []
-                sub_data = {"metadata": metadata["composite_metadata"][i]}
+                sub_data = {
+                    k: v for k, v in datum.items() if k not in ("metadata", "counts", "memory")
+                }
+                sub_data["metadata"] = metadata["composite_metadata"][i]
                 if "counts" in datum:
                     if composite_clbits is not None:
                         sub_data["counts"] = marginal_distribution(
diff --git a/test/framework/ b/test/framework/
index 791b7b9689..1ca1e5a5b6 100644
--- a/test/framework/
+++ b/test/framework/
@@ -719,6 +719,8 @@ def test_composite_count_memory_marginalization(self, memory):
                     "metadata": {"experiment_type": "FineXAmplitude", "qubits": [0]},
                     "counts": {"0": 6, "1": 4},
                     "memory": ["0", "0", "1", "0", "0", "1", "1", "0", "0", "1"],
+                    "shots": 10,
+                    "meas_level": 2,
@@ -726,6 +728,8 @@ def test_composite_count_memory_marginalization(self, memory):
                     "metadata": {"experiment_type": "FineXAmplitude", "qubits": [1]},
                     "counts": {"0": 5, "1": 5},
                     "memory": ["0", "1", "1", "0", "0", "0", "1", "0", "1", "1"],
+                    "shots": 10,
+                    "meas_level": 2,
@@ -775,6 +779,8 @@ def test_composite_single_kerneled_memory_marginalization(self):
                     [[idx + 0.3, idx + 0.3]],
                     [[idx + 0.4, idx + 0.4]],
+                "shots": 5,
+                "meas_level": 1,
             self.assertEqual(expected, sub_data[0])
@@ -813,6 +819,8 @@ def test_composite_avg_kerneled_memory_marginalization(self):
             expected = {
                 "metadata": {"experiment_type": "FineXAmplitude", "qubits": [idx]},
                 "memory": [[idx + 0.0, idx + 0.1]],
+                "shots": 5,
+                "meas_level": 1,
             self.assertEqual(expected, sub_data[0])

From 92cfc92f5e0be9556155e25a1152a1c9abfdecf5 Mon Sep 17 00:00:00 2001
From: Naoki Kanazawa <>
Date: Mon, 5 Feb 2024 10:26:01 +0900
Subject: [PATCH 15/29] Replace class_id with data_uid

 docs/tutorials/curve_analysis.rst             |  6 +--
 .../               | 10 ++---
 .../curve_analysis/          | 24 +++++------
 .../curve_analysis/           | 43 ++++++++++---------
 .../               |  4 +-
 test/curve_analysis/     | 34 +++++++--------
 6 files changed, 61 insertions(+), 60 deletions(-)

diff --git a/docs/tutorials/curve_analysis.rst b/docs/tutorials/curve_analysis.rst
index 77d394ad7d..c10dd9cbfd 100644
--- a/docs/tutorials/curve_analysis.rst
+++ b/docs/tutorials/curve_analysis.rst
@@ -273,7 +273,7 @@ This table may look like:
 .. code-block::
-        xval      yval      yerr name  class_id category  shots     analysis
+        xval      yval      yerr name  data_uid category  shots     analysis
     0    0.1  0.153659  0.011258    A         0      raw   1024   MyAnalysis
     1    0.1  0.590732  0.015351    B         1      raw   1024   MyAnalysis
     2    0.1  0.315610  0.014510    A         0      raw   1024   MyAnalysis
@@ -295,7 +295,7 @@ for each condition. The role of each column is as follows:
 - ``yval``: Nominal part of the outcome. The outcome is something like expectation value, which is computed from the experiment result with the data processor.
 - ``yerr``: Standard error of the outcome, which is mainly due to sampling error.
 - ``name``: Unique identifier of the result class. This is defined by the ``data_subfit_map`` option.
-- ``class_id``: Numerical index corresponding to the result class. This number is automatically assigned.
+- ``data_uid``: Numerical index corresponding to the result class. This number is automatically assigned.
 - ``category``: The attribute of data set. The "raw" category indicates an output from the data processing.
 - ``shots``: Number of measurement shots used to acquire this result.
 - ``analysis``: The name of curve analysis instance that generated this data. In :class:`.CompositeCurveAnalysis`, the table is a composite of tables from all component analyses.
@@ -311,7 +311,7 @@ This allows the analysis to easily estimate the slope of the curves to
 create algorithmic initial guess of fit parameters.
 A developer can inject extra data processing, for example, filtering, smoothing,
 or elimination of outliers for better fitting.
-The new class_id is given here so that its value corresponds to the fit model object index
+The new data_uid is given here so that its value corresponds to the fit model object index
 in this analysis class. This index mapping is done based upon the correspondence of
 the data name and the fit model name.
diff --git a/qiskit_experiments/curve_analysis/ b/qiskit_experiments/curve_analysis/
index 5a220e38d1..ad7a04a38b 100644
--- a/qiskit_experiments/curve_analysis/
+++ b/qiskit_experiments/curve_analysis/
@@ -232,7 +232,7 @@ def _create_figures(
         for analysis in self.analyses():
             group_data = curve_data.filter(
             model_names = analysis.model_names()
-            for uid, sub_data in group_data.iter_by_class():
+            for uid, sub_data in group_data.iter_by_data():
                 full_name = f"{model_names[uid]}_{}"
                 # Plot raw data scatters
                 if analysis.options.plot_raw_data:
@@ -378,7 +378,7 @@ def _run_analysis(
             if fit_data.success:
                 # Add fit data to curve data table
                 model_names = analysis.model_names()
-                for i, sub_data in formatted_subset.iter_by_class():
+                for data_id, sub_data in formatted_subset.iter_by_data():
                     xval = sub_data.x
                     if len(xval) == 0:
                         # If data is empty, skip drawing this model.
@@ -388,7 +388,7 @@ def _run_analysis(
                     xval_arr_fit = np.linspace(np.min(xval), np.max(xval), num=100, dtype=float)
                     uval_arr_fit = eval_with_uncertainties(
-                        model=analysis.models[i],
+                        model=analysis.models[data_id],
                     yval_arr_fit = unp.nominal_values(uval_arr_fit)
@@ -398,8 +398,8 @@ def _run_analysis(
                         yerr_arr_fit = np.zeros_like(xval_arr_fit)
                     for xval, yval, yerr in zip(xval_arr_fit, yval_arr_fit, yerr_arr_fit):
-                            name=model_names[i],
-                            class_id=i,
+                            name=model_names[data_id],
+                            data_uid=data_id,
diff --git a/qiskit_experiments/curve_analysis/ b/qiskit_experiments/curve_analysis/
index 657787557d..95ad2f2669 100644
--- a/qiskit_experiments/curve_analysis/
+++ b/qiskit_experiments/curve_analysis/
@@ -208,16 +208,16 @@ def _run_data_processing(
                 ) from ex
             # Assign entry name and class id
-            for uid, (name, spec) in enumerate(classifier.items()):
+            for data_id, (name, spec) in enumerate(classifier.items()):
                 if spec.items() <= metadata.items():
                 # This is unclassified data.
-                uid = pd.NA
+                data_id = pd.NA
                 name = pd.NA
-                class_id=uid,
+                data_uid=data_id,
@@ -256,12 +256,12 @@ def _format_data(
-                uid = model_names.index(name)
+                data_id = model_names.index(name)
             except ValueError:
-                uid = pd.NA
+                data_id = pd.NA
-                class_id=uid,
+                data_uid=data_id,
@@ -339,7 +339,7 @@ def _run_curve_fit(
         # Create convenient function to compute residual of the models.
         partial_residuals = []
         valid_uncertainty = np.all(np.isfinite(curve_data.y_err))
-        for i, sub_data in curve_data.iter_by_class():
+        for i, sub_data in curve_data.iter_by_data():
             if valid_uncertainty:
                 nonzero_yerr = np.where(
                     np.isclose(sub_data.y_err, 0.0),
@@ -417,7 +417,7 @@ def _create_figures(
             A list of figures.
-        for i, sub_data in curve_data.iter_by_class():
+        for i, sub_data in curve_data.iter_by_data():
             name = self.model_names()[i]
             # Plot raw data scatters
             if self.options.plot_raw_data:
@@ -499,7 +499,7 @@ def _run_analysis(
         if fit_data.success:
             # Add fit data to curve data table
             model_names = self.model_names()
-            for i, sub_data in formatted_subset.iter_by_class():
+            for data_id, sub_data in formatted_subset.iter_by_data():
                 xval = sub_data.x
                 if len(xval) == 0:
                     # If data is empty, skip drawing this model.
@@ -509,7 +509,7 @@ def _run_analysis(
                 xval_arr_fit = np.linspace(np.min(xval), np.max(xval), num=100, dtype=float)
                 uval_arr_fit = eval_with_uncertainties(
-                    model=self._models[i],
+                    model=self._models[data_id],
                 yval_arr_fit = unp.nominal_values(uval_arr_fit)
@@ -519,8 +519,8 @@ def _run_analysis(
                     yerr_arr_fit = np.zeros_like(xval_arr_fit)
                 for xval, yval, yerr in zip(xval_arr_fit, yval_arr_fit, yerr_arr_fit):
-                        name=model_names[i],
-                        class_id=i,
+                        name=model_names[data_id],
+                        data_uid=data_id,
diff --git a/qiskit_experiments/curve_analysis/ b/qiskit_experiments/curve_analysis/
index d5227a6a26..04d3f1538a 100644
--- a/qiskit_experiments/curve_analysis/
+++ b/qiskit_experiments/curve_analysis/
@@ -44,7 +44,7 @@ class ScatterTable:
-        "class_id",
+        "data_uid",
@@ -106,13 +106,14 @@ def dataframe(self):
     def x(self) -> np.ndarray:
         """X values."""
+        # For backward compatibility with CurveData.x
         return self._data.xval.to_numpy(dtype=float, na_value=np.nan)
     def x(self, new_values):
         self._data.loc[:, "xval"] = new_values
-    def get_x(
+    def xvals(
         kind: int | str | None = None,
         category: str | None = None,
@@ -141,13 +142,14 @@ def get_x(
     def y(self) -> np.ndarray:
         """Y values."""
+        # For backward compatibility with CurveData.y
         return self._data.yval.to_numpy(dtype=float, na_value=np.nan)
     def y(self, new_values: np.ndarray):
         self._data.loc[:, "yval"] = new_values
-    def get_y(
+    def yvals(
         kind: int | str | None = None,
         category: str | None = None,
@@ -176,13 +178,14 @@ def get_y(
     def y_err(self) -> np.ndarray:
         """Standard deviation of Y values."""
+        # For backward compatibility with CurveData.y_err
         return self._data.yerr.to_numpy(dtype=float, na_value=np.nan)
     def y_err(self, new_values: np.ndarray):
         self._data.loc[:, "yerr"] = new_values
-    def get_y_err(
+    def yerrs(
         kind: int | str | None = None,
         category: str | None = None,
@@ -218,13 +221,13 @@ def name(self, new_values: np.ndarray):
         self._data.loc[:, "name"] = new_values
-    def class_id(self) -> np.ndarray:
+    def data_uid(self) -> np.ndarray:
         """Corresponding data UID."""
-        return self._data.class_id.to_numpy(dtype=object, na_value=None)
+        return self._data.data_uid.to_numpy(dtype=object, na_value=None)
-    @class_id.setter
-    def class_id(self, new_values: np.ndarray):
-        self._data.loc[:, "class_id"] = new_values
+    @data_uid.setter
+    def data_uid(self, new_values: np.ndarray):
+        self._data.loc[:, "data_uid"] = new_values
     def category(self) -> np.ndarray:
@@ -273,7 +276,7 @@ def filter(
         if kind is not None:
             if isinstance(kind, int):
-                index = self._data.class_id == kind
+                index = self._data.data_uid == kind
             elif isinstance(kind, str):
                 index = == kind
@@ -287,16 +290,16 @@ def filter(
             filt_data = filt_data.loc[index, :]
         return ScatterTable.from_dataframe(filt_data)
-    def iter_by_class(self) -> Iterator[tuple[int, "ScatterTable"]]:
+    def iter_by_data(self) -> Iterator[tuple[int, "ScatterTable"]]:
         """Iterate over subset of data sorted by the data UID.
             Tuple of data UID and subset of ScatterTable.
-        ids = self._data.class_id.dropna().sort_values().unique()
-        for mid in ids:
-            index = self._data.class_id == mid
-            yield mid, ScatterTable.from_dataframe(self._data.loc[index, :])
+        data_ids = self._data.data_uid.dropna().sort_values().unique()
+        for did in data_ids:
+            index = self._data.data_uid == did
+            yield did, ScatterTable.from_dataframe(self._data.loc[index, :])
     def iter_groups(
@@ -326,7 +329,7 @@ def iter_groups(
     def add_row(
         name: str | pd.NA = pd.NA,
-        class_id: int | pd.NA = pd.NA,
+        data_uid: int | pd.NA = pd.NA,
         category: str | pd.NA = pd.NA,
         x: float | pd.NA = pd.NA,
         y: float | pd.NA = pd.NA,
@@ -344,11 +347,11 @@ def add_row(
             y_err: Standard deviation of y value.
             shots: Shot number used to acquire this data point.
             name: Name of this data if available.
-            class_id: Data UID of if available.
+            data_uid: Data UID of if available.
             category: Data category if available.
             analysis: Analysis name if available.
-        self._lazy_add_rows.append([x, y, y_err, name, class_id, category, shots, analysis])
+        self._lazy_add_rows.append([x, y, y_err, name, data_uid, category, shots, analysis])
     def _format_table(cls, data: pd.DataFrame) -> pd.DataFrame:
@@ -389,7 +392,7 @@ def _warn_composite_data(table: ScatterTable):
     def data_allocation(self) -> np.ndarray:
         """Index of corresponding fit model."""
-        return self.class_id
+        return self.data_uid
@@ -402,7 +405,7 @@ def data_allocation(self) -> np.ndarray:
     def labels(self) -> list[str]:
         """List of model names."""
         # Order sensitive
-        name_id_tups = self._data.groupby(["name", "class_id"]).groups.keys()
+        name_id_tups = self._data.groupby(["name", "data_uid"]).groups.keys()
         return [k[0] for k in sorted(name_id_tups, key=lambda k: k[1])]
diff --git a/qiskit_experiments/library/driven_freq_tuning/ b/qiskit_experiments/library/driven_freq_tuning/
index 21b263b993..d9a1e3dcfd 100644
--- a/qiskit_experiments/library/driven_freq_tuning/
+++ b/qiskit_experiments/library/driven_freq_tuning/
@@ -258,7 +258,7 @@ def _format_data(
         y_mean = ramsey_xy.y.mean()
         # Create phase data by arctan(Y/X)
-        for m_id, direction in enumerate(("pos", "neg")):
+        for data_id, direction in enumerate(("pos", "neg")):
             x_quadrature = ramsey_xy.filter(kind=f"X{direction}")
             y_quadrature = ramsey_xy.filter(kind=f"Y{direction}")
             if not np.array_equal(x_quadrature.x, y_quadrature.x):
@@ -294,7 +294,7 @@ def _format_data(
-                    class_id=m_id,
+                    data_uid=data_id,
diff --git a/test/curve_analysis/ b/test/curve_analysis/
index e8532cba60..b197bd2399 100644
--- a/test/curve_analysis/
+++ b/test/curve_analysis/
@@ -41,7 +41,7 @@ def setUp(self):
-            "class_id": [0, 1, 0, 1, 0, 0, 0, 0, 1, 1],
+            "data_uid": [0, 1, 0, 1, 0, 0, 0, 0, 1, 1],
             "category": [
@@ -95,7 +95,7 @@ def test_add_row(self):
         obj = ScatterTable()
-            class_id=0,
+            data_uid=0,
@@ -113,7 +113,7 @@ def test_add_row(self):
         np.testing.assert_array_equal(obj.y, np.array([2.3, 3.4]))
         np.testing.assert_array_equal(obj.y_err, np.array([0.4, np.nan]))
         np.testing.assert_array_equal(, np.array(["model1", None]))
-        np.testing.assert_array_equal(obj.class_id, np.array([0, None]))
+        np.testing.assert_array_equal(obj.data_uid, np.array([0, None]))
         np.testing.assert_array_equal(obj.category, np.array(["raw", "raw"]))
             # Numpy tries to handle nan strictly, but isnan only works for float dtype.
@@ -138,44 +138,42 @@ def test_set_values(self):
         obj.y_err = [0.3, 0.5, 0.7]
         # Broadcast single value
-        obj.class_id = 0
+        obj.data_uid = 0 = "model0"
         np.testing.assert_array_equal(obj.x, np.array([0.1, 0.2, 0.3]))
         np.testing.assert_array_equal(obj.y, np.array([1.3, 1.4, 1.5]))
         np.testing.assert_array_equal(obj.y_err, np.array([0.3, 0.5, 0.7]))
-        np.testing.assert_array_equal(obj.class_id, np.array([0, 0, 0]))
+        np.testing.assert_array_equal(obj.data_uid, np.array([0, 0, 0]))
         np.testing.assert_array_equal(, np.array(["model0", "model0", "model0"]))
     def test_get_subset_numbers(self):
         """Test end-user shortcut for getting the subset of x, y, y_err data."""
         obj = ScatterTable.from_dataframe(self.reference)
-        np.testing.assert_array_equal(obj.get_x("model1", "raw", "Fit1"), np.array([0.100, 0.200]))
-        np.testing.assert_array_equal(obj.get_y("model1", "raw", "Fit1"), np.array([0.192, 0.854]))
-        np.testing.assert_array_equal(
-            obj.get_y_err("model1", "raw", "Fit1"), np.array([0.002, 0.090])
-        )
+        np.testing.assert_array_equal(obj.xvals("model1", "raw", "Fit1"), np.array([0.100, 0.200]))
+        np.testing.assert_array_equal(obj.yvals("model1", "raw", "Fit1"), np.array([0.192, 0.854]))
+        np.testing.assert_array_equal(obj.yerrs("model1", "raw", "Fit1"), np.array([0.002, 0.090]))
     def test_warn_composite_values(self):
         """Test raise warning when returned x, y, y_err data contains multiple data series."""
         obj = ScatterTable.from_dataframe(self.reference)
         with self.assertWarns(UserWarning):
-            obj.get_x()
+            obj.xvals()
         with self.assertWarns(UserWarning):
-            obj.get_y()
+            obj.yvals()
         with self.assertWarns(UserWarning):
-            obj.get_y_err()
+            obj.yerrs()
-    def test_filter_data_by_class_id(self):
+    def test_filter_data_by_data_uid(self):
         """Test filter table data with data UID."""
         obj = ScatterTable.from_dataframe(self.reference)
         filtered = obj.filter(kind=0)
         self.assertEqual(len(filtered), 6)
         np.testing.assert_array_equal(filtered.x, np.array([0.1, 0.2, 0.1, 0.2, 0.1, 0.2]))
-        np.testing.assert_array_equal(filtered.class_id, np.array([0, 0, 0, 0, 0, 0]))
+        np.testing.assert_array_equal(filtered.data_uid, np.array([0, 0, 0, 0, 0, 0]))
     def test_filter_data_by_model_name(self):
         """Test filter table data with data name."""
@@ -215,7 +213,7 @@ def test_filter_multiple(self):
         filtered = obj.filter(kind=0, category="raw", analysis="Fit1")
         self.assertEqual(len(filtered), 2)
         np.testing.assert_array_equal(filtered.x, np.array([0.1, 0.2]))
-        np.testing.assert_array_equal(filtered.class_id, np.array([0, 0]))
+        np.testing.assert_array_equal(filtered.data_uid, np.array([0, 0]))
         np.testing.assert_array_equal(filtered.category, np.array(["raw", "raw"]))
         np.testing.assert_array_equal(filtered.analysis, np.array(["Fit1", "Fit1"]))
@@ -223,7 +221,7 @@ def test_iter_class(self):
         """Test iterating over mini tables associated with different data UID."""
         obj = ScatterTable.from_dataframe(self.reference).filter(category="raw")
-        class_iter = obj.iter_by_class()
+        class_iter = obj.iter_by_data()
         index, table_cls0 = next(class_iter)
         ref_table_cls0 = obj.filter(kind=0)
@@ -239,7 +237,7 @@ def test_iter_groups(self):
         """Test iterating over mini tables associated with multiple attributes."""
         obj = ScatterTable.from_dataframe(self.reference).filter(category="raw")
-        class_iter = obj.iter_groups("class_id", "xval")
+        class_iter = obj.iter_groups("data_uid", "xval")
         (index, xval), table0 = next(class_iter)
         self.assertEqual(index, 0)

From 346d23af58bee40fa005e030dadb7ca9f08a7470 Mon Sep 17 00:00:00 2001
From: Naoki Kanazawa <>
Date: Mon, 5 Feb 2024 11:54:38 +0900
Subject: [PATCH 16/29] Add documentation for filtering triplet

 docs/tutorials/curve_analysis.rst             |  6 +-
 .../curve_analysis/           | 68 +++++++++++++++++--
 2 files changed, 68 insertions(+), 6 deletions(-)

diff --git a/docs/tutorials/curve_analysis.rst b/docs/tutorials/curve_analysis.rst
index c10dd9cbfd..0b91306648 100644
--- a/docs/tutorials/curve_analysis.rst
+++ b/docs/tutorials/curve_analysis.rst
@@ -295,11 +295,13 @@ for each condition. The role of each column is as follows:
 - ``yval``: Nominal part of the outcome. The outcome is something like expectation value, which is computed from the experiment result with the data processor.
 - ``yerr``: Standard error of the outcome, which is mainly due to sampling error.
 - ``name``: Unique identifier of the result class. This is defined by the ``data_subfit_map`` option.
-- ``data_uid``: Numerical index corresponding to the result class. This number is automatically assigned.
-- ``category``: The attribute of data set. The "raw" category indicates an output from the data processing.
+- ``data_uid``: Integer number corresponding to the data unique index. This number is automatically assigned.
+- ``category``: The tag of data group. The "raw" category indicates an output from the data processing.
 - ``shots``: Number of measurement shots used to acquire this result.
 - ``analysis``: The name of curve analysis instance that generated this data. In :class:`.CompositeCurveAnalysis`, the table is a composite of tables from all component analyses.
+To find data points that belong to a particular dataset, you can follow :ref:`filter_scatter_table`.
 3. Formatting
diff --git a/qiskit_experiments/curve_analysis/ b/qiskit_experiments/curve_analysis/
index 04d3f1538a..9de43380d7 100644
--- a/qiskit_experiments/curve_analysis/
+++ b/qiskit_experiments/curve_analysis/
@@ -34,9 +34,69 @@ class ScatterTable:
     Default table columns are defined in the class attribute :attr:`.DEFAULT_COLUMNS`.
     This table cannot be expanded with user-provided column names.
-    See attribute documentation for what columns represent.
     This dataset is not thread safe. Do not use the same instance in multiple threads.
+    .. _filter_scatter_table:
+    Filtering ScatterTable
+    ----------------------
+    ScatterTable is a single source of the truth as the data used in the curve fit analysis.
+    Each data point in a 1-D curve fit may consist of the x value, y value, and
+    standard error of the y value.
+    In addition, such analysis may internally create several data subsets,
+    and data points can also take metadata triplet (`data_uid`, `category`, `analysis`)
+    to distinguish the subset.
+    * The `data_uid` is an integer key representing the class of the data.
+      When an analysis consists of multiple fit models and performs the multi-objective fit,
+      the created table may contain multiple datasets for each fit model.
+      Usually the index of data matches with the index of the fit model in the analysis.
+      The table also provides `name` column which is a human-friendly text notation of the data_uid.
+      The name and corresponding data_uid must refer to the identical data class,
+      and the name typically matches with the name of the fit model.
+      You can find a particular data subset by either data_uid or name.
+    * The `category` is a string key representing a tag of data groups.
+      The measured outcomes input as-is to the curve analysis are tagged with "raw".
+      In a standard :class:`.CurveAnalysis` subclass, the input data is pre-processed for
+      the fitting and the formatted data is also stored in the table with "formatted" tag.
+      After the fit is successfully conducted and the model parameters are identified,
+      data points in the interpolated fit curves are also stored with "fitted" tag
+      for visualization. The management of data group depends on the design of
+      the curve analysis protocol, and the convention of category naming might
+      be different in a particular analysis.
+    * The `analysis` is a string key representing a name of
+      the analysis instance that generated the data point.
+      This allows a user to combine multiple tables from the different analyses
+      without collapsing the data points.
+      In the :class:`.CompositeCurveAnalysis`, the instance consists of statistically
+      independent fit models represented in a form of nested component analysis instances.
+      Such component has unique analysis name, and datasets generated from each instance
+      are merged into a single table stored in the outermost composite analysis.
+    User must be aware of this triplet to extract data points that belong to a
+    particular data subset. For example,
+    .. code-block:: python
+        mini_table = table.filter(kind="model1", category="raw", analysis="Analysis_A")
+        mini_x = mini_table.x
+        mini_y = mini_table.y
+    this operation is equivalent to
+    .. code-block:: python
+        mini_x = table.xvals(kind="model1", category="raw", analysis="Analysis_A")
+        mini_y = table.yvals(kind="model1", category="raw", analysis="Analysis_A")
+    When an analysis only has a single model and the table is created from a single
+    analysis instance, the data_uid and analysis are trivial, and you only need to
+    specify the category to get subset data of interest.
@@ -62,7 +122,6 @@ class ScatterTable:
     def __init__(self):
-        """Create new dataset."""
         self._lazy_add_rows = []
         self._dump = pd.DataFrame(columns=self.DEFAULT_COLUMNS)
@@ -365,8 +424,8 @@ def _format_table(cls, data: pd.DataFrame) -> pd.DataFrame:
     def _warn_composite_data(table: ScatterTable):
         if len( > 1:
-                "Returned data contains multiple series. "
-                "You may want to filter the data by a specific kind identifier.",
+                "Returned data contains multiple data kinds. "
+                "You may want to filter the data by a specific data_uid or name.",
         if len(table._data.category.unique()) > 1:
@@ -426,6 +485,7 @@ def get_subset_of(self, index: str | int) -> "ScatterTable":
         return self.filter(kind=index)
     def __len__(self):
+        """Return the number of data points stored in the table."""
         return len(self._data)
     def __eq__(self, other):

From ee0316148645f25acf57e7ba164da9dfd7ef8012 Mon Sep 17 00:00:00 2001
From: Naoki Kanazawa <>
Date: Tue, 6 Feb 2024 00:08:30 +0900
Subject: [PATCH 17/29] Apply review comments

 docs/tutorials/curve_analysis.rst             |  1 +
 .../               |  4 +-
 .../curve_analysis/          |  8 +--
 .../curve_analysis/           | 51 ++++++++++---------
 .../standard_analysis/     |  6 +--
 .../analysis/                 |  2 +-
 .../analysis/            |  4 +-
 .../analysis/            |  4 +-
 .../               | 10 ++--
 .../                |  4 +-
 test/curve_analysis/     | 12 ++---
 11 files changed, 54 insertions(+), 52 deletions(-)

diff --git a/docs/tutorials/curve_analysis.rst b/docs/tutorials/curve_analysis.rst
index 0b91306648..5c2211d200 100644
--- a/docs/tutorials/curve_analysis.rst
+++ b/docs/tutorials/curve_analysis.rst
@@ -322,6 +322,7 @@ This may return new scatter table object with the addition of rows like the foll
 .. code-block::
+        xval      yval      yerr name  data_uid   category  shots     analysis
     12   0.1  0.234634  0.009183    A         0  formatted   2048   MyAnalysis
     13   0.2  0.737561  0.008656    A         0  formatted   2048   MyAnalysis
     14   0.3  0.487317  0.008018    A         0  formatted   2048   MyAnalysis
diff --git a/qiskit_experiments/curve_analysis/ b/qiskit_experiments/curve_analysis/
index ad7a04a38b..06f4ae8a89 100644
--- a/qiskit_experiments/curve_analysis/
+++ b/qiskit_experiments/curve_analysis/
@@ -232,7 +232,7 @@ def _create_figures(
         for analysis in self.analyses():
             group_data = curve_data.filter(
             model_names = analysis.model_names()
-            for uid, sub_data in group_data.iter_by_data():
+            for uid, sub_data in group_data.iter_by_data_uid():
                 full_name = f"{model_names[uid]}_{}"
                 # Plot raw data scatters
                 if analysis.options.plot_raw_data:
@@ -378,7 +378,7 @@ def _run_analysis(
             if fit_data.success:
                 # Add fit data to curve data table
                 model_names = analysis.model_names()
-                for data_id, sub_data in formatted_subset.iter_by_data():
+                for data_id, sub_data in formatted_subset.iter_by_data_uid():
                     xval = sub_data.x
                     if len(xval) == 0:
                         # If data is empty, skip drawing this model.
diff --git a/qiskit_experiments/curve_analysis/ b/qiskit_experiments/curve_analysis/
index 95ad2f2669..9d2089d647 100644
--- a/qiskit_experiments/curve_analysis/
+++ b/qiskit_experiments/curve_analysis/
@@ -339,7 +339,7 @@ def _run_curve_fit(
         # Create convenient function to compute residual of the models.
         partial_residuals = []
         valid_uncertainty = np.all(np.isfinite(curve_data.y_err))
-        for i, sub_data in curve_data.iter_by_data():
+        for uid, sub_data in curve_data.iter_by_data_uid():
             if valid_uncertainty:
                 nonzero_yerr = np.where(
                     np.isclose(sub_data.y_err, 0.0),
@@ -356,7 +356,7 @@ def _run_curve_fit(
                 weights = None
             model_residual = partial(
-                self._models[i]._residual,
+                self._models[uid]._residual,
@@ -417,7 +417,7 @@ def _create_figures(
             A list of figures.
-        for i, sub_data in curve_data.iter_by_data():
+        for i, sub_data in curve_data.iter_by_data_uid():
             name = self.model_names()[i]
             # Plot raw data scatters
             if self.options.plot_raw_data:
@@ -499,7 +499,7 @@ def _run_analysis(
         if fit_data.success:
             # Add fit data to curve data table
             model_names = self.model_names()
-            for data_id, sub_data in formatted_subset.iter_by_data():
+            for data_id, sub_data in formatted_subset.iter_by_data_uid():
                 xval = sub_data.x
                 if len(xval) == 0:
                     # If data is empty, skip drawing this model.
diff --git a/qiskit_experiments/curve_analysis/ b/qiskit_experiments/curve_analysis/
index 9de43380d7..bac11f5ac3 100644
--- a/qiskit_experiments/curve_analysis/
+++ b/qiskit_experiments/curve_analysis/
@@ -82,7 +82,7 @@ class ScatterTable:
     .. code-block:: python
-        mini_table = table.filter(kind="model1", category="raw", analysis="Analysis_A")
+        mini_table = table.filter(data_uid="model1", category="raw", analysis="Analysis_A")
         mini_x = mini_table.x
         mini_y = mini_table.y
@@ -90,8 +90,8 @@ class ScatterTable:
     .. code-block:: python
-        mini_x = table.xvals(kind="model1", category="raw", analysis="Analysis_A")
-        mini_y = table.yvals(kind="model1", category="raw", analysis="Analysis_A")
+        mini_x = table.xvals(data_uid="model1", category="raw", analysis="Analysis_A")
+        mini_y = table.yvals(data_uid="model1", category="raw", analysis="Analysis_A")
     When an analysis only has a single model and the table is created from a single
     analysis instance, the data_uid and analysis are trivial, and you only need to
@@ -122,7 +122,6 @@ class ScatterTable:
     def __init__(self):
-        super().__init__()
         self._lazy_add_rows = []
         self._dump = pd.DataFrame(columns=self.DEFAULT_COLUMNS)
@@ -174,7 +173,7 @@ def x(self, new_values):
     def xvals(
-        kind: int | str | None = None,
+        data_uid: int | str | None = None,
         category: str | None = None,
         analysis: str | None = None,
         check_unique: bool = True,
@@ -184,7 +183,7 @@ def xvals(
         A convenient shortcut of getting X data with filtering.
-            kind: Identifier of the data, either data UID or name.
+            data_uid: Identifier of the data, either data UID or name.
             category: Name of data category.
             analysis: Name of analysis.
             check_unique: Set True to check if multiple series are contained.
@@ -193,7 +192,7 @@ def xvals(
             Numpy array of X values.
-        sub_table = self.filter(kind, category, analysis)
+        sub_table = self.filter(data_uid, category, analysis)
         if check_unique:
         return sub_table.x
@@ -210,7 +209,7 @@ def y(self, new_values: np.ndarray):
     def yvals(
-        kind: int | str | None = None,
+        data_uid: int | str | None = None,
         category: str | None = None,
         analysis: str | None = None,
         check_unique: bool = True,
@@ -220,7 +219,7 @@ def yvals(
         A convenient shortcut of getting Y data with filtering.
-            kind: Identifier of the data, either data UID or name.
+            data_uid: Identifier of the data, either data UID or name.
             category: Name of data category.
             analysis: Name of analysis.
             check_unique: Set True to check if multiple series are contained.
@@ -229,7 +228,7 @@ def yvals(
             Numpy array of Y values.
-        sub_table = self.filter(kind, category, analysis)
+        sub_table = self.filter(data_uid, category, analysis)
         if check_unique:
         return sub_table.y
@@ -246,7 +245,7 @@ def y_err(self, new_values: np.ndarray):
     def yerrs(
-        kind: int | str | None = None,
+        data_uid: int | str | None = None,
         category: str | None = None,
         analysis: str | None = None,
         check_unique: bool = True,
@@ -256,7 +255,7 @@ def yerrs(
         A convenient shortcut of getting Y error data with filtering.
-            kind: Identifier of the data, either data UID or name.
+            data_uid: Identifier of the data, either data UID or name.
             category: Name of data category.
             analysis: Name of analysis.
             check_unique: Set True to check if multiple series are contained.
@@ -265,7 +264,7 @@ def yerrs(
             Numpy array of Y error values.
-        sub_table = self.filter(kind, category, analysis)
+        sub_table = self.filter(data_uid, category, analysis)
         if check_unique:
         return sub_table.y_err
@@ -317,14 +316,14 @@ def analysis(self, new_values: np.ndarray):
     def filter(
-        kind: int | str | None = None,
+        data_uid: int | str | None = None,
         category: str | None = None,
         analysis: str | None = None,
     ) -> ScatterTable:
         """Filter data by class, category, and/or analysis name.
-            kind: Identifier of the data, either data UID or name.
+            data_uid: Identifier of the data, either data UID or name.
             category: Name of data category.
             analysis: Name of analysis.
@@ -333,13 +332,15 @@ def filter(
         filt_data = self._data
-        if kind is not None:
-            if isinstance(kind, int):
-                index = self._data.data_uid == kind
-            elif isinstance(kind, str):
-                index = == kind
+        if data_uid is not None:
+            if isinstance(data_uid, int):
+                index = self._data.data_uid == data_uid
+            elif isinstance(data_uid, str):
+                index = == data_uid
-                raise ValueError(f"Invalid kind type {type(kind)}. This must be integer or string.")
+                raise ValueError(
+                    f"Invalid data_uid {type(data_uid)}. This must be integer or string."
+                )
             filt_data = filt_data.loc[index, :]
         if category is not None:
             index = self._data.category == category
@@ -349,7 +350,7 @@ def filter(
             filt_data = filt_data.loc[index, :]
         return ScatterTable.from_dataframe(filt_data)
-    def iter_by_data(self) -> Iterator[tuple[int, "ScatterTable"]]:
+    def iter_by_data_uid(self) -> Iterator[tuple[int, "ScatterTable"]]:
         """Iterate over subset of data sorted by the data UID.
@@ -424,8 +425,8 @@ def _format_table(cls, data: pd.DataFrame) -> pd.DataFrame:
     def _warn_composite_data(table: ScatterTable):
         if len( > 1:
-                "Returned data contains multiple data kinds. "
-                "You may want to filter the data by a specific data_uid or name.",
+                "Returned data contains multiple data UIDs. "
+                "You may want to filter the data by a specific data_uid integer or name string.",
         if len(table._data.category.unique()) > 1:
@@ -482,7 +483,7 @@ def get_subset_of(self, index: str | int) -> "ScatterTable":
             A subset of data corresponding to a particular series.
-        return self.filter(kind=index)
+        return self.filter(data_uid=index)
     def __len__(self):
         """Return the number of data points stored in the table."""
diff --git a/qiskit_experiments/curve_analysis/standard_analysis/ b/qiskit_experiments/curve_analysis/standard_analysis/
index 30931d026a..96585dd0b0 100644
--- a/qiskit_experiments/curve_analysis/standard_analysis/
+++ b/qiskit_experiments/curve_analysis/standard_analysis/
@@ -170,9 +170,9 @@ def _generate_fit_guesses(
         user_opt.bounds.set_if_empty(t_off=(0, np.inf), b=(-1, 1))
-        x_data = curve_data.filter(kind="x")
-        y_data = curve_data.filter(kind="y")
-        z_data = curve_data.filter(kind="z")
+        x_data = curve_data.filter(data_uid="x")
+        y_data = curve_data.filter(data_uid="y")
+        z_data = curve_data.filter(data_uid="z")
         omega_xyz = []
         for data in (x_data, y_data, z_data):
diff --git a/qiskit_experiments/library/characterization/analysis/ b/qiskit_experiments/library/characterization/analysis/
index ce2fb34fd4..988bb70a76 100644
--- a/qiskit_experiments/library/characterization/analysis/
+++ b/qiskit_experiments/library/characterization/analysis/
@@ -118,7 +118,7 @@ def _generate_fit_guesses(
         # Use the highest-frequency curve to estimate the oscillation frequency.
         max_rep_model_name = self.model_names()[-1]
         max_rep = self.options.data_subfit_map[max_rep_model_name]["nrep"]
-        curve_data = curve_data.filter(kind=max_rep_model_name)
+        curve_data = curve_data.filter(data_uid=max_rep_model_name)
         x_data = curve_data.x
         min_beta, max_beta = min(x_data), max(x_data)
diff --git a/qiskit_experiments/library/characterization/analysis/ b/qiskit_experiments/library/characterization/analysis/
index ea7fc8fcb7..7d2ee51511 100644
--- a/qiskit_experiments/library/characterization/analysis/
+++ b/qiskit_experiments/library/characterization/analysis/
@@ -113,8 +113,8 @@ def _generate_fit_guesses(
             List of fit options that are passed to the fitter function.
-        ramx_data = curve_data.filter(kind="X")
-        ramy_data = curve_data.filter(kind="Y")
+        ramx_data = curve_data.filter(data_uid="X")
+        ramy_data = curve_data.filter(data_uid="Y")
         # At very low frequency, y value of X (Y) curve stay at P=1.0 (0.5) for all x values.
         # Computing y peak-to-peak with combined data gives fake amplitude of 0.25.
diff --git a/qiskit_experiments/library/characterization/analysis/ b/qiskit_experiments/library/characterization/analysis/
index b2fcb25482..30e875d9d3 100644
--- a/qiskit_experiments/library/characterization/analysis/
+++ b/qiskit_experiments/library/characterization/analysis/
@@ -142,8 +142,8 @@ def _generate_fit_guesses(
         y_ptp = y_max - y_min
         x_max = np.max(curve_data.x)
-        data_0 = curve_data.filter(kind="0")
-        data_1 = curve_data.filter(kind="1")
+        data_0 = curve_data.filter(data_uid="0")
+        data_1 = curve_data.filter(data_uid="1")
         def typical_step(arr):
             """Find the typical step size of an array"""
diff --git a/qiskit_experiments/library/driven_freq_tuning/ b/qiskit_experiments/library/driven_freq_tuning/
index d9a1e3dcfd..ee01650721 100644
--- a/qiskit_experiments/library/driven_freq_tuning/
+++ b/qiskit_experiments/library/driven_freq_tuning/
@@ -259,8 +259,8 @@ def _format_data(
         # Create phase data by arctan(Y/X)
         for data_id, direction in enumerate(("pos", "neg")):
-            x_quadrature = ramsey_xy.filter(kind=f"X{direction}")
-            y_quadrature = ramsey_xy.filter(kind=f"Y{direction}")
+            x_quadrature = ramsey_xy.filter(data_uid=f"X{direction}")
+            y_quadrature = ramsey_xy.filter(data_uid=f"Y{direction}")
             if not np.array_equal(x_quadrature.x, y_quadrature.x):
                 raise ValueError(
                     "Amplitude values of X and Y quadrature are different. "
@@ -358,7 +358,7 @@ def _create_figures(
         # plot unwrapped phase on first axis
         for direction in ("pos", "neg"):
-            sub_data = curve_data.filter(kind=f"FREQ{direction}", category="freq")
+            sub_data = curve_data.filter(data_uid=f"FREQ{direction}", category="freq")
@@ -368,7 +368,7 @@ def _create_figures(
         # plot raw RamseyXY plot on second axis
         for name in ("Xpos", "Ypos", "Xneg", "Yneg"):
-            sub_data = curve_data.filter(kind=name, category="ramsey_xy")
+            sub_data = curve_data.filter(data_uid=name, category="ramsey_xy")
@@ -384,7 +384,7 @@ def _create_figures(
         # plot frequency and Ramsey fit lines
         line_data = curve_data.filter(category="fitted")
         for direction in ("pos", "neg"):
-            sub_data = line_data.filter(kind=f"FREQ{direction}")
+            sub_data = line_data.filter(data_uid=f"FREQ{direction}")
             if len(sub_data) == 0:
             xval = sub_data.x
diff --git a/qiskit_experiments/library/randomized_benchmarking/ b/qiskit_experiments/library/randomized_benchmarking/
index b918fd41fa..4e10cd7c0d 100644
--- a/qiskit_experiments/library/randomized_benchmarking/
+++ b/qiskit_experiments/library/randomized_benchmarking/
@@ -141,12 +141,12 @@ def _generate_fit_guesses(
         b_guess = 1 / 2**self._num_qubits
         # for standard RB curve
-        std_curve = curve_data.filter(kind="standard")
+        std_curve = curve_data.filter(data_uid="standard")
         alpha_std = curve.guess.rb_decay(std_curve.x, std_curve.y, b=b_guess)
         a_std = (std_curve.y[0] - b_guess) / (alpha_std ** std_curve.x[0])
         # for interleaved RB curve
-        int_curve = curve_data.filter(kind="interleaved")
+        int_curve = curve_data.filter(data_uid="interleaved")
         alpha_int = curve.guess.rb_decay(int_curve.x, int_curve.y, b=b_guess)
         a_int = (int_curve.y[0] - b_guess) / (alpha_int ** int_curve.x[0])
diff --git a/test/curve_analysis/ b/test/curve_analysis/
index b197bd2399..9777cd300d 100644
--- a/test/curve_analysis/
+++ b/test/curve_analysis/
@@ -170,7 +170,7 @@ def test_filter_data_by_data_uid(self):
         """Test filter table data with data UID."""
         obj = ScatterTable.from_dataframe(self.reference)
-        filtered = obj.filter(kind=0)
+        filtered = obj.filter(data_uid=0)
         self.assertEqual(len(filtered), 6)
         np.testing.assert_array_equal(filtered.x, np.array([0.1, 0.2, 0.1, 0.2, 0.1, 0.2]))
         np.testing.assert_array_equal(filtered.data_uid, np.array([0, 0, 0, 0, 0, 0]))
@@ -179,7 +179,7 @@ def test_filter_data_by_model_name(self):
         """Test filter table data with data name."""
         obj = ScatterTable.from_dataframe(self.reference)
-        filtered = obj.filter(kind="model1")
+        filtered = obj.filter(data_uid="model1")
         self.assertEqual(len(filtered), 6)
         np.testing.assert_array_equal(filtered.x, np.array([0.1, 0.2, 0.1, 0.2, 0.1, 0.2]))
@@ -210,7 +210,7 @@ def test_filter_multiple(self):
         """Test filter table data with multiple attributes."""
         obj = ScatterTable.from_dataframe(self.reference)
-        filtered = obj.filter(kind=0, category="raw", analysis="Fit1")
+        filtered = obj.filter(data_uid=0, category="raw", analysis="Fit1")
         self.assertEqual(len(filtered), 2)
         np.testing.assert_array_equal(filtered.x, np.array([0.1, 0.2]))
         np.testing.assert_array_equal(filtered.data_uid, np.array([0, 0]))
@@ -221,15 +221,15 @@ def test_iter_class(self):
         """Test iterating over mini tables associated with different data UID."""
         obj = ScatterTable.from_dataframe(self.reference).filter(category="raw")
-        class_iter = obj.iter_by_data()
+        class_iter = obj.iter_by_data_uid()
         index, table_cls0 = next(class_iter)
-        ref_table_cls0 = obj.filter(kind=0)
+        ref_table_cls0 = obj.filter(data_uid=0)
         self.assertEqual(index, 0)
         self.assertEqual(table_cls0, ref_table_cls0)
         index, table_cls1 = next(class_iter)
-        ref_table_cls1 = obj.filter(kind=1)
+        ref_table_cls1 = obj.filter(data_uid=1)
         self.assertEqual(index, 1)
         self.assertEqual(table_cls1, ref_table_cls1)

From ee5b34d5da672db3b6daeb83fa7924779d31b82d Mon Sep 17 00:00:00 2001
From: Naoki Kanazawa <>
Date: Tue, 6 Feb 2024 10:23:52 +0900
Subject: [PATCH 18/29] Wording suggestions

Co-authored-by: Will Shanks <>
 docs/tutorials/curve_analysis.rst             | 13 ++--
 .../curve_analysis/           | 72 ++++++++++---------
 .../framework/        | 13 ++--
 3 files changed, 51 insertions(+), 47 deletions(-)

diff --git a/docs/tutorials/curve_analysis.rst b/docs/tutorials/curve_analysis.rst
index 5c2211d200..8a1e7ba04e 100644
--- a/docs/tutorials/curve_analysis.rst
+++ b/docs/tutorials/curve_analysis.rst
@@ -292,13 +292,16 @@ is scanned from 0.1 to 0.3 in each subset. In this example, the experiment is ru
 for each condition. The role of each column is as follows:
 - ``xval``: Parameter scanned in the experiment. This value must be defined in the circuit metadata.
-- ``yval``: Nominal part of the outcome. The outcome is something like expectation value, which is computed from the experiment result with the data processor.
+- ``yval``: Nominal part of the outcome. The outcome is something like expectation value,
+  which is computed from the experiment result with the data processor.
 - ``yerr``: Standard error of the outcome, which is mainly due to sampling error.
 - ``name``: Unique identifier of the result class. This is defined by the ``data_subfit_map`` option.
-- ``data_uid``: Integer number corresponding to the data unique index. This number is automatically assigned.
-- ``category``: The tag of data group. The "raw" category indicates an output from the data processing.
+- ``data_uid``: Integer corresponding to a data unique index. This number is automatically assigned.
+- ``category``: A tag for the data group. The "raw" category indicates an output from the data processing.
 - ``shots``: Number of measurement shots used to acquire this result.
-- ``analysis``: The name of curve analysis instance that generated this data. In :class:`.CompositeCurveAnalysis`, the table is a composite of tables from all component analyses.
+- ``analysis``: The name of the curve analysis instance that generated this data.
+  For a simple analysis class, all rows will have the same value, but :class:`.CompositeCurveAnalysis`
+  combines the tables from all component analyses leading to more than one unique entry.
 To find data points that belong to a particular dataset, you can follow :ref:`filter_scatter_table`.
@@ -313,7 +316,7 @@ This allows the analysis to easily estimate the slope of the curves to
 create algorithmic initial guess of fit parameters.
 A developer can inject extra data processing, for example, filtering, smoothing,
 or elimination of outliers for better fitting.
-The new data_uid is given here so that its value corresponds to the fit model object index
+The new ``data_uid`` is given here so that its value corresponds to the fit model object index
 in this analysis class. This index mapping is done based upon the correspondence of
 the data name and the fit model name.
diff --git a/qiskit_experiments/curve_analysis/ b/qiskit_experiments/curve_analysis/
index bac11f5ac3..b89baf1904 100644
--- a/qiskit_experiments/curve_analysis/
+++ b/qiskit_experiments/curve_analysis/
@@ -30,51 +30,53 @@
 class ScatterTable:
-    """A table-like dataset for curve fitting intermediate data.
+    """A table-like dataset for the intermediate data used for curve fitting.
     Default table columns are defined in the class attribute :attr:`.DEFAULT_COLUMNS`.
     This table cannot be expanded with user-provided column names.
-    This dataset is not thread safe. Do not use the same instance in multiple threads.
+    .. note::
+        This dataset is not thread safe. Do not use the same instance in multiple threads.
     .. _filter_scatter_table:
     Filtering ScatterTable
-    ScatterTable is a single source of the truth as the data used in the curve fit analysis.
+    ScatterTable is the single source of truth for the data used in the curve fit analysis.
     Each data point in a 1-D curve fit may consist of the x value, y value, and
     standard error of the y value.
-    In addition, such analysis may internally create several data subsets,
-    and data points can also take metadata triplet (`data_uid`, `category`, `analysis`)
+    In addition, such analysis may internally create several data subsets.
+    Each data point is given a metadata triplet (`data_uid`, `category`, `analysis`)
     to distinguish the subset.
     * The `data_uid` is an integer key representing the class of the data.
-      When an analysis consists of multiple fit models and performs the multi-objective fit,
+      When an analysis consists of multiple fit models and performs a multi-objective fit,
       the created table may contain multiple datasets for each fit model.
       Usually the index of data matches with the index of the fit model in the analysis.
-      The table also provides `name` column which is a human-friendly text notation of the data_uid.
-      The name and corresponding data_uid must refer to the identical data class,
-      and the name typically matches with the name of the fit model.
-      You can find a particular data subset by either data_uid or name.
-    * The `category` is a string key representing a tag of data groups.
-      The measured outcomes input as-is to the curve analysis are tagged with "raw".
-      In a standard :class:`.CurveAnalysis` subclass, the input data is pre-processed for
-      the fitting and the formatted data is also stored in the table with "formatted" tag.
+      The table also provides a `name` column which is a human-friendly text notation of the data_uid.
+      The `name` and corresponding `data_uid` must refer to the identical data class,
+      and the `name` typically matches with the name of the fit model.
+      You can find a particular data subset by either `data_uid` or `name`.
+    * The `category` is a string tag categorizing a group of data points.
+      The measured outcomes input as-is to the curve analysis are categorized by "raw".
+      In a standard :class:`.CurveAnalysis` subclass, the input data is formatted for
+      the fitting and the formatted data is also stored in the table with the "formatted" category.
       After the fit is successfully conducted and the model parameters are identified,
-      data points in the interpolated fit curves are also stored with "fitted" tag
-      for visualization. The management of data group depends on the design of
+      data points in the interpolated fit curves are stored with the "fitted" category
+      for visualization. The management of the data groups depends on the design of
       the curve analysis protocol, and the convention of category naming might
       be different in a particular analysis.
     * The `analysis` is a string key representing a name of
       the analysis instance that generated the data point.
-      This allows a user to combine multiple tables from the different analyses
+      This allows a user to combine multiple tables from different analyses
       without collapsing the data points.
-      In the :class:`.CompositeCurveAnalysis`, the instance consists of statistically
-      independent fit models represented in a form of nested component analysis instances.
-      Such component has unique analysis name, and datasets generated from each instance
+      A :class:`.CompositeCurveAnalysis` instance consists of
+      nested component analysis instances containing statistically independent fit models.
+      Each component is given a unique analysis name, and datasets generated from each instance
       are merged into a single table stored in the outermost composite analysis.
     User must be aware of this triplet to extract data points that belong to a
@@ -86,7 +88,7 @@ class ScatterTable:
         mini_x = mini_table.x
         mini_y = mini_table.y
-    this operation is equivalent to
+    This operation is equivalent to
     .. code-block:: python
@@ -180,7 +182,7 @@ def xvals(
     ) -> np.ndarray:
         """Get subset of X values.
-        A convenient shortcut of getting X data with filtering.
+        A convenient shortcut for getting X data with filtering.
             data_uid: Identifier of the data, either data UID or name.
@@ -216,7 +218,7 @@ def yvals(
     ) -> np.ndarray:
         """Get subset of Y values.
-        A convenient shortcut of getting Y data with filtering.
+        A convenient shortcut for getting Y data with filtering.
             data_uid: Identifier of the data, either data UID or name.
@@ -252,7 +254,7 @@ def yerrs(
     ) -> np.ndarray:
         """Get subset of standard deviation of Y values.
-        A convenient shortcut of getting Y error data with filtering.
+        A convenient shortcut for getting Y error data with filtering.
             data_uid: Identifier of the data, either data UID or name.
@@ -271,7 +273,7 @@ def yerrs(
     def name(self) -> np.ndarray:
-        """Corresponding data name."""
+        """Corresponding data name for each data point."""
         return, na_value=None)
@@ -280,7 +282,7 @@ def name(self, new_values: np.ndarray):
     def data_uid(self) -> np.ndarray:
-        """Corresponding data UID."""
+        """Corresponding data UID for each data point."""
         return self._data.data_uid.to_numpy(dtype=object, na_value=None)
@@ -289,7 +291,7 @@ def data_uid(self, new_values: np.ndarray):
     def category(self) -> np.ndarray:
-        """Category of data points."""
+        """Array of categories of the data points."""
         return self._data.category.to_numpy(dtype=object, na_value=None)
@@ -298,7 +300,7 @@ def category(self, new_values: np.ndarray):
     def shots(self) -> np.ndarray:
-        """Shot number used to acquire data points."""
+        """Shot number used to acquire each data point."""
         return self._data.shots.to_numpy(dtype=object, na_value=np.nan)
@@ -307,7 +309,7 @@ def shots(self, new_values: np.ndarray):
     def analysis(self) -> np.ndarray:
-        """Corresponding analysis name."""
+        """Corresponding analysis name for each data point."""
         return self._data.analysis.to_numpy(dtype=object, na_value=None)
@@ -368,16 +370,16 @@ def iter_groups(
         """Iterate over the subset sorted by multiple column values.
-            group_by: Name of column to group by.
+            group_by: Names of columns to group by.
-            Tuple of keys and subset of ScatterTable.
+            Tuple of values for the grouped columns and the corresponding subset of the scatter table.
             sort_by = itemgetter(*[self.DEFAULT_COLUMNS.index(c) for c in group_by])
         except ValueError as ex:
             raise ValueError(
-                f"Specified columns don't exist: {group_by} are not subset of {self.DEFAULT_COLUMNS}."
+                f"Specified columns don't exist: {group_by} is not a subset of {self.DEFAULT_COLUMNS}."
             ) from ex
         # Use python native groupby method on dataframe ndarray when sorting by multiple columns.
@@ -397,7 +399,7 @@ def add_row(
         shots: float | pd.NA = pd.NA,
         analysis: str | pd.NA = pd.NA,
-        """Add new data group to the table.
+        """Add new data point to the table.
         Data must be the same length.
@@ -445,7 +447,7 @@ def _warn_composite_data(table: ScatterTable):
-        additional_msg="Curve data uses dataframe representation. Call .model_id instead.",
+        additional_msg="Curve data uses dataframe representation. Call .data_uid instead.",
diff --git a/qiskit_experiments/framework/ b/qiskit_experiments/framework/
index 4f33fa1737..eaa30cddf0 100644
--- a/qiskit_experiments/framework/
+++ b/qiskit_experiments/framework/
@@ -29,11 +29,11 @@ class AnalysisResultTable:
     """A table-like dataset for analysis results.
     Default table columns are defined in the class attribute :attr:`.DEFAULT_COLUMNS`.
-    The table is automatically expanded when extra key is included in the
-    input dictionary data. Missing columns in the input data are filled with null value.
+    The table is automatically expanded when an extra key is included in the
+    input dictionary data. Missing columns in the input data are filled with a null value.
     Table row index (i.e. entry ID) is created by truncating the result_id string which
-    is basically UUID-4 string. A random unique ID is generated when the result_id
+    is basically a UUID-4 string. A random unique ID is generated when the result_id
     is missing in the input data.
     Any operation on the table value via the instance methods guarantees thread safety.
@@ -57,7 +57,6 @@ class AnalysisResultTable:
     def __init__(self):
         """Create new dataset."""
-        super().__init__()
         self._data = pd.DataFrame(columns=self.DEFAULT_COLUMNS)
         self._lock = threading.RLock()
@@ -102,10 +101,10 @@ def add_data(
             key: Identifier of this entry. This must be UUID-4 format.
-                The result_id string in the input data is used if nothing provided.
-                Random unique ID is prepared if result_id is also missing.
+                The ``result_id`` string in the input data is used if nothing is provided.
+                A random unique ID is prepared if ``result_id`` is also missing.
             data: Arbitrary key-value pairs representing a single data entry.
-                Missing values for default columns are filled with None.
+                Missing values for default columns are filled with ``None``.
             Assigned analysis result ID.

From 38abdffc7be77165fcc438d8018df7bb0815981d Mon Sep 17 00:00:00 2001
From: Naoki Kanazawa <>
Date: Tue, 6 Feb 2024 10:25:49 +0900
Subject: [PATCH 19/29] Remove DEFAULT_

 .../curve_analysis/            | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/qiskit_experiments/curve_analysis/ b/qiskit_experiments/curve_analysis/
index b89baf1904..7413eb0c23 100644
--- a/qiskit_experiments/curve_analysis/
+++ b/qiskit_experiments/curve_analysis/
@@ -101,7 +101,7 @@ class ScatterTable:
+    COLUMNS = [
@@ -112,7 +112,7 @@ class ScatterTable:
+    DTYPES = [
@@ -125,14 +125,14 @@ class ScatterTable:
     def __init__(self):
         self._lazy_add_rows = []
-        self._dump = pd.DataFrame(columns=self.DEFAULT_COLUMNS)
+        self._dump = pd.DataFrame(columns=self.COLUMNS)
     def _data(self) -> pd.DataFrame:
         if self._lazy_add_rows:
             # Add data when table element is called.
             # Adding rows in loop is extremely slow in pandas.
-            tmp_df = pd.DataFrame(self._lazy_add_rows, columns=self.DEFAULT_COLUMNS)
+            tmp_df = pd.DataFrame(self._lazy_add_rows, columns=self.COLUMNS)
             tmp_df = self._format_table(tmp_df)
             if len(self._dump) == 0:
                 self._dump = tmp_df
@@ -151,7 +151,7 @@ def from_dataframe(cls, data: pd.DataFrame) -> "ScatterTable":
             A new ScatterTable instance.
-        if list(data.columns) != cls.DEFAULT_COLUMNS:
+        if list(data.columns) != cls.COLUMNS:
             raise ValueError("Input dataframe columns don't match with the ScatterTable spec.")
         instance = object.__new__(ScatterTable)
         instance._lazy_add_rows = []
@@ -376,16 +376,16 @@ def iter_groups(
             Tuple of values for the grouped columns and the corresponding subset of the scatter table.
-            sort_by = itemgetter(*[self.DEFAULT_COLUMNS.index(c) for c in group_by])
+            sort_by = itemgetter(*[self.COLUMNS.index(c) for c in group_by])
         except ValueError as ex:
             raise ValueError(
-                f"Specified columns don't exist: {group_by} is not a subset of {self.DEFAULT_COLUMNS}."
+                f"Specified columns don't exist: {group_by} is not a subset of {self.COLUMNS}."
             ) from ex
         # Use python native groupby method on dataframe ndarray when sorting by multiple columns.
         # This is more performant than pandas groupby implementation.
         for vals, sub_data in groupby(sorted(self._data.values, key=sort_by), key=sort_by):
-            tmp_df = pd.DataFrame(list(sub_data), columns=self.DEFAULT_COLUMNS)
+            tmp_df = pd.DataFrame(list(sub_data), columns=self.COLUMNS)
             yield vals, ScatterTable.from_dataframe(tmp_df)
     def add_row(
@@ -419,7 +419,7 @@ def add_row(
     def _format_table(cls, data: pd.DataFrame) -> pd.DataFrame:
         return (
             data.replace(np.nan, pd.NA)
-            .astype(dict(zip(cls.DEFAULT_COLUMNS, cls.DEFAULT_DTYPES)))
+            .astype(dict(zip(cls.COLUMNS, cls.DTYPES)))

From 9e27f16eb3d0f376cc0bf81494319199d67f4cb1 Mon Sep 17 00:00:00 2001
From: Naoki Kanazawa <>
Date: Tue, 6 Feb 2024 12:02:58 +0900
Subject: [PATCH 20/29] Reorganize the doc

 docs/tutorials/curve_analysis.rst             | 136 +++++++++++++-----
 .../curve_analysis/           |  70 ++-------
 2 files changed, 111 insertions(+), 95 deletions(-)

diff --git a/docs/tutorials/curve_analysis.rst b/docs/tutorials/curve_analysis.rst
index 8a1e7ba04e..7c94478a84 100644
--- a/docs/tutorials/curve_analysis.rst
+++ b/docs/tutorials/curve_analysis.rst
@@ -240,6 +240,85 @@ generate initial guesses for parameters, from the ``AnalysisA`` class in the fir
 On the other hand, in the latter case, you need to manually copy and paste
 every logic defined in ``AnalysisA``.
+.. _data_management_with_scatter_table:
+Managing intermediate data
+:class:`.ScatterTable` is the single source of truth for the data used in the curve fit analysis.
+Each data point in a 1-D curve fit may consist of the x value, y value, and
+standard error of the y value.
+In addition, such analysis may internally create several data subsets.
+Each data point is given a metadata triplet (`data_uid`, `category`, `analysis`)
+to distinguish the subset.
+* The `data_uid` is an integer key representing a label of the data which may be classified by fits models.
+  When an analysis consists of multiple fit models and performs a multi-objective fit,
+  the created table may contain multiple datasets for each fit model.
+  Usually the index of data matches with the index of the fit model in the analysis.
+  The table also provides a `name` column which is a human-friendly text notation of the `data_uid`.
+  The `name` and corresponding `data_uid` must refer to the identical group,
+  and the `name` typically matches with the name of the fit model.
+  You can find a particular data subset by either `data_uid` or `name`.
+* The `category` is a string tag categorizing a group of data points.
+  The measured outcomes input as-is to the curve analysis are categorized by "raw".
+  In a standard :class:`.CurveAnalysis` subclass, the input data is formatted for
+  the fitting and the formatted data is also stored in the table with the "formatted" category.
+  You can filter the formatted data to run curve fitting with your custom program.
+  After the fit is successfully conducted and the model parameters are identified,
+  data points in the interpolated fit curves are stored with the "fitted" category
+  for visualization. The management of the data groups depends on the design of
+  the curve analysis protocol, and the convention of category naming might
+  be different in a particular analysis.
+* The `analysis` is a string key representing a name of
+  the analysis instance that generated the data point.
+  This allows a user to combine multiple tables from different analyses without collapsing the data points.
+  For a simple analysis class, all rows will have the same value,
+  but a :class:`.CompositeCurveAnalysis` instance consists of
+  nested component analysis instances containing statistically independent fit models.
+  Each component is given a unique analysis name, and datasets generated from each instance
+  are merged into a single table stored in the outermost composite analysis.
+User must be aware of this triplet to extract data points that belong to a
+particular data subset. For example,
+.. code-block:: python
+    mini_table = table.filter(data_uid="my_experiment1", category="raw", analysis="AnalysisA")
+    mini_x = mini_table.x
+    mini_y = mini_table.y
+This operation is equivalent to
+.. code-block:: python
+    mini_x = table.xvals(data_uid="my_experiment1", category="raw", analysis="AnalysisA")
+    mini_y = table.yvals(data_uid="my_experiment1", category="raw", analysis="AnalysisA")
+When an analysis only has a single model and the table is created from a single
+analysis instance, the data_uid and analysis are trivial, and you only need to
+specify the category to get subset data of interest.
+The full description of :class:`.ScatterTable` columns are following below:
+- `xval`: Parameter scanned in the experiment. This value must be defined in the circuit metadata.
+- `yval`: Nominal part of the outcome. The outcome is something like expectation value,
+  which is computed from the experiment result with the data processor.
+- `yerr`: Standard error of the outcome, which is mainly due to sampling error.
+- `name`: Unique identifier of the data group. This is defined by the ``data_subfit_map`` option in the :class:`.CurveAnalysis`.
+- `data_uid`: Integer corresponding to a data unique index. This number is automatically assigned.
+- `category`: A tag for the data group. This is defined by a developer of the curve analysis.
+- `shots`: Number of measurement shots used to acquire a data point. This value can be defined in the circuit metadata.
+- `analysis`: The name of the curve analysis instance that generated a data point.
+This object helps an analysis developer with writing a custom analysis class
+without an overhead of complex data management, as well as end-users with
+retrieving and reusing the intermediate data for their custom fitting workflow
+outside our curve fitting framework.
+Note that a :class:`ScatterTable` instance may be saved in the :class:`.ExperimentData` as an artifact.
 .. _curve_analysis_workflow:
 Curve Analysis workflow
@@ -273,37 +352,24 @@ This table may look like:
 .. code-block::
-        xval      yval      yerr name  data_uid category  shots     analysis
-    0    0.1  0.153659  0.011258    A         0      raw   1024   MyAnalysis
-    1    0.1  0.590732  0.015351    B         1      raw   1024   MyAnalysis
-    2    0.1  0.315610  0.014510    A         0      raw   1024   MyAnalysis
-    3    0.1  0.376098  0.015123    B         1      raw   1024   MyAnalysis
-    4    0.2  0.937073  0.007581    A         0      raw   1024   MyAnalysis
-    5    0.2  0.323415  0.014604    B         1      raw   1024   MyAnalysis
-    6    0.2  0.538049  0.015565    A         0      raw   1024   MyAnalysis
-    7    0.2  0.530244  0.015581    B         1      raw   1024   MyAnalysis
-    8    0.3  0.143902  0.010958    A         0      raw   1024   MyAnalysis
-    9    0.3  0.261951  0.013727    B         1      raw   1024   MyAnalysis
-    10   0.3  0.830732  0.011707    A         0      raw   1024   MyAnalysis
-    11   0.3  0.874634  0.010338    B         1      raw   1024   MyAnalysis
+        xval      yval      yerr  name  data_uid category  shots     analysis
+    0    0.1  0.153659  0.011258     A         0      raw   1024   MyAnalysis
+    1    0.1  0.590732  0.015351     B         1      raw   1024   MyAnalysis
+    2    0.1  0.315610  0.014510     A         0      raw   1024   MyAnalysis
+    3    0.1  0.376098  0.015123     B         1      raw   1024   MyAnalysis
+    4    0.2  0.937073  0.007581     A         0      raw   1024   MyAnalysis
+    5    0.2  0.323415  0.014604     B         1      raw   1024   MyAnalysis
+    6    0.2  0.538049  0.015565     A         0      raw   1024   MyAnalysis
+    7    0.2  0.530244  0.015581     B         1      raw   1024   MyAnalysis
+    8    0.3  0.143902  0.010958     A         0      raw   1024   MyAnalysis
+    9    0.3  0.261951  0.013727     B         1      raw   1024   MyAnalysis
+    10   0.3  0.830732  0.011707     A         0      raw   1024   MyAnalysis
+    11   0.3  0.874634  0.010338     B         1      raw   1024   MyAnalysis
 where the experiment consists of two subset series A and B, and the experiment parameter (xval)
 is scanned from 0.1 to 0.3 in each subset. In this example, the experiment is run twice
-for each condition. The role of each column is as follows:
-- ``xval``: Parameter scanned in the experiment. This value must be defined in the circuit metadata.
-- ``yval``: Nominal part of the outcome. The outcome is something like expectation value,
-  which is computed from the experiment result with the data processor.
-- ``yerr``: Standard error of the outcome, which is mainly due to sampling error.
-- ``name``: Unique identifier of the result class. This is defined by the ``data_subfit_map`` option.
-- ``data_uid``: Integer corresponding to a data unique index. This number is automatically assigned.
-- ``category``: A tag for the data group. The "raw" category indicates an output from the data processing.
-- ``shots``: Number of measurement shots used to acquire this result.
-- ``analysis``: The name of the curve analysis instance that generated this data.
-  For a simple analysis class, all rows will have the same value, but :class:`.CompositeCurveAnalysis`
-  combines the tables from all component analyses leading to more than one unique entry.
-To find data points that belong to a particular dataset, you can follow :ref:`filter_scatter_table`.
+for each condition.
+See :ref:`data_management_with_scatter_table` for the details of columns.
 3. Formatting
@@ -325,13 +391,13 @@ This may return new scatter table object with the addition of rows like the foll
 .. code-block::
-        xval      yval      yerr name  data_uid   category  shots     analysis
-    12   0.1  0.234634  0.009183    A         0  formatted   2048   MyAnalysis
-    13   0.2  0.737561  0.008656    A         0  formatted   2048   MyAnalysis
-    14   0.3  0.487317  0.008018    A         0  formatted   2048   MyAnalysis
-    15   0.1  0.483415  0.010774    B         1  formatted   2048   MyAnalysis
-    16   0.2  0.426829  0.010678    B         1  formatted   2048   MyAnalysis
-    17   0.3  0.568293  0.008592    B         1  formatted   2048   MyAnalysis
+        xval      yval      yerr  name  data_uid   category  shots     analysis
+    12   0.1  0.234634  0.009183     A         0  formatted   2048   MyAnalysis
+    13   0.2  0.737561  0.008656     A         0  formatted   2048   MyAnalysis
+    14   0.3  0.487317  0.008018     A         0  formatted   2048   MyAnalysis
+    15   0.1  0.483415  0.010774     B         1  formatted   2048   MyAnalysis
+    16   0.2  0.426829  0.010678     B         1  formatted   2048   MyAnalysis
+    17   0.3  0.568293  0.008592     B         1  formatted   2048   MyAnalysis
 The default :meth:`_format_data` method adds its output data with the category "formatted".
 This category name must be also specified in the analysis option ``fit_category``.
diff --git a/qiskit_experiments/curve_analysis/ b/qiskit_experiments/curve_analysis/
index 7413eb0c23..61470193eb 100644
--- a/qiskit_experiments/curve_analysis/
+++ b/qiskit_experiments/curve_analysis/
@@ -32,72 +32,22 @@
 class ScatterTable:
     """A table-like dataset for the intermediate data used for curve fitting.
-    Default table columns are defined in the class attribute :attr:`.DEFAULT_COLUMNS`.
+    Default table columns are defined in the class attribute :attr:`.COLUMNS`.
     This table cannot be expanded with user-provided column names.
+    In a standard :class:`.CurveAnalysis` subclass, a ScatterTable instance may be
+    stored in the :class:`.ExperimentData` as an artifact.
+    Users can retrieve the table data at a later time to rerun a fitting with a homemade program
+    or with different fit options, or to visualize the curves in a preferred format.
+    This table dataset is designed to seamlessly provide such information
+    that an experimentalist may want to reuse for a custom workflow.
     .. note::
         This dataset is not thread safe. Do not use the same instance in multiple threads.
-    .. _filter_scatter_table:
-    Filtering ScatterTable
-    ----------------------
-    ScatterTable is the single source of truth for the data used in the curve fit analysis.
-    Each data point in a 1-D curve fit may consist of the x value, y value, and
-    standard error of the y value.
-    In addition, such analysis may internally create several data subsets.
-    Each data point is given a metadata triplet (`data_uid`, `category`, `analysis`)
-    to distinguish the subset.
-    * The `data_uid` is an integer key representing the class of the data.
-      When an analysis consists of multiple fit models and performs a multi-objective fit,
-      the created table may contain multiple datasets for each fit model.
-      Usually the index of data matches with the index of the fit model in the analysis.
-      The table also provides a `name` column which is a human-friendly text notation of the data_uid.
-      The `name` and corresponding `data_uid` must refer to the identical data class,
-      and the `name` typically matches with the name of the fit model.
-      You can find a particular data subset by either `data_uid` or `name`.
-    * The `category` is a string tag categorizing a group of data points.
-      The measured outcomes input as-is to the curve analysis are categorized by "raw".
-      In a standard :class:`.CurveAnalysis` subclass, the input data is formatted for
-      the fitting and the formatted data is also stored in the table with the "formatted" category.
-      After the fit is successfully conducted and the model parameters are identified,
-      data points in the interpolated fit curves are stored with the "fitted" category
-      for visualization. The management of the data groups depends on the design of
-      the curve analysis protocol, and the convention of category naming might
-      be different in a particular analysis.
-    * The `analysis` is a string key representing a name of
-      the analysis instance that generated the data point.
-      This allows a user to combine multiple tables from different analyses
-      without collapsing the data points.
-      A :class:`.CompositeCurveAnalysis` instance consists of
-      nested component analysis instances containing statistically independent fit models.
-      Each component is given a unique analysis name, and datasets generated from each instance
-      are merged into a single table stored in the outermost composite analysis.
-    User must be aware of this triplet to extract data points that belong to a
-    particular data subset. For example,
-    .. code-block:: python
-        mini_table = table.filter(data_uid="model1", category="raw", analysis="Analysis_A")
-        mini_x = mini_table.x
-        mini_y = mini_table.y
-    This operation is equivalent to
-    .. code-block:: python
-        mini_x = table.xvals(data_uid="model1", category="raw", analysis="Analysis_A")
-        mini_y = table.yvals(data_uid="model1", category="raw", analysis="Analysis_A")
-    When an analysis only has a single model and the table is created from a single
-    analysis instance, the data_uid and analysis are trivial, and you only need to
-    specify the category to get subset data of interest.
+    See the tutorial of :ref:`data_management_with_scatter_table` for the
+    role of each table column and how values are typically provided.

From b870be33607b502dc5e961a40b2e8c1785fec05c Mon Sep 17 00:00:00 2001
From: Naoki Kanazawa <>
Date: Tue, 6 Feb 2024 12:31:47 +0900
Subject: [PATCH 21/29] Remove _data

 .../curve_analysis/           | 103 +++++++++---------
 1 file changed, 49 insertions(+), 54 deletions(-)

diff --git a/qiskit_experiments/curve_analysis/ b/qiskit_experiments/curve_analysis/
index 61470193eb..db3d6ec82c 100644
--- a/qiskit_experiments/curve_analysis/
+++ b/qiskit_experiments/curve_analysis/
@@ -77,20 +77,6 @@ def __init__(self):
         self._lazy_add_rows = []
         self._dump = pd.DataFrame(columns=self.COLUMNS)
-    @property
-    def _data(self) -> pd.DataFrame:
-        if self._lazy_add_rows:
-            # Add data when table element is called.
-            # Adding rows in loop is extremely slow in pandas.
-            tmp_df = pd.DataFrame(self._lazy_add_rows, columns=self.COLUMNS)
-            tmp_df = self._format_table(tmp_df)
-            if len(self._dump) == 0:
-                self._dump = tmp_df
-            else:
-                self._dump = pd.concat([self._dump, tmp_df], ignore_index=True)
-            self._lazy_add_rows.clear()
-        return self._dump
     def from_dataframe(cls, data: pd.DataFrame) -> "ScatterTable":
         """Create new dataset with existing dataframe.
@@ -111,17 +97,27 @@ def from_dataframe(cls, data: pd.DataFrame) -> "ScatterTable":
     def dataframe(self):
         """Dataframe object of data points."""
-        return self._data
+        if self._lazy_add_rows:
+            # Add data when table element is called.
+            # Adding rows in loop is extremely slow in pandas.
+            tmp_df = pd.DataFrame(self._lazy_add_rows, columns=self.COLUMNS)
+            tmp_df = self._format_table(tmp_df)
+            if len(self._dump) == 0:
+                self._dump = tmp_df
+            else:
+                self._dump = pd.concat([self._dump, tmp_df], ignore_index=True)
+            self._lazy_add_rows.clear()
+        return self._dump
     def x(self) -> np.ndarray:
         """X values."""
         # For backward compatibility with CurveData.x
-        return self._data.xval.to_numpy(dtype=float, na_value=np.nan)
+        return self.dataframe.xval.to_numpy(dtype=float, na_value=np.nan)
     def x(self, new_values):
-        self._data.loc[:, "xval"] = new_values
+        self.dataframe.loc[:, "xval"] = new_values
     def xvals(
@@ -146,18 +142,18 @@ def xvals(
         sub_table = self.filter(data_uid, category, analysis)
         if check_unique:
-            self._warn_composite_data(sub_table)
+            sub_table._warn_composite_data()
         return sub_table.x
     def y(self) -> np.ndarray:
         """Y values."""
         # For backward compatibility with CurveData.y
-        return self._data.yval.to_numpy(dtype=float, na_value=np.nan)
+        return self.dataframe.yval.to_numpy(dtype=float, na_value=np.nan)
     def y(self, new_values: np.ndarray):
-        self._data.loc[:, "yval"] = new_values
+        self.dataframe.loc[:, "yval"] = new_values
     def yvals(
@@ -182,18 +178,18 @@ def yvals(
         sub_table = self.filter(data_uid, category, analysis)
         if check_unique:
-            self._warn_composite_data(sub_table)
+            sub_table._warn_composite_data()
         return sub_table.y
     def y_err(self) -> np.ndarray:
         """Standard deviation of Y values."""
         # For backward compatibility with CurveData.y_err
-        return self._data.yerr.to_numpy(dtype=float, na_value=np.nan)
+        return self.dataframe.yerr.to_numpy(dtype=float, na_value=np.nan)
     def y_err(self, new_values: np.ndarray):
-        self._data.loc[:, "yerr"] = new_values
+        self.dataframe.loc[:, "yerr"] = new_values
     def yerrs(
@@ -218,53 +214,53 @@ def yerrs(
         sub_table = self.filter(data_uid, category, analysis)
         if check_unique:
-            self._warn_composite_data(sub_table)
+            sub_table._warn_composite_data()
         return sub_table.y_err
     def name(self) -> np.ndarray:
         """Corresponding data name for each data point."""
-        return, na_value=None)
+        return, na_value=None)
     def name(self, new_values: np.ndarray):
-        self._data.loc[:, "name"] = new_values
+        self.dataframe.loc[:, "name"] = new_values
     def data_uid(self) -> np.ndarray:
         """Corresponding data UID for each data point."""
-        return self._data.data_uid.to_numpy(dtype=object, na_value=None)
+        return self.dataframe.data_uid.to_numpy(dtype=object, na_value=None)
     def data_uid(self, new_values: np.ndarray):
-        self._data.loc[:, "data_uid"] = new_values
+        self.dataframe.loc[:, "data_uid"] = new_values
     def category(self) -> np.ndarray:
         """Array of categories of the data points."""
-        return self._data.category.to_numpy(dtype=object, na_value=None)
+        return self.dataframe.category.to_numpy(dtype=object, na_value=None)
     def category(self, new_values: np.ndarray):
-        self._data.loc[:, "category"] = new_values
+        self.dataframe.loc[:, "category"] = new_values
     def shots(self) -> np.ndarray:
         """Shot number used to acquire each data point."""
-        return self._data.shots.to_numpy(dtype=object, na_value=np.nan)
+        return self.dataframe.shots.to_numpy(dtype=object, na_value=np.nan)
     def shots(self, new_values: np.ndarray):
-        self._data.loc[:, "shots"] = new_values
+        self.dataframe.loc[:, "shots"] = new_values
     def analysis(self) -> np.ndarray:
         """Corresponding analysis name for each data point."""
-        return self._data.analysis.to_numpy(dtype=object, na_value=None)
+        return self.dataframe.analysis.to_numpy(dtype=object, na_value=None)
     def analysis(self, new_values: np.ndarray):
-        self._data.loc[:, "analysis"] = new_values
+        self.dataframe.loc[:, "analysis"] = new_values
     def filter(
@@ -282,23 +278,23 @@ def filter(
             New ScatterTable object with filtered data.
-        filt_data = self._data
+        filt_data = self.dataframe
         if data_uid is not None:
             if isinstance(data_uid, int):
-                index = self._data.data_uid == data_uid
+                index = filt_data.data_uid == data_uid
             elif isinstance(data_uid, str):
-                index = == data_uid
+                index = == data_uid
                 raise ValueError(
                     f"Invalid data_uid {type(data_uid)}. This must be integer or string."
             filt_data = filt_data.loc[index, :]
         if category is not None:
-            index = self._data.category == category
+            index = filt_data.category == category
             filt_data = filt_data.loc[index, :]
         if analysis is not None:
-            index = self._data.analysis == analysis
+            index = filt_data.analysis == analysis
             filt_data = filt_data.loc[index, :]
         return ScatterTable.from_dataframe(filt_data)
@@ -308,10 +304,10 @@ def iter_by_data_uid(self) -> Iterator[tuple[int, "ScatterTable"]]:
             Tuple of data UID and subset of ScatterTable.
-        data_ids = self._data.data_uid.dropna().sort_values().unique()
+        data_ids = self.dataframe.data_uid.dropna().sort_values().unique()
+        id_cols = self.dataframe.data_uid
         for did in data_ids:
-            index = self._data.data_uid == did
-            yield did, ScatterTable.from_dataframe(self._data.loc[index, :])
+            yield did, ScatterTable.from_dataframe(self.dataframe.loc[id_cols == did, :])
     def iter_groups(
@@ -334,7 +330,7 @@ def iter_groups(
         # Use python native groupby method on dataframe ndarray when sorting by multiple columns.
         # This is more performant than pandas groupby implementation.
-        for vals, sub_data in groupby(sorted(self._data.values, key=sort_by), key=sort_by):
+        for vals, sub_data in groupby(sorted(self.dataframe.values, key=sort_by), key=sort_by):
             tmp_df = pd.DataFrame(list(sub_data), columns=self.COLUMNS)
             yield vals, ScatterTable.from_dataframe(tmp_df)
@@ -373,23 +369,22 @@ def _format_table(cls, data: pd.DataFrame) -> pd.DataFrame:
-    @staticmethod
-    def _warn_composite_data(table: ScatterTable):
-        if len( > 1:
+    def _warn_composite_data(self):
+        if len( > 1:
-                "Returned data contains multiple data UIDs. "
+                "Table data contains multiple data UIDs. "
                 "You may want to filter the data by a specific data_uid integer or name string.",
-        if len(table._data.category.unique()) > 1:
+        if len(self.dataframe.category.unique()) > 1:
-                "Returned data contains multiple categories. "
+                "Table data contains multiple categories. "
                 "You may want to filter the data by a specific category name.",
-        if len(table._data.analysis.unique()) > 1:
+        if len(self.dataframe.analysis.unique()) > 1:
-                "Returned data contains multiple datasets from different component analyses. "
+                "Table data contains multiple datasets from different component analyses. "
                 "You may want to filter the data by a specific analysis name.",
@@ -417,7 +412,7 @@ def data_allocation(self) -> np.ndarray:
     def labels(self) -> list[str]:
         """List of model names."""
         # Order sensitive
-        name_id_tups = self._data.groupby(["name", "data_uid"]).groups.keys()
+        name_id_tups = self.dataframe.groupby(["name", "data_uid"]).groups.keys()
         return [k[0] for k in sorted(name_id_tups, key=lambda k: k[1])]
@@ -439,7 +434,7 @@ def get_subset_of(self, index: str | int) -> "ScatterTable":
     def __len__(self):
         """Return the number of data points stored in the table."""
-        return len(self._data)
+        return len(self.dataframe)
     def __eq__(self, other):
         return self.dataframe.equals(other.dataframe)
@@ -447,7 +442,7 @@ def __eq__(self, other):
     def __json_encode__(self) -> dict[str, Any]:
         return {
             "class": "ScatterTable",
-            "data": self._data.to_dict(orient="index"),
+            "data": self.dataframe.to_dict(orient="index"),

From cc905c689ac7f51ff0eae4c19afe81935956114f Mon Sep 17 00:00:00 2001
From: Naoki Kanazawa <>
Date: Tue, 6 Feb 2024 13:21:41 +0900
Subject: [PATCH 22/29] Remove key from add_data

 .../framework/        | 26 ++++++++-----------
 1 file changed, 11 insertions(+), 15 deletions(-)

diff --git a/qiskit_experiments/framework/ b/qiskit_experiments/framework/
index eaa30cddf0..2c84a08e2a 100644
--- a/qiskit_experiments/framework/
+++ b/qiskit_experiments/framework/
@@ -94,40 +94,35 @@ def columns(self) -> list[str]:
     def add_data(
-        key: str | int | None = None,
+        *,
+        result_id: str | None = None,
     ) -> str:
         """Add new data to this dataset.
-            key: Identifier of this entry. This must be UUID-4 format.
-                The ``result_id`` string in the input data is used if nothing is provided.
-                A random unique ID is prepared if ``result_id`` is also missing.
+            result_id: A unique UUID-4 string for this data entry.
+                The full string is used to identify the data in the experiment service database,
+                and a short ID is created by truncating this string as a dataframe index.
             data: Arbitrary key-value pairs representing a single data entry.
                 Missing values for default columns are filled with ``None``.
             Assigned analysis result ID.
-        if not key:
-            if result_id := data.get("result_id"):
-                key = result_id
-            else:
-                key = self._create_unique_hash()
-        if data.get("result_id", None) is None:
-            data["result_id"] = key
-        if matched := re.match(self.VALID_ID_REGEX, key):
+        result_id = result_id or self._create_unique_hash()
+        if matched := re.match(self.VALID_ID_REGEX, result_id):
             # Short unique index is generated from result id.
             # Showing full result id unnecessary occupies horizontal space of the html table.
             # This mechanism is inspired by the github commit hash.
             index ="short_id")
-                f"Data key {key} is not valid result ID string. ",
+                f"Result ID of {result_id} is not a valid UUID-4 string. ",
-            index = key[:8]
+            index = result_id[:8]
         with self._lock:
             if index in self._data.index:
@@ -153,6 +148,7 @@ def add_data(
             # Also see test.framework.test_data_table.TestBaseTable.test_type_*
             self._data.loc[index, :] = [None] * len(self._data.columns)
             template = dict.fromkeys(self.columns, None)
+            template["result_id"] = result_id
             self._data.loc[index, :] = pd.array(list(template.values()), dtype=object)

From 0dc4eb299a6691ca0849f087893eb430d4a816f6 Mon Sep 17 00:00:00 2001
From: Naoki Kanazawa <>
Date: Tue, 6 Feb 2024 14:03:15 +0900
Subject: [PATCH 23/29] Remove type cast depending on the entry number

 .../framework/              | 48 +++++++++++++++----
 1 file changed, 38 insertions(+), 10 deletions(-)

diff --git a/qiskit_experiments/framework/ b/qiskit_experiments/framework/
index 1e29a940c2..a7b8ce9620 100644
--- a/qiskit_experiments/framework/
+++ b/qiskit_experiments/framework/
@@ -1511,15 +1511,48 @@ def _retrieve_analysis_results(self, refresh: bool = False):
     def analysis_results(
-        index: Optional[Union[int, slice, str]] = None,
+        index: int | slice | str | None = None,
         refresh: bool = False,
         block: bool = True,
-        timeout: Optional[float] = None,
-        columns: Union[str, List[str]] = "default",
+        timeout: float | None = None,
+        columns: str | list[str] = "default",
         dataframe: bool = False,
-    ) -> Union[AnalysisResult, List[AnalysisResult], pd.DataFrame, pd.Series]:
+    ) -> AnalysisResult | list[AnalysisResult] | pd.DataFrame:
         """Return analysis results associated with this experiment.
+        When this method is called with ``dataframe=True`` you will receive
+        matched result entries with the ``index`` condition in the dataframe format.
+        You can access a certain entry value by specifying its row index by either
+        row number or short index string. For example,
+        .. jupyter-input::
+            results = exp_data.analysis_results("res1", dataframe=True)
+            print(results)
+        .. jupyter-output::
+                      name  experiment  components  value  quality  backend          run_time
+            7dd286f4  res1       MyExp    [Q0, Q1]      1     good    test1  2024-02-06 13:46
+            f62042a7  res1       MyExp    [Q2, Q3]      2     good    test1  2024-02-06 13:46
+        Getting the first result value with a row number (``iloc``).
+        .. code-block:: python
+            value = results.iloc[0].value
+        Getting the first result value with a short index (``loc``).
+        .. code-block:: python
+            value = results.loc["7dd286f4"]
+        See the pandas `DataFrame`_ documentation for the tips about data handling.
+        .. _DataFrame:
             index: Index of the analysis result to be returned.
                 Several types are accepted for convenience:
@@ -1558,12 +1591,7 @@ def analysis_results(
         if dataframe:
-            df = self._analysis_results.get_data(index, columns=columns)
-            if len(df) == 1 and index is not None:
-                # For backward compatibility.
-                # One can directly access attributes with Series. e.g. out.value
-                return df.iloc[0]
-            return df
+            return self._analysis_results.get_data(index, columns=columns)
         # Convert back into List[AnalysisResult] which is payload for IBM experiment service.
         # This will be removed in future version.

From f8c1efef25be73aae3b8f20a84c23677e550a2a1 Mon Sep 17 00:00:00 2001
From: Naoki Kanazawa <>
Date: Tue, 6 Feb 2024 15:03:15 +0900
Subject: [PATCH 24/29] Minor docs formatting

 docs/tutorials/curve_analysis.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/tutorials/curve_analysis.rst b/docs/tutorials/curve_analysis.rst
index 7c94478a84..bbd45585ba 100644
--- a/docs/tutorials/curve_analysis.rst
+++ b/docs/tutorials/curve_analysis.rst
@@ -382,7 +382,7 @@ This allows the analysis to easily estimate the slope of the curves to
 create algorithmic initial guess of fit parameters.
 A developer can inject extra data processing, for example, filtering, smoothing,
 or elimination of outliers for better fitting.
-The new ``data_uid`` is given here so that its value corresponds to the fit model object index
+The new `data_uid` is given here so that its value corresponds to the fit model object index
 in this analysis class. This index mapping is done based upon the correspondence of
 the data name and the fit model name.

From ee92f1df501db02c34505b52581d558cee964bae Mon Sep 17 00:00:00 2001
From: Naoki Kanazawa <>
Date: Tue, 6 Feb 2024 15:12:47 +0900
Subject: [PATCH 25/29] Add more tests for result table

 test/framework/ | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/test/framework/ b/test/framework/
index dea7e66c3d..ea8e566ab4 100644
--- a/test/framework/
+++ b/test/framework/
@@ -75,6 +75,23 @@ def test_extra_column_name_is_always_returned(self):
         all_columns = table.get_data(0, "all")
         self.assertTrue("extra" in all_columns.columns)
+    def test_get_custom_columns(self):
+        """Test getting entry with user-specified columns."""
+        table = AnalysisResultTable()
+        table.add_data(name="test", value=0)
+        cols = ["name", "value"]
+        custom_columns = table.get_data(0, cols)
+        self.assertListEqual(list(custom_columns.columns), cols)
+    def test_warning_non_existing_columns(self):
+        """Test raise user warning when attempt to get non-existing column."""
+        table = AnalysisResultTable()
+        table.add_data(name="test", value=0)
+        with self.assertWarns(UserWarning):
+            table.get_data(0, ["not_existing_column"])
     def test_listing_result_id(self):
         """Test returning result IDs of all stored entries."""
         table = AnalysisResultTable()

From 03aac67c30c5e74fa2b07047d888a6937292c65f Mon Sep 17 00:00:00 2001
From: Naoki Kanazawa <>
Date: Tue, 6 Feb 2024 16:57:49 +0900
Subject: [PATCH 26/29] Performance optimization

 .../curve_analysis/           | 44 +++++++++++++------
 1 file changed, 30 insertions(+), 14 deletions(-)

diff --git a/qiskit_experiments/curve_analysis/ b/qiskit_experiments/curve_analysis/
index db3d6ec82c..3e5fd4f4ba 100644
--- a/qiskit_experiments/curve_analysis/
+++ b/qiskit_experiments/curve_analysis/
@@ -17,8 +17,8 @@
 import warnings
 from import Iterator
 from typing import Any
-from itertools import groupby
-from operator import itemgetter
+from functools import reduce
+from itertools import product
 import numpy as np
 import pandas as pd
@@ -78,7 +78,10 @@ def __init__(self):
         self._dump = pd.DataFrame(columns=self.COLUMNS)
-    def from_dataframe(cls, data: pd.DataFrame) -> "ScatterTable":
+    def from_dataframe(
+        cls,
+        data: pd.DataFrame,
+    ) -> "ScatterTable":
         """Create new dataset with existing dataframe.
@@ -89,9 +92,20 @@ def from_dataframe(cls, data: pd.DataFrame) -> "ScatterTable":
         if list(data.columns) != cls.COLUMNS:
             raise ValueError("Input dataframe columns don't match with the ScatterTable spec.")
-        instance = object.__new__(ScatterTable)
+        format_data = cls._format_table(data)
+        return cls._create_new_instance(format_data)
+    @classmethod
+    def _create_new_instance(
+        cls,
+        data: pd.DataFrame,
+    ) -> "ScatterTable":
+        # A shortcut for creating instance.
+        # This bypasses data formatting and column compatibility check.
+        # User who calls this method must guarantee the quality of the input data.
+        instance = object.__new__(cls)
         instance._lazy_add_rows = []
-        instance._dump = cls._format_table(data)
+        instance._dump = data
         return instance
@@ -296,7 +310,7 @@ def filter(
         if analysis is not None:
             index = filt_data.analysis == analysis
             filt_data = filt_data.loc[index, :]
-        return ScatterTable.from_dataframe(filt_data)
+        return ScatterTable._create_new_instance(filt_data)
     def iter_by_data_uid(self) -> Iterator[tuple[int, "ScatterTable"]]:
         """Iterate over subset of data sorted by the data UID.
@@ -307,7 +321,7 @@ def iter_by_data_uid(self) -> Iterator[tuple[int, "ScatterTable"]]:
         data_ids = self.dataframe.data_uid.dropna().sort_values().unique()
         id_cols = self.dataframe.data_uid
         for did in data_ids:
-            yield did, ScatterTable.from_dataframe(self.dataframe.loc[id_cols == did, :])
+            yield did, ScatterTable._create_new_instance(self.dataframe.loc[id_cols == did, :])
     def iter_groups(
@@ -321,18 +335,20 @@ def iter_groups(
             Tuple of values for the grouped columns and the corresponding subset of the scatter table.
+        out = self.dataframe
-            sort_by = itemgetter(*[self.COLUMNS.index(c) for c in group_by])
-        except ValueError as ex:
+            values_iter = product(*[out.get(col).unique() for col in group_by])
+        except AttributeError as ex:
             raise ValueError(
                 f"Specified columns don't exist: {group_by} is not a subset of {self.COLUMNS}."
             ) from ex
-        # Use python native groupby method on dataframe ndarray when sorting by multiple columns.
-        # This is more performant than pandas groupby implementation.
-        for vals, sub_data in groupby(sorted(self.dataframe.values, key=sort_by), key=sort_by):
-            tmp_df = pd.DataFrame(list(sub_data), columns=self.COLUMNS)
-            yield vals, ScatterTable.from_dataframe(tmp_df)
+        for values in sorted(values_iter):
+            each_matched = [out.get(c) == v for c, v in zip(group_by, values)]
+            all_matched = reduce(lambda x, y: x & y, each_matched)
+            if not any(all_matched):
+                continue
+            yield values, ScatterTable._create_new_instance(out.loc[all_matched, :])
     def add_row(

From ac5bdd86feeea0b2cff159ca4c2e764326a91eeb Mon Sep 17 00:00:00 2001
From: Naoki Kanazawa <>
Date: Tue, 6 Feb 2024 18:25:56 +0900
Subject: [PATCH 27/29] name, data_uid -> series_name, series_id

 docs/tutorials/curve_analysis.rst             | 96 ++++++++++--------
 .../               | 12 +--
 .../curve_analysis/          | 46 ++++-----
 .../curve_analysis/           | 99 +++++++++----------
 .../standard_analysis/     |  6 +-
 .../analysis/                 |  2 +-
 .../analysis/            |  4 +-
 .../analysis/            |  4 +-
 .../               | 14 +--
 .../                |  4 +-
 test/curve_analysis/     | 79 +++++++--------
 11 files changed, 188 insertions(+), 178 deletions(-)

diff --git a/docs/tutorials/curve_analysis.rst b/docs/tutorials/curve_analysis.rst
index bbd45585ba..4b243ef48d 100644
--- a/docs/tutorials/curve_analysis.rst
+++ b/docs/tutorials/curve_analysis.rst
@@ -249,17 +249,17 @@ Managing intermediate data
 Each data point in a 1-D curve fit may consist of the x value, y value, and
 standard error of the y value.
 In addition, such analysis may internally create several data subsets.
-Each data point is given a metadata triplet (`data_uid`, `category`, `analysis`)
+Each data point is given a metadata triplet (`series_id`, `category`, `analysis`)
 to distinguish the subset.
-* The `data_uid` is an integer key representing a label of the data which may be classified by fits models.
+* The `series_id` is an integer key representing a label of the data which may be classified by fits models.
   When an analysis consists of multiple fit models and performs a multi-objective fit,
   the created table may contain multiple datasets for each fit model.
-  Usually the index of data matches with the index of the fit model in the analysis.
-  The table also provides a `name` column which is a human-friendly text notation of the `data_uid`.
-  The `name` and corresponding `data_uid` must refer to the identical group,
-  and the `name` typically matches with the name of the fit model.
-  You can find a particular data subset by either `data_uid` or `name`.
+  Usually the index of series matches with the index of the fit model in the analysis.
+  The table also provides a `series_name` column which is a human-friendly text notation of the `series_id`.
+  The `series_name` and corresponding `series_id` must refer to the identical data subset,
+  and the `series_name` typically matches with the name of the fit model.
+  You can find a particular data subset by either `series_id` or `series_name`.
 * The `category` is a string tag categorizing a group of data points.
   The measured outcomes input as-is to the curve analysis are categorized by "raw".
@@ -286,7 +286,7 @@ particular data subset. For example,
 .. code-block:: python
-    mini_table = table.filter(data_uid="my_experiment1", category="raw", analysis="AnalysisA")
+    mini_table = table.filter(series="my_experiment1", category="raw", analysis="AnalysisA")
     mini_x = mini_table.x
     mini_y = mini_table.y
@@ -294,12 +294,12 @@ This operation is equivalent to
 .. code-block:: python
-    mini_x = table.xvals(data_uid="my_experiment1", category="raw", analysis="AnalysisA")
-    mini_y = table.yvals(data_uid="my_experiment1", category="raw", analysis="AnalysisA")
+    mini_x = table.xvals(series="my_experiment1", category="raw", analysis="AnalysisA")
+    mini_y = table.yvals(series="my_experiment1", category="raw", analysis="AnalysisA")
 When an analysis only has a single model and the table is created from a single
-analysis instance, the data_uid and analysis are trivial, and you only need to
-specify the category to get subset data of interest.
+analysis instance, the `series_id` and `analysis` are trivial, and you only need to
+specify the `category` to get subset data of interest.
 The full description of :class:`.ScatterTable` columns are following below:
@@ -307,8 +307,8 @@ The full description of :class:`.ScatterTable` columns are following below:
 - `yval`: Nominal part of the outcome. The outcome is something like expectation value,
   which is computed from the experiment result with the data processor.
 - `yerr`: Standard error of the outcome, which is mainly due to sampling error.
-- `name`: Unique identifier of the data group. This is defined by the ``data_subfit_map`` option in the :class:`.CurveAnalysis`.
-- `data_uid`: Integer corresponding to a data unique index. This number is automatically assigned.
+- `series_name`: Human readable name of the data series. This is defined by the ``data_subfit_map`` option in the :class:`.CurveAnalysis`.
+- `series_id`: Integer corresponding to the name of data series. This number is automatically assigned.
 - `category`: A tag for the data group. This is defined by a developer of the curve analysis.
 - `shots`: Number of measurement shots used to acquire a data point. This value can be defined in the circuit metadata.
 - `analysis`: The name of the curve analysis instance that generated a data point.
@@ -350,21 +350,26 @@ the data processor in the analysis option is internally called.
 This consumes input experiment results and creates the :class:`.ScatterTable` dataframe.
 This table may look like:
-.. code-block::
-        xval      yval      yerr  name  data_uid category  shots     analysis
-    0    0.1  0.153659  0.011258     A         0      raw   1024   MyAnalysis
-    1    0.1  0.590732  0.015351     B         1      raw   1024   MyAnalysis
-    2    0.1  0.315610  0.014510     A         0      raw   1024   MyAnalysis
-    3    0.1  0.376098  0.015123     B         1      raw   1024   MyAnalysis
-    4    0.2  0.937073  0.007581     A         0      raw   1024   MyAnalysis
-    5    0.2  0.323415  0.014604     B         1      raw   1024   MyAnalysis
-    6    0.2  0.538049  0.015565     A         0      raw   1024   MyAnalysis
-    7    0.2  0.530244  0.015581     B         1      raw   1024   MyAnalysis
-    8    0.3  0.143902  0.010958     A         0      raw   1024   MyAnalysis
-    9    0.3  0.261951  0.013727     B         1      raw   1024   MyAnalysis
-    10   0.3  0.830732  0.011707     A         0      raw   1024   MyAnalysis
-    11   0.3  0.874634  0.010338     B         1      raw   1024   MyAnalysis
+.. jupyter-input::
+    table = analysis._run_data_processing(
+    print(table)
+.. jupyter-output::
+        xval      yval      yerr  series_name  series_id  category  shots     analysis
+    0    0.1  0.153659  0.011258            A          0      raw    1024   MyAnalysis
+    1    0.1  0.590732  0.015351            B          1      raw    1024   MyAnalysis
+    2    0.1  0.315610  0.014510            A          0      raw    1024   MyAnalysis
+    3    0.1  0.376098  0.015123            B          1      raw    1024   MyAnalysis
+    4    0.2  0.937073  0.007581            A          0      raw    1024   MyAnalysis
+    5    0.2  0.323415  0.014604            B          1      raw    1024   MyAnalysis
+    6    0.2  0.538049  0.015565            A          0      raw    1024   MyAnalysis
+    7    0.2  0.530244  0.015581            B          1      raw    1024   MyAnalysis
+    8    0.3  0.143902  0.010958            A          0      raw    1024   MyAnalysis
+    9    0.3  0.261951  0.013727            B          1      raw    1024   MyAnalysis
+    10   0.3  0.830732  0.011707            A          0      raw    1024   MyAnalysis
+    11   0.3  0.874634  0.010338            B          1      raw    1024   MyAnalysis
 where the experiment consists of two subset series A and B, and the experiment parameter (xval)
 is scanned from 0.1 to 0.3 in each subset. In this example, the experiment is run twice
@@ -374,37 +379,42 @@ See :ref:`data_management_with_scatter_table` for the details of columns.
 3. Formatting
-Next, the processed dataset is converted into another format suited for the fitting and
-every valid result is assigned a class corresponding to a fit model.
+Next, the processed dataset is converted into another format suited for the fitting.
 By default, the formatter takes average of the outcomes in the processed dataset
 over the same x values, followed by the sorting in the ascending order of x values.
 This allows the analysis to easily estimate the slope of the curves to
 create algorithmic initial guess of fit parameters.
 A developer can inject extra data processing, for example, filtering, smoothing,
 or elimination of outliers for better fitting.
-The new `data_uid` is given here so that its value corresponds to the fit model object index
-in this analysis class. This index mapping is done based upon the correspondence of
-the data name and the fit model name.
+The new `series_id` is given here so that its value corresponds to the fit model index
+defined in this analysis class. This index mapping is done based upon the correspondence of
+the `series_name` and the fit model name.
 This is done by calling :meth:`_format_data` method.
 This may return new scatter table object with the addition of rows like the following below.
-.. code-block::
+.. jupyter-input::
+    table = analysis._format_data(table)
+    print(table)
+.. jupyter-output::
-        xval      yval      yerr  name  data_uid   category  shots     analysis
-    12   0.1  0.234634  0.009183     A         0  formatted   2048   MyAnalysis
-    13   0.2  0.737561  0.008656     A         0  formatted   2048   MyAnalysis
-    14   0.3  0.487317  0.008018     A         0  formatted   2048   MyAnalysis
-    15   0.1  0.483415  0.010774     B         1  formatted   2048   MyAnalysis
-    16   0.2  0.426829  0.010678     B         1  formatted   2048   MyAnalysis
-    17   0.3  0.568293  0.008592     B         1  formatted   2048   MyAnalysis
+        xval      yval      yerr  series_name  series_id   category  shots     analysis
+    ...
+    12   0.1  0.234634  0.009183            A          0  formatted   2048   MyAnalysis
+    13   0.2  0.737561  0.008656            A          0  formatted   2048   MyAnalysis
+    14   0.3  0.487317  0.008018            A          0  formatted   2048   MyAnalysis
+    15   0.1  0.483415  0.010774            B          1  formatted   2048   MyAnalysis
+    16   0.2  0.426829  0.010678            B          1  formatted   2048   MyAnalysis
+    17   0.3  0.568293  0.008592            B          1  formatted   2048   MyAnalysis
 The default :meth:`_format_data` method adds its output data with the category "formatted".
 This category name must be also specified in the analysis option ``fit_category``.
 If overriding this method to do additional processing after the default formatting,
 the ``fit_category`` analysis option can be set to choose a different category name to use to
 select the data to pass to the fitting routine.
-The (x, y) value in each row is passed to the corresponding fit model object
+The (xval, yval) value in each row is passed to the corresponding fit model object
 to compute residual values for the least square optimization.
 3. Fitting
diff --git a/qiskit_experiments/curve_analysis/ b/qiskit_experiments/curve_analysis/
index 06f4ae8a89..6b3029c6a6 100644
--- a/qiskit_experiments/curve_analysis/
+++ b/qiskit_experiments/curve_analysis/
@@ -232,8 +232,8 @@ def _create_figures(
         for analysis in self.analyses():
             group_data = curve_data.filter(
             model_names = analysis.model_names()
-            for uid, sub_data in group_data.iter_by_data_uid():
-                full_name = f"{model_names[uid]}_{}"
+            for series_id, sub_data in group_data.iter_by_series_id():
+                full_name = f"{model_names[series_id]}_{}"
                 # Plot raw data scatters
                 if analysis.options.plot_raw_data:
                     raw_data = sub_data.filter(category="raw")
@@ -378,7 +378,7 @@ def _run_analysis(
             if fit_data.success:
                 # Add fit data to curve data table
                 model_names = analysis.model_names()
-                for data_id, sub_data in formatted_subset.iter_by_data_uid():
+                for series_id, sub_data in formatted_subset.iter_by_series_id():
                     xval = sub_data.x
                     if len(xval) == 0:
                         # If data is empty, skip drawing this model.
@@ -388,7 +388,7 @@ def _run_analysis(
                     xval_arr_fit = np.linspace(np.min(xval), np.max(xval), num=100, dtype=float)
                     uval_arr_fit = eval_with_uncertainties(
-                        model=analysis.models[data_id],
+                        model=analysis.models[series_id],
                     yval_arr_fit = unp.nominal_values(uval_arr_fit)
@@ -398,8 +398,8 @@ def _run_analysis(
                         yerr_arr_fit = np.zeros_like(xval_arr_fit)
                     for xval, yval, yerr in zip(xval_arr_fit, yval_arr_fit, yerr_arr_fit):
-                            name=model_names[data_id],
-                            data_uid=data_id,
+                            series_name=model_names[series_id],
+                            series_id=series_id,
diff --git a/qiskit_experiments/curve_analysis/ b/qiskit_experiments/curve_analysis/
index 9d2089d647..368f1b2a83 100644
--- a/qiskit_experiments/curve_analysis/
+++ b/qiskit_experiments/curve_analysis/
@@ -207,17 +207,17 @@ def _run_data_processing(
                     f"X value key {opt.x_key} is not defined in the circuit metadata."
                 ) from ex
-            # Assign entry name and class id
-            for data_id, (name, spec) in enumerate(classifier.items()):
+            # Assign series name and series id
+            for series_id, (series_name, spec) in enumerate(classifier.items()):
                 if spec.items() <= metadata.items():
                 # This is unclassified data.
-                data_id = pd.NA
-                name = pd.NA
+                series_name = pd.NA
+                series_id = pd.NA
-                name=name,
-                data_uid=data_id,
+                series_name=series_name,
+                series_id=series_id,
@@ -249,19 +249,19 @@ def _format_data(
         average = averaging_methods[self.options.average_method]
         model_names = self.model_names()
-        for (name, xval), sub_data in curve_data.iter_groups("name", "xval"):
+        for (series_name, xval), sub_data in curve_data.iter_groups("series_name", "xval"):
             avg_yval, avg_yerr, shots = average(
-                data_id = model_names.index(name)
+                series_id = model_names.index(series_name)
             except ValueError:
-                data_id = pd.NA
+                series_id = pd.NA
-                name=name,
-                data_uid=data_id,
+                series_name=series_name,
+                series_id=series_id,
@@ -339,7 +339,7 @@ def _run_curve_fit(
         # Create convenient function to compute residual of the models.
         partial_residuals = []
         valid_uncertainty = np.all(np.isfinite(curve_data.y_err))
-        for uid, sub_data in curve_data.iter_by_data_uid():
+        for idx, sub_data in curve_data.iter_by_series_id():
             if valid_uncertainty:
                 nonzero_yerr = np.where(
                     np.isclose(sub_data.y_err, 0.0),
@@ -356,7 +356,7 @@ def _run_curve_fit(
                 weights = None
             model_residual = partial(
-                self._models[uid]._residual,
+                self._models[idx]._residual,
@@ -417,20 +417,20 @@ def _create_figures(
             A list of figures.
-        for i, sub_data in curve_data.iter_by_data_uid():
-            name = self.model_names()[i]
+        for series_id, sub_data in curve_data.iter_by_series_id():
+            model_name = self.model_names()[series_id]
             # Plot raw data scatters
             if self.options.plot_raw_data:
                 raw_data = sub_data.filter(category="raw")
-                    series_name=name,
+                    series_name=model_name,
             # Plot formatted data scatters
             formatted_data = sub_data.filter(category=self.options.fit_category)
-                series_name=name,
+                series_name=model_name,
@@ -440,14 +440,14 @@ def _create_figures(
             if len(line_data) == 0:
-                series_name=name,
+                series_name=model_name,
             fit_stdev = line_data.y_err
             if np.isfinite(fit_stdev).all():
-                    series_name=name,
+                    series_name=model_name,
@@ -499,7 +499,7 @@ def _run_analysis(
         if fit_data.success:
             # Add fit data to curve data table
             model_names = self.model_names()
-            for data_id, sub_data in formatted_subset.iter_by_data_uid():
+            for series_id, sub_data in formatted_subset.iter_by_series_id():
                 xval = sub_data.x
                 if len(xval) == 0:
                     # If data is empty, skip drawing this model.
@@ -509,7 +509,7 @@ def _run_analysis(
                 xval_arr_fit = np.linspace(np.min(xval), np.max(xval), num=100, dtype=float)
                 uval_arr_fit = eval_with_uncertainties(
-                    model=self._models[data_id],
+                    model=self._models[series_id],
                 yval_arr_fit = unp.nominal_values(uval_arr_fit)
@@ -519,8 +519,8 @@ def _run_analysis(
                     yerr_arr_fit = np.zeros_like(xval_arr_fit)
                 for xval, yval, yerr in zip(xval_arr_fit, yval_arr_fit, yerr_arr_fit):
-                        name=model_names[data_id],
-                        data_uid=data_id,
+                        series_name=model_names[series_id],
+                        series_id=series_id,
diff --git a/qiskit_experiments/curve_analysis/ b/qiskit_experiments/curve_analysis/
index 3e5fd4f4ba..249ede87e9 100644
--- a/qiskit_experiments/curve_analysis/
+++ b/qiskit_experiments/curve_analysis/
@@ -55,8 +55,8 @@ class ScatterTable:
-        "name",
-        "data_uid",
+        "series_name",
+        "series_id",
@@ -135,7 +135,7 @@ def x(self, new_values):
     def xvals(
-        data_uid: int | str | None = None,
+        series: int | str | None = None,
         category: str | None = None,
         analysis: str | None = None,
         check_unique: bool = True,
@@ -145,7 +145,7 @@ def xvals(
         A convenient shortcut for getting X data with filtering.
-            data_uid: Identifier of the data, either data UID or name.
+            series: Identifier of the data series, either integer series index or name.
             category: Name of data category.
             analysis: Name of analysis.
             check_unique: Set True to check if multiple series are contained.
@@ -154,7 +154,7 @@ def xvals(
             Numpy array of X values.
-        sub_table = self.filter(data_uid, category, analysis)
+        sub_table = self.filter(series, category, analysis)
         if check_unique:
         return sub_table.x
@@ -171,7 +171,7 @@ def y(self, new_values: np.ndarray):
     def yvals(
-        data_uid: int | str | None = None,
+        series: int | str | None = None,
         category: str | None = None,
         analysis: str | None = None,
         check_unique: bool = True,
@@ -181,7 +181,7 @@ def yvals(
         A convenient shortcut for getting Y data with filtering.
-            data_uid: Identifier of the data, either data UID or name.
+            series: Identifier of the data series, either integer series index or name.
             category: Name of data category.
             analysis: Name of analysis.
             check_unique: Set True to check if multiple series are contained.
@@ -190,7 +190,7 @@ def yvals(
             Numpy array of Y values.
-        sub_table = self.filter(data_uid, category, analysis)
+        sub_table = self.filter(series, category, analysis)
         if check_unique:
         return sub_table.y
@@ -207,7 +207,7 @@ def y_err(self, new_values: np.ndarray):
     def yerrs(
-        data_uid: int | str | None = None,
+        series: int | str | None = None,
         category: str | None = None,
         analysis: str | None = None,
         check_unique: bool = True,
@@ -217,7 +217,7 @@ def yerrs(
         A convenient shortcut for getting Y error data with filtering.
-            data_uid: Identifier of the data, either data UID or name.
+            series: Identifier of the data series, either integer series index or name.
             category: Name of data category.
             analysis: Name of analysis.
             check_unique: Set True to check if multiple series are contained.
@@ -226,28 +226,28 @@ def yerrs(
             Numpy array of Y error values.
-        sub_table = self.filter(data_uid, category, analysis)
+        sub_table = self.filter(series, category, analysis)
         if check_unique:
         return sub_table.y_err
-    def name(self) -> np.ndarray:
+    def series_name(self) -> np.ndarray:
         """Corresponding data name for each data point."""
-        return, na_value=None)
+        return self.dataframe.series_name.to_numpy(dtype=object, na_value=None)
-    @name.setter
-    def name(self, new_values: np.ndarray):
-        self.dataframe.loc[:, "name"] = new_values
+    @series_name.setter
+    def series_name(self, new_values: np.ndarray):
+        self.dataframe.loc[:, "series_name"] = new_values
-    def data_uid(self) -> np.ndarray:
+    def series_id(self) -> np.ndarray:
         """Corresponding data UID for each data point."""
-        return self.dataframe.data_uid.to_numpy(dtype=object, na_value=None)
+        return self.dataframe.series_id.to_numpy(dtype=object, na_value=None)
-    @data_uid.setter
-    def data_uid(self, new_values: np.ndarray):
-        self.dataframe.loc[:, "data_uid"] = new_values
+    @series_id.setter
+    def series_id(self, new_values: np.ndarray):
+        self.dataframe.loc[:, "series_id"] = new_values
     def category(self) -> np.ndarray:
@@ -278,14 +278,14 @@ def analysis(self, new_values: np.ndarray):
     def filter(
-        data_uid: int | str | None = None,
+        series: int | str | None = None,
         category: str | None = None,
         analysis: str | None = None,
     ) -> ScatterTable:
-        """Filter data by class, category, and/or analysis name.
+        """Filter data by series, category, and/or analysis name.
-            data_uid: Identifier of the data, either data UID or name.
+            series: Identifier of the data series, either integer series index or name.
             category: Name of data category.
             analysis: Name of analysis.
@@ -294,14 +294,14 @@ def filter(
         filt_data = self.dataframe
-        if data_uid is not None:
-            if isinstance(data_uid, int):
-                index = filt_data.data_uid == data_uid
-            elif isinstance(data_uid, str):
-                index = == data_uid
+        if series is not None:
+            if isinstance(series, int):
+                index = filt_data.series_id == series
+            elif isinstance(series, str):
+                index = filt_data.series_name == series
                 raise ValueError(
-                    f"Invalid data_uid {type(data_uid)}. This must be integer or string."
+                    f"Invalid series identifier {series}. This must be integer or string."
             filt_data = filt_data.loc[index, :]
         if category is not None:
@@ -312,16 +312,15 @@ def filter(
             filt_data = filt_data.loc[index, :]
         return ScatterTable._create_new_instance(filt_data)
-    def iter_by_data_uid(self) -> Iterator[tuple[int, "ScatterTable"]]:
-        """Iterate over subset of data sorted by the data UID.
+    def iter_by_series_id(self) -> Iterator[tuple[int, "ScatterTable"]]:
+        """Iterate over subset of data sorted by the data series index.
-            Tuple of data UID and subset of ScatterTable.
+            Tuple of data series index and subset of ScatterTable.
-        data_ids = self.dataframe.data_uid.dropna().sort_values().unique()
-        id_cols = self.dataframe.data_uid
-        for did in data_ids:
-            yield did, ScatterTable._create_new_instance(self.dataframe.loc[id_cols == did, :])
+        id_values = self.dataframe.series_id
+        for did in id_values.dropna().sort_values().unique():
+            yield did, ScatterTable._create_new_instance(self.dataframe.loc[id_values == did, :])
     def iter_groups(
@@ -352,8 +351,8 @@ def iter_groups(
     def add_row(
-        name: str | pd.NA = pd.NA,
-        data_uid: int | pd.NA = pd.NA,
+        series_name: str | pd.NA = pd.NA,
+        series_id: int | pd.NA = pd.NA,
         category: str | pd.NA = pd.NA,
         x: float | pd.NA = pd.NA,
         y: float | pd.NA = pd.NA,
@@ -370,12 +369,12 @@ def add_row(
             y: Y value.
             y_err: Standard deviation of y value.
             shots: Shot number used to acquire this data point.
-            name: Name of this data if available.
-            data_uid: Data UID of if available.
+            series_name: Name of this data series if available.
+            series_id: Index of this data series if available.
             category: Data category if available.
             analysis: Analysis name if available.
-        self._lazy_add_rows.append([x, y, y_err, name, data_uid, category, shots, analysis])
+        self._lazy_add_rows.append([x, y, y_err, series_name, series_id, category, shots, analysis])
     def _format_table(cls, data: pd.DataFrame) -> pd.DataFrame:
@@ -386,10 +385,10 @@ def _format_table(cls, data: pd.DataFrame) -> pd.DataFrame:
     def _warn_composite_data(self):
-        if len( > 1:
+        if len(self.dataframe.series_name.unique()) > 1:
-                "Table data contains multiple data UIDs. "
-                "You may want to filter the data by a specific data_uid integer or name string.",
+                "Table data contains multiple data series. "
+                "You may want to filter the data by a specific series_id or series_name.",
         if len(self.dataframe.category.unique()) > 1:
@@ -408,19 +407,19 @@ def _warn_composite_data(self):
-        additional_msg="Curve data uses dataframe representation. Call .data_uid instead.",
+        additional_msg="Curve data uses dataframe representation. Call .series_id instead.",
     def data_allocation(self) -> np.ndarray:
         """Index of corresponding fit model."""
-        return self.data_uid
+        return self.series_id
-        additional_msg="No alternative is provided. Use .name with set operation.",
+        additional_msg="No alternative is provided. Use .series_name with set operation.",
@@ -428,7 +427,7 @@ def data_allocation(self) -> np.ndarray:
     def labels(self) -> list[str]:
         """List of model names."""
         # Order sensitive
-        name_id_tups = self.dataframe.groupby(["name", "data_uid"]).groups.keys()
+        name_id_tups = self.dataframe.groupby(["series_name", "series_id"]).groups.keys()
         return [k[0] for k in sorted(name_id_tups, key=lambda k: k[1])]
@@ -446,7 +445,7 @@ def get_subset_of(self, index: str | int) -> "ScatterTable":
             A subset of data corresponding to a particular series.
-        return self.filter(data_uid=index)
+        return self.filter(series=index)
     def __len__(self):
         """Return the number of data points stored in the table."""
diff --git a/qiskit_experiments/curve_analysis/standard_analysis/ b/qiskit_experiments/curve_analysis/standard_analysis/
index 96585dd0b0..5219ff662f 100644
--- a/qiskit_experiments/curve_analysis/standard_analysis/
+++ b/qiskit_experiments/curve_analysis/standard_analysis/
@@ -170,9 +170,9 @@ def _generate_fit_guesses(
         user_opt.bounds.set_if_empty(t_off=(0, np.inf), b=(-1, 1))
-        x_data = curve_data.filter(data_uid="x")
-        y_data = curve_data.filter(data_uid="y")
-        z_data = curve_data.filter(data_uid="z")
+        x_data = curve_data.filter(series="x")
+        y_data = curve_data.filter(series="y")
+        z_data = curve_data.filter(series="z")
         omega_xyz = []
         for data in (x_data, y_data, z_data):
diff --git a/qiskit_experiments/library/characterization/analysis/ b/qiskit_experiments/library/characterization/analysis/
index 988bb70a76..351349ec08 100644
--- a/qiskit_experiments/library/characterization/analysis/
+++ b/qiskit_experiments/library/characterization/analysis/
@@ -118,7 +118,7 @@ def _generate_fit_guesses(
         # Use the highest-frequency curve to estimate the oscillation frequency.
         max_rep_model_name = self.model_names()[-1]
         max_rep = self.options.data_subfit_map[max_rep_model_name]["nrep"]
-        curve_data = curve_data.filter(data_uid=max_rep_model_name)
+        curve_data = curve_data.filter(series=max_rep_model_name)
         x_data = curve_data.x
         min_beta, max_beta = min(x_data), max(x_data)
diff --git a/qiskit_experiments/library/characterization/analysis/ b/qiskit_experiments/library/characterization/analysis/
index 7d2ee51511..77c87ef17c 100644
--- a/qiskit_experiments/library/characterization/analysis/
+++ b/qiskit_experiments/library/characterization/analysis/
@@ -113,8 +113,8 @@ def _generate_fit_guesses(
             List of fit options that are passed to the fitter function.
-        ramx_data = curve_data.filter(data_uid="X")
-        ramy_data = curve_data.filter(data_uid="Y")
+        ramx_data = curve_data.filter(series="X")
+        ramy_data = curve_data.filter(series="Y")
         # At very low frequency, y value of X (Y) curve stay at P=1.0 (0.5) for all x values.
         # Computing y peak-to-peak with combined data gives fake amplitude of 0.25.
diff --git a/qiskit_experiments/library/characterization/analysis/ b/qiskit_experiments/library/characterization/analysis/
index 30e875d9d3..c657f21596 100644
--- a/qiskit_experiments/library/characterization/analysis/
+++ b/qiskit_experiments/library/characterization/analysis/
@@ -142,8 +142,8 @@ def _generate_fit_guesses(
         y_ptp = y_max - y_min
         x_max = np.max(curve_data.x)
-        data_0 = curve_data.filter(data_uid="0")
-        data_1 = curve_data.filter(data_uid="1")
+        data_0 = curve_data.filter(series="0")
+        data_1 = curve_data.filter(series="1")
         def typical_step(arr):
             """Find the typical step size of an array"""
diff --git a/qiskit_experiments/library/driven_freq_tuning/ b/qiskit_experiments/library/driven_freq_tuning/
index ee01650721..ecbc1e2114 100644
--- a/qiskit_experiments/library/driven_freq_tuning/
+++ b/qiskit_experiments/library/driven_freq_tuning/
@@ -259,8 +259,8 @@ def _format_data(
         # Create phase data by arctan(Y/X)
         for data_id, direction in enumerate(("pos", "neg")):
-            x_quadrature = ramsey_xy.filter(data_uid=f"X{direction}")
-            y_quadrature = ramsey_xy.filter(data_uid=f"Y{direction}")
+            x_quadrature = ramsey_xy.filter(series=f"X{direction}")
+            y_quadrature = ramsey_xy.filter(series=f"Y{direction}")
             if not np.array_equal(x_quadrature.x, y_quadrature.x):
                 raise ValueError(
                     "Amplitude values of X and Y quadrature are different. "
@@ -293,8 +293,8 @@ def _format_data(
-                    name=f"FREQ{direction}",
-                    data_uid=data_id,
+                    series_name=f"FREQ{direction}",
+                    series_id=data_id,
@@ -358,7 +358,7 @@ def _create_figures(
         # plot unwrapped phase on first axis
         for direction in ("pos", "neg"):
-            sub_data = curve_data.filter(data_uid=f"FREQ{direction}", category="freq")
+            sub_data = curve_data.filter(series=f"FREQ{direction}", category="freq")
@@ -368,7 +368,7 @@ def _create_figures(
         # plot raw RamseyXY plot on second axis
         for name in ("Xpos", "Ypos", "Xneg", "Yneg"):
-            sub_data = curve_data.filter(data_uid=name, category="ramsey_xy")
+            sub_data = curve_data.filter(series=name, category="ramsey_xy")
@@ -384,7 +384,7 @@ def _create_figures(
         # plot frequency and Ramsey fit lines
         line_data = curve_data.filter(category="fitted")
         for direction in ("pos", "neg"):
-            sub_data = line_data.filter(data_uid=f"FREQ{direction}")
+            sub_data = line_data.filter(series=f"FREQ{direction}")
             if len(sub_data) == 0:
             xval = sub_data.x
diff --git a/qiskit_experiments/library/randomized_benchmarking/ b/qiskit_experiments/library/randomized_benchmarking/
index 4e10cd7c0d..b389cbd74d 100644
--- a/qiskit_experiments/library/randomized_benchmarking/
+++ b/qiskit_experiments/library/randomized_benchmarking/
@@ -141,12 +141,12 @@ def _generate_fit_guesses(
         b_guess = 1 / 2**self._num_qubits
         # for standard RB curve
-        std_curve = curve_data.filter(data_uid="standard")
+        std_curve = curve_data.filter(series="standard")
         alpha_std = curve.guess.rb_decay(std_curve.x, std_curve.y, b=b_guess)
         a_std = (std_curve.y[0] - b_guess) / (alpha_std ** std_curve.x[0])
         # for interleaved RB curve
-        int_curve = curve_data.filter(data_uid="interleaved")
+        int_curve = curve_data.filter(series="interleaved")
         alpha_int = curve.guess.rb_decay(int_curve.x, int_curve.y, b=b_guess)
         a_int = (int_curve.y[0] - b_guess) / (alpha_int ** int_curve.x[0])
diff --git a/test/curve_analysis/ b/test/curve_analysis/
index 9777cd300d..0f78755738 100644
--- a/test/curve_analysis/
+++ b/test/curve_analysis/
@@ -29,7 +29,7 @@ def setUp(self):
             "xval": [0.100, 0.100, 0.200, 0.200, 0.100, 0.200, 0.100, 0.200, 0.100, 0.200],
             "yval": [0.192, 0.784, 0.854, 0.672, 0.567, 0.488, 0.379, 0.671, 0.784, 0.672],
             "yerr": [0.002, 0.091, 0.090, 0.027, 0.033, 0.038, 0.016, 0.048, 0.091, 0.027],
-            "name": [
+            "series_name": [
@@ -41,7 +41,7 @@ def setUp(self):
-            "data_uid": [0, 1, 0, 1, 0, 0, 0, 0, 1, 1],
+            "series_id": [0, 1, 0, 1, 0, 0, 0, 0, 1, 1],
             "category": [
@@ -94,8 +94,8 @@ def test_add_row(self):
         """Test adding single row to the table without and with missing data."""
         obj = ScatterTable()
-            name="model1",
-            data_uid=0,
+            series_name="model1",
+            series_id=0,
@@ -112,8 +112,8 @@ def test_add_row(self):
         np.testing.assert_array_equal(obj.x, np.array([0.1, 0.2]))
         np.testing.assert_array_equal(obj.y, np.array([2.3, 3.4]))
         np.testing.assert_array_equal(obj.y_err, np.array([0.4, np.nan]))
-        np.testing.assert_array_equal(, np.array(["model1", None]))
-        np.testing.assert_array_equal(obj.data_uid, np.array([0, None]))
+        np.testing.assert_array_equal(obj.series_name, np.array(["model1", None]))
+        np.testing.assert_array_equal(obj.series_id, np.array([0, None]))
         np.testing.assert_array_equal(obj.category, np.array(["raw", "raw"]))
             # Numpy tries to handle nan strictly, but isnan only works for float dtype.
@@ -138,14 +138,14 @@ def test_set_values(self):
         obj.y_err = [0.3, 0.5, 0.7]
         # Broadcast single value
-        obj.data_uid = 0
- = "model0"
+        obj.series_id = 0
+        obj.series_name = "model0"
         np.testing.assert_array_equal(obj.x, np.array([0.1, 0.2, 0.3]))
         np.testing.assert_array_equal(obj.y, np.array([1.3, 1.4, 1.5]))
         np.testing.assert_array_equal(obj.y_err, np.array([0.3, 0.5, 0.7]))
-        np.testing.assert_array_equal(obj.data_uid, np.array([0, 0, 0]))
-        np.testing.assert_array_equal(, np.array(["model0", "model0", "model0"]))
+        np.testing.assert_array_equal(obj.series_id, np.array([0, 0, 0]))
+        np.testing.assert_array_equal(obj.series_name, np.array(["model0", "model0", "model0"]))
     def test_get_subset_numbers(self):
         """Test end-user shortcut for getting the subset of x, y, y_err data."""
@@ -166,24 +166,25 @@ def test_warn_composite_values(self):
         with self.assertWarns(UserWarning):
-    def test_filter_data_by_data_uid(self):
-        """Test filter table data with data UID."""
+    def test_filter_data_by_series_id(self):
+        """Test filter table data with series index."""
         obj = ScatterTable.from_dataframe(self.reference)
-        filtered = obj.filter(data_uid=0)
+        filtered = obj.filter(series=0)
         self.assertEqual(len(filtered), 6)
         np.testing.assert_array_equal(filtered.x, np.array([0.1, 0.2, 0.1, 0.2, 0.1, 0.2]))
-        np.testing.assert_array_equal(filtered.data_uid, np.array([0, 0, 0, 0, 0, 0]))
+        np.testing.assert_array_equal(filtered.series_id, np.array([0, 0, 0, 0, 0, 0]))
-    def test_filter_data_by_model_name(self):
-        """Test filter table data with data name."""
+    def test_filter_data_by_series_name(self):
+        """Test filter table data with series name."""
         obj = ScatterTable.from_dataframe(self.reference)
-        filtered = obj.filter(data_uid="model1")
+        filtered = obj.filter(series="model1")
         self.assertEqual(len(filtered), 6)
         np.testing.assert_array_equal(filtered.x, np.array([0.1, 0.2, 0.1, 0.2, 0.1, 0.2]))
-  , np.array(["model1", "model1", "model1", "model1", "model1", "model1"])
+            filtered.series_name,
+            np.array(["model1", "model1", "model1", "model1", "model1", "model1"]),
     def test_filter_data_by_category(self):
@@ -210,55 +211,55 @@ def test_filter_multiple(self):
         """Test filter table data with multiple attributes."""
         obj = ScatterTable.from_dataframe(self.reference)
-        filtered = obj.filter(data_uid=0, category="raw", analysis="Fit1")
+        filtered = obj.filter(series=0, category="raw", analysis="Fit1")
         self.assertEqual(len(filtered), 2)
         np.testing.assert_array_equal(filtered.x, np.array([0.1, 0.2]))
-        np.testing.assert_array_equal(filtered.data_uid, np.array([0, 0]))
+        np.testing.assert_array_equal(filtered.series_id, np.array([0, 0]))
         np.testing.assert_array_equal(filtered.category, np.array(["raw", "raw"]))
         np.testing.assert_array_equal(filtered.analysis, np.array(["Fit1", "Fit1"]))
     def test_iter_class(self):
-        """Test iterating over mini tables associated with different data UID."""
+        """Test iterating over mini tables associated with different series indices."""
         obj = ScatterTable.from_dataframe(self.reference).filter(category="raw")
-        class_iter = obj.iter_by_data_uid()
+        class_iter = obj.iter_by_series_id()
-        index, table_cls0 = next(class_iter)
-        ref_table_cls0 = obj.filter(data_uid=0)
-        self.assertEqual(index, 0)
-        self.assertEqual(table_cls0, ref_table_cls0)
+        series_id, table0 = next(class_iter)
+        ref_table_cls0 = obj.filter(series=0)
+        self.assertEqual(series_id, 0)
+        self.assertEqual(table0, ref_table_cls0)
-        index, table_cls1 = next(class_iter)
-        ref_table_cls1 = obj.filter(data_uid=1)
-        self.assertEqual(index, 1)
-        self.assertEqual(table_cls1, ref_table_cls1)
+        series_id, table1 = next(class_iter)
+        ref_table_cls1 = obj.filter(series=1)
+        self.assertEqual(series_id, 1)
+        self.assertEqual(table1, ref_table_cls1)
     def test_iter_groups(self):
         """Test iterating over mini tables associated with multiple attributes."""
         obj = ScatterTable.from_dataframe(self.reference).filter(category="raw")
-        class_iter = obj.iter_groups("data_uid", "xval")
+        class_iter = obj.iter_groups("series_id", "xval")
-        (index, xval), table0 = next(class_iter)
-        self.assertEqual(index, 0)
+        (series_id, xval), table0 = next(class_iter)
+        self.assertEqual(series_id, 0)
         self.assertEqual(xval, 0.1)
         self.assertEqual(len(table0), 2)
         np.testing.assert_array_equal(table0.y, [0.192, 0.567])
-        (index, xval), table1 = next(class_iter)
-        self.assertEqual(index, 0)
+        (series_id, xval), table1 = next(class_iter)
+        self.assertEqual(series_id, 0)
         self.assertEqual(xval, 0.2)
         self.assertEqual(len(table1), 2)
         np.testing.assert_array_equal(table1.y, [0.854, 0.488])
-        (index, xval), table2 = next(class_iter)
-        self.assertEqual(index, 1)
+        (series_id, xval), table2 = next(class_iter)
+        self.assertEqual(series_id, 1)
         self.assertEqual(xval, 0.1)
         self.assertEqual(len(table2), 1)
         np.testing.assert_array_equal(table2.y, [0.784])
-        (index, xval), table3 = next(class_iter)
-        self.assertEqual(index, 1)
+        (series_id, xval), table3 = next(class_iter)
+        self.assertEqual(series_id, 1)
         self.assertEqual(xval, 0.2)
         self.assertEqual(len(table3), 1)
         np.testing.assert_array_equal(table3.y, [0.672])

From 58671ebe40ca4afaa339e44a091f5faaf11fbbcb Mon Sep 17 00:00:00 2001
From: Naoki Kanazawa <>
Date: Wed, 7 Feb 2024 01:42:18 +0900
Subject: [PATCH 28/29] Add more tests for construction

 .../               |  6 ++---
 .../curve_analysis/          | 18 ++++++-------
 .../curve_analysis/           | 18 +++++++------
 test/curve_analysis/     | 25 +++++++++++++++----
 4 files changed, 42 insertions(+), 25 deletions(-)

diff --git a/qiskit_experiments/curve_analysis/ b/qiskit_experiments/curve_analysis/
index 6b3029c6a6..2a116f24f9 100644
--- a/qiskit_experiments/curve_analysis/
+++ b/qiskit_experiments/curve_analysis/
@@ -398,12 +398,12 @@ def _run_analysis(
                         yerr_arr_fit = np.zeros_like(xval_arr_fit)
                     for xval, yval, yerr in zip(xval_arr_fit, yval_arr_fit, yerr_arr_fit):
+                            xval=xval,
+                            yval=yval,
+                            yerr=yerr,
-                            x=xval,
-                            y=yval,
-                            y_err=yerr,
diff --git a/qiskit_experiments/curve_analysis/ b/qiskit_experiments/curve_analysis/
index 368f1b2a83..b08366ec0d 100644
--- a/qiskit_experiments/curve_analysis/
+++ b/qiskit_experiments/curve_analysis/
@@ -216,12 +216,12 @@ def _run_data_processing(
                 series_name = pd.NA
                 series_id = pd.NA
+                xval=xval,
+                yval=yval,
+                yerr=yerr,
-                x=xval,
-                y=yval,
-                y_err=yerr,
                 shots=datum.get("shots", pd.NA),
@@ -260,12 +260,12 @@ def _format_data(
             except ValueError:
                 series_id = pd.NA
+                xval=xval,
+                yval=avg_yval,
+                yerr=avg_yerr,
-                x=xval,
-                y=avg_yval,
-                y_err=avg_yerr,
@@ -519,12 +519,12 @@ def _run_analysis(
                     yerr_arr_fit = np.zeros_like(xval_arr_fit)
                 for xval, yval, yerr in zip(xval_arr_fit, yval_arr_fit, yerr_arr_fit):
+                        xval=xval,
+                        yval=yval,
+                        yerr=yerr,
-                        x=xval,
-                        y=yval,
-                        y_err=yerr,
diff --git a/qiskit_experiments/curve_analysis/ b/qiskit_experiments/curve_analysis/
index 249ede87e9..bbb5f91bbd 100644
--- a/qiskit_experiments/curve_analysis/
+++ b/qiskit_experiments/curve_analysis/
@@ -351,12 +351,12 @@ def iter_groups(
     def add_row(
+        xval: float | pd.NA = pd.NA,
+        yval: float | pd.NA = pd.NA,
+        yerr: float | pd.NA = pd.NA,
         series_name: str | pd.NA = pd.NA,
         series_id: int | pd.NA = pd.NA,
         category: str | pd.NA = pd.NA,
-        x: float | pd.NA = pd.NA,
-        y: float | pd.NA = pd.NA,
-        y_err: float | pd.NA = pd.NA,
         shots: float | pd.NA = pd.NA,
         analysis: str | pd.NA = pd.NA,
@@ -365,16 +365,18 @@ def add_row(
         Data must be the same length.
-            x: X value.
-            y: Y value.
-            y_err: Standard deviation of y value.
-            shots: Shot number used to acquire this data point.
+            xval: X value.
+            yval: Y value.
+            yerr: Standard deviation of y value.
             series_name: Name of this data series if available.
             series_id: Index of this data series if available.
             category: Data category if available.
+            shots: Shot number used to acquire this data point.
             analysis: Analysis name if available.
-        self._lazy_add_rows.append([x, y, y_err, series_name, series_id, category, shots, analysis])
+        self._lazy_add_rows.append(
+            [xval, yval, yerr, series_name, series_id, category, shots, analysis]
+        )
     def _format_table(cls, data: pd.DataFrame) -> pd.DataFrame:
diff --git a/test/curve_analysis/ b/test/curve_analysis/
index 0f78755738..7166b64a25 100644
--- a/test/curve_analysis/
+++ b/test/curve_analysis/
@@ -90,23 +90,38 @@ def test_create_table_from_dataframe(self):
         obj = ScatterTable.from_dataframe(formatted_ref)
+    def test_factory_method_check_all_members(self):
+        """Test to check the factory method populates all instance members."""
+        to_test = ScatterTable.from_dataframe(pd.DataFrame(columns=ScatterTable.COLUMNS))
+        ref = ScatterTable()
+        self.assertEqual(to_test.__dict__.keys(), ref.__dict__.keys())
+    def test_two_construction_method_identical(self):
+        """Check if two tables constructed differently from the same source are identical."""
+        new_table = ScatterTable()
+        for _, row_data in self.reference.iterrows():
+            new_table.add_row(**row_data)
+        ref_table = ScatterTable.from_dataframe(self.reference)
+        self.assertEqual(new_table, ref_table)
     def test_add_row(self):
         """Test adding single row to the table without and with missing data."""
         obj = ScatterTable()
+            xval=0.1,
+            yval=2.3,
+            yerr=0.4,
-            x=0.1,
-            y=2.3,
-            y_err=0.4,
-            x=0.2,
-            y=3.4,
+            xval=0.2,
+            yval=3.4,
         self.assertEqual(len(obj), 2)
         np.testing.assert_array_equal(obj.x, np.array([0.1, 0.2]))

From 7ff2c6aa3d5dd0e47cf0f3442c958cc444d8719d Mon Sep 17 00:00:00 2001
From: Naoki Kanazawa <>
Date: Wed, 7 Feb 2024 01:53:05 +0900
Subject: [PATCH 29/29] Update Ramsey analysis

 .../library/driven_freq_tuning/  | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/qiskit_experiments/library/driven_freq_tuning/ b/qiskit_experiments/library/driven_freq_tuning/
index ecbc1e2114..bde5750c32 100644
--- a/qiskit_experiments/library/driven_freq_tuning/
+++ b/qiskit_experiments/library/driven_freq_tuning/
@@ -290,9 +290,9 @@ def _format_data(
                 amplitudes, unwrapped_phase, phase_s, shot_sums
-                    x=new_x,
-                    y=new_y,
-                    y_err=new_y_err,
+                    xval=new_x,
+                    yval=new_y,
+                    yerr=new_y_err,