Skip to content

Commit 1faa54a

Browse files
priyadarshini-niPriyadarshini Piramanayagam
and
Priyadarshini Piramanayagam
authored
feat: Export Results as Dataframe (#100)
Co-authored-by: Priyadarshini Piramanayagam <priydarshini.piramanayagam@emerson.com>
1 parent dc4c718 commit 1faa54a

File tree

5 files changed

+354
-0
lines changed

5 files changed

+354
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
from ._dataframe_utilities import convert_results_to_dataframe
2+
3+
# flake8: noqa
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
from typing import Any, Dict, List
2+
3+
import pandas as pd
4+
from nisystemlink.clients.testmonitor.models import Result
5+
from nisystemlink.clients.testmonitor.utilities.constants import DataFrameHeaders
6+
7+
8+
def convert_results_to_dataframe(
9+
results: List[Result], set_id_as_index: bool = True
10+
) -> pd.DataFrame:
11+
"""Creates a Pandas DataFrame for the results.
12+
13+
Args:
14+
results: The list of results to be included in the dataframe.
15+
set_id_as_index: If true, result id will be set as index for the dataframe.
16+
If false, index will not be set.
17+
Default value is false.
18+
19+
Returns:
20+
A Pandas DataFrame with the each result fields having a separate column.
21+
Following fields are split into sub-columns.
22+
- status_type_summary: All the entries will be split into separate columns.
23+
For example, status_type_summary.LOOPING, status_type_summary.PASSED, etc
24+
- status: Split into status.status_type and status.status_name columns.
25+
- Properties: All the properties will be split into separate columns. For example,
26+
properties.property1, properties.property2, etc.
27+
"""
28+
results_dict = [result.dict(exclude_none=True) for result in results]
29+
results_dict_with_normalized_status = __normalize_results_status(results_dict)
30+
normalized_dataframe = pd.json_normalize(
31+
results_dict_with_normalized_status, sep="."
32+
)
33+
normalized_dataframe = __format_results_columns(
34+
results_dataframe=normalized_dataframe
35+
)
36+
if set_id_as_index and "id" in normalized_dataframe.columns:
37+
normalized_dataframe.set_index("id", inplace=True)
38+
39+
return normalized_dataframe
40+
41+
42+
def __normalize_results_status(
43+
results_dict: List[Dict[str, Any]]
44+
) -> List[Dict[str, Any]]:
45+
"""Gets dictionary of results data and modifies the status object.
46+
47+
Args:
48+
results: List of results.
49+
50+
Returns:
51+
A list of result fields as dictionary. If status.status_type is "CUSTOM"
52+
the status field takes the value of "status_name", else value of "status_type" is used.
53+
"""
54+
for result in results_dict:
55+
status = result.get("status", {})
56+
if status.get("status_type") == "CUSTOM":
57+
result["status"] = status["status_name"]
58+
else:
59+
result["status"] = status["status_type"].value
60+
61+
return results_dict
62+
63+
64+
def __format_results_columns(results_dataframe: pd.DataFrame) -> pd.DataFrame:
65+
"""Format results column to keep properties at the end.
66+
67+
Args:
68+
results_dataframe: Dataframe of results.
69+
70+
Returns:
71+
Formatted dataframe of results.
72+
"""
73+
column_headers = results_dataframe.columns.to_list()
74+
standard_column_headers = [
75+
header for header in column_headers if __is_standard_column_header(header)
76+
]
77+
status_type_summary_header = [
78+
header
79+
for header in column_headers
80+
if __is_status_type_summary_header(header=header)
81+
]
82+
properties_headers = [
83+
header for header in column_headers if __is_property_header(header=header)
84+
]
85+
standard_column_headers += status_type_summary_header + properties_headers
86+
87+
return results_dataframe.reindex(columns=standard_column_headers, copy=False)
88+
89+
90+
def __is_standard_column_header(header: str) -> bool:
91+
"""Check if column header is not status type summary or property.
92+
93+
Args:
94+
header: column header for results dataframe.
95+
96+
Returns:
97+
True if header doesn't start with 'status_type_summary.', 'properties.'. Else returns false.
98+
99+
"""
100+
return not (
101+
__is_status_type_summary_header(header=header)
102+
or __is_property_header(header=header)
103+
)
104+
105+
106+
def __is_status_type_summary_header(header: str) -> bool:
107+
"""Check if column header is not a status type summary.
108+
109+
Args:
110+
header: column header for results dataframe.
111+
112+
Returns:
113+
True if header contains 'status_type_summary.'. Else returns false.
114+
115+
"""
116+
return header.startswith(DataFrameHeaders.STATUS_TYPE_SUMMARY_HEADER_PREFIX)
117+
118+
119+
def __is_property_header(header: str) -> bool:
120+
"""Check if column header is not a property.
121+
122+
Args:
123+
header: column header for results dataframe.
124+
125+
Returns:
126+
True if header contains 'properties.'. Else returns false.
127+
128+
"""
129+
return header.startswith(DataFrameHeaders.PROPERTY_COLUMN_HEADER_PREFIX)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
class DataFrameHeaders:
2+
STATUS_TYPE_SUMMARY_HEADER_PREFIX = "status_type_summary."
3+
4+
PROPERTY_COLUMN_HEADER_PREFIX = "properties."

tests/testmonitor/__init__.py

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
# flake8: noqa
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,217 @@
1+
import datetime
2+
import uuid
3+
from typing import List
4+
5+
import pandas as pd
6+
import pytest
7+
from nisystemlink.clients.testmonitor.models._result import Result
8+
from nisystemlink.clients.testmonitor.models._status import Status, StatusType
9+
from nisystemlink.clients.testmonitor.utilities._dataframe_utilities import (
10+
convert_results_to_dataframe,
11+
)
12+
13+
14+
@pytest.fixture(scope="class")
15+
def results() -> List[Result]:
16+
"""Sample results for testing purposes."""
17+
results = [
18+
Result(
19+
status=Status.PASSED(),
20+
started_at=datetime.datetime(
21+
2018, 5, 7, 18, 58, 5, 219692, tzinfo=datetime.timezone.utc
22+
),
23+
updated_at=datetime.datetime(
24+
2018, 5, 7, 18, 58, 5, 219692, tzinfo=datetime.timezone.utc
25+
),
26+
program_name="My Program Name",
27+
id=uuid.uuid1().hex,
28+
system_id=uuid.uuid1().hex,
29+
host_name="host name",
30+
part_number=uuid.uuid1().hex,
31+
serial_number=uuid.uuid1().hex,
32+
total_time_in_seconds=16.76845106446358,
33+
keywords=["keyword1", "keyword2"],
34+
properties={"property1": "value1", "property2": "value2"},
35+
operator="sample operator",
36+
file_ids=[uuid.uuid1().hex, uuid.uuid1().hex],
37+
data_table_ids=[uuid.uuid1().hex, uuid.uuid1().hex],
38+
status_type_summary={StatusType.PASSED: 1, StatusType.FAILED: 0},
39+
workspace=uuid.uuid1().hex,
40+
),
41+
Result(
42+
status=Status.FAILED(),
43+
started_at=datetime.datetime(
44+
2018, 5, 7, 18, 58, 5, 219692, tzinfo=datetime.timezone.utc
45+
),
46+
updated_at=datetime.datetime(
47+
2018, 5, 7, 18, 58, 5, 219692, tzinfo=datetime.timezone.utc
48+
),
49+
program_name="My Program Name",
50+
id=uuid.uuid1().hex,
51+
part_number=uuid.uuid1().hex,
52+
total_time_in_seconds=16.76845106446358,
53+
keywords=[],
54+
properties={"property3": "value3"},
55+
file_ids=[uuid.uuid1().hex],
56+
status_type_summary={StatusType.PASSED: 0, StatusType.FAILED: 1},
57+
workspace=uuid.uuid1().hex,
58+
),
59+
Result(
60+
status=Status(status_type=StatusType.CUSTOM, status_name="custom_status"),
61+
started_at=datetime.datetime(
62+
2018, 5, 7, 18, 58, 5, 219692, tzinfo=datetime.timezone.utc
63+
),
64+
updated_at=datetime.datetime(
65+
2018, 5, 7, 18, 58, 5, 219692, tzinfo=datetime.timezone.utc
66+
),
67+
program_name="My Program Name",
68+
id=uuid.uuid1().hex,
69+
file_ids=[uuid.uuid1().hex],
70+
status_type_summary={StatusType.PASSED: 0, StatusType.FAILED: 1},
71+
workspace=uuid.uuid1().hex,
72+
),
73+
]
74+
75+
return results
76+
77+
78+
@pytest.mark.enterprise
79+
class TestTestmonitorDataframeUtilities:
80+
def test__convert_results_with_all_fields_to_dataframe__returns_whole_results_dataframe(
81+
self, results
82+
):
83+
expected_results_dataframe = self.__get_expected_results_dataframe(
84+
results=results
85+
)
86+
87+
results_dataframe = convert_results_to_dataframe(
88+
results=results, set_id_as_index=False
89+
)
90+
91+
assert not results_dataframe.empty
92+
assert len(results_dataframe.columns.tolist()) == 20
93+
pd.testing.assert_frame_equal(
94+
results_dataframe, expected_results_dataframe, check_dtype=True
95+
)
96+
assert isinstance(results_dataframe["status"].iloc[0], str)
97+
assert results_dataframe["started_at"].dtype == "datetime64[ns, UTC]"
98+
assert results_dataframe["updated_at"].dtype == "datetime64[ns, UTC]"
99+
assert results_dataframe["file_ids"].dtype == "object"
100+
assert isinstance(results_dataframe["file_ids"].iloc[0], List)
101+
assert results_dataframe["data_table_ids"].dtype == "object"
102+
assert isinstance(results_dataframe["data_table_ids"].iloc[0], List)
103+
assert results_dataframe["keywords"].dtype == "object"
104+
assert isinstance(results_dataframe["keywords"].iloc[0], List)
105+
106+
def test__convert_results_with_specific_fields_to_dataframe__returns_results_dataframe_with_specific_fields(
107+
self, results
108+
):
109+
results = results[1:]
110+
expected_results_dataframe = self.__get_expected_results_dataframe(
111+
results=results
112+
)
113+
114+
results_dataframe = convert_results_to_dataframe(
115+
results=results, set_id_as_index=False
116+
)
117+
118+
assert not results_dataframe.empty
119+
assert len(results_dataframe.columns.tolist()) == 13
120+
pd.testing.assert_frame_equal(
121+
results_dataframe, expected_results_dataframe, check_dtype=True
122+
)
123+
assert isinstance(results_dataframe["status"].iloc[0], str)
124+
assert results_dataframe["started_at"].dtype == "datetime64[ns, UTC]"
125+
assert results_dataframe["updated_at"].dtype == "datetime64[ns, UTC]"
126+
assert results_dataframe["file_ids"].dtype == "object"
127+
assert isinstance(results_dataframe["file_ids"].iloc[0], List)
128+
assert results_dataframe["keywords"].dtype == "object"
129+
assert isinstance(results_dataframe["keywords"].iloc[0], List)
130+
131+
def test__convert_results_to_dataframe_with_id_index__returns_results_dataframe_with_id_index(
132+
self, results
133+
):
134+
expected_results_dataframe = self.__get_expected_results_dataframe(
135+
results=results
136+
)
137+
expected_results_dataframe = expected_results_dataframe.set_index("id")
138+
139+
results_dataframe = convert_results_to_dataframe(results=results)
140+
141+
assert not results_dataframe.empty
142+
assert len(results_dataframe.columns.tolist()) == 19
143+
pd.testing.assert_frame_equal(
144+
results_dataframe, expected_results_dataframe, check_dtype=True
145+
)
146+
assert isinstance(results_dataframe["status"].iloc[0], str)
147+
assert results_dataframe["started_at"].dtype == "datetime64[ns, UTC]"
148+
assert results_dataframe["updated_at"].dtype == "datetime64[ns, UTC]"
149+
assert results_dataframe["file_ids"].dtype == "object"
150+
assert isinstance(results_dataframe["file_ids"].iloc[0], List)
151+
assert results_dataframe["data_table_ids"].dtype == "object"
152+
assert isinstance(results_dataframe["data_table_ids"].iloc[0], List)
153+
assert results_dataframe["keywords"].dtype == "object"
154+
assert isinstance(results_dataframe["keywords"].iloc[0], List)
155+
156+
def test__convert_results_to_dataframe_with_no_results__returns_empty_dataframe(
157+
self,
158+
):
159+
results_dataframe = convert_results_to_dataframe(results=[])
160+
161+
assert isinstance(results_dataframe, pd.DataFrame)
162+
assert results_dataframe.empty
163+
164+
def __get_expected_results_dataframe(self, results: List[Result]):
165+
results_dict = []
166+
for result in results:
167+
status = {
168+
"status": (
169+
result.status.status_type.value
170+
if result.status and result.status.status_type != "CUSTOM"
171+
else result.status.status_name if result.status else None
172+
)
173+
}
174+
status_type_summary = (
175+
{
176+
f"status_type_summary.{key}": value
177+
for key, value in result.status_type_summary.items()
178+
}
179+
if result.status_type_summary
180+
else {}
181+
)
182+
properties = (
183+
{f"properties.{key}": value for key, value in result.properties.items()}
184+
if result.properties
185+
else {}
186+
)
187+
results_dict.append(
188+
{
189+
**{
190+
"started_at": result.started_at,
191+
"updated_at": result.updated_at,
192+
"program_name": result.program_name,
193+
"id": result.id,
194+
"system_id": result.system_id,
195+
"host_name": result.host_name,
196+
"part_number": result.part_number,
197+
"serial_number": result.serial_number,
198+
"total_time_in_seconds": result.total_time_in_seconds,
199+
"keywords": result.keywords,
200+
"operator": result.operator,
201+
"file_ids": result.file_ids,
202+
"data_table_ids": result.data_table_ids,
203+
"workspace": result.workspace,
204+
},
205+
**status,
206+
**status_type_summary,
207+
**properties,
208+
}
209+
)
210+
211+
results_df = pd.DataFrame(results_dict)
212+
results_df = results_df[
213+
["status"] + [col for col in results_df.columns if col != "status"]
214+
]
215+
results_df.dropna(axis="columns", how="all", inplace=True)
216+
217+
return results_df

0 commit comments

Comments
 (0)