Skip to content

Commit ae1b608

Browse files
sam-rishiSam rishishri2k2Sam RishiRSam-NI
authored
feat: Export Specs as Dataframe (#98)
Co-authored-by: Sam rishi <sam.rishi@solitontech.local> Co-authored-by: shri2k2 <shriram.sakthivel@solitontech.com> Co-authored-by: Sam Rishi <sam.rishi@ni.com> Co-authored-by: RSam-NI <sam.rishi@emerson.com>
1 parent 820df0e commit ae1b608

File tree

5 files changed

+1005
-0
lines changed

5 files changed

+1005
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
from ._dataframe_utilities import (
2+
convert_specs_to_dataframe,
3+
summarize_conditions_as_a_string,
4+
normalize_conditions_per_column,
5+
normalize_conditions_per_row,
6+
)
7+
8+
# flake8: noqa
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
class DataFrameHeaders:
2+
CONDITION_COLUMN_HEADER_PREFIX = "condition_"
3+
4+
PROPERTY_COLUMN_HEADER_PREFIX = "properties."
5+
6+
KEYWORDS_COLUMN_HEADER = "keywords"
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,358 @@
1+
from typing import Any, Callable, Dict, List, Optional, Union
2+
3+
import pandas as pd
4+
from nisystemlink.clients.spec.models._condition import (
5+
Condition,
6+
NumericConditionValue,
7+
StringConditionValue,
8+
)
9+
from nisystemlink.clients.spec.models._specification import (
10+
Specification,
11+
SpecificationLimit,
12+
SpecificationType,
13+
)
14+
from nisystemlink.clients.spec.utilities._constants import DataFrameHeaders
15+
16+
17+
def summarize_conditions_as_a_string(
18+
conditions: List[Condition],
19+
) -> List[Dict[str, str]]:
20+
"""Converts the condition values to an easily readable string format that summarizes
21+
either of numeric or string condition.
22+
23+
Args:
24+
conditions: List of all conditions in a spec.
25+
26+
Returns:
27+
Conditions as a list of dictionary. The dictionary key will be
28+
"condition_<conditionName>(<conditionUnit>)".
29+
The dictionary value will be "[min: num; max: num, step: num], num, num"
30+
where data within the '[]' is numeric condition range and other num
31+
values are numeric condition discrete values.
32+
The dictionary value will be "str, str, str" - where str values are the
33+
condition discrete values for a string condition. If the condition doesn't
34+
have a name and value, it will be skipped.
35+
"""
36+
return [
37+
{
38+
__generate_condition_column_header(condition): ", ".join(
39+
__serialize_condition_value(condition)
40+
)
41+
for condition in conditions
42+
if condition.name and condition.value
43+
}
44+
]
45+
46+
47+
def normalize_conditions_per_column(
48+
conditions: List[Condition],
49+
) -> List[Dict[str, Any]]:
50+
"""Convert conditions into list of dictionaries where dictionary key will be condition name
51+
and dictionary value will be condition value.
52+
53+
Args:
54+
conditions: List of all conditions in a spec.
55+
56+
Returns:
57+
Conditions as a list of dictionary. The key will be
58+
the condition name and the value will be the condition value which is
59+
either Numeric Condition Value, String Condition Value or None. If the condition doesn't
60+
have a name and value, it will be skipped.
61+
"""
62+
return [
63+
{
64+
f"{DataFrameHeaders.CONDITION_COLUMN_HEADER_PREFIX}{condition.name}": condition.value
65+
for condition in conditions
66+
if condition.name and condition.value
67+
}
68+
]
69+
70+
71+
def normalize_conditions_per_row(
72+
conditions: List[Condition],
73+
) -> List[Dict[str, Any]]:
74+
"""Convert conditions into list of dictionaries where dictionary keys will be 'condition.name'
75+
and 'condition.value' and dictionary values will be condition name and condition value respectively.
76+
77+
Args:
78+
conditions: List of all conditions in a spec.
79+
80+
Returns:
81+
Conditions as a list of dictionary. The keys will be
82+
the 'condition.name' and 'condition.values' and the values will be the condition name and
83+
condition value which is either Numeric Condition Value, String Condition Value or None.
84+
If the condition doesn't have a name and value, it will be skipped.
85+
Each condition data will be logged as separate dictionary entry in the list which translates to
86+
separate row in the dataframe.
87+
"""
88+
return [
89+
{"condition.name": condition.name, "condition.value": condition.value}
90+
for condition in conditions
91+
if condition.name and condition.value
92+
]
93+
94+
95+
def convert_specs_to_dataframe(
96+
specs: List[Specification],
97+
condition_format: Optional[
98+
Callable[[List[Condition]], List[Dict[str, Any]]]
99+
] = normalize_conditions_per_column,
100+
) -> pd.DataFrame:
101+
"""Creates a Pandas DataFrame for the specs.
102+
103+
Args:
104+
specs: List of specs.
105+
condition_format: A callback function which takes in a list of condition of a spec and returns
106+
a list of dictionary of condition and its values. The dictionary keys
107+
should be the condition name and the values should be the condition
108+
value in any format you need. Dataframe rows will be constructed based on
109+
these list of dictionaries. Each dictionary in the list indicates a row.
110+
If there is more than one dictionary in the list, it will be considered as a new
111+
row and other spec column data will be duplicated. Keys will be used as the dataframe
112+
column header and values will be used as the row cells for the
113+
respective column header.
114+
If not passed, condition column header will be condition name and
115+
corresponding row value will be condition value.
116+
For all the condition columns to be grouped together in the dataframe,
117+
the dictionary key should have the prefix "condition_".
118+
If condition is needed as condition per row, the public method `normalize_conditions_per_row`
119+
can be provided as the callback function.
120+
If condition value is needed as a string summary of condition data, the public method
121+
`summarize_conditions_as_a_string` can be provided as this callback function.
122+
If None is passed, conditions will not be included in the dataframe.
123+
124+
Returns:
125+
A Pandas DataFrame with the each spec fields having a separate column.
126+
Following fields are split into sub-columns.
127+
- conditions: format of the condition columns are decided by the `condition_format`
128+
argument of this function.
129+
- Properties: All the unique properties across all specs will be split into separate columns.
130+
For example, properties.property1, properties.property2, etc.
131+
"""
132+
specs_dict = [
133+
__convert_spec_to_dict(spec=spec, condition=condition)
134+
for spec in specs
135+
for condition in (
136+
condition_format(spec.conditions)
137+
if (spec.conditions and condition_format)
138+
else [{}]
139+
)
140+
]
141+
142+
specs_dataframe = pd.json_normalize(specs_dict)
143+
specs_dataframe = __format_specs_columns(specs_dataframe=specs_dataframe)
144+
specs_dataframe.dropna(axis="columns", how="all", inplace=True)
145+
146+
return specs_dataframe
147+
148+
149+
def __convert_spec_to_dict(
150+
spec: Specification, condition: Dict[str, Any]
151+
) -> Dict[str, Any]:
152+
"""Converts a spec into dictionary.
153+
154+
Args:
155+
spec: Spec object.
156+
condition: Condition as a dictionary which is added to the output spec dictionary.
157+
158+
Returns:
159+
Spec as a dictionary with the provided condition dictionary included.
160+
"""
161+
return {
162+
**{
163+
key: value
164+
for key, value in vars(spec).items()
165+
if key not in ["type", "limit", "conditions"]
166+
},
167+
**(__serialize_type(spec.type) if spec.type else {}),
168+
**(__serialize_limits(spec.limit) if spec.limit else {}),
169+
**{key: value for key, value in condition.items()},
170+
}
171+
172+
173+
def __serialize_limits(limit: SpecificationLimit) -> Dict[str, str]:
174+
"""Serialize limit into limit.min, limit.typical and limit.max.
175+
176+
Args:
177+
limit: Limit of a spec.
178+
179+
Returns:
180+
Limit as a dictionary.
181+
"""
182+
return {f"limit.{key}": value for key, value in vars(limit).items()}
183+
184+
185+
def __serialize_type(type: SpecificationType) -> Dict[str, str]:
186+
"""Serialize type into it's string value.
187+
188+
Args:
189+
type: Type of a spec.
190+
191+
Returns:
192+
Type as a dictionary.
193+
"""
194+
return {"type": type.name}
195+
196+
197+
def __format_specs_columns(specs_dataframe: pd.DataFrame) -> pd.DataFrame:
198+
"""Format specs column to group conditions and keep properties and keywords at the end.
199+
200+
Args:
201+
specs_dataframe: Dataframe of specs.
202+
203+
Returns:
204+
Formatted dataframe of specs.
205+
"""
206+
column_headers = specs_dataframe.columns.to_list()
207+
standard_column_headers = [
208+
header for header in column_headers if __is_standard_column_header(header)
209+
]
210+
condition_headers = [
211+
header for header in column_headers if __is_condition_header(header=header)
212+
]
213+
properties_headers = [
214+
header for header in column_headers if __is_property_header(header=header)
215+
]
216+
formatted_column_headers = (
217+
standard_column_headers
218+
+ condition_headers
219+
+ (["keywords"] if "keywords" in column_headers else [])
220+
+ properties_headers
221+
)
222+
223+
return specs_dataframe.reindex(columns=formatted_column_headers, copy=False)
224+
225+
226+
def __is_standard_column_header(header: str) -> bool:
227+
"""Check if column header is not a condition, property or keywords.
228+
229+
Args:
230+
header: column header for specs dataframe.
231+
232+
Returns:
233+
True if header doesn't start with condition_, properties. or keywords. Else returns false.
234+
235+
"""
236+
return not (
237+
__is_condition_header(header=header)
238+
or __is_property_header(header=header)
239+
or __is_keywords_header(header=header)
240+
)
241+
242+
243+
def __is_condition_header(header: str) -> bool:
244+
"""Check if column header is not a condition.
245+
246+
Args:
247+
header: column header for specs dataframe.
248+
249+
Returns:
250+
True if header contains 'condition_'. Else returns false.
251+
252+
"""
253+
return header.startswith(DataFrameHeaders.CONDITION_COLUMN_HEADER_PREFIX)
254+
255+
256+
def __is_property_header(header: str) -> bool:
257+
"""Check if column header is not a property.
258+
259+
Args:
260+
header: column header for specs dataframe.
261+
262+
Returns:
263+
True if header contains 'properties.'. Else returns false.
264+
265+
"""
266+
return header.startswith(DataFrameHeaders.PROPERTY_COLUMN_HEADER_PREFIX)
267+
268+
269+
def __is_keywords_header(header: str) -> bool:
270+
"""Check if column header is not a keywords.
271+
272+
Args:
273+
header: column header for specs dataframe.
274+
275+
Returns:
276+
True if header equals 'keywords'. Else returns false.
277+
278+
"""
279+
return header == DataFrameHeaders.KEYWORDS_COLUMN_HEADER
280+
281+
282+
def __generate_condition_column_header(condition: Condition) -> str:
283+
"""Generate column header for a condition.
284+
285+
Args:
286+
condition: Condition object for generating column header.
287+
288+
Returns:
289+
The column header for the given condition.
290+
"""
291+
name = condition.name or ""
292+
unit = (
293+
f"({condition.value.unit})"
294+
if isinstance(condition.value, NumericConditionValue) and condition.value.unit
295+
else ""
296+
)
297+
298+
return f"{DataFrameHeaders.CONDITION_COLUMN_HEADER_PREFIX}{name}{unit}"
299+
300+
301+
def __serialize_condition_value(condition: Condition) -> List[str]:
302+
"""Get ranges and discrete values of a condition.
303+
304+
Args:
305+
condition: Condition for getting values.
306+
307+
Returns:
308+
The list of values of the given condition in a specific format.
309+
"""
310+
if not condition.value:
311+
return []
312+
313+
values = []
314+
315+
if isinstance(condition.value, NumericConditionValue):
316+
values.extend(__serialize_numeric_condition_range(value=condition.value))
317+
318+
values.extend(__serialize_condition_discrete_values(value=condition.value))
319+
320+
return values
321+
322+
323+
def __serialize_numeric_condition_range(value: NumericConditionValue) -> List[str]:
324+
"""Serialize ranges of a numeric condition value.
325+
326+
Args:
327+
value: A condition's value with NumericConditionValue type.
328+
329+
Returns:
330+
The list of ranges of the given condition where each range will be in
331+
string format `[min: <value>; max: <value>; step: <value>]` if the corresponding
332+
fields are not none.
333+
"""
334+
if not value.range:
335+
return []
336+
337+
return [
338+
f"""[{'; '.join(
339+
f'{range_key}: {range_value}'
340+
for range_key, range_value in vars(range).items()
341+
if range_value is not None
342+
)}]"""
343+
for range in value.range
344+
]
345+
346+
347+
def __serialize_condition_discrete_values(
348+
value: Union[NumericConditionValue, StringConditionValue]
349+
) -> List[str]:
350+
"""Serialize discrete values of a value.
351+
352+
Args:
353+
value: A condition's value with either NumericConditionValue type or StringConditionValue type.
354+
355+
Returns:
356+
The list of discrete values of the given value in a string format.
357+
"""
358+
return [str(discrete) for discrete in (value.discrete or [])]

tests/spec/__init__.py

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
# flake8: noqa

0 commit comments

Comments
 (0)