|
| 1 | +from typing import Any, Callable, Dict, List, Optional, Union |
| 2 | + |
| 3 | +import pandas as pd |
| 4 | +from nisystemlink.clients.spec.models._condition import ( |
| 5 | + Condition, |
| 6 | + NumericConditionValue, |
| 7 | + StringConditionValue, |
| 8 | +) |
| 9 | +from nisystemlink.clients.spec.models._specification import ( |
| 10 | + Specification, |
| 11 | + SpecificationLimit, |
| 12 | + SpecificationType, |
| 13 | +) |
| 14 | +from nisystemlink.clients.spec.utilities._constants import DataFrameHeaders |
| 15 | + |
| 16 | + |
| 17 | +def summarize_conditions_as_a_string( |
| 18 | + conditions: List[Condition], |
| 19 | +) -> List[Dict[str, str]]: |
| 20 | + """Converts the condition values to an easily readable string format that summarizes |
| 21 | + either of numeric or string condition. |
| 22 | +
|
| 23 | + Args: |
| 24 | + conditions: List of all conditions in a spec. |
| 25 | +
|
| 26 | + Returns: |
| 27 | + Conditions as a list of dictionary. The dictionary key will be |
| 28 | + "condition_<conditionName>(<conditionUnit>)". |
| 29 | + The dictionary value will be "[min: num; max: num, step: num], num, num" |
| 30 | + where data within the '[]' is numeric condition range and other num |
| 31 | + values are numeric condition discrete values. |
| 32 | + The dictionary value will be "str, str, str" - where str values are the |
| 33 | + condition discrete values for a string condition. If the condition doesn't |
| 34 | + have a name and value, it will be skipped. |
| 35 | + """ |
| 36 | + return [ |
| 37 | + { |
| 38 | + __generate_condition_column_header(condition): ", ".join( |
| 39 | + __serialize_condition_value(condition) |
| 40 | + ) |
| 41 | + for condition in conditions |
| 42 | + if condition.name and condition.value |
| 43 | + } |
| 44 | + ] |
| 45 | + |
| 46 | + |
| 47 | +def normalize_conditions_per_column( |
| 48 | + conditions: List[Condition], |
| 49 | +) -> List[Dict[str, Any]]: |
| 50 | + """Convert conditions into list of dictionaries where dictionary key will be condition name |
| 51 | + and dictionary value will be condition value. |
| 52 | +
|
| 53 | + Args: |
| 54 | + conditions: List of all conditions in a spec. |
| 55 | +
|
| 56 | + Returns: |
| 57 | + Conditions as a list of dictionary. The key will be |
| 58 | + the condition name and the value will be the condition value which is |
| 59 | + either Numeric Condition Value, String Condition Value or None. If the condition doesn't |
| 60 | + have a name and value, it will be skipped. |
| 61 | + """ |
| 62 | + return [ |
| 63 | + { |
| 64 | + f"{DataFrameHeaders.CONDITION_COLUMN_HEADER_PREFIX}{condition.name}": condition.value |
| 65 | + for condition in conditions |
| 66 | + if condition.name and condition.value |
| 67 | + } |
| 68 | + ] |
| 69 | + |
| 70 | + |
| 71 | +def normalize_conditions_per_row( |
| 72 | + conditions: List[Condition], |
| 73 | +) -> List[Dict[str, Any]]: |
| 74 | + """Convert conditions into list of dictionaries where dictionary keys will be 'condition.name' |
| 75 | + and 'condition.value' and dictionary values will be condition name and condition value respectively. |
| 76 | +
|
| 77 | + Args: |
| 78 | + conditions: List of all conditions in a spec. |
| 79 | +
|
| 80 | + Returns: |
| 81 | + Conditions as a list of dictionary. The keys will be |
| 82 | + the 'condition.name' and 'condition.values' and the values will be the condition name and |
| 83 | + condition value which is either Numeric Condition Value, String Condition Value or None. |
| 84 | + If the condition doesn't have a name and value, it will be skipped. |
| 85 | + Each condition data will be logged as separate dictionary entry in the list which translates to |
| 86 | + separate row in the dataframe. |
| 87 | + """ |
| 88 | + return [ |
| 89 | + {"condition.name": condition.name, "condition.value": condition.value} |
| 90 | + for condition in conditions |
| 91 | + if condition.name and condition.value |
| 92 | + ] |
| 93 | + |
| 94 | + |
| 95 | +def convert_specs_to_dataframe( |
| 96 | + specs: List[Specification], |
| 97 | + condition_format: Optional[ |
| 98 | + Callable[[List[Condition]], List[Dict[str, Any]]] |
| 99 | + ] = normalize_conditions_per_column, |
| 100 | +) -> pd.DataFrame: |
| 101 | + """Creates a Pandas DataFrame for the specs. |
| 102 | +
|
| 103 | + Args: |
| 104 | + specs: List of specs. |
| 105 | + condition_format: A callback function which takes in a list of condition of a spec and returns |
| 106 | + a list of dictionary of condition and its values. The dictionary keys |
| 107 | + should be the condition name and the values should be the condition |
| 108 | + value in any format you need. Dataframe rows will be constructed based on |
| 109 | + these list of dictionaries. Each dictionary in the list indicates a row. |
| 110 | + If there is more than one dictionary in the list, it will be considered as a new |
| 111 | + row and other spec column data will be duplicated. Keys will be used as the dataframe |
| 112 | + column header and values will be used as the row cells for the |
| 113 | + respective column header. |
| 114 | + If not passed, condition column header will be condition name and |
| 115 | + corresponding row value will be condition value. |
| 116 | + For all the condition columns to be grouped together in the dataframe, |
| 117 | + the dictionary key should have the prefix "condition_". |
| 118 | + If condition is needed as condition per row, the public method `normalize_conditions_per_row` |
| 119 | + can be provided as the callback function. |
| 120 | + If condition value is needed as a string summary of condition data, the public method |
| 121 | + `summarize_conditions_as_a_string` can be provided as this callback function. |
| 122 | + If None is passed, conditions will not be included in the dataframe. |
| 123 | +
|
| 124 | + Returns: |
| 125 | + A Pandas DataFrame with the each spec fields having a separate column. |
| 126 | + Following fields are split into sub-columns. |
| 127 | + - conditions: format of the condition columns are decided by the `condition_format` |
| 128 | + argument of this function. |
| 129 | + - Properties: All the unique properties across all specs will be split into separate columns. |
| 130 | + For example, properties.property1, properties.property2, etc. |
| 131 | + """ |
| 132 | + specs_dict = [ |
| 133 | + __convert_spec_to_dict(spec=spec, condition=condition) |
| 134 | + for spec in specs |
| 135 | + for condition in ( |
| 136 | + condition_format(spec.conditions) |
| 137 | + if (spec.conditions and condition_format) |
| 138 | + else [{}] |
| 139 | + ) |
| 140 | + ] |
| 141 | + |
| 142 | + specs_dataframe = pd.json_normalize(specs_dict) |
| 143 | + specs_dataframe = __format_specs_columns(specs_dataframe=specs_dataframe) |
| 144 | + specs_dataframe.dropna(axis="columns", how="all", inplace=True) |
| 145 | + |
| 146 | + return specs_dataframe |
| 147 | + |
| 148 | + |
| 149 | +def __convert_spec_to_dict( |
| 150 | + spec: Specification, condition: Dict[str, Any] |
| 151 | +) -> Dict[str, Any]: |
| 152 | + """Converts a spec into dictionary. |
| 153 | +
|
| 154 | + Args: |
| 155 | + spec: Spec object. |
| 156 | + condition: Condition as a dictionary which is added to the output spec dictionary. |
| 157 | +
|
| 158 | + Returns: |
| 159 | + Spec as a dictionary with the provided condition dictionary included. |
| 160 | + """ |
| 161 | + return { |
| 162 | + **{ |
| 163 | + key: value |
| 164 | + for key, value in vars(spec).items() |
| 165 | + if key not in ["type", "limit", "conditions"] |
| 166 | + }, |
| 167 | + **(__serialize_type(spec.type) if spec.type else {}), |
| 168 | + **(__serialize_limits(spec.limit) if spec.limit else {}), |
| 169 | + **{key: value for key, value in condition.items()}, |
| 170 | + } |
| 171 | + |
| 172 | + |
| 173 | +def __serialize_limits(limit: SpecificationLimit) -> Dict[str, str]: |
| 174 | + """Serialize limit into limit.min, limit.typical and limit.max. |
| 175 | +
|
| 176 | + Args: |
| 177 | + limit: Limit of a spec. |
| 178 | +
|
| 179 | + Returns: |
| 180 | + Limit as a dictionary. |
| 181 | + """ |
| 182 | + return {f"limit.{key}": value for key, value in vars(limit).items()} |
| 183 | + |
| 184 | + |
| 185 | +def __serialize_type(type: SpecificationType) -> Dict[str, str]: |
| 186 | + """Serialize type into it's string value. |
| 187 | +
|
| 188 | + Args: |
| 189 | + type: Type of a spec. |
| 190 | +
|
| 191 | + Returns: |
| 192 | + Type as a dictionary. |
| 193 | + """ |
| 194 | + return {"type": type.name} |
| 195 | + |
| 196 | + |
| 197 | +def __format_specs_columns(specs_dataframe: pd.DataFrame) -> pd.DataFrame: |
| 198 | + """Format specs column to group conditions and keep properties and keywords at the end. |
| 199 | +
|
| 200 | + Args: |
| 201 | + specs_dataframe: Dataframe of specs. |
| 202 | +
|
| 203 | + Returns: |
| 204 | + Formatted dataframe of specs. |
| 205 | + """ |
| 206 | + column_headers = specs_dataframe.columns.to_list() |
| 207 | + standard_column_headers = [ |
| 208 | + header for header in column_headers if __is_standard_column_header(header) |
| 209 | + ] |
| 210 | + condition_headers = [ |
| 211 | + header for header in column_headers if __is_condition_header(header=header) |
| 212 | + ] |
| 213 | + properties_headers = [ |
| 214 | + header for header in column_headers if __is_property_header(header=header) |
| 215 | + ] |
| 216 | + formatted_column_headers = ( |
| 217 | + standard_column_headers |
| 218 | + + condition_headers |
| 219 | + + (["keywords"] if "keywords" in column_headers else []) |
| 220 | + + properties_headers |
| 221 | + ) |
| 222 | + |
| 223 | + return specs_dataframe.reindex(columns=formatted_column_headers, copy=False) |
| 224 | + |
| 225 | + |
| 226 | +def __is_standard_column_header(header: str) -> bool: |
| 227 | + """Check if column header is not a condition, property or keywords. |
| 228 | +
|
| 229 | + Args: |
| 230 | + header: column header for specs dataframe. |
| 231 | +
|
| 232 | + Returns: |
| 233 | + True if header doesn't start with condition_, properties. or keywords. Else returns false. |
| 234 | +
|
| 235 | + """ |
| 236 | + return not ( |
| 237 | + __is_condition_header(header=header) |
| 238 | + or __is_property_header(header=header) |
| 239 | + or __is_keywords_header(header=header) |
| 240 | + ) |
| 241 | + |
| 242 | + |
| 243 | +def __is_condition_header(header: str) -> bool: |
| 244 | + """Check if column header is not a condition. |
| 245 | +
|
| 246 | + Args: |
| 247 | + header: column header for specs dataframe. |
| 248 | +
|
| 249 | + Returns: |
| 250 | + True if header contains 'condition_'. Else returns false. |
| 251 | +
|
| 252 | + """ |
| 253 | + return header.startswith(DataFrameHeaders.CONDITION_COLUMN_HEADER_PREFIX) |
| 254 | + |
| 255 | + |
| 256 | +def __is_property_header(header: str) -> bool: |
| 257 | + """Check if column header is not a property. |
| 258 | +
|
| 259 | + Args: |
| 260 | + header: column header for specs dataframe. |
| 261 | +
|
| 262 | + Returns: |
| 263 | + True if header contains 'properties.'. Else returns false. |
| 264 | +
|
| 265 | + """ |
| 266 | + return header.startswith(DataFrameHeaders.PROPERTY_COLUMN_HEADER_PREFIX) |
| 267 | + |
| 268 | + |
| 269 | +def __is_keywords_header(header: str) -> bool: |
| 270 | + """Check if column header is not a keywords. |
| 271 | +
|
| 272 | + Args: |
| 273 | + header: column header for specs dataframe. |
| 274 | +
|
| 275 | + Returns: |
| 276 | + True if header equals 'keywords'. Else returns false. |
| 277 | +
|
| 278 | + """ |
| 279 | + return header == DataFrameHeaders.KEYWORDS_COLUMN_HEADER |
| 280 | + |
| 281 | + |
| 282 | +def __generate_condition_column_header(condition: Condition) -> str: |
| 283 | + """Generate column header for a condition. |
| 284 | +
|
| 285 | + Args: |
| 286 | + condition: Condition object for generating column header. |
| 287 | +
|
| 288 | + Returns: |
| 289 | + The column header for the given condition. |
| 290 | + """ |
| 291 | + name = condition.name or "" |
| 292 | + unit = ( |
| 293 | + f"({condition.value.unit})" |
| 294 | + if isinstance(condition.value, NumericConditionValue) and condition.value.unit |
| 295 | + else "" |
| 296 | + ) |
| 297 | + |
| 298 | + return f"{DataFrameHeaders.CONDITION_COLUMN_HEADER_PREFIX}{name}{unit}" |
| 299 | + |
| 300 | + |
| 301 | +def __serialize_condition_value(condition: Condition) -> List[str]: |
| 302 | + """Get ranges and discrete values of a condition. |
| 303 | +
|
| 304 | + Args: |
| 305 | + condition: Condition for getting values. |
| 306 | +
|
| 307 | + Returns: |
| 308 | + The list of values of the given condition in a specific format. |
| 309 | + """ |
| 310 | + if not condition.value: |
| 311 | + return [] |
| 312 | + |
| 313 | + values = [] |
| 314 | + |
| 315 | + if isinstance(condition.value, NumericConditionValue): |
| 316 | + values.extend(__serialize_numeric_condition_range(value=condition.value)) |
| 317 | + |
| 318 | + values.extend(__serialize_condition_discrete_values(value=condition.value)) |
| 319 | + |
| 320 | + return values |
| 321 | + |
| 322 | + |
| 323 | +def __serialize_numeric_condition_range(value: NumericConditionValue) -> List[str]: |
| 324 | + """Serialize ranges of a numeric condition value. |
| 325 | +
|
| 326 | + Args: |
| 327 | + value: A condition's value with NumericConditionValue type. |
| 328 | +
|
| 329 | + Returns: |
| 330 | + The list of ranges of the given condition where each range will be in |
| 331 | + string format `[min: <value>; max: <value>; step: <value>]` if the corresponding |
| 332 | + fields are not none. |
| 333 | + """ |
| 334 | + if not value.range: |
| 335 | + return [] |
| 336 | + |
| 337 | + return [ |
| 338 | + f"""[{'; '.join( |
| 339 | + f'{range_key}: {range_value}' |
| 340 | + for range_key, range_value in vars(range).items() |
| 341 | + if range_value is not None |
| 342 | + )}]""" |
| 343 | + for range in value.range |
| 344 | + ] |
| 345 | + |
| 346 | + |
| 347 | +def __serialize_condition_discrete_values( |
| 348 | + value: Union[NumericConditionValue, StringConditionValue] |
| 349 | +) -> List[str]: |
| 350 | + """Serialize discrete values of a value. |
| 351 | +
|
| 352 | + Args: |
| 353 | + value: A condition's value with either NumericConditionValue type or StringConditionValue type. |
| 354 | +
|
| 355 | + Returns: |
| 356 | + The list of discrete values of the given value in a string format. |
| 357 | + """ |
| 358 | + return [str(discrete) for discrete in (value.discrete or [])] |
0 commit comments