Skip to content

Commit 297374a

Browse files
committed
ENH: Added predicate to request dataset
- Required minor fix to NetCDF variable loading
1 parent 851fafb commit 297374a

File tree

6 files changed

+91
-15
lines changed

6 files changed

+91
-15
lines changed

doc/changelog.rst

+27-9
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,46 @@
1-
==========
2-
Change Log
3-
==========
1+
===========
2+
Change Logs
3+
===========
44

55
.. _backwards-compatibility-v1.3:
66

7-
Version ``2.x`` Backwards Compatibility
8-
---------------------------------------
7+
Version 2.0.0 Change Log
8+
------------------------
99

1010
.. note:: Version ``1.3.x`` will be maintained for bug fixes and dependency upgrades. It is recommended for all users to upgrade to ``v2.x``.
1111

1212
Some changes in ``v2.x`` will break backwards compatbility with ``v1.3.x``. These changes are listed below. If any of these changes affect your workflow, please post a `GitHub Issue <https://github.com/NCPP/ocgis/issues>`_ or contact the `support list <mailto:ocgis_info@list.woc.noaa.gov>`_.
1313

14-
* Changed dimension map format. See :ref:`configuring-a-dimension-map` for the new configuration. Use :meth:`~ocgis.DimensionMap.from_old_style_dimension_map` to convert old-style dimension maps.
14+
:class:`~ocgis.RequestDataset`
15+
++++++++++++++++++++++++++++++
16+
17+
* Changed default coordinate system to :class:`~ocgis.crs.Spherical` from :class:`~ocgis.crs.WGS84`. See :ref:`default-coordinate-system` for guidance on OpenClimateGIS coordinate systems.
1518
* Removed :class:`Inspect` object. Use the :meth:`~ocgis.RequestDataset.inspect` method.
19+
* Changed dimension map format. See :ref:`configuring-a-dimension-map` for the new configuration. Use :meth:`~ocgis.DimensionMap.from_old_style_dimension_map` to convert old-style dimension maps.
20+
* Removed ``alias`` parameters and attributes. Aliases are replaced by explicit name parameters (see :ref:`rename_variable <request-dataset>` for example).
21+
* Removed :meth:`RequestDataset.inspect_as_dict` method.
22+
23+
:class:`~ocgis.RequestDatasetCollection`
24+
++++++++++++++++++++++++++++++++++++++++
25+
1626
* Removed the :class:`RequestDatasetCollection` object in favor of request dataset or field sequences.
27+
28+
:class:`~ocgis.OcgOperations`
29+
+++++++++++++++++++++++++++++
30+
31+
* Changed default coordinate system to :class:`~ocgis.crs.Spherical` from :class:`~ocgis.crs.WGS84`. See :ref:`default-coordinate-system` for guidance on OpenClimateGIS coordinate systems.
32+
* All collection variables are subset by shared dimensions (i.e. anything sharing a spatial dimension). Use the ``predicate`` argument to :class:`~ocgis.RequestDataset` to include/exclude variables.
1733
* Removed unique dimension identifers (``TID``, ``LID``, etc.) from tabular outputs. Unique geometry identifiers are maintained for foreign key file relationships.
18-
* Removed ``alias`` parameters and attributes. Aliases are replaced by explicit name parameters (see :ref:`rename_variable <request-dataset>` for example).
1934
* Changed default unique identifier for no geometry from ``1`` to ``None``.
20-
* Changed default coordinate system to :class:`~ocgis.crs.Spherical` from :class:`~ocgis.crs.WGS84`. See :ref:`default-coordinate-system` for guidance on OpenClimateGIS coordinate systems.
2135
* Removed ``headers`` argument from operations. The tabular structure has been streamlined in ``v2.x`` by removing extraneous identifier variables.
2236
* Removed global unique identifier as a default property of all variable objects. Dataset geometry identifers are now unique within a subset operation.
2337
* Removed check for `data` (the coordinate masking is still evaluated for empty subsets) masking following a subset to avoid loading all data from file to retrieve the mask.
2438
* Changed logging output directory to a nested ``logs`` directory inside output directory when ``add_auxiliary_files`` is ``True``.
2539
* Changed masked values in tabular formats to ``None`` from the numeric fill value.
26-
* Removed :meth:`RequestDataset.inspect_as_dict` method.
2740
* Changed :ref:`search_radius_mult key` default to ``None``. Point subsetting will now use the point geometry for intersects operations. Point geometries are no longer buffered by default.
2841
* Removed UGRID conversion. Use `ugrid-tools <https://github.com/NESII/ugrid-tools>`_ to convert to ESMF Unstructured Format.
42+
43+
:class:`~ocgis.Inspect`
44+
+++++++++++++++++++++++
45+
46+
* Removed :class:`Inspect` object. Use the :meth:`~ocgis.RequestDataset.inspect` method.

src/ocgis/driver/base.py

+21-3
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
from ocgis.collection.field import Field
1414
from ocgis.constants import MPIWriteMode, TagName, KeywordArgument
1515
from ocgis.driver.dimension_map import DimensionMap
16-
from ocgis.exc import DefinitionValidationError, NoDataVariablesFound, DimensionMapError
16+
from ocgis.exc import DefinitionValidationError, NoDataVariablesFound, DimensionMapError, VariableMissingMetadataError
1717
from ocgis.util.helpers import get_group
1818
from ocgis.util.logging_ocgis import ocgis_lh
1919
from ocgis.variable.base import SourcedVariable, VariableCollection
@@ -291,7 +291,20 @@ def get_metadata(self):
291291
:rtype: dict
292292
"""
293293

294-
return self._get_metadata_main_()
294+
metadata_subclass = self._get_metadata_main_()
295+
296+
# Use the predicate (filter) if present on the request dataset.
297+
# TODO: Should handle groups?
298+
pred = self.rd.predicate
299+
if pred is not None:
300+
to_pop = []
301+
for var_name in metadata_subclass['variables'].keys():
302+
if not pred(var_name):
303+
to_pop.append(var_name)
304+
for var_name in to_pop:
305+
metadata_subclass['variables'].pop(var_name)
306+
307+
return metadata_subclass
295308

296309
def get_source_metadata_as_json(self):
297310
# tdk: test
@@ -729,7 +742,12 @@ def get_dump_report_for_group(group, global_attributes_name='global', indent=0):
729742

730743

731744
def get_variable_metadata_from_request_dataset(driver, variable):
732-
return get_group(driver.metadata_source, variable.group, has_root=False)['variables'][variable._source_name]
745+
variables_metadata = get_group(driver.metadata_source, variable.group, has_root=False)['variables']
746+
try:
747+
ret = variables_metadata[variable._source_name]
748+
except KeyError:
749+
raise VariableMissingMetadataError(variable._source_name)
750+
return ret
733751

734752

735753
def iter_all_group_keys(ddict, entry=None, has_root=True):

src/ocgis/driver/nc.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -393,10 +393,15 @@ def read_from_collection(target, request_dataset, parent=None, name=None, source
393393

394394
ret = VariableCollection(attrs=get_netcdf_attributes(target), parent=parent, name=name, source_name=source_name,
395395
uid=uid)
396+
pred = request_dataset.predicate
396397
for varname, ncvar in target.variables.items():
398+
if pred is not None and not pred(varname):
399+
continue
397400
source_name = varname
398401
name = rename_variable_map.get(varname, varname)
399-
ret[name] = SourcedVariable(name=name, request_dataset=request_dataset, parent=ret, source_name=source_name)
402+
sv = SourcedVariable(name=name, request_dataset=request_dataset, parent=ret, source_name=source_name)
403+
ret[name] = sv
404+
400405
for group_name, ncgroup in list(target.groups.items()):
401406
child = read_from_collection(ncgroup, request_dataset, parent=ret, name=group_name, uid=uid)
402407
ret.add_child(child)

src/ocgis/driver/request/core.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,12 @@ class RequestDataset(AbstractRequestObject):
114114
:param opened: An open file used as a write target for the driver.
115115
:type opened: varies by ``driver`` class
116116
:param int uid: A unique identifier for the request dataset.
117+
:param predicate: A filter function returning ``True`` if a variable should be included in the output field. The
118+
function should take a single argument which is a sequence of string variable names. This function is applied
119+
directly to the metadata before other functions (i.e. identifying data variables).
120+
:type predicate: `function`
121+
122+
>>> predicate = lambda x: x.startswith('w')
117123
118124
.. _time units: http://netcdf4-python.googlecode.com/svn/trunk/docs/netCDF4-module.html#num2date
119125
.. _time calendar: http://netcdf4-python.googlecode.com/svn/trunk/docs/netCDF4-module.html#num2date
@@ -124,7 +130,7 @@ def __init__(self, uri=None, variable=None, units=None, time_range=None, time_re
124130
time_subset_func=None, level_range=None, conform_units_to=None, crs='auto', t_units=None,
125131
t_calendar=None, t_conform_units_to=None, grid_abstraction='auto', dimension_map=None,
126132
field_name=None, driver=None, regrid_source=True, regrid_destination=False, metadata=None,
127-
format_time=True, opened=None, uid=None, rename_variable=None):
133+
format_time=True, opened=None, uid=None, rename_variable=None, predicate=None):
128134

129135
self._is_init = True
130136

@@ -134,6 +140,7 @@ def __init__(self, uri=None, variable=None, units=None, time_range=None, time_re
134140
self._time_region = None
135141
self._time_subset_func = None
136142

143+
self.predicate = predicate
137144
if dimension_map is not None and isinstance(dimension_map, dict):
138145
dimension_map = DimensionMap.from_dict(dimension_map)
139146
self._dimension_map = dimension_map

src/ocgis/exc.py

+8
Original file line numberDiff line numberDiff line change
@@ -422,3 +422,11 @@ class DimensionMapError(OcgException):
422422
def __init__(self, entry_key, message):
423423
msg = "Error with entry key '{}': {}".format(entry_key, message)
424424
super(DimensionMapError, self).__init__(message=msg)
425+
426+
427+
class VariableMissingMetadataError(OcgException):
428+
"""Raised when variable metadata cannot be found."""
429+
430+
def __init__(self, variable_name):
431+
msg = 'Variable is missing metadata: {}'.format(variable_name)
432+
super(VariableMissingMetadataError, self).__init__(message=msg)

src/ocgis/test/test_ocgis/test_driver/test_request/test_core.py

+21-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
import numpy as np
44

5-
from ocgis import RequestDataset
5+
from ocgis import RequestDataset, Variable
66
from ocgis.collection.field import Field
77
from ocgis.constants import TagName, MiscName, DimensionMapKey
88
from ocgis.driver.nc import DriverNetcdf, DriverNetcdfCF
@@ -96,6 +96,26 @@ def test_init_field_name(self):
9696
self.assertIsNone(field.source_name)
9797
field.load()
9898

99+
def test_system_predicate(self):
100+
"""Test creating a request dataset with a predicate."""
101+
102+
path = self.get_temporary_file_path('foo.nc')
103+
field = self.get_field()
104+
to_exclude = Variable(name='exclude')
105+
field.add_variable(to_exclude)
106+
field.write(path)
107+
108+
rd = RequestDataset(uri=path, predicate=lambda x: not x.startswith('exclude'))
109+
self.assertNotIn('exclude', rd.metadata['variables'])
110+
actual = rd.get()
111+
self.assertNotIn('exclude', actual)
112+
113+
# Test predicate affects data variable identification.
114+
path = self.get_temporary_file_path('foo.nc')
115+
rd = RequestDataset(uri=path, predicate=lambda x: x != 'foo')
116+
with self.assertRaises(NoDataVariablesFound):
117+
assert rd.variable
118+
99119
@attr('cfunits')
100120
def test_conform_units_to(self):
101121
rd = self.get_request_dataset_netcdf(variable='a', units='celsius', conform_units_to='fahrenheit')

0 commit comments

Comments
 (0)