Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

No overlap counts #150

Draft
wants to merge 8 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 55 additions & 4 deletions src/rasterstats/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ def gen_zonal_stats(
layer=0,
band=1,
nodata=None,
no_overlap=None,
affine=None,
stats=None,
all_touched=False,
Expand Down Expand Up @@ -139,7 +140,31 @@ def gen_zonal_stats(
warnings.warn("Use `band` to specify band number", DeprecationWarning)
band = band_num

with Raster(raster, affine, nodata, band) as rast:

if 'no_overlap' in stats:

if 'nodata' in stats and nodata is None:
nodata = -999
warnings.warn("Setting nodata to -999; specify nodata explicitly "
"when requesting no_overlap stat")

if no_overlap is None:
no_overlap = -998
if no_overlap == nodata:
no_overlap = -997

warnings.warn("Setting no_overlap to {0}; specify no_overlap "
"explicitly when request the `no_overlap` stat".format(
no_overlap))
elif no_overlap == nodata:
raise Exception("`no_overlap` value is equal to `nodata` value. "
"Values must be distinct to calculate `no_overlap`")

tmp_nodata = no_overlap if 'no_overlap' in stats else nodata


with Raster(raster, affine=affine, nodata=tmp_nodata, band=band) as rast:

features_iter = read_features(vectors, layer)
for _, feat in enumerate(features_iter):
geom = shape(feat['geometry'])
Expand All @@ -149,26 +174,49 @@ def gen_zonal_stats(

geom_bounds = tuple(geom.bounds)

fsrc = rast.read(bounds=geom_bounds)
fsrc = rast.read(bounds=geom_bounds, masked=False)

# rasterized geometry
rv_array = rasterize_geom(geom, like=fsrc, all_touched=all_touched)

if nodata is None and no_overlap is None:
nodata = fsrc.nodata

# nodata mask
isnodata = (fsrc.array == fsrc.nodata)

# include actual nodata val when no_overlap is used
if nodata is not None and no_overlap is not None:
isnodata = (isnodata | (fsrc.array == nodata))

# add nan mask (if necessary)
has_nan = (np.issubdtype(fsrc.array.dtype, float)
and np.isnan(fsrc.array.min()))
if has_nan:
isnodata = (isnodata | np.isnan(fsrc.array))


# Mask the source data array
# mask everything that is not a valid value or not within our geom
masked = np.ma.MaskedArray(
fsrc.array,
mask=(isnodata | ~rv_array))

# print stats
# print "no_overlap: {0}".format(no_overlap)
# print "nodata: {0}".format(nodata)
# print "tmp_nodata: {0}".format(tmp_nodata)
# print "fsrc.nodata: {0}".format(fsrc.nodata)

# print "fsrc.array"
# print fsrc.array
# print "isnodata"
# print isnodata
# print "rv_array"
# print rv_array
# print "masked"
# print masked

# execute zone_func on masked zone ndarray
if zone_func is not None:
if not callable(zone_func):
Expand Down Expand Up @@ -233,13 +281,16 @@ def gen_zonal_stats(
pctarr = masked.compressed()
feature_stats[pctile] = np.percentile(pctarr, q)

if 'nodata' in stats or 'nan' in stats:

if any(i in stats for i in ['nodata', 'nan', 'no_overlap']):
featmasked = np.ma.MaskedArray(fsrc.array, mask=(~rv_array))

if 'nodata' in stats:
feature_stats['nodata'] = float((featmasked == fsrc.nodata).sum())
feature_stats['nodata'] = float((featmasked == nodata).sum())
if 'nan' in stats:
feature_stats['nan'] = float(np.isnan(featmasked).sum()) if has_nan else 0
if 'no_overlap' in stats:
feature_stats['no_overlap'] = float((featmasked == no_overlap).sum())

if add_stats is not None:
for stat_name, stat_func in add_stats.items():
Expand Down
2 changes: 1 addition & 1 deletion src/rasterstats/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

DEFAULT_STATS = ['count', 'min', 'max', 'mean']
VALID_STATS = DEFAULT_STATS + \
['sum', 'std', 'median', 'majority', 'minority', 'unique', 'range', 'nodata', 'nan']
['sum', 'std', 'median', 'majority', 'minority', 'unique', 'range', 'nodata', 'nan', 'no_overlap']
# also percentile_{q} but that is handled as special case


Expand Down
1 change: 1 addition & 0 deletions tests/data/single_polygon_partial_overlap.cpg
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
UTF-8
Binary file added tests/data/single_polygon_partial_overlap.dbf
Binary file not shown.
1 change: 1 addition & 0 deletions tests/data/single_polygon_partial_overlap.prj
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
PROJCS["Albers",GEOGCS["GCS_GRS 1980(IUGG, 1980)",DATUM["D_unknown",SPHEROID["GRS80",6378137,298.257222101]],PRIMEM["Greenwich",0],UNIT["Degree",0.017453292519943295]],PROJECTION["Albers"],PARAMETER["standard_parallel_1",43],PARAMETER["standard_parallel_2",48],PARAMETER["latitude_of_origin",34],PARAMETER["central_meridian",-120],PARAMETER["false_easting",600000],PARAMETER["false_northing",0],UNIT["Meter",1]]
1 change: 1 addition & 0 deletions tests/data/single_polygon_partial_overlap.qpj
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
PROJCS["unnamed",GEOGCS["GRS 1980(IUGG, 1980)",DATUM["unknown",SPHEROID["GRS80",6378137,298.257222101]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433]],PROJECTION["Albers_Conic_Equal_Area"],PARAMETER["standard_parallel_1",43],PARAMETER["standard_parallel_2",48],PARAMETER["latitude_of_center",34],PARAMETER["longitude_of_center",-120],PARAMETER["false_easting",600000],PARAMETER["false_northing",0],UNIT["Meter",1]]
Binary file added tests/data/single_polygon_partial_overlap.shp
Binary file not shown.
Binary file added tests/data/single_polygon_partial_overlap.shx
Binary file not shown.
101 changes: 96 additions & 5 deletions tests/test_zonal.py
Original file line number Diff line number Diff line change
Expand Up @@ -457,8 +457,6 @@ def test_geojson_out():
assert 'id' in feature['properties'] # from orig
assert 'count' in feature['properties'] # from zonal stats



# do not think this is actually testing the line i wanted it to
# since the read_features func for this data type is generating
# the properties field
Expand Down Expand Up @@ -488,10 +486,9 @@ def test_copy_properties_warn():
with pytest.deprecated_call():
stats_b = zonal_stats(polygons, raster, copy_properties=True)
assert stats_a == stats_b


def test_nan_counts():
from affine import Affine
transform = Affine(1, 0, 1, 0, -1, 3)

data = np.array([
Expand All @@ -504,7 +501,7 @@ def test_nan_counts():
geom = 'POLYGON ((1 0, 4 0, 4 3, 1 3, 1 0))'

# nan stat is requested
stats = zonal_stats(geom, data, affine=transform, nodata=0.0, stats="*")
stats = zonal_stats(geom, data, affine=transform, nodata=0.0, stats="count nodata nan")

for res in stats:
assert res['count'] == 3 # 3 pixels of valid data
Expand All @@ -519,6 +516,100 @@ def test_nan_counts():
assert res['nodata'] == 3 # 3 pixels of nodata
assert 'nan' not in res

# nan stat is not impacted by no_overlap
stats = zonal_stats(geom, data, affine=transform, nodata=0.0, stats="*")

for res in stats:
assert res['count'] == 3 # 3 pixels of valid data
assert res['nodata'] == 3 # 3 pixels of nodata
assert res['nan'] == 3 # 3 pixels of nans
assert res['no_overlap'] == 0 # 3 pixels of nans


def test_array_overlap_counts():
nodata = -9999
no_overlap = -8888

transform = Affine(1, 0, 1, 0, -1, 3)

# data = np.array([
# [-9999, 0.0, 516.2840576171875, 524.4825439453125],
# [-9999, 178.74169921875, 573.80126953125, 415.345947265625],
# [-9999, 397.3150939941406, 568.3016357421875, 185.3491973876953]
# ])

data = np.array([
[-9999, 0.0, 516.2840576171875, np.nan],
[-9999, 178.74169921875, 573.80126953125, 415.345947265625],
[-9999, 397.3150939941406, 568.3016357421875, 185.3491973876953]
])

geom = 'POLYGON ((0 0, 5 0, 5 3, 0 3, 0 0))'

stats = zonal_stats(geom, data, affine=transform, stats="*", nodata=nodata, no_overlap=no_overlap)

for res in stats:
assert res['count'] == 8 # Two pixels of valid data
assert res['nodata'] == 3 # Two pixels of nodata
assert res['no_overlap'] == 3 # Three pixels of no overlap
assert res['nan'] == 1 # One pixel of nan


def test_missing_no_overlap_logic():
nodata = -998
no_overlap = None

transform = Affine(1, 0, 1, 0, -1, 3)

data = np.array([
[-998, 0.0, 516.2840576171875, np.nan],
[-998, 178.74169921875, 573.80126953125, 415.345947265625],
[-998, 397.3150939941406, 568.3016357421875, 185.3491973876953]
])

geom = 'POLYGON ((0 0, 5 0, 5 3, 0 3, 0 0))'

stats = zonal_stats(geom, data, affine=transform, stats="*", nodata=nodata, no_overlap=no_overlap)

for res in stats:
assert res['count'] == 8 # Two pixels of valid data
assert res['nodata'] == 3 # Two pixels of nodata
assert res['no_overlap'] == 3 # Three pixels of no overlap
assert res['nan'] == 1 # One pixel of nan


def test_nodata_and_no_overlap_check():
nodata = -9999
no_overlap = -9999

transform = Affine(1, 0, 1, 0, -1, 3)

data = np.array([
[-998, 0.0, 516.2840576171875, np.nan],
[-998, 178.74169921875, 573.80126953125, 415.345947265625],
[-998, 397.3150939941406, 568.3016357421875, 185.3491973876953]
])

geom = 'POLYGON ((0 0, 5 0, 5 3, 0 3, 0 0))'

with pytest.raises(Exception):
stats = zonal_stats(geom, data, affine=transform, stats="*", nodata=nodata, no_overlap=no_overlap)


def test_raster_overlap_counts():
nodata = -9999
no_overlap = -8888

# same shape/overlap/nodata-pixel as test_array_overlap_counts
polygons = os.path.join(DATA, 'single_polygon_partial_overlap.shp')

stats = zonal_stats(polygons, raster, stats="*", nodata=nodata, no_overlap=no_overlap)

for res in stats:
assert res['count'] == 9 # Two pixels of valid data
assert res['nodata'] == 3 # Two pixels of nodata
assert res['no_overlap'] == 3 # Three pixels of no overlap


# Optional tests
def test_geodataframe_zonal():
Expand Down