Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Changes to allow using the DELTA ImageryDataset class to directly inferface with the tensorflow API for training and inference. #152

Open
wants to merge 10 commits into
base: master
Choose a base branch
from
8 changes: 4 additions & 4 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,11 @@ jobs:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v2
- name: Set up Python 3.6
uses: actions/setup-python@v2
- uses: actions/checkout@v3
- name: Set up Python 3.10
uses: actions/setup-python@v4
with:
python-version: 3.6
python-version: '3.10'
- name: Install DELTA
run: |
./scripts/setup.sh
Expand Down
10 changes: 9 additions & 1 deletion delta/config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

#pylint: disable=unsubscriptable-object

"""
Loading configuration from command line arguments and yaml files.

Expand Down Expand Up @@ -309,7 +312,7 @@ def load(self, yaml_file: Optional[str] = None, yaml_str: Optional[str] = None):
yaml_file: Optional[str]
Filename of a yaml file to load.
yaml_str: Optional[str]
Load yaml directly from a str. Exactly one of `yaml_file` and `yaml_str`
Load yaml directly from a str. Exactly one of `yaml_file` or `yaml_str`
must be specified.
"""
base_path = None
Expand Down Expand Up @@ -350,17 +353,22 @@ def initialize(self, options: 'argparse.Namespace', config_files: Optional[List[
"""
self.reset()

#TODO: when none is supplied to this function, AppDirs doesn't find the right folder for the default
# delta.yaml file. It needs to look in python/conda installs
if config_files is None:
dirs = appdirs.AppDirs('delta', 'nasa')
config_files = [os.path.join(dirs.site_config_dir, 'delta.yaml'),
os.path.join(dirs.user_config_dir, 'delta.yaml')]

#TODO: needs to check if list or string and deal with appropriately
for filename in config_files:
if os.path.exists(filename):
config.load(filename)

if options is not None:
config.parse_args(options)

# TODO: need some sort of check that this is called in the highest script so that it's acceessible everywhere. Just
# use global?
config = DeltaConfig()
"""Global config object. Use this to access all configuration."""
14 changes: 7 additions & 7 deletions delta/extensions/layers/efficientnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,13 +217,13 @@ def block(inputs, activation_fn=swish, drop_rate=0., name='',
activation='sigmoid',
kernel_initializer=CONV_KERNEL_INITIALIZER,
name=name + 'se_expand')(se)
if backend.backend() == 'theano':
# For the Theano backend, we have to explicitly make
# the excitation weights broadcastable.
se = layers.Lambda(
lambda x: backend.pattern_broadcast(x, [True, True, True, False]),
output_shape=lambda input_shape: input_shape,
name=name + 'se_broadcast')(se)
#if backend.backend() == 'theano':
# # For the Theano backend, we have to explicitly make
# # the excitation weights broadcastable.
# se = layers.Lambda(
# lambda x: backend.pattern_broadcast(x, [True, True, True, False]),
# output_shape=lambda input_shape: input_shape,
# name=name + 'se_broadcast')(se) # pylint:disable=no-member
x = layers.multiply([x, se], name=name + 'se_excite')

# Output phase
Expand Down
4 changes: 3 additions & 1 deletion delta/extensions/layers/pretrained.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.

#pylint: disable=unsubscriptable-object

"""
Use a pretrained model inside another network.
"""
Expand All @@ -38,7 +40,7 @@ def __init__(self, arg_number, **kwargs):
"""
super().__init__(**kwargs)
self._arg = arg_number
def call(self, inputs, **kwargs): #pylint: disable=unused-argument
def call(self, inputs, **kwargs): #pylint: disable=unused-argument,arguments-differ
return inputs[self._arg]
def get_config(self):
return {'arg_number' : self._arg}
Expand Down
4 changes: 2 additions & 2 deletions delta/extensions/layers/simple.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ class RepeatedGlobalAveragePooling2D(tensorflow.keras.layers.Layer):
def compute_output_shape(self, input_shape): # pylint: disable=no-self-use
return input_shape

def call(self, inputs, **_): # pylint: disable=no-self-use
def call(self, inputs, **_): # pylint: disable=no-self-use,arguments-differ
ones = tf.fill(tf.shape(inputs)[:-1], 1.0)
ones = tf.expand_dims(ones, -1)
mean = K.mean(inputs, axis=[1, 2])
Expand All @@ -56,7 +56,7 @@ def get_config(self):
config.update({'padding': self.padding})
return config

def call(self, inputs, **_):
def call(self, inputs, **_): # pylint: disable=arguments-differ
w_pad,h_pad = self.padding
return tf.pad(inputs, [[0,0], [h_pad,h_pad], [w_pad,w_pad], [0,0] ], 'REFLECT')

Expand Down
10 changes: 6 additions & 4 deletions delta/extensions/sources/sentinel1.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,6 @@ def run_ffilipponi_preprocessing(source_file, target_file):
print(cmd)
os.system(cmd)


def unpack_s1_to_folder(zip_path, unpack_folder):
'''Returns the merged image path from the unpack folder.
Unpacks the zip file and merges the source images as needed.'''
Expand Down Expand Up @@ -108,9 +107,13 @@ def unpack_s1_to_folder(zip_path, unpack_folder):
print('Unpacking file ' + zip_path + ' to folder ' + unpack_folder)
utilities.unpack_to_folder(zip_path, unpack_folder)
subdirs = os.listdir(unpack_folder)
if len(subdirs) != 1:
safe_folder = None
for s in subdirs:
if s.endswith('.SAFE'):
safe_folder = s
if not safe_folder:
raise Exception('Unexpected Sentinel1 subdirectories: ' + str(subdirs))
cmd = 'mv ' + os.path.join(unpack_folder, subdirs[0]) +'/* ' + unpack_folder
cmd = 'mv ' + os.path.join(unpack_folder, safe_folder) +'/* ' + unpack_folder
print(cmd)
os.system(cmd)
source_image_paths = get_files_from_unpack_folder(unpack_folder)
Expand All @@ -129,7 +132,6 @@ def unpack_s1_to_folder(zip_path, unpack_folder):

dimap_path = temp_out_path + '.dim'
cmd = 'pconvert -s 0,0 -f GeoTIFF-BigTiff -o ' + os.path.dirname(temp_out_path) +' '+ dimap_path
print(cmd)
os.system(cmd)
MIN_IMAGE_SIZE = 1024*1024*500 # 500 MB, expected size is much larger
if not os.path.exists(temp_out_path):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@
<saveLocalIncidenceAngle>false</saveLocalIncidenceAngle>
<saveProjectedLocalIncidenceAngle>false</saveProjectedLocalIncidenceAngle>
<saveSelectedSourceBand>true</saveSelectedSourceBand>
<outputComplex>false</outputComplex>
<applyRadiometricNormalization>false</applyRadiometricNormalization>
<saveSigmaNought>false</saveSigmaNought>
<saveGammaNought>false</saveGammaNought>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,6 @@
<saveLocalIncidenceAngle>false</saveLocalIncidenceAngle>
<saveProjectedLocalIncidenceAngle>false</saveProjectedLocalIncidenceAngle>
<saveSelectedSourceBand>true</saveSelectedSourceBand>
<outputComplex>false</outputComplex>
<applyRadiometricNormalization>false</applyRadiometricNormalization>
<saveSigmaNought>false</saveSigmaNought>
<saveGammaNought>false</saveGammaNought>
Expand Down
5 changes: 4 additions & 1 deletion delta/extensions/sources/tiff.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,7 @@ def metadata(self):
data['gcps'] = h.GetGCPs()
data['gcpproj'] = h.GetGCPProjection()
data['metadata'] = h.GetMetadata()
data['spatial_ref'] = h.GetSpatialRef()
return data

def block_aligned_roi(self, desired_roi):
Expand Down Expand Up @@ -359,10 +360,11 @@ def __initialize(self, path, num_bands, data_type, nodata_value, metadata):
self._handle.GetRasterBand(i).SetNoDataValue(nodata_value)

if metadata:
self._handle.SetGCPs (metadata['gcps'], metadata['gcpproj'])
self._handle.SetProjection (metadata['projection' ])
self._handle.SetGeoTransform(metadata['geotransform'])
self._handle.SetMetadata (metadata['metadata' ])
self._handle.SetGCPs (metadata['gcps'], metadata['gcpproj'])
self._handle.SetSpatialRef (metadata['spatial_ref' ])

def __del__(self):
self.close()
Expand All @@ -380,6 +382,7 @@ def tile_shape(self):
def close(self):
if self._handle is not None:
self._handle.FlushCache()
del self._handle
self._handle = None

def get_num_tiles(self):
Expand Down
3 changes: 3 additions & 0 deletions delta/imagery/imagery_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ def __init__(self, images, image_type, preprocess=None, nodata_value=None):
nodata_value: image dtype
A no data value for pixels to disregard
"""
# TODO: if a list of a list of file strings is passed it, it will open them up all together and concatenate
# them. Is this intended behavior or a bug? Could be useful? 🤷‍♀️
self._images = images
self._image_type = image_type
self._preprocess = preprocess
Expand Down Expand Up @@ -531,6 +533,7 @@ class CacheConfig(DeltaConfigComponent):
Configuration for cache.
"""
def __init__(self):
# TODO: Is this missing a "Cache" in the __init__Function like the IOConfig has?
super().__init__()
self.register_field('dir', str, None, validate_path, 'Cache directory.')
self.register_field('limit', int, None, validate_positive, 'Number of items to cache.')
Expand Down
42 changes: 32 additions & 10 deletions delta/imagery/imagery_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ class ImageryDataset: # pylint: disable=too-many-instance-attributes,too-many-ar
"""

def __init__(self, images, labels, output_shape, chunk_shape, stride=None,
tile_shape=(256, 256), tile_overlap=None, max_rand_offset=None):
tile_shape=(256, 256), tile_overlap=None, max_rand_offset=None, label_type=tf.uint8, shuffle=True):
"""
Parameters
----------
Expand All @@ -56,7 +56,8 @@ def __init__(self, images, labels, output_shape, chunk_shape, stride=None,
If specified, in each epoch, offset all tiles by a random amount in x and y
in the range(-max_rand_offset, max_rand_offset).
"""

# TODO: description implies this should be optional, need to add logic to set it equal to tile_shape if
# optional?
self._iopool = ThreadPoolExecutor(config.io.threads())

# Record some of the config values
Expand All @@ -67,7 +68,8 @@ def __init__(self, images, labels, output_shape, chunk_shape, stride=None,
stride = (1, 1)
self._stride = stride
self._data_type = tf.float32
self._label_type = tf.uint8
# self._label_type = tf.uint8
self._label_type = label_type
self._tile_shape = tile_shape
if tile_overlap is None:
tile_overlap = (0, 0)
Expand All @@ -83,6 +85,9 @@ def __init__(self, images, labels, output_shape, chunk_shape, stride=None,
# Load the first image to get the number of bands for the input files.
self._num_bands = images.load(0).num_bands()
self._random_seed = random.randint(0, 1 << 16)
#TODO: go through details more to ensure shuffle=False does result in non shuffled behavior for repeated
# calls of .dataset - seems to bbe mostly implemented. Some more testing thougd before submitting. Maybe pytest?
self._shuffle=shuffle

def _list_tiles(self, i): # pragma: no cover
"""
Expand Down Expand Up @@ -147,12 +152,18 @@ def _tile_generator(self, is_labels): # pragma: no cover
def tile_gen():
image_tiles = [(images[i], self._list_tiles(i)) for i in range(len(images))]
# shuffle tiles within each image
for (img, tiles) in image_tiles:
rand.shuffle(tiles)
# TODO: implement toggleable shuffle
if self._shuffle:
for (img, tiles) in image_tiles:
rand.shuffle(tiles)
# create iterator
image_tiles = [(img, iter(tiles)) for (img, tiles) in image_tiles]
while image_tiles:
index = rand.randrange(len(image_tiles))
# TODO: implement toggleable shuffle
if self._shuffle:
index = rand.randrange(len(image_tiles))
else:
index=0
(img, it) = image_tiles[index]
try:
yield (img, next(it))
Expand Down Expand Up @@ -212,12 +223,20 @@ def add_to_queue(buf_queue, item):
except StopIteration:
pass
while True:
buf_index = rand.randrange(len(cur_bufs))
# TODO: implement toggleable shuffle
if self._shuffle:
buf_index = rand.randrange(len(cur_bufs))
else:
buf_index=0
(sub_tiles, buf) = cur_bufs[buf_index]
if not sub_tiles:
del cur_bufs[buf_index]
break
sub_index = rand.randrange(len(sub_tiles))
# TODO: implement toggleable shuffle
if self._shuffle:
sub_index = rand.randrange(len(sub_tiles))
else:
sub_index=0
s = sub_tiles[sub_index]
del sub_tiles[sub_index]
yield buf[s.min_y:s.max_y, s.min_x:s.max_x, :]
Expand Down Expand Up @@ -323,11 +342,14 @@ def dataset(self, class_weights=None, augment_function=None):
"""

# Pair the data and labels in our dataset
ds = tf.data.Dataset.zip((self.data(), self.labels()))
if self._labels:
ds = tf.data.Dataset.zip((self.data(), self.labels()))
else:
ds = self.data()
# ignore chunks which are all nodata (nodata is re-indexed to be after the classes)
# cannot do with max_rand_offset since would have different number of tiles which
# breaks keras fit
if self._labels.nodata_value() is not None:
if self._labels and self._labels.nodata_value() is not None:
ds = ds.filter(lambda x, y: tf.math.reduce_any(tf.math.not_equal(y, self._labels.nodata_value())))
if augment_function is not None:
ds = ds.map(augment_function, num_parallel_calls=tf.data.experimental.AUTOTUNE)
Expand Down
2 changes: 2 additions & 0 deletions delta/ml/config_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.

#pylint: disable=unsubscriptable-object

"""
Functions to support loading custom ML-related objects from dictionaries specified
in yaml files. Includes constructing custom neural networks and more.
Expand Down
2 changes: 1 addition & 1 deletion delta/ml/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def load_model(filename):
if version.parse(tensorflow.__version__) < version.parse('2.2'): # need to load newer models
# renamed to Model from Functional in newer versions.
# Also added Conv2D groups parameter
class OldModel(tensorflow.keras.models.Model): # pylint: disable=too-many-ancestors
class OldModel(tensorflow.keras.models.Model): # pylint: disable=too-many-ancestors,abstract-method
@classmethod
def from_config(cls, config, custom_objects=None): #pylint: disable=redefined-outer-name
for l in config['layers']:
Expand Down
2 changes: 2 additions & 0 deletions delta/ml/ml_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.

#pylint: disable=unsubscriptable-object

"""
Configuration options specific to machine learning.
"""
Expand Down
23 changes: 12 additions & 11 deletions scripts/classify_directory.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,35 +305,36 @@ def call_presoak(args, input_path, output_folder, unknown_args):
return (True, presoak_output_folder, presoak_output_cost_path, presoak_output_dem_path)


def delete_from_dict(d, name):
'''Delete the named field from the provided dictionary d'''
def set_value_in_dict_recursive(d, name, newValue):
'''Find the named field in the dictionary d and set it to the given value'''

remove = None
index = None
if isinstance(d, dict):
for k, v in d.items():
if k == name:
remove = name
index = name
continue
if isinstance(v, (dict, list)):
delete_from_dict(v, name)
set_value_in_dict_recursive(v, name, newValue)
else:
for i, v in enumerate(d):
if v == name:
remove = i
index = i
continue
if isinstance(v, (dict, list)):
delete_from_dict(v, name)
if remove is not None:
d.pop(remove)
set_value_in_dict_recursive(v, name, newValue)
if index is not None:
d[index] = newValue


def make_no_preprocess_config(input_path, output_path):
'''Generate version of config file with preprocess steps stripped out'''
with open(input_path) as f:
config_yaml = yaml.safe_load(f)
delete_from_dict(config_yaml, 'preprocess')
set_value_in_dict_recursive(config_yaml, 'preprocess', None)
text = yaml.dump(config_yaml)
with open(output_path, 'w') as f:
yaml.dump(config_yaml, f)
f.write(text)


def call_delta(args, input_path, output_folder, input_name,
Expand Down
2 changes: 1 addition & 1 deletion scripts/fetch/get_landsat_dswe_labels.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
import argparse
import subprocess

import gdal
from osgeo import gdal
from osgeo import osr

from usgs import api
Expand Down
Loading