Skip to content

Commit 333646c

Browse files
authored
Merge pull request #184 from spotify/lynn/add-dft-metrics-pt1
[lib, docs] Add default metrics for batch IO transforms, and retry & timeout decorator
2 parents b08c1fd + d82945e commit 333646c

File tree

17 files changed

+749
-109
lines changed

17 files changed

+749
-109
lines changed

docs/src/conf.py

+1
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,7 @@ def find_version(*file_paths):
164164
linkcheck_anchors_ignore = [
165165
"changelog-format",
166166
"update-changelog",
167+
"matplotlib.figure.Figure",
167168
]
168169

169170
# -- Options for HTML output -------------------------------------------------

docs/src/reference/lib/api/transforms/helpers.rst

+1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ Helpers
44

55
.. currentmodule:: klio.transforms.helpers
66

7+
.. autoclass:: KlioMessageCounter()
78
.. autoclass:: KlioGcsCheckInputExists()
89
.. autoclass:: KlioGcsCheckOutputExists()
910
.. autoclass:: KlioFilterPing()

docs/src/reference/lib/api/transforms/index.rst

+1
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
.. autosummary::
3838
:nosignatures:
3939

40+
KlioMessageCounter
4041
KlioGcsCheckInputExists
4142
KlioGcsCheckOutputExists
4243
KlioFilterPing

exec/setup.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,7 @@ def get_long_description(package_dir):
172172
"pytest-mock",
173173
],
174174
"debug": [
175-
"line_profiler", # wall time profiling
175+
"line_profiler<3.2", # wall time profiling
176176
"matplotlib", # needed for plotting mem/CPU usage
177177
"numpy", # needed for plotting
178178
"memory_profiler",

integration/audio-spectrograms/expected_job_output.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
INFO:root:Found worker image: integration-klio-audio:audio-spectrograms
2-
INFO:matplotlib.font_manager:Generating new fontManager, this may take some time...
2+
INFO:matplotlib.font_manager:generated new fontManager
33
DEBUG:klio:Loading config file from /usr/local/klio-job-run-effective.yaml.
44
DEBUG:klio:KlioMessage full audit log - Entity ID: - Path: fluffy-zelda-glitch-toki-kobe::klio-audio (current job)
55
DEBUG:klio:Process 'battleclip_daq': Ping mode OFF.

integration/audio-spectrograms/run.py

+1
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
"apache_beam.internal.gcp.auth",
3434
"oauth2client.transport",
3535
"oauth2client.client",
36+
"klio.metrics",
3637
# The concurrency logs may be different for every machine, so let's
3738
# just turn them off
3839
"klio.concurrency",

lib/src/klio/transforms/_helpers.py

+34-2
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,15 @@ class _KlioInputDataMixin(object):
171171

172172
DIRECTION_PFX = KlioIODirection.INPUT
173173

174+
def setup(self, *args, **kwargs):
175+
super(_KlioInputDataMixin, self).setup(*args, **kwargs)
176+
self.found_ctr = self._klio.metrics.counter(
177+
"kmsg-data-found-input", transform=self._transform_name
178+
)
179+
self.not_found_ctr = self._klio.metrics.counter(
180+
"kmsg-data-not-found-input", transform=self._transform_name
181+
)
182+
174183
@property
175184
def _data_config(self):
176185
# TODO: figure out how to support multiple inputs
@@ -198,6 +207,11 @@ def _data_config(self):
198207
)
199208
return self._klio.config.job_config.data.inputs[0]
200209

210+
@property
211+
def _transform_name(self):
212+
# grab the child class name that inherits this class, if any
213+
return self.__class__.__name__
214+
201215

202216
class _KlioOutputDataMixin(object):
203217
"""Mixin to add output-specific logic for a data existence check.
@@ -207,6 +221,15 @@ class _KlioOutputDataMixin(object):
207221

208222
DIRECTION_PFX = KlioIODirection.OUTPUT
209223

224+
def setup(self, *args, **kwargs):
225+
super(_KlioOutputDataMixin, self).setup(*args, **kwargs)
226+
self.found_ctr = self._klio.metrics.counter(
227+
"kmsg-data-found-output", transform=self._transform_name
228+
)
229+
self.not_found_ctr = self._klio.metrics.counter(
230+
"kmsg-data-not-found-output", transform=self._transform_name
231+
)
232+
210233
@property
211234
def _data_config(self):
212235
# TODO: figure out how to support multiple outputs
@@ -235,6 +258,11 @@ def _data_config(self):
235258
)
236259
return self._klio.config.job_config.data.outputs[0]
237260

261+
@property
262+
def _transform_name(self):
263+
# grab the child class name that inherits this class, if any
264+
return self.__class__.__name__
265+
238266

239267
class _KlioGcsDataExistsMixin(object):
240268
"""Mixin for GCS-specific data existence check logic.
@@ -243,7 +271,8 @@ class _KlioGcsDataExistsMixin(object):
243271
_KlioInputDataMixin or _KlioOutputDataMixin
244272
"""
245273

246-
def setup(self):
274+
def setup(self, *args, **kwargs):
275+
super(_KlioGcsDataExistsMixin, self).setup(*args, **kwargs)
247276
self.client = gcsio.GcsIO()
248277

249278
def exists(self, path):
@@ -260,9 +289,12 @@ def process(self, kmsg):
260289
item_path = self._get_absolute_path(item)
261290
item_exists = self.exists(item_path)
262291

263-
state = DataExistState.FOUND
264292
if not item_exists:
293+
self.not_found_ctr.inc()
265294
state = DataExistState.NOT_FOUND
295+
else:
296+
self.found_ctr.inc()
297+
state = DataExistState.FOUND
266298

267299
self._klio.logger.info(
268300
"%s %s at %s"

lib/src/klio/transforms/_retry.py

+9
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ def __init__(
4141
self,
4242
function,
4343
tries,
44+
klio_context,
4445
delay=None,
4546
exception=None,
4647
raise_exception=None,
@@ -54,6 +55,12 @@ def __init__(
5455
self._retry_exception = raise_exception or KlioRetriesExhausted
5556
self._exception_message = exception_message
5657
self._logger = logging.getLogger("klio")
58+
self._retry_ctr = klio_context.metrics.counter(
59+
"kmsg-retry-attempt", transform=self._func_name
60+
)
61+
self._retry_error_ctr = klio_context.metrics.counter(
62+
"kmsg-drop-retry-error", transform=self._func_name
63+
)
5764

5865
def __call__(self, *args, **kwargs):
5966
tries = self._tries
@@ -68,9 +75,11 @@ def __call__(self, *args, **kwargs):
6875
except self._exception as e:
6976
tries -= 1
7077
if not tries:
78+
self._retry_error_ctr.inc()
7179
self._raise_exception(e)
7280
break
7381

82+
self._retry_ctr.inc()
7483
msg = self._format_log_message(tries, e)
7584
self._logger.warning(msg)
7685

lib/src/klio/transforms/_timeout.py

+10-1
Original file line numberDiff line numberDiff line change
@@ -63,13 +63,21 @@ class KlioTimeoutWrapper(object):
6363
DEFAULT_EXC_MSG = "Function '{}' timed out after {} seconds."
6464

6565
def __init__(
66-
self, function, seconds, timeout_exception=None, exception_message=None
66+
self,
67+
function,
68+
seconds,
69+
klio_context,
70+
timeout_exception=None,
71+
exception_message=None,
6772
):
6873
self._function = function
6974
self._func_name = getattr(function, "__qualname__", function.__name__)
7075
self._seconds = seconds
7176
self._timeout_exception = timeout_exception or KlioTimeoutError
7277
self._exception_message = exception_message
78+
self._timeout_ctr = klio_context.metrics.counter(
79+
"klio-drop-timed-out", transform=self._func_name
80+
)
7381

7482
def __call__(self, *args, **kwargs):
7583
self._queue = multiprocessing.Queue(maxsize=1)
@@ -123,6 +131,7 @@ def cancel(self):
123131
def ready(self):
124132
"""Manage the status of "value" property."""
125133
if self._timeout < time.monotonic():
134+
self._timeout_ctr.inc()
126135
self.cancel()
127136
return self._queue.full() and not self._queue.empty()
128137

lib/src/klio/transforms/decorators.py

+18-14
Original file line numberDiff line numberDiff line change
@@ -456,12 +456,14 @@ def _timeout(seconds=None, exception=None, exception_message=None):
456456
)
457457

458458
def inner(func_or_meth):
459-
timeout_wrapper = ktimeout.KlioTimeoutWrapper(
460-
function=func_or_meth,
461-
seconds=seconds,
462-
timeout_exception=exception,
463-
exception_message=exception_message,
464-
)
459+
with _klio_context() as kctx:
460+
timeout_wrapper = ktimeout.KlioTimeoutWrapper(
461+
function=func_or_meth,
462+
seconds=seconds,
463+
timeout_exception=exception,
464+
exception_message=exception_message,
465+
klio_context=kctx,
466+
)
465467

466468
# Unfortunately these two wrappers can't be abstracted into
467469
# one wrapper - the `self` arg apparently can not be abstracted
@@ -528,14 +530,16 @@ def _retry(
528530
)
529531

530532
def inner(func_or_meth):
531-
retry_wrapper = kretry.KlioRetryWrapper(
532-
function=func_or_meth,
533-
tries=tries,
534-
delay=delay,
535-
exception=exception,
536-
raise_exception=raise_exception,
537-
exception_message=exception_message,
538-
)
533+
with _klio_context() as kctx:
534+
retry_wrapper = kretry.KlioRetryWrapper(
535+
function=func_or_meth,
536+
tries=tries,
537+
delay=delay,
538+
exception=exception,
539+
raise_exception=raise_exception,
540+
exception_message=exception_message,
541+
klio_context=kctx,
542+
)
539543

540544
# Unfortunately these two wrappers can't be abstracted into
541545
# one wrapper - the `self` arg apparently can not be abstracted

0 commit comments

Comments
 (0)