Skip to content

Commit 76d2728

Browse files
committed
Use uuid1() for unique key.
1 parent 9ea4a96 commit 76d2728

File tree

1 file changed

+19
-12
lines changed

1 file changed

+19
-12
lines changed

sdks/python/apache_beam/ml/anomaly/transforms.py

+19-12
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
#
1717

1818
import dataclasses
19-
import time
2019
import typing
2120
import uuid
2221
from typing import Any
@@ -494,19 +493,27 @@ def expand(
494493

495494
# Add a temporary unique key per data point to facilitate grouping the
496495
# outputs from multiple anomaly detectors for the same data point.
497-
# Previously, timestamp.Timestamp.now().micros was used, but on Windows,
498-
# its limited precision (around 10 milliseconds) resulted in key collisions.
499-
# https://peps.python.org/pep-0564/#windows
500-
# Performance note: time.monotonic_ns() is about 10x-20x faster than
501-
# uuid.uuid4().
502-
# $ python -m timeit -n 100000 "import time; time.monotonic_ns()"
503-
# 100000 loops, best of 5: 86.7 nsec per loop
504-
# $ python -m timeit -n 100000 "import uuid; str(uuid.uuid4())"
505-
# 10000 loops, best of 5: 2.04 usec per loop
496+
#
497+
# Unique key generation options:
498+
# (1) Timestamp-based methods: https://docs.python.org/3/library/time.html
499+
# (2) UUID module: https://docs.python.org/3/library/uuid.html
500+
#
501+
# Timestamp precision on Windows can lead to key collisions (see PEP 564:
502+
# https://peps.python.org/pep-0564/#windows). Only time.perf_counter_ns()
503+
# provides sufficient precision for our needs.
504+
#
505+
# Performance note:
506+
# $ python -m timeit -n 100000 "import uuid; uuid.uuid1()"
507+
# 100000 loops, best of 5: 806 nsec per loop
508+
# $ python -m timeit -n 100000 "import uuid; uuid.uuid4()"
509+
# 100000 loops, best of 5: 1.53 usec per loop
506510
# $ python -m timeit -n 100000 "import time; time.perf_counter_ns()"
507-
# 100000 loops, best of 5: 85.2 nsec per loop
511+
# 100000 loops, best of 5: 82.3 nsec per loop
512+
#
513+
# We select uuid.uuid1() for its inclusion of node information, making it
514+
# more suitable for parallel execution environments.
508515
add_temp_key_fn: Callable[[InputT], KeyedInputT] \
509-
= lambda e: (e[0], (time.perf_counter_ns(), e[1]))
516+
= lambda e: (e[0], (str(uuid.uuid1()), e[1]))
510517
keyed_input = (input | "Add temp key" >> beam.Map(add_temp_key_fn))
511518

512519
if isinstance(self._root_detector, EnsembleAnomalyDetector):

0 commit comments

Comments
 (0)