From 7495e0ed73635bebee92141529974f261bd3a8dd Mon Sep 17 00:00:00 2001 From: Holt Skinner <13262395+holtskinner@users.noreply.github.com> Date: Thu, 13 Jun 2024 09:14:11 -0500 Subject: [PATCH] fix: Change `if` condition typo in `_get_children_of_element()` (#313) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous `if` condition was inefficient and should have been written with an `and` This resulted in a performance gap. cProfile timing of `export_hocr_string()` on the same document Before ``` 143598720 function calls (129111346 primitive calls) in 44.487 seconds ``` After ``` 97883150 function calls (88084552 primitive calls) in 30.235 seconds ``` Fixes #312 🦕 --- google/cloud/documentai_toolbox/wrappers/page.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/google/cloud/documentai_toolbox/wrappers/page.py b/google/cloud/documentai_toolbox/wrappers/page.py index 1ca84e14..d224d1de 100644 --- a/google/cloud/documentai_toolbox/wrappers/page.py +++ b/google/cloud/documentai_toolbox/wrappers/page.py @@ -359,9 +359,11 @@ def _get_children_of_element( return [ child for child in children - if child.documentai_object.layout.text_anchor.text_segments[0].start_index - >= start_index - if child.documentai_object.layout.text_anchor.text_segments[0].end_index + if start_index + <= child.documentai_object.layout.text_anchor.text_segments[0].start_index + < end_index + and start_index + < child.documentai_object.layout.text_anchor.text_segments[0].end_index <= end_index ]