From 8ad3f272b2db93b54b243f44e7c72a7f19ee2f45 Mon Sep 17 00:00:00 2001 From: Ynda Jas Date: Mon, 10 Mar 2025 10:49:24 +0000 Subject: [PATCH 1/5] Consolidate loops in GraphQL dataloader sources Co-authored-by: Mike Patrick --- .../sources/linked_to_editions_source.rb | 23 ++++++++++++++----- .../reverse_linked_to_editions_source.rb | 15 ++++++++---- 2 files changed, 28 insertions(+), 10 deletions(-) diff --git a/app/graphql/sources/linked_to_editions_source.rb b/app/graphql/sources/linked_to_editions_source.rb index f8cd88411..aee46ddef 100644 --- a/app/graphql/sources/linked_to_editions_source.rb +++ b/app/graphql/sources/linked_to_editions_source.rb @@ -7,27 +7,38 @@ def initialize(content_store:) # rubocop:enable Lint/MissingSuper def fetch(editions_and_link_types) - content_id_tuples = editions_and_link_types.map { |edition, link_type| "('#{edition.content_id}','#{link_type}')" }.join(",") - edition_id_tuples = editions_and_link_types.map { |edition, link_type| "(#{edition.id},'#{link_type}')" }.join(",") + edition_id_tuples = [] + content_id_tuples = [] + link_types_map = {} + + editions_and_link_types.each do |edition, link_type| + edition_id_tuples.push("(#{edition.id},'#{link_type}')") + content_id_tuples.push("('#{edition.content_id}','#{link_type}')") + link_types_map[[edition.content_id, link_type]] = [] + end edition_links = Link .joins(:edition, { target_documents: @content_store }) .includes(:edition, { target_documents: @content_store }) - .where('("editions"."id", "links"."link_type") IN (?)', Arel.sql(edition_id_tuples)) + .where( + '("editions"."id", "links"."link_type") IN (?)', + Arel.sql(edition_id_tuples.join(",")), + ) .where(target_documents: { locale: "en" }) .order(link_type: :asc, position: :asc) link_set_links = Link .joins(:link_set, { target_documents: @content_store }) .includes(:link_set, { target_documents: @content_store }) - .where('("link_sets"."content_id", "links"."link_type") IN (?)', Arel.sql(content_id_tuples)) + .where( + '("link_sets"."content_id", "links"."link_type") IN (?)', + Arel.sql(content_id_tuples.join(",")), + ) .where(target_documents: { locale: "en" }) .order(link_type: :asc, position: :asc) all_links = edition_links + link_set_links - link_types_map = editions_and_link_types.map { [_1.content_id, _2] }.index_with { [] } - all_links.each_with_object(link_types_map) { |link, hash| hash[[(link.link_set || link.edition).content_id, link.link_type]].concat(editions_for_link(link)) }.values diff --git a/app/graphql/sources/reverse_linked_to_editions_source.rb b/app/graphql/sources/reverse_linked_to_editions_source.rb index 0b62fae5f..9b72b707e 100644 --- a/app/graphql/sources/reverse_linked_to_editions_source.rb +++ b/app/graphql/sources/reverse_linked_to_editions_source.rb @@ -7,14 +7,21 @@ def initialize(content_store:) # rubocop:enable Lint/MissingSuper def fetch(editions_and_link_types) - content_id_tuples = editions_and_link_types.map { |edition, link_type| "('#{edition.content_id}','#{link_type}')" }.join(",") + content_id_tuples = [] + link_types_map = {} + + editions_and_link_types.each do |edition, link_type| + content_id_tuples.push("('#{edition.content_id}','#{link_type}')") + link_types_map[[edition.content_id, link_type]] = [] + end all_links = Link - .where('("links"."target_content_id", "links"."link_type") IN (?)', Arel.sql(content_id_tuples)) + .where( + '("links"."target_content_id", "links"."link_type") IN (?)', + Arel.sql(content_id_tuples.join(",")), + ) .includes(source_documents: @content_store) - link_types_map = editions_and_link_types.map { [_1.content_id, _2] }.index_with { [] } - all_links.each_with_object(link_types_map) { |link, hash| if link.link_set hash[[link.target_content_id, link.link_type]].concat(editions_for_link_set_link(link)) From eaf7caebfeb0c45a5b6387666600832b6c7330c6 Mon Sep 17 00:00:00 2001 From: Mike Patrick Date: Fri, 14 Feb 2025 16:57:01 +0000 Subject: [PATCH 2/5] Allow Edition to provide content_id, locale itself I'm not 100% sure about this change. We're experimenting with SELECTing subsets of an edition's columns and JOINed-tables' columns to improve the performance of our GraphQL resolvers. When you SELECT JOIN columns, Active Record helpfully just seems to dynamically add them as attributes to the model you're working with, which means that, e.g. SELECT editions.*, documents.content_id, documents.locale FROM editions INNER JOIN documents ON document.id = editions.document_id would result in instances of Edition with their own, native #content_id and #locale attributes. In those specific examples, the existing `delegate` doesn't appear to acknowledge these attributes and still attempts to lazy-load the edition's document to fetch them. Since that lazy-loading kinda defeats the point of the approach we're experimenting with, here's an alternative to `delegate` that first checks for pre-existing attributes of the same name. There could be better approaches than this worth exploring in the long run if we do stick with this SELECTing optimisation. (I don't think it's wise to use these Active Record models in such dramatically different ways in different parts of the codebase.) --- app/models/edition.rb | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/app/models/edition.rb b/app/models/edition.rb index e2c83dbfd..55db8900e 100644 --- a/app/models/edition.rb +++ b/app/models/edition.rb @@ -75,7 +75,15 @@ class Edition < ApplicationRecord validates_with StateForDocumentValidator validates_with RoutesAndRedirectsValidator - delegate :content_id, :locale, to: :document + def self.attribute_or_delegate(*names, to:) + names.each do |name| + define_method(name) do + attribute(name.to_s) || method(to).call.method(name).call + end + end + end + + attribute_or_delegate :content_id, :locale, to: :document def auth_bypass_ids_are_uuids unless auth_bypass_ids.all? { |id| UuidValidator.valid?(id) } From 410233377d0a46a30e8404a4f1559ecee7c40fbf Mon Sep 17 00:00:00 2001 From: Ynda Jas Date: Thu, 6 Mar 2025 16:38:53 +0000 Subject: [PATCH 3/5] Ignore order in dataloader tests where irrelevant We explicitly order the results by position, but we'll test this separately. We changed the Active Record code to use a UNION query, which has meant interweaving edition and link set links, breaking the order that these tests were implicitly depending on Co-authored-by: Mike Patrick --- spec/graphql/sources/linked_to_editions_source_spec.rb | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/spec/graphql/sources/linked_to_editions_source_spec.rb b/spec/graphql/sources/linked_to_editions_source_spec.rb index db25ad18a..bf5aef89b 100644 --- a/spec/graphql/sources/linked_to_editions_source_spec.rb +++ b/spec/graphql/sources/linked_to_editions_source_spec.rb @@ -12,7 +12,7 @@ GraphQL::Dataloader.with_dataloading do |dataloader| request = dataloader.with(described_class, content_store: source_edition.content_store).request([source_edition, "test_link"]) - expect(request.load).to eq([target_edition_1, target_edition_3]) + expect(request.load).to match_array([target_edition_1, target_edition_3]) end end @@ -30,7 +30,7 @@ GraphQL::Dataloader.with_dataloading do |dataloader| request = dataloader.with(described_class, content_store: source_edition.content_store).request([source_edition, "test_link"]) - expect(request.load).to eq([target_edition_1, target_edition_3]) + expect(request.load).to match_array([target_edition_1, target_edition_3]) end end @@ -51,7 +51,7 @@ GraphQL::Dataloader.with_dataloading do |dataloader| request = dataloader.with(described_class, content_store: source_edition.content_store).request([source_edition, "test_link"]) - expect(request.load).to eq([target_edition_1, target_edition_3]) + expect(request.load).to match_array([target_edition_1, target_edition_3]) end end @@ -74,7 +74,7 @@ GraphQL::Dataloader.with_dataloading do |dataloader| request = dataloader.with(described_class, content_store: source_edition.content_store).request([source_edition, "test_link"]) - expect(request.load).to eq([target_edition_3, target_edition_4]) + expect(request.load).to match_array([target_edition_3, target_edition_4]) end end end From 07b8e11b1ab9d20ec6bd6bed0a879bfdb391ded4 Mon Sep 17 00:00:00 2001 From: Ynda Jas Date: Thu, 6 Mar 2025 16:33:30 +0000 Subject: [PATCH 4/5] Don't create link_set for edition links in factory We encountered an issue when creating an edition link separately to its edition (i.e. not using the `links_hash` transient/argument), whereby both a link set association was created even when explicitly creating an edition association Co-authored-by: Mike Patrick --- spec/factories/link.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/factories/link.rb b/spec/factories/link.rb index cfd0f284a..0322f47f6 100644 --- a/spec/factories/link.rb +++ b/spec/factories/link.rb @@ -1,6 +1,6 @@ FactoryBot.define do factory :link do - link_set + link_set { create(:link_set) unless edition } target_content_id { SecureRandom.uuid } link_type { "organisations" } end From 70f046a0905c4ef19142022f46a8912378c06414 Mon Sep 17 00:00:00 2001 From: Ynda Jas Date: Mon, 10 Mar 2025 10:52:44 +0000 Subject: [PATCH 5/5] Reduce query counts in GraphQL dataloader sources We found that the previous eager loading approach wasn't using joins effectively, so we're more manually crafting the queries to join in an efficient way. We also found that some associations we're still lazy loaded after the initial queries - this new approach avoids that issue For now we're selecting all columns from the editions table. We'll reduce the selections down to a subset in a later change The change to using a UNION affects the order of the resulting editions, so we've added a test to document our expectations Co-authored-by: Mike Patrick --- .../sources/linked_to_editions_source.rb | 78 +++++++++++++------ .../reverse_linked_to_editions_source.rb | 43 ++++++---- .../sources/linked_to_editions_source_spec.rb | 28 +++++++ 3 files changed, 113 insertions(+), 36 deletions(-) diff --git a/app/graphql/sources/linked_to_editions_source.rb b/app/graphql/sources/linked_to_editions_source.rb index aee46ddef..57ec9c5e4 100644 --- a/app/graphql/sources/linked_to_editions_source.rb +++ b/app/graphql/sources/linked_to_editions_source.rb @@ -7,6 +7,10 @@ def initialize(content_store:) # rubocop:enable Lint/MissingSuper def fetch(editions_and_link_types) + all_selections = { + links: %i[link_type position], + documents: %i[content_id], + } edition_id_tuples = [] content_id_tuples = [] link_types_map = {} @@ -17,37 +21,65 @@ def fetch(editions_and_link_types) link_types_map[[edition.content_id, link_type]] = [] end - edition_links = Link - .joins(:edition, { target_documents: @content_store }) - .includes(:edition, { target_documents: @content_store }) + link_set_links_target_editions = Edition + .joins(document: { reverse_links: :link_set }) .where( - '("editions"."id", "links"."link_type") IN (?)', - Arel.sql(edition_id_tuples.join(",")), + '("link_sets"."content_id", "links"."link_type") IN (?)', + Arel.sql(content_id_tuples.join(",")), + ) + .where( + editions: { content_store: @content_store }, + documents: { locale: "en" }, + ) + .select( + "editions.*", + all_selections, + { link_sets: { content_id: :source_content_id } }, ) - .where(target_documents: { locale: "en" }) - .order(link_type: :asc, position: :asc) - link_set_links = Link - .joins(:link_set, { target_documents: @content_store }) - .includes(:link_set, { target_documents: @content_store }) + edition_links_target_editions = Edition + .joins(document: :reverse_links) + .joins( + <<~SQL, + INNER JOIN editions source_editions + ON source_editions.id = links.edition_id + SQL + ) + .joins( + <<~SQL, + INNER JOIN documents source_documents + ON source_documents.id = source_editions.document_id + SQL + ) .where( - '("link_sets"."content_id", "links"."link_type") IN (?)', - Arel.sql(content_id_tuples.join(",")), + '("source_editions"."id", "links"."link_type") IN (?)', + Arel.sql(edition_id_tuples.join(",")), + ) + .where( + editions: { content_store: @content_store }, + documents: { locale: "en" }, + ) + .select( + "editions.*", + all_selections, + { source_documents: { content_id: :source_content_id } }, ) - .where(target_documents: { locale: "en" }) - .order(link_type: :asc, position: :asc) - all_links = edition_links + link_set_links + all_editions = Edition + .from( + <<~SQL, + ( + #{link_set_links_target_editions.to_sql} + UNION + #{edition_links_target_editions.to_sql} + ) AS editions + SQL + ) + .order(link_type: :asc, position: :asc) - all_links.each_with_object(link_types_map) { |link, hash| - hash[[(link.link_set || link.edition).content_id, link.link_type]].concat(editions_for_link(link)) + all_editions.each_with_object(link_types_map) { |edition, hash| + hash[[edition.source_content_id, edition.link_type]] << edition }.values end - - private - - def editions_for_link(link) - link.target_documents.map { |document| document.send(@content_store) } - end end end diff --git a/app/graphql/sources/reverse_linked_to_editions_source.rb b/app/graphql/sources/reverse_linked_to_editions_source.rb index 9b72b707e..b23bdef54 100644 --- a/app/graphql/sources/reverse_linked_to_editions_source.rb +++ b/app/graphql/sources/reverse_linked_to_editions_source.rb @@ -7,6 +7,10 @@ def initialize(content_store:) # rubocop:enable Lint/MissingSuper def fetch(editions_and_link_types) + all_selections = { + links: %i[target_content_id link_type link_set_id edition_id], + documents: %i[content_id], + } content_id_tuples = [] link_types_map = {} @@ -15,26 +19,39 @@ def fetch(editions_and_link_types) link_types_map[[edition.content_id, link_type]] = [] end - all_links = Link + link_set_links_source_editions = Edition + .joins(:document) + .joins("INNER JOIN link_sets ON link_sets.content_id = documents.content_id") + .joins("INNER JOIN links ON links.link_set_id = link_sets.id") + .where(editions: { content_store: @content_store }) .where( '("links"."target_content_id", "links"."link_type") IN (?)', Arel.sql(content_id_tuples.join(",")), ) - .includes(source_documents: @content_store) + .select("editions.*", all_selections) - all_links.each_with_object(link_types_map) { |link, hash| - if link.link_set - hash[[link.target_content_id, link.link_type]].concat(editions_for_link_set_link(link)) - elsif link.edition - hash[[link.target_content_id, link.link_type]] << link.edition - end - }.values - end + edition_links_source_editions = Edition + .joins(:document, :links) + .where(editions: { content_store: @content_store }) + .where( + '("links"."target_content_id", "links"."link_type") IN (?)', + Arel.sql(content_id_tuples.join(",")), + ) + .select("editions.*", all_selections) - private + all_editions = Edition.from( + <<~SQL, + ( + #{link_set_links_source_editions.to_sql} + UNION + #{edition_links_source_editions.to_sql} + ) AS editions + SQL + ) - def editions_for_link_set_link(link) - link.source_documents.map { |document| document.send(@content_store) } + all_editions.each_with_object(link_types_map) { |edition, hash| + hash[[edition.target_content_id, edition.link_type]] << edition + }.values end end end diff --git a/spec/graphql/sources/linked_to_editions_source_spec.rb b/spec/graphql/sources/linked_to_editions_source_spec.rb index bf5aef89b..25e6b0275 100644 --- a/spec/graphql/sources/linked_to_editions_source_spec.rb +++ b/spec/graphql/sources/linked_to_editions_source_spec.rb @@ -77,4 +77,32 @@ expect(request.load).to match_array([target_edition_3, target_edition_4]) end end + + it "returns editions in order of their associated link's `position`" do + target_edition_0 = create(:edition) + target_edition_1 = create(:edition) + target_edition_2 = create(:edition) + target_edition_3 = create(:edition) + + source_edition = create(:edition, content_store: "draft") + create(:link, edition: source_edition, target_content_id: target_edition_1.content_id, position: 1, link_type: "test_link") + create(:link, edition: source_edition, target_content_id: target_edition_3.content_id, position: 3, link_type: "test_link") + + link_set = create(:link_set, content_id: source_edition.content_id) + create(:link, link_set:, target_content_id: target_edition_0.content_id, position: 0, link_type: "test_link") + create(:link, link_set:, target_content_id: target_edition_2.content_id, position: 2, link_type: "test_link") + + GraphQL::Dataloader.with_dataloading do |dataloader| + request = dataloader.with( + described_class, + content_store: source_edition.content_store, + ).request([ + source_edition, + "test_link", + %i[id base_path title document_id], + ]) + + expect(request.load).to eq([target_edition_0, target_edition_1, target_edition_2, target_edition_3]) + end + end end