From f1221a5cf82c79ceca7b618b6ef0b76ce1c9cfbc Mon Sep 17 00:00:00 2001 From: Finn Date: Wed, 31 Jan 2024 12:05:52 +0000 Subject: [PATCH] Use README contents for description of nfcore workflows --- app/models/concerns/workflow_extraction.rb | 10 +++++----- app/models/git_workflow_wizard.rb | 4 ++-- lib/scrapers/nfcore_scraper.rb | 21 +++++++++++---------- lib/seek/workflow_extractors/base.rb | 2 +- lib/seek/workflow_extractors/cff.rb | 2 +- lib/seek/workflow_extractors/ro_like.rb | 6 ++++-- test/integration/nfcore_scraper_test.rb | 4 ++-- 7 files changed, 26 insertions(+), 23 deletions(-) diff --git a/app/models/concerns/workflow_extraction.rb b/app/models/concerns/workflow_extraction.rb index f4c612b952..3e0a0da759 100644 --- a/app/models/concerns/workflow_extraction.rb +++ b/app/models/concerns/workflow_extraction.rb @@ -13,15 +13,15 @@ def extractor_class workflow_class&.extractor_class || Seek::WorkflowExtractors::Base end - def extractor + def extractor(opts = {}) if is_git_ro_crate? - Seek::WorkflowExtractors::ROCrate.new(git_version, main_workflow_class: workflow_class) + Seek::WorkflowExtractors::ROCrate.new(git_version, main_workflow_class: workflow_class, **opts) elsif is_already_ro_crate? - Seek::WorkflowExtractors::ROCrate.new(content_blob, main_workflow_class: workflow_class) + Seek::WorkflowExtractors::ROCrate.new(content_blob, main_workflow_class: workflow_class, **opts) elsif is_git_versioned? - Seek::WorkflowExtractors::GitRepo.new(git_version, main_workflow_class: workflow_class) + Seek::WorkflowExtractors::GitRepo.new(git_version, main_workflow_class: workflow_class, **opts) else - extractor_class.new(content_blob) + extractor_class.new(content_blob, **opts) end end diff --git a/app/models/git_workflow_wizard.rb b/app/models/git_workflow_wizard.rb index 7ab39547fa..34b6f9a490 100644 --- a/app/models/git_workflow_wizard.rb +++ b/app/models/git_workflow_wizard.rb @@ -10,7 +10,7 @@ class GitWorkflowWizard attr_reader :next_step, :git_repository - attr_accessor :params, :workflow, :workflow_class + attr_accessor :params, :workflow, :workflow_class, :use_readme_for_description def run if new_version? @@ -78,7 +78,7 @@ def run return workflow end - extractor = workflow.extractor + extractor = workflow.extractor(use_readme_for_description: @use_readme_for_description) workflow.provide_metadata(extractor.metadata) @next_step = :provide_metadata diff --git a/lib/scrapers/nfcore_scraper.rb b/lib/scrapers/nfcore_scraper.rb index abadedf0cb..630be29baa 100644 --- a/lib/scrapers/nfcore_scraper.rb +++ b/lib/scrapers/nfcore_scraper.rb @@ -20,15 +20,16 @@ def main_branch(repo) def workflow_wizard(repo, tag) GitWorkflowWizard.new(workflow_class: WorkflowClass.find_by_key('nextflow'), - params: { - git_version_attributes: { - main_workflow_path: 'nextflow.config', - git_repository_id: repo.id, - ref: "refs/tags/#{tag}", - name: tag, - comment: "Updated to #{tag}" - } - }) + use_readme_for_description: true, + params: { + git_version_attributes: { + main_workflow_path: 'nextflow.config', + git_repository_id: repo.id, + ref: "refs/tags/#{tag}", + name: tag, + comment: "Updated to #{tag}" + } + }) end end -end \ No newline at end of file +end diff --git a/lib/seek/workflow_extractors/base.rb b/lib/seek/workflow_extractors/base.rb index f4adf4177e..1bcb75a664 100644 --- a/lib/seek/workflow_extractors/base.rb +++ b/lib/seek/workflow_extractors/base.rb @@ -7,7 +7,7 @@ class Base "name" => "Unrecognized Workflow Type" } - def initialize(io) + def initialize(io, opts = {}) @io = io.is_a?(String) ? StringIO.new(io) : io end diff --git a/lib/seek/workflow_extractors/cff.rb b/lib/seek/workflow_extractors/cff.rb index 1312b2911b..c0556b1a4a 100644 --- a/lib/seek/workflow_extractors/cff.rb +++ b/lib/seek/workflow_extractors/cff.rb @@ -5,7 +5,7 @@ module WorkflowExtractors class CFF FILENAME = 'CITATION.cff' - def initialize(io) + def initialize(io, opts = {}) if io.respond_to?(:path) @path = io.path else diff --git a/lib/seek/workflow_extractors/ro_like.rb b/lib/seek/workflow_extractors/ro_like.rb index 04bba108cd..a9d8c37787 100644 --- a/lib/seek/workflow_extractors/ro_like.rb +++ b/lib/seek/workflow_extractors/ro_like.rb @@ -7,9 +7,10 @@ module WorkflowExtractors # Abstract extractor class for a "Research Object-like" structured bundle of files, # e.g. an RO-Crate or an annotated Git repository. class ROLike < Base - def initialize(obj, main_workflow_class: nil) + def initialize(obj, main_workflow_class: nil, use_readme_for_description: false) @obj = obj @main_workflow_class = main_workflow_class + @use_readme_for_description = use_readme_for_description end def can_render_diagram? @@ -56,7 +57,8 @@ def metadata end if file_exists?('README.md') - m[:description] ||= file('README.md').read.force_encoding('utf-8').gsub(/^(---\s*\n.*?\n?)^(---\s*$\n?)/m,'') # Remove "Front matter" + readme = file('README.md').read.force_encoding('utf-8').gsub(/^(---\s*\n.*?\n?)^(---\s*$\n?)/m,'') # Remove "Front matter" + m[:description] = readme if readme.present? && (m[:description].blank? || @use_readme_for_description) end m[:workflow_class_id] ||= main_workflow_class&.id diff --git a/test/integration/nfcore_scraper_test.rb b/test/integration/nfcore_scraper_test.rb index 9850cf4b44..29f1e5c10a 100644 --- a/test/integration/nfcore_scraper_test.rb +++ b/test/integration/nfcore_scraper_test.rb @@ -25,7 +25,7 @@ class NfcoreScraperTest < ActionDispatch::IntegrationTest assert_equal bot, wf.contributor assert_equal [project], wf.projects assert_equal 'nf-core/rnaseq', wf.title - assert_equal 'Nextflow RNA-Seq analysis pipeline, part of the nf-core community.', wf.description + assert_includes wf.description, '**nf-core/rnaseq** is a bioinformatics analysis pipeline' assert_equal 'MIT', wf.license assert_equal 'nextflow.config', wf.main_workflow_path assert_equal '3.0', wf.git_version.name @@ -66,7 +66,7 @@ class NfcoreScraperTest < ActionDispatch::IntegrationTest assert_equal bot, wf.contributor assert_equal [project], wf.projects assert_equal 'nf-core/rnaseq', wf.title - assert_equal 'Nextflow RNA-Seq analysis pipeline, part of the nf-core community.', wf.description + assert_includes wf.description, '**nf-core/rnaseq** is a bioinformatics analysis pipeline' assert_equal 'MIT', wf.license assert_equal 'nextflow.config', wf.main_workflow_path assert_equal '3.0', wf.git_version.name