Skip to content

Commit 9185852

Browse files
committed
merge origin
2 parents 7f70f76 + 536bb28 commit 9185852

File tree

15 files changed

+308
-1220
lines changed

15 files changed

+308
-1220
lines changed

Gemfile.lock

+2-2
Original file line numberDiff line numberDiff line change
@@ -458,7 +458,7 @@ GEM
458458
pundit (2.3.1)
459459
activesupport (>= 3.0.0)
460460
racc (1.8.1)
461-
rack (2.2.11)
461+
rack (2.2.13)
462462
rack-cors (2.0.2)
463463
rack (>= 2.0.0)
464464
rack-oauth2 (2.2.0)
@@ -764,7 +764,7 @@ GEM
764764
unf_ext (0.0.8.2)
765765
unicode-display_width (2.4.2)
766766
unicode-types (1.10.0)
767-
uri (0.13.0)
767+
uri (0.13.2)
768768
useragent (0.16.11)
769769
validate_email (0.1.6)
770770
activemodel (>= 3.0)

app/controllers/sources_controller.rb

+1-1
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ def set_content_provider
150150

151151
# Never trust parameters from the scary internet, only allow the white list through.
152152
def source_params
153-
permitted = [:url, :method, :token, :enabled]
153+
permitted = [:url, :method, :token, :default_language, :enabled]
154154
permitted << :approval_status if policy(Source).approve?
155155
permitted << :content_provider_id if policy(Source).index?
156156

app/models/source.rb

+2
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ class Source < ApplicationRecord
2222
validates :approval_status, inclusion: { in: APPROVAL_STATUS.values }
2323
validates :method, inclusion: { in: -> (_) { TeSS::Config.user_ingestion_methods } },
2424
unless: -> { User.current_user&.is_admin? || User.current_user&.has_role?(:scraper_user) }
25+
validates :default_language, controlled_vocabulary: { dictionary: 'LanguageDictionary',
26+
allow_blank: true }
2527
validate :check_method
2628

2729
before_create :set_approval_status

app/views/sources/_form.html.erb

+5
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,11 @@
2222

2323
<%= f.input :token, hint: t('sources.hints.token'), label: 'Authentication Token' %>
2424

25+
<%= f.input :default_language,
26+
collection: LanguageDictionary.instance.options_for_select,
27+
prompt: t('sources.prompts.default_language'), include_blank: true,
28+
hint: t('sources.hints.default_language') %>
29+
2530
<%= f.input :enabled, hint: t('sources.hints.enabled') %>
2631

2732
<% if policy(@source).approve? %>

app/views/sources/show.html.erb

+7
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,13 @@
5252
</p>
5353
<% end %>
5454

55+
<!-- Field: default_language -->
56+
<% if current_user && (current_user.is_curator? || current_user.is_admin?) %>
57+
<p><strong><%= Source.human_attribute_name(:default_language) %>:</strong>
58+
<%= render_language_name(@source.default_language) if @source.default_language %>
59+
</p>
60+
<% end %>
61+
5562
<!-- Field: enabled -->
5663
<p>
5764
<strong>Source is:</strong>

config/locales/en.yml

+5
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,8 @@ en:
6464
onsite: Face-to-face
6565
hybrid: Hybrid
6666
language: 'Language of instruction'
67+
source:
68+
default_language: 'Default language'
6769
about:
6870
headings:
6971
events: Events
@@ -741,10 +743,13 @@ en:
741743
method: 'The method used to ingest the contents of the source URL.'
742744
token: Some ingestion methods require an authentication token, leave blank if you aren't sure.
743745
enabled: Sources that are not enabled will not be ingested.
746+
default_language: 'Default language of ingested events/materials'
744747
approval_status:
745748
not_approved: Not approved
746749
requested: Approval requested
747750
approved: Approved
751+
prompts:
752+
default_language: 'Select a default language...'
748753
scraper:
749754
messages:
750755
status: 'Scraper.run: %{status}'
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
class AddDefaultLanguageToSource < ActiveRecord::Migration[7.0]
2+
def change
3+
add_column :sources, :default_language, :string
4+
end
5+
end

db/schema.rb

+4-3
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
#
1111
# It's strongly recommended that you check this file into your version control system.
1212

13-
ActiveRecord::Schema[7.0].define(version: 2024_12_02_092029) do
13+
ActiveRecord::Schema[7.0].define(version: 2025_03_03_210238) do
1414
# These are extensions that must be enabled in order to support this database
1515
enable_extension "plpgsql"
1616

@@ -227,9 +227,9 @@
227227
t.string "cost_basis"
228228
t.string "cost_currency"
229229
t.string "fields", default: [], array: true
230-
t.string "open_science", default: [], array: true
231230
t.boolean "visible", default: true
232231
t.string "language"
232+
t.string "open_science", default: [], array: true
233233
t.index ["presence"], name: "index_events_on_presence"
234234
t.index ["slug"], name: "index_events_on_slug", unique: true
235235
t.index ["user_id"], name: "index_events_on_user_id"
@@ -469,6 +469,7 @@
469469
t.string "token"
470470
t.integer "approval_status"
471471
t.datetime "updated_at"
472+
t.string "default_language"
472473
t.index ["content_provider_id"], name: "index_sources_on_content_provider_id"
473474
t.index ["user_id"], name: "index_sources_on_user_id"
474475
end
@@ -572,9 +573,9 @@
572573
t.string "resource_type"
573574
t.text "data"
574575
t.json "params"
575-
t.text "referrer"
576576
t.datetime "created_at", null: false
577577
t.datetime "updated_at", null: false
578+
t.text "referrer"
578579
t.index ["resource_type", "resource_id"], name: "index_widget_logs_on_resource_type_and_resource_id"
579580
end
580581

docker-compose-prod.yml

+1
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ services:
7474
dockerfile: Dockerfile
7575
target: production
7676
image: ${PREFIX}-app
77+
restart: always
7778
depends_on:
7879
- app
7980
- db

lib/ingestors/ingestor.rb

+8-4
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,10 @@ def read(_url)
3333
raise NotImplementedError
3434
end
3535

36-
def write(user, provider)
37-
write_resources(Event, @events, user, provider)
36+
def write(user, provider, source: nil)
37+
write_resources(Event, @events, user, provider, source: source)
3838
@messages << stats_summary(:events)
39-
write_resources(Material, @materials, user, provider)
39+
write_resources(Material, @materials, user, provider, source: source)
4040
@messages << stats_summary(:materials)
4141
end
4242

@@ -127,7 +127,7 @@ def set_resource_defaults(resource)
127127
resource
128128
end
129129

130-
def write_resources(type, resources, user, provider)
130+
def write_resources(type, resources, user, provider, source: nil)
131131
resources.each_with_index do |resource, i|
132132
key = type.model_name.collection.to_sym
133133
@stats[key][:processed] += 1
@@ -144,6 +144,10 @@ def write_resources(type, resources, user, provider)
144144
type.new(resource.to_h)
145145
end
146146

147+
if resource.has_attribute?(:language) && resource.new_record?
148+
resource.language ||= source&.default_language
149+
end
150+
147151
resource = set_resource_defaults(resource)
148152
if resource.valid?
149153
resource.save!

lib/ingestors/oscm_ingestor.rb

+8-5
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,16 @@
22
require 'open-uri'
33
require 'csv'
44
require 'nokogiri'
5+
require 'openssl'
56

67
module Ingestors
78
class OscmIngestor < Ingestor
89
def self.config
910
{
1011
key: 'oscm_event',
1112
title: 'OSCM Events API',
12-
category: :events
13+
category: :events,
14+
ssl_verify_mode: OpenSSL::SSL::VERIFY_NONE
1315
}
1416
end
1517

@@ -29,9 +31,10 @@ def read(url)
2931
def process_oscm(url)
3032
# Instead of using the sitemap we use the events page.
3133
# The sitemap shows also past events, but the ical link for those does not work, so we can't parse them with the below code.
32-
Nokogiri::HTML5(open_url(url, raise: true)).css('.eventname > a').each do |link|
34+
url = 'https://www.openscience-maastricht.nl/events/'
35+
Nokogiri::HTML5.parse(open_url(url.to_s, raise: true)).css('.eventname > a').each do |event_link|
3336
begin
34-
event_url = link.attributes['href']
37+
event_url = event_link.attributes['href']
3538
event_page = Nokogiri::HTML5.parse(open_url(event_url, raise: true))
3639

3740
# create new event
@@ -43,8 +46,8 @@ def process_oscm(url)
4346
event.description = convert_description ical_event.description
4447
event.url = ical_event.url
4548
# TeSS timezone handling is a bit special.
46-
event.start = ical_event.dtstart
47-
event.end = ical_event.dtend
49+
event.start = Time.zone.parse(ical_event.dtstart.strftime('%a, %d %b %Y %H:%M:%S'))
50+
event.end = Time.zone.parse(ical_event.dtend.strftime('%a, %d %b %Y %H:%M:%S'))
4851
event.set_default_times
4952
event.venue = ical_event.try(:venue)
5053
event.keywords = ical_event.categories # fair-coffee pre-registration-workshop fair-essentials-workshop fair-for-qualitative-data reproducibilitea

lib/scraper.rb

+1-1
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@ def scrape(source, user = get_user, index: 0)
133133
end
134134

135135
# write resources
136-
ingestor.write(user, source.content_provider)
136+
ingestor.write(user, source.content_provider, source: source)
137137
unless ingestor.messages.blank?
138138
output.concat "\n## Writing\n\n"
139139
ingestor.messages.each { |m| output.concat "#{m}\n" }

test/unit/ingestors/ingestor_test.rb

+102-1
Original file line numberDiff line numberDiff line change
@@ -12,4 +12,105 @@ class IngestorTest < ActiveSupport::TestCase
1212
expected = "# Title\n\n- Item 1\n- Item 2"
1313
assert_equal expected, ingestor.convert_description(input)
1414
end
15-
end
15+
16+
test 'sets event language from source default language' do
17+
user = users(:scraper_user)
18+
provider = content_providers(:portal_provider)
19+
20+
# Source has default language set
21+
@source = Source.create!(url: 'https://somewhere.com/stuff', method: 'bioschemas',
22+
enabled: true, approval_status: 'approved',
23+
default_language: 'fr',
24+
content_provider: provider, user: users(:admin))
25+
26+
ingestor = Ingestors::Ingestor.new
27+
28+
# Fake an event that was read ... no language set
29+
ingestor.instance_variable_set(:@events,
30+
[OpenStruct.new(url: 'https://some-course.ca',
31+
title: 'Some course',
32+
start: '2021-01-31 13:00:00',
33+
end:'2021-01-31 14:00:00')])
34+
assert_difference('provider.events.count', 1) do
35+
ingestor.write(user, provider, source: @source)
36+
end
37+
event = Event.find_by(title: 'Some course')
38+
assert_equal(event.language, 'fr')
39+
end
40+
41+
test 'does not override event language from source default language when language set' do
42+
user = users(:scraper_user)
43+
provider = content_providers(:portal_provider)
44+
45+
# Source has default language set
46+
@source = Source.create!(url: 'https://somewhere.com/stuff', method: 'bioschemas',
47+
enabled: true, approval_status: 'approved',
48+
default_language: 'fr',
49+
content_provider: provider, user: users(:admin))
50+
51+
ingestor = Ingestors::Ingestor.new
52+
53+
# Fake an event that was read ... with language set
54+
ingestor.instance_variable_set(:@events,
55+
[OpenStruct.new(url: 'https://some-course.de',
56+
title: 'Some german course',
57+
start: '2021-01-31 13:00:00',
58+
end:'2021-01-31 14:00:00',
59+
language: 'de')])
60+
assert_difference('provider.events.count', 1) do
61+
ingestor.write(user, provider, source: @source)
62+
end
63+
event = Event.find_by(title: 'Some german course')
64+
assert_equal(event.language, 'de')
65+
end
66+
67+
test 'does not override event language when source default language missing' do
68+
user = users(:scraper_user)
69+
provider = content_providers(:portal_provider)
70+
71+
# Source has no default language set
72+
@source = Source.create!(url: 'https://somewhere.com/stuff', method: 'bioschemas',
73+
enabled: true, approval_status: 'approved',
74+
content_provider: provider, user: users(:admin))
75+
76+
ingestor = Ingestors::Ingestor.new
77+
78+
# Fake an event that was read ... with language set
79+
ingestor.instance_variable_set(:@events,
80+
[OpenStruct.new(url: 'https://some-course.org',
81+
title: 'Some other course',
82+
start: '2021-01-31 13:00:00',
83+
end:'2021-01-31 14:00:00',
84+
language: 'de')])
85+
assert_difference('provider.events.count', 1) do
86+
ingestor.write(user, provider, source: @source)
87+
end
88+
event = Event.find_by(title: 'Some other course')
89+
assert_equal(event.language, 'de')
90+
end
91+
92+
test 'does not set event language when languare and source default language missing' do
93+
user = users(:scraper_user)
94+
provider = content_providers(:portal_provider)
95+
96+
# Source has no default language set
97+
@source = Source.create!(url: 'https://somewhere.com/stuff', method: 'bioschemas',
98+
enabled: true, approval_status: 'approved',
99+
content_provider: provider, user: users(:admin))
100+
101+
ingestor = Ingestors::Ingestor.new
102+
103+
# Fake an event that was read ... no language set
104+
ingestor.instance_variable_set(:@events,
105+
[OpenStruct.new(url: 'https://some-course.net',
106+
title: 'Yet another course',
107+
start: '2021-01-31 13:00:00',
108+
end:'2021-01-31 14:00:00')])
109+
assert_difference('provider.events.count', 1) do
110+
ingestor.write(user, provider, source: @source)
111+
end
112+
event = Event.find_by(title: 'Yet another course')
113+
assert_nil(event.language)
114+
end
115+
116+
end

test/unit/ingestors/taxila/oscm_ingestor_test.rb

+8-8
Original file line numberDiff line numberDiff line change
@@ -22,12 +22,12 @@ class OscmIngestorTest < ActiveSupport::TestCase
2222
ingestor = Ingestors::Taxila::OscmIngestor.new
2323

2424
# check event doesn't
25-
new_title = 'FAIR Coffee lecture - Gijs van Dijck'
26-
new_url = 'https://www.openscience-maastricht.nl/events/fair-coffee-lecture-24-may-2023/'
25+
new_title = 'FAIR Coffee lecture - Mariëlle Prevoo (pre-announcement)'
26+
new_url = 'https://www.openscience-maastricht.nl/events/fair-coffee-lecture-marielle-prevoo-pre-announcement/'
2727
refute Event.where(title: new_title, url: new_url).any?
2828

2929
# run task
30-
assert_difference 'Event.count', 3 do
30+
assert_difference 'Event.count', 4 do
3131
freeze_time(2019) do
3232
VCR.use_cassette("ingestors/oscm") do
3333
ingestor.read(source.url)
@@ -36,9 +36,9 @@ class OscmIngestorTest < ActiveSupport::TestCase
3636
end
3737
end
3838

39-
assert_equal 3, ingestor.events.count
39+
assert_equal 4, ingestor.events.count
4040
assert ingestor.materials.empty?
41-
assert_equal 3, ingestor.stats[:events][:added]
41+
assert_equal 4, ingestor.stats[:events][:added]
4242
assert_equal 0, ingestor.stats[:events][:updated]
4343
assert_equal 0, ingestor.stats[:events][:rejected]
4444

@@ -49,11 +49,11 @@ class OscmIngestorTest < ActiveSupport::TestCase
4949
assert_equal new_url, event.url
5050

5151
# check other fields
52-
assert_equal 'FAIR Coffee lecture - Gijs van Dijck', event.title
52+
assert_equal 'FAIR Coffee lecture - Mariëlle Prevoo (pre-announcement)', event.title
5353
assert_equal 'Amsterdam', event.timezone
5454
assert_equal 'OSCM', event.source
5555
assert event.online?
56-
assert_equal Time.zone.parse('Wed, 24 May 2023 11:00:00.000000000 UTC +00:00'), event.start
57-
assert_equal Time.zone.parse('Wed, 24 May 2023 12:00:00.000000000 UTC +00:00'), event.end
56+
assert_equal Time.zone.parse('Wed, 15 May 2025 10:30:00.000000000 UTC +00:00'), event.start
57+
assert_equal Time.zone.parse('Wed, 15 May 2025 11:30:00.000000000 UTC +00:00'), event.end
5858
end
5959
end

0 commit comments

Comments
 (0)