From a2944e73046246377b58f8d33a31a3276390d731 Mon Sep 17 00:00:00 2001 From: amontenegro Date: Thu, 19 Oct 2023 13:46:23 -0600 Subject: [PATCH 1/8] Initial commit --- .../common/manager/EmailDomainManager.java | 4 +- .../manager/impl/EmailDomainManagerImpl.java | 21 ++- .../orcid/persistence/dao/EmailDomainDao.java | 4 + .../dao/impl/EmailDomainDaoImpl.java | 22 +++ .../jpa/entities/EmailDomainEntity.java | 22 ++- .../create_email_domain_mapping_tables.xml | 38 +---- .../loader/cli/EmailDomainToRorLoader.java | 149 ++++++++++++++++++ .../controllers/EmailDomainController.java | 2 +- 8 files changed, 222 insertions(+), 40 deletions(-) create mode 100644 orcid-scheduler-web/src/main/java/org/orcid/scheduler/loader/cli/EmailDomainToRorLoader.java diff --git a/orcid-core/src/main/java/org/orcid/core/common/manager/EmailDomainManager.java b/orcid-core/src/main/java/org/orcid/core/common/manager/EmailDomainManager.java index b6c7e310adb..4fe0fb04c15 100644 --- a/orcid-core/src/main/java/org/orcid/core/common/manager/EmailDomainManager.java +++ b/orcid-core/src/main/java/org/orcid/core/common/manager/EmailDomainManager.java @@ -6,10 +6,12 @@ public interface EmailDomainManager { EmailDomainEntity createEmailDomain(String emailDomain, EmailDomainEntity.DomainCategory category); - + boolean updateCategory(long id, EmailDomainEntity.DomainCategory category); EmailDomainEntity findByEmailDoman(String emailDomain); List findByCategory(EmailDomainEntity.DomainCategory category); + + EmailDomainEntity createOrUpdateEmailDomain(String emailDomain, String rorId); } diff --git a/orcid-core/src/main/java/org/orcid/core/common/manager/impl/EmailDomainManagerImpl.java b/orcid-core/src/main/java/org/orcid/core/common/manager/impl/EmailDomainManagerImpl.java index b5950fb9f91..d096f255f04 100644 --- a/orcid-core/src/main/java/org/orcid/core/common/manager/impl/EmailDomainManagerImpl.java +++ b/orcid-core/src/main/java/org/orcid/core/common/manager/impl/EmailDomainManagerImpl.java @@ -19,14 +19,18 @@ public class EmailDomainManagerImpl implements EmailDomainManager { @Resource(name = "emailDomainDaoReadOnly") private EmailDomainDao emailDomainDaoReadOnly; - @Override - public EmailDomainEntity createEmailDomain(String emailDomain, DomainCategory category) { + private void validateEmailDomain(String emailDomain) { if (emailDomain == null || emailDomain.isBlank()) { throw new IllegalArgumentException("Email Domain must not be empty"); } if(!InternetDomainName.isValid(emailDomain)) { throw new IllegalArgumentException("Email Domain '" + emailDomain + "' is invalid"); } + } + + @Override + public EmailDomainEntity createEmailDomain(String emailDomain, DomainCategory category) { + validateEmailDomain(emailDomain); if (category == null) { throw new IllegalArgumentException("Category must not be empty"); } @@ -57,4 +61,17 @@ public List findByCategory(DomainCategory category) { return emailDomainDaoReadOnly.findByCategory(category); } + @Override + public EmailDomainEntity createOrUpdateEmailDomain(String emailDomain, String rorId) { + EmailDomainEntity existingEntity = emailDomainDaoReadOnly.findByEmailDoman(emailDomain); + if(existingEntity != null) { + if(!rorId.equals(existingEntity.getRorId())) { + emailDomainDao.updateRorId(existingEntity.getId(), rorId); + } + } else { + return emailDomainDao.createEmailDomain(emailDomain, DomainCategory.PROFESSIONAL, rorId); + } + return existingEntity; + } + } diff --git a/orcid-persistence/src/main/java/org/orcid/persistence/dao/EmailDomainDao.java b/orcid-persistence/src/main/java/org/orcid/persistence/dao/EmailDomainDao.java index bb5c5730a53..93ea7a3bc35 100644 --- a/orcid-persistence/src/main/java/org/orcid/persistence/dao/EmailDomainDao.java +++ b/orcid-persistence/src/main/java/org/orcid/persistence/dao/EmailDomainDao.java @@ -6,8 +6,12 @@ public interface EmailDomainDao extends GenericDao { EmailDomainEntity createEmailDomain(String emailDomain, EmailDomainEntity.DomainCategory category); + + EmailDomainEntity createEmailDomain(String emailDomain, EmailDomainEntity.DomainCategory category, String rorId); boolean updateCategory(long id, EmailDomainEntity.DomainCategory category); + + boolean updateRorId(long id, String rorId); EmailDomainEntity findByEmailDoman(String emailDomain); diff --git a/orcid-persistence/src/main/java/org/orcid/persistence/dao/impl/EmailDomainDaoImpl.java b/orcid-persistence/src/main/java/org/orcid/persistence/dao/impl/EmailDomainDaoImpl.java index 5a8416d1e3c..6c75eec184f 100644 --- a/orcid-persistence/src/main/java/org/orcid/persistence/dao/impl/EmailDomainDaoImpl.java +++ b/orcid-persistence/src/main/java/org/orcid/persistence/dao/impl/EmailDomainDaoImpl.java @@ -31,6 +31,18 @@ public EmailDomainEntity createEmailDomain(String emailDomain, DomainCategory ca entityManager.persist(e); return e; } + + @Override + @Transactional + public EmailDomainEntity createEmailDomain(String emailDomain, DomainCategory category, String rorId) { + LOG.debug("Creating domain {} with category {} and ror Id {}", emailDomain, category, rorId); + EmailDomainEntity e = new EmailDomainEntity(); + e.setEmailDomain(emailDomain); + e.setCategory(category); + e.setRorId(rorId); + entityManager.persist(e); + return e; + } @Override @Transactional @@ -42,6 +54,16 @@ public boolean updateCategory(long id, DomainCategory category) { return query.executeUpdate() > 0; } + @Override + @Transactional + public boolean updateRorId(long id, String rorId) { + LOG.debug("Updating domain with id {} with rorId {}", id, rorId); + Query query = entityManager.createNativeQuery("UPDATE email_domain SET ror_id=:rorId WHERE id = :id"); + query.setParameter("id", id); + query.setParameter("rorId", rorId.toString()); + return query.executeUpdate() > 0; + } + @Override public EmailDomainEntity findByEmailDoman(String emailDomain) { TypedQuery query = entityManager.createQuery("from EmailDomainEntity where emailDomain = :emailDomain", EmailDomainEntity.class); diff --git a/orcid-persistence/src/main/java/org/orcid/persistence/jpa/entities/EmailDomainEntity.java b/orcid-persistence/src/main/java/org/orcid/persistence/jpa/entities/EmailDomainEntity.java index 06ea938e449..c676f19cea8 100644 --- a/orcid-persistence/src/main/java/org/orcid/persistence/jpa/entities/EmailDomainEntity.java +++ b/orcid-persistence/src/main/java/org/orcid/persistence/jpa/entities/EmailDomainEntity.java @@ -28,6 +28,7 @@ public static enum DomainCategory {PERSONAL, PROFESSIONAL, UNDEFINED} private Long id; private String emailDomain; private DomainCategory category; + private String rorId; public EmailDomainEntity() { @@ -38,6 +39,12 @@ public EmailDomainEntity(String emailDomain, DomainCategory category) { this.category = category; } + public EmailDomainEntity(String emailDomain, DomainCategory category, String rorId) { + this.emailDomain = emailDomain; + this.category = category; + this.rorId = rorId; + } + @Override @Id @GeneratedValue(strategy = GenerationType.AUTO, generator = "email_domain_seq") @@ -70,9 +77,18 @@ public void setEmailDomain(String emailDomain) { this.emailDomain = emailDomain; } + @Column(name = "ror_id") + public String getRorId() { + return rorId; + } + + public void setRorId(String rorId) { + this.rorId = rorId; + } + @Override public int hashCode() { - return Objects.hash(category, emailDomain, id); + return Objects.hash(category, emailDomain, id, rorId); } @Override @@ -84,6 +100,6 @@ public boolean equals(Object obj) { if (getClass() != obj.getClass()) return false; EmailDomainEntity other = (EmailDomainEntity) obj; - return category == other.category && Objects.equals(emailDomain, other.emailDomain) && Objects.equals(id, other.id); - } + return category == other.category && Objects.equals(emailDomain, other.emailDomain) && Objects.equals(id, other.id) && Objects.equals(rorId, other.rorId); + } } diff --git a/orcid-persistence/src/main/resources/db/updates/create_email_domain_mapping_tables.xml b/orcid-persistence/src/main/resources/db/updates/create_email_domain_mapping_tables.xml index 02d6c9d413a..5e5832934af 100644 --- a/orcid-persistence/src/main/resources/db/updates/create_email_domain_mapping_tables.xml +++ b/orcid-persistence/src/main/resources/db/updates/create_email_domain_mapping_tables.xml @@ -20,53 +20,25 @@ - - - - - - - - - - - - - - - - - - - - - + + - - ALTER TABLE email_domain_to_org_id ADD CONSTRAINT email_domain_fk FOREIGN KEY (email_domian_id) REFERENCES email_domain (id); - ALTER TABLE email_domain_to_org_id ADD CONSTRAINT org_disambiguated_id_fk FOREIGN KEY (org_disambiguated_id) REFERENCES org_disambiguated (id); - create index email_domain_to_org_id_domain_index on email_domain_to_org_id(email_domian_id); - create index email_domain_to_org_id_org_index on email_domain_to_org_id(org_disambiguated_id); - + - - - @@ -76,11 +48,11 @@ create index email_domain_domain_index on email_domain(email_domain); + create index email_domain_domain_index on email_domain(ror); - GRANT SELECT ON email_domain to orcidro; - GRANT SELECT ON email_domain_to_org_id to orcidro; + GRANT SELECT ON email_domain to orcidro; \ No newline at end of file diff --git a/orcid-scheduler-web/src/main/java/org/orcid/scheduler/loader/cli/EmailDomainToRorLoader.java b/orcid-scheduler-web/src/main/java/org/orcid/scheduler/loader/cli/EmailDomainToRorLoader.java new file mode 100644 index 00000000000..6260f2e7adc --- /dev/null +++ b/orcid-scheduler-web/src/main/java/org/orcid/scheduler/loader/cli/EmailDomainToRorLoader.java @@ -0,0 +1,149 @@ +package org.orcid.scheduler.loader.cli; + +import java.io.FileReader; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.AbstractMap; +import java.util.AbstractMap.SimpleEntry; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.orcid.core.common.manager.EmailDomainManager; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.context.ApplicationContext; +import org.springframework.context.support.ClassPathXmlApplicationContext; + +import com.fasterxml.jackson.databind.MappingIterator; +import com.fasterxml.jackson.dataformat.csv.CsvMapper; +import com.fasterxml.jackson.dataformat.csv.CsvParser; + +public class EmailDomainToRorLoader { + + private static final Logger LOG = LoggerFactory.getLogger(EmailDomainToRorLoader.class); + + private String filePath; + private EmailDomainManager emailDomainManager; + List> csvData; + + List invalidDomains = new ArrayList(); + + Map map = new HashMap(); + + public EmailDomainToRorLoader(String filePath) { + this.filePath = filePath; + init(filePath); + } + + public void execute() throws IOException { + load(this.filePath); + processCsvData(); + } + + private void init(String filePath) { + Path path = Paths.get(filePath); + if(!Files.exists(path)) { + LOG.error("File does not exists: '{}'", filePath); + System.exit(1); + } + + @SuppressWarnings("resource") + ApplicationContext context = new ClassPathXmlApplicationContext("orcid-core-context.xml"); + emailDomainManager = (EmailDomainManager) context.getBean("emailDomainManager"); + } + + private void load(String filePath) throws IOException { + LOG.info("Reading file {}", filePath); + FileReader fileReader = new FileReader(filePath); + CsvMapper csvMapper = new CsvMapper(); + csvMapper.enable(CsvParser.Feature.WRAP_AS_ARRAY); + MappingIterator> it = csvMapper.readerForListOf(String.class).readValues(fileReader); + + if (it != null) { + csvData = new ArrayList>(); + while(it.hasNext()) { + List r = it.next(); + csvData.add(r); + } + } + } + + private void processCsvData() { + for (List row : csvData) { + String domain = row.get(0); + String rorId = row.get(1); + boolean hasParent = false; + try { + String hasParentField = row.get(2); + hasParent = hasParentField == null ? false : Boolean.valueOf(hasParentField); + } catch(IndexOutOfBoundsException eoob) { + // Leave the hasParent as false + } + + if(!map.containsKey(domain)) { + DomainToRorMap dtrm = new DomainToRorMap(); + dtrm.setDomain(domain); + if(hasParent) { + dtrm.addIdWithParent(rorId); + } else { + dtrm.addIdWithNoParent(rorId); + } + map.put(rorId, dtrm); + } else { + DomainToRorMap dtrm = map.get(rorId); + if(hasParent) { + dtrm.addIdWithParent(rorId); + } else { + dtrm.addIdWithNoParent(rorId); + } + } + } + } + + private void storeDomainToRorMap() { + for(DomainToRorMap element : map.values()) { + // If the domain has only one entry with no parent, store that one + if(element.getIdsWithNoParent().size() == 1) { + + } else if(element.getIdsWithParent().size() == 1) { + // Else, if the domain has only one entry with parent, store that one + } else { + // Else log a warning because there is no way to provide a suggestion + } + } + } + + private class DomainToRorMap { + private String domain; + private List idsWithParent = new ArrayList(); + private List idsWithNoParent = new ArrayList(); + + public void setDomain(String domain) { + this.domain = domain; + } + + public String getDomain() { + return this.domain; + } + + public void addIdWithParent(String rorId) { + idsWithParent.add(rorId); + } + + public List getIdsWithParent() { + return this.idsWithParent; + } + + public void addIdWithNoParent(String rorId) { + idsWithNoParent.add(rorId); + } + + public List getIdsWithNoParent() { + return this.idsWithNoParent; + } + } +} diff --git a/orcid-web/src/main/java/org/orcid/frontend/web/controllers/EmailDomainController.java b/orcid-web/src/main/java/org/orcid/frontend/web/controllers/EmailDomainController.java index e42dd146931..76c33f8d640 100644 --- a/orcid-web/src/main/java/org/orcid/frontend/web/controllers/EmailDomainController.java +++ b/orcid-web/src/main/java/org/orcid/frontend/web/controllers/EmailDomainController.java @@ -27,7 +27,7 @@ public class EmailDomainController { ObjectMapper mapper = new ObjectMapper(); if(domain == null || domain.isBlank() || domain.length() > 254) { ObjectNode response = mapper.createObjectNode(); - response.put("error", "domain lenght too long or invalid"); + response.put("error", "Domain lenght too long, empty or invalid"); return response; } domain = OrcidStringUtils.stripHtml(domain); From 862dc684c63454b7ff68d34493beea6744059ea2 Mon Sep 17 00:00:00 2001 From: amontenegro Date: Thu, 19 Oct 2023 15:02:58 -0600 Subject: [PATCH 2/8] Need more testing --- .../create_email_domain_mapping_tables.xml | 2 +- .../loader/cli/EmailDomainToRorLoader.java | 25 ++++++++++++++++--- 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/orcid-persistence/src/main/resources/db/updates/create_email_domain_mapping_tables.xml b/orcid-persistence/src/main/resources/db/updates/create_email_domain_mapping_tables.xml index 5e5832934af..bd23c345b21 100644 --- a/orcid-persistence/src/main/resources/db/updates/create_email_domain_mapping_tables.xml +++ b/orcid-persistence/src/main/resources/db/updates/create_email_domain_mapping_tables.xml @@ -48,7 +48,7 @@ create index email_domain_domain_index on email_domain(email_domain); - create index email_domain_domain_index on email_domain(ror); + create index email_domain_ror_id_index on email_domain(ror_id); diff --git a/orcid-scheduler-web/src/main/java/org/orcid/scheduler/loader/cli/EmailDomainToRorLoader.java b/orcid-scheduler-web/src/main/java/org/orcid/scheduler/loader/cli/EmailDomainToRorLoader.java index 6260f2e7adc..efd4cc6319a 100644 --- a/orcid-scheduler-web/src/main/java/org/orcid/scheduler/loader/cli/EmailDomainToRorLoader.java +++ b/orcid-scheduler-web/src/main/java/org/orcid/scheduler/loader/cli/EmailDomainToRorLoader.java @@ -5,12 +5,12 @@ import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; -import java.util.AbstractMap; -import java.util.AbstractMap.SimpleEntry; import java.util.ArrayList; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Set; import org.orcid.core.common.manager.EmailDomainManager; import org.slf4j.Logger; @@ -30,7 +30,7 @@ public class EmailDomainToRorLoader { private EmailDomainManager emailDomainManager; List> csvData; - List invalidDomains = new ArrayList(); + Set invalidDomains = new HashSet(); Map map = new HashMap(); @@ -42,6 +42,7 @@ public EmailDomainToRorLoader(String filePath) { public void execute() throws IOException { load(this.filePath); processCsvData(); + storeDomainToRorMap(); } private void init(String filePath) { @@ -106,15 +107,23 @@ private void processCsvData() { private void storeDomainToRorMap() { for(DomainToRorMap element : map.values()) { + LOG.debug("Processing domain {}", element.getDomain()); // If the domain has only one entry with no parent, store that one if(element.getIdsWithNoParent().size() == 1) { - + emailDomainManager.createOrUpdateEmailDomain(element.getDomain(), element.getIdsWithNoParent().get(0)); } else if(element.getIdsWithParent().size() == 1) { // Else, if the domain has only one entry with parent, store that one + emailDomainManager.createOrUpdateEmailDomain(element.getDomain(), element.getIdsWithParent().get(0)); } else { // Else log a warning because there is no way to provide a suggestion + invalidDomains.add(element.getDomain()); } } + + LOG.warn("The following domains couldn't be mapped"); + for(String invalidDomain : invalidDomains) { + LOG.warn("{}", invalidDomain); + } } private class DomainToRorMap { @@ -131,6 +140,7 @@ public String getDomain() { } public void addIdWithParent(String rorId) { + LOG.debug("Domain {} adding {} with parent flag", this.domain, rorId); idsWithParent.add(rorId); } @@ -139,6 +149,7 @@ public List getIdsWithParent() { } public void addIdWithNoParent(String rorId) { + LOG.debug("Domain {} adding {} with NO parent flag", this.domain, rorId); idsWithNoParent.add(rorId); } @@ -146,4 +157,10 @@ public List getIdsWithNoParent() { return this.idsWithNoParent; } } + + public static void main(String[] args) throws IOException { + String filePath = args[0]; + EmailDomainToRorLoader edl = new EmailDomainToRorLoader(filePath); + edl.execute(); + } } From f6a57f19b96049180ffca5963dac277ea8f90c1f Mon Sep 17 00:00:00 2001 From: amontenegro Date: Fri, 20 Oct 2023 11:23:42 -0600 Subject: [PATCH 3/8] Data loaders done --- .../common/manager/EmailDomainManager.java | 3 +- .../manager/impl/EmailDomainManagerImpl.java | 15 ++++++-- .../loader/cli/EmailDomainLoader.java | 8 ++-- .../loader/cli/EmailDomainToRorLoader.java | 37 +++++++++++++++---- .../controllers/EmailDomainController.java | 2 +- 5 files changed, 48 insertions(+), 17 deletions(-) diff --git a/orcid-core/src/main/java/org/orcid/core/common/manager/EmailDomainManager.java b/orcid-core/src/main/java/org/orcid/core/common/manager/EmailDomainManager.java index 4fe0fb04c15..167e25e2d2f 100644 --- a/orcid-core/src/main/java/org/orcid/core/common/manager/EmailDomainManager.java +++ b/orcid-core/src/main/java/org/orcid/core/common/manager/EmailDomainManager.java @@ -2,6 +2,7 @@ import java.util.List; +import org.orcid.core.common.manager.impl.EmailDomainManagerImpl.STATUS; import org.orcid.persistence.jpa.entities.EmailDomainEntity; public interface EmailDomainManager { @@ -13,5 +14,5 @@ public interface EmailDomainManager { List findByCategory(EmailDomainEntity.DomainCategory category); - EmailDomainEntity createOrUpdateEmailDomain(String emailDomain, String rorId); + STATUS createOrUpdateEmailDomain(String emailDomain, String rorId); } diff --git a/orcid-core/src/main/java/org/orcid/core/common/manager/impl/EmailDomainManagerImpl.java b/orcid-core/src/main/java/org/orcid/core/common/manager/impl/EmailDomainManagerImpl.java index d096f255f04..dcfe88f952b 100644 --- a/orcid-core/src/main/java/org/orcid/core/common/manager/impl/EmailDomainManagerImpl.java +++ b/orcid-core/src/main/java/org/orcid/core/common/manager/impl/EmailDomainManagerImpl.java @@ -13,6 +13,8 @@ public class EmailDomainManagerImpl implements EmailDomainManager { + public enum STATUS {CREATED, UPDATED}; + @Resource(name = "emailDomainDao") private EmailDomainDao emailDomainDao; @@ -62,16 +64,21 @@ public List findByCategory(DomainCategory category) { } @Override - public EmailDomainEntity createOrUpdateEmailDomain(String emailDomain, String rorId) { + public STATUS createOrUpdateEmailDomain(String emailDomain, String rorId) { EmailDomainEntity existingEntity = emailDomainDaoReadOnly.findByEmailDoman(emailDomain); if(existingEntity != null) { if(!rorId.equals(existingEntity.getRorId())) { - emailDomainDao.updateRorId(existingEntity.getId(), rorId); + boolean updated = emailDomainDao.updateRorId(existingEntity.getId(), rorId); + if(updated) + return STATUS.UPDATED; } } else { - return emailDomainDao.createEmailDomain(emailDomain, DomainCategory.PROFESSIONAL, rorId); + EmailDomainEntity newEntity = emailDomainDao.createEmailDomain(emailDomain, DomainCategory.PROFESSIONAL, rorId); + if (newEntity != null) { + return STATUS.CREATED; + } } - return existingEntity; + return null; } } diff --git a/orcid-scheduler-web/src/main/java/org/orcid/scheduler/loader/cli/EmailDomainLoader.java b/orcid-scheduler-web/src/main/java/org/orcid/scheduler/loader/cli/EmailDomainLoader.java index 30977c1abfe..f7517c6a436 100644 --- a/orcid-scheduler-web/src/main/java/org/orcid/scheduler/loader/cli/EmailDomainLoader.java +++ b/orcid-scheduler-web/src/main/java/org/orcid/scheduler/loader/cli/EmailDomainLoader.java @@ -97,9 +97,11 @@ private void process() { total += 1; } } - LOG.warn("List of invalid domains:"); - for(String invalidDomain : invalidDomains) { - LOG.warn(invalidDomain); + if(!invalidDomains.isEmpty()) { + LOG.warn("List of invalid domains:"); + for(String invalidDomain : invalidDomains) { + LOG.info(invalidDomain); + } } LOG.info("Process done, total: {}, new entities: {}, updated entities: {}", total, newEntities, updatedEntities); } diff --git a/orcid-scheduler-web/src/main/java/org/orcid/scheduler/loader/cli/EmailDomainToRorLoader.java b/orcid-scheduler-web/src/main/java/org/orcid/scheduler/loader/cli/EmailDomainToRorLoader.java index efd4cc6319a..adfd0b603f6 100644 --- a/orcid-scheduler-web/src/main/java/org/orcid/scheduler/loader/cli/EmailDomainToRorLoader.java +++ b/orcid-scheduler-web/src/main/java/org/orcid/scheduler/loader/cli/EmailDomainToRorLoader.java @@ -13,6 +13,7 @@ import java.util.Set; import org.orcid.core.common.manager.EmailDomainManager; +import org.orcid.core.common.manager.impl.EmailDomainManagerImpl.STATUS; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.context.ApplicationContext; @@ -34,6 +35,9 @@ public class EmailDomainToRorLoader { Map map = new HashMap(); + private int updatedEntries = 0; + private int createdEntries = 0; + public EmailDomainToRorLoader(String filePath) { this.filePath = filePath; init(filePath); @@ -62,13 +66,17 @@ private void load(String filePath) throws IOException { FileReader fileReader = new FileReader(filePath); CsvMapper csvMapper = new CsvMapper(); csvMapper.enable(CsvParser.Feature.WRAP_AS_ARRAY); + csvMapper.enable(CsvParser.Feature.TRIM_SPACES); + MappingIterator> it = csvMapper.readerForListOf(String.class).readValues(fileReader); if (it != null) { csvData = new ArrayList>(); while(it.hasNext()) { List r = it.next(); - csvData.add(r); + // Hack to avoid adding empty lines if they are present, we need at least 2 columns, the domain and the ror id + if(r.size() > 1) + csvData.add(r); } } } @@ -93,9 +101,9 @@ private void processCsvData() { } else { dtrm.addIdWithNoParent(rorId); } - map.put(rorId, dtrm); + map.put(domain, dtrm); } else { - DomainToRorMap dtrm = map.get(rorId); + DomainToRorMap dtrm = map.get(domain); if(hasParent) { dtrm.addIdWithParent(rorId); } else { @@ -110,20 +118,33 @@ private void storeDomainToRorMap() { LOG.debug("Processing domain {}", element.getDomain()); // If the domain has only one entry with no parent, store that one if(element.getIdsWithNoParent().size() == 1) { - emailDomainManager.createOrUpdateEmailDomain(element.getDomain(), element.getIdsWithNoParent().get(0)); + STATUS s = emailDomainManager.createOrUpdateEmailDomain(element.getDomain(), element.getIdsWithNoParent().get(0)); + if(STATUS.CREATED.equals(s)) { + createdEntries++; + } else if (STATUS.UPDATED.equals(s)) { + updatedEntries++; + } } else if(element.getIdsWithParent().size() == 1) { // Else, if the domain has only one entry with parent, store that one - emailDomainManager.createOrUpdateEmailDomain(element.getDomain(), element.getIdsWithParent().get(0)); + STATUS s = emailDomainManager.createOrUpdateEmailDomain(element.getDomain(), element.getIdsWithParent().get(0)); + if(STATUS.CREATED.equals(s)) { + createdEntries++; + } else if (STATUS.UPDATED.equals(s)) { + updatedEntries++; + } } else { // Else log a warning because there is no way to provide a suggestion invalidDomains.add(element.getDomain()); } } - LOG.warn("The following domains couldn't be mapped"); - for(String invalidDomain : invalidDomains) { - LOG.warn("{}", invalidDomain); + if(!invalidDomains.isEmpty()) { + LOG.warn("The following domains couldn't be mapped ({} In total):", invalidDomains.size()); + for(String invalidDomain : invalidDomains) { + LOG.warn("{}", invalidDomain); + } } + LOG.info("Created entries: {}, updated entries: {}", createdEntries, updatedEntries); } private class DomainToRorMap { diff --git a/orcid-web/src/main/java/org/orcid/frontend/web/controllers/EmailDomainController.java b/orcid-web/src/main/java/org/orcid/frontend/web/controllers/EmailDomainController.java index 76c33f8d640..39cd788af50 100644 --- a/orcid-web/src/main/java/org/orcid/frontend/web/controllers/EmailDomainController.java +++ b/orcid-web/src/main/java/org/orcid/frontend/web/controllers/EmailDomainController.java @@ -27,7 +27,7 @@ public class EmailDomainController { ObjectMapper mapper = new ObjectMapper(); if(domain == null || domain.isBlank() || domain.length() > 254) { ObjectNode response = mapper.createObjectNode(); - response.put("error", "Domain lenght too long, empty or invalid"); + response.put("error", "Domain length too short, empty or invalid"); return response; } domain = OrcidStringUtils.stripHtml(domain); From 6b49808e402f5e3a46f2eaa4178f7d573b566ad6 Mon Sep 17 00:00:00 2001 From: amontenegro Date: Fri, 20 Oct 2023 12:02:59 -0600 Subject: [PATCH 4/8] Endpoint to get org info --- .../controllers/EmailDomainController.java | 38 +++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/orcid-web/src/main/java/org/orcid/frontend/web/controllers/EmailDomainController.java b/orcid-web/src/main/java/org/orcid/frontend/web/controllers/EmailDomainController.java index 39cd788af50..9a79e493f88 100644 --- a/orcid-web/src/main/java/org/orcid/frontend/web/controllers/EmailDomainController.java +++ b/orcid-web/src/main/java/org/orcid/frontend/web/controllers/EmailDomainController.java @@ -1,11 +1,15 @@ package org.orcid.frontend.web.controllers; +import java.util.List; + import javax.annotation.Resource; import javax.ws.rs.core.MediaType; import org.orcid.core.common.manager.EmailDomainManager; +import org.orcid.core.manager.OrgDisambiguatedManager; import org.orcid.core.utils.OrcidStringUtils; import org.orcid.persistence.jpa.entities.EmailDomainEntity; +import org.orcid.pojo.OrgDisambiguated; import org.springframework.stereotype.Controller; import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.RequestMethod; @@ -22,6 +26,9 @@ public class EmailDomainController { @Resource private EmailDomainManager emailDomainManager; + @Resource + private OrgDisambiguatedManager orgDisambiguatedManager; + @RequestMapping(value = "/find-category", method = RequestMethod.GET, produces = MediaType.APPLICATION_JSON) public @ResponseBody ObjectNode findCategory(@RequestParam("domain") String domain) { ObjectMapper mapper = new ObjectMapper(); @@ -42,4 +49,35 @@ public class EmailDomainController { return response; } } + + @RequestMapping(value = "/find-org", method = RequestMethod.GET, produces = MediaType.APPLICATION_JSON) + public @ResponseBody ObjectNode findOrgInfo(@RequestParam("domain") String domain) { + ObjectMapper mapper = new ObjectMapper(); + ObjectNode response = mapper.createObjectNode(); + if(domain == null || domain.isBlank() || domain.length() > 254) { + response.put("error", "Domain length too short, empty or invalid"); + return response; + } + domain = OrcidStringUtils.stripHtml(domain); + + EmailDomainEntity ede = emailDomainManager.findByEmailDoman(domain); + if(ede != null) { + String emailDomain = ede.getEmailDomain(); + if(emailDomain != null && !emailDomain.isBlank()) { + // Escape the : on the email domain to be able to search in solr + emailDomain = emailDomain.replace(":", "\\:"); + String searchTerm = "org-disambiguated-id-from-source:" + emailDomain; + List orgsInfo = orgDisambiguatedManager.searchOrgsFromSolr(searchTerm, 0, 1, false); + if(orgsInfo != null && !orgsInfo.isEmpty()) { + // Pick the first result + OrgDisambiguated firstOrg = orgsInfo.get(0); + response.put("ROR", domain); + response.put("Org Name", firstOrg.getValue()); + response.put("Country", firstOrg.getCountry()); + response.put("City", firstOrg.getCity()); + } + } + } + return response; + } } From 5162de286c86e36f3c0106de5a010998ee57b714 Mon Sep 17 00:00:00 2001 From: amontenegro Date: Fri, 20 Oct 2023 14:01:14 -0600 Subject: [PATCH 5/8] Endpoint to fetch org info is done, working on unit testing --- .../orcid/core/solr/OrcidSolrOrgsClient.java | 17 ++++++++++- .../controllers/EmailDomainController.java | 29 ++++++++----------- .../resources/orcid-frontend-security.xml | 4 ++- 3 files changed, 31 insertions(+), 19 deletions(-) diff --git a/orcid-core/src/main/java/org/orcid/core/solr/OrcidSolrOrgsClient.java b/orcid-core/src/main/java/org/orcid/core/solr/OrcidSolrOrgsClient.java index e30eaf806fb..be4c57fa21a 100644 --- a/orcid-core/src/main/java/org/orcid/core/solr/OrcidSolrOrgsClient.java +++ b/orcid-core/src/main/java/org/orcid/core/solr/OrcidSolrOrgsClient.java @@ -33,7 +33,7 @@ public class OrcidSolrOrgsClient { private static final String SOLR_SELF_SERVICE_ORGS_QUERY = "(org-disambiguated-id-from-source:%s)^50.0 (org-disambiguated-name%s)^50.0 (org-disambiguated-name-string:%s)^25.0"; - + private static final String SOLR_ORG_BY_ROR_ID_QUERY = "org-disambiguated-id-from-source:%s"; public OrgDisambiguatedSolrDocument findById(Long id) { SolrQuery query = new SolrQuery(); @@ -93,4 +93,19 @@ public List getOrgsForSelfService(String searchTer throw new NonTransientDataAccessResourceException(errorMessage, se); } } + + public OrgDisambiguatedSolrDocument getOrgByRorId(String rorId) { + SolrQuery query = new SolrQuery(); + // Escape the : on the email domain to be able to search in solr + query.setQuery(SOLR_ORG_BY_ROR_ID_QUERY.replace("%s", rorId.replace(":", "\\:"))); + query.addOrUpdateSort("score", ORDER.desc); + try { + QueryResponse queryResponse = solrReadOnlyOrgsClient.query(query); + List result = queryResponse.getBeans(OrgDisambiguatedSolrDocument.class); + return (result == null || result.isEmpty()) ? null : result.get(0); + } catch (SolrServerException | IOException se) { + String errorMessage = MessageFormat.format("Error when attempting to search for orgs by ror id, with ror id {0}", new Object[] { rorId }); + throw new NonTransientDataAccessResourceException(errorMessage, se); + } + } } diff --git a/orcid-web/src/main/java/org/orcid/frontend/web/controllers/EmailDomainController.java b/orcid-web/src/main/java/org/orcid/frontend/web/controllers/EmailDomainController.java index 9a79e493f88..b4626ba8551 100644 --- a/orcid-web/src/main/java/org/orcid/frontend/web/controllers/EmailDomainController.java +++ b/orcid-web/src/main/java/org/orcid/frontend/web/controllers/EmailDomainController.java @@ -1,15 +1,13 @@ package org.orcid.frontend.web.controllers; -import java.util.List; - import javax.annotation.Resource; import javax.ws.rs.core.MediaType; import org.orcid.core.common.manager.EmailDomainManager; -import org.orcid.core.manager.OrgDisambiguatedManager; +import org.orcid.core.solr.OrcidSolrOrgsClient; import org.orcid.core.utils.OrcidStringUtils; import org.orcid.persistence.jpa.entities.EmailDomainEntity; -import org.orcid.pojo.OrgDisambiguated; +import org.orcid.utils.solr.entities.OrgDisambiguatedSolrDocument; import org.springframework.stereotype.Controller; import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.RequestMethod; @@ -27,7 +25,7 @@ public class EmailDomainController { private EmailDomainManager emailDomainManager; @Resource - private OrgDisambiguatedManager orgDisambiguatedManager; + private OrcidSolrOrgsClient orcidSolrOrgsClient; @RequestMapping(value = "/find-category", method = RequestMethod.GET, produces = MediaType.APPLICATION_JSON) public @ResponseBody ObjectNode findCategory(@RequestParam("domain") String domain) { @@ -62,19 +60,16 @@ public class EmailDomainController { EmailDomainEntity ede = emailDomainManager.findByEmailDoman(domain); if(ede != null) { - String emailDomain = ede.getEmailDomain(); - if(emailDomain != null && !emailDomain.isBlank()) { - // Escape the : on the email domain to be able to search in solr - emailDomain = emailDomain.replace(":", "\\:"); - String searchTerm = "org-disambiguated-id-from-source:" + emailDomain; - List orgsInfo = orgDisambiguatedManager.searchOrgsFromSolr(searchTerm, 0, 1, false); - if(orgsInfo != null && !orgsInfo.isEmpty()) { + String rorId = ede.getRorId(); + if(rorId != null && !rorId.isBlank()) { + OrgDisambiguatedSolrDocument orgInfo = orcidSolrOrgsClient.getOrgByRorId(rorId); + if(orgInfo != null) { // Pick the first result - OrgDisambiguated firstOrg = orgsInfo.get(0); - response.put("ROR", domain); - response.put("Org Name", firstOrg.getValue()); - response.put("Country", firstOrg.getCountry()); - response.put("City", firstOrg.getCity()); + response.put("Domain", domain); + response.put("ROR", rorId); + response.put("Org Name", orgInfo.getOrgDisambiguatedName()); + response.put("Country", orgInfo.getOrgDisambiguatedCountry()); + response.put("City", orgInfo.getOrgDisambiguatedCity()); } } } diff --git a/orcid-web/src/main/resources/orcid-frontend-security.xml b/orcid-web/src/main/resources/orcid-frontend-security.xml index d264b73f05e..42e2fd62ab0 100644 --- a/orcid-web/src/main/resources/orcid-frontend-security.xml +++ b/orcid-web/src/main/resources/orcid-frontend-security.xml @@ -421,7 +421,9 @@ + access="IS_AUTHENTICATED_ANONYMOUSLY" /> + Date: Fri, 20 Oct 2023 14:19:50 -0600 Subject: [PATCH 6/8] A few unit tets --- .../manager/EmailDomainManagerTest.java | 36 ++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/orcid-core/src/test/java/org/orcid/core/common/manager/EmailDomainManagerTest.java b/orcid-core/src/test/java/org/orcid/core/common/manager/EmailDomainManagerTest.java index bc347b9977f..de67c61336a 100644 --- a/orcid-core/src/test/java/org/orcid/core/common/manager/EmailDomainManagerTest.java +++ b/orcid-core/src/test/java/org/orcid/core/common/manager/EmailDomainManagerTest.java @@ -4,7 +4,11 @@ import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyLong; +import static org.mockito.ArgumentMatchers.anyString; import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.never; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; @@ -16,6 +20,7 @@ import org.mockito.Mock; import org.mockito.MockitoAnnotations; import org.orcid.core.common.manager.impl.EmailDomainManagerImpl; +import org.orcid.core.common.manager.impl.EmailDomainManagerImpl.STATUS; import org.orcid.persistence.dao.EmailDomainDao; import org.orcid.persistence.jpa.entities.EmailDomainEntity; import org.orcid.persistence.jpa.entities.EmailDomainEntity.DomainCategory; @@ -38,12 +43,17 @@ public void before() { EmailDomainEntity e1 = new EmailDomainEntity("gmail.com", DomainCategory.PERSONAL); EmailDomainEntity e2 = new EmailDomainEntity("yahoo.com", DomainCategory.PERSONAL); - EmailDomainEntity e3 = new EmailDomainEntity("orcid.org", DomainCategory.PROFESSIONAL); + EmailDomainEntity e3 = new EmailDomainEntity("orcid.org", DomainCategory.PROFESSIONAL, "https://ror.org/04fa4r544"); + e3.setId(1000L); when(emailDomainDaoReadOnlyMock.findByCategory(eq(DomainCategory.PERSONAL))).thenReturn(List.of(e1, e2)); when(emailDomainDaoReadOnlyMock.findByCategory(eq(DomainCategory.PROFESSIONAL))).thenReturn(List.of(e3)); when(emailDomainDaoReadOnlyMock.findByEmailDoman("gmail.com")).thenReturn(e1); + when(emailDomainDaoReadOnlyMock.findByEmailDoman("orcid.org")).thenReturn(e3); + + when(emailDomainDaoMock.createEmailDomain(eq("new.domain"), eq(DomainCategory.PROFESSIONAL), eq("https://ror.org/0"))).thenReturn(new EmailDomainEntity("new.domain", DomainCategory.PROFESSIONAL, "https://ror.org/0")); + when(emailDomainDaoMock.updateRorId(1000L, "https://ror.org/0")).thenReturn(true); } @Test(expected = IllegalArgumentException.class) @@ -135,4 +145,28 @@ public void findByCategory_TwoResultsTest() { assertEquals(DomainCategory.PERSONAL, personal.get(1).getCategory()); assertEquals("yahoo.com", personal.get(1).getEmailDomain()); } + + @Test + public void createOrUpdateEmailDomain_CreateTest() { + STATUS s = edm.createOrUpdateEmailDomain("new.domain", "https://ror.org/0"); + assertEquals(STATUS.CREATED, s); + verify(emailDomainDaoMock, times(1)).createEmailDomain(eq("new.domain"), eq(DomainCategory.PROFESSIONAL), eq("https://ror.org/0")); + verify(emailDomainDaoMock, never()).updateRorId(anyLong(), anyString()); + } + + @Test + public void createOrUpdateEmailDomain_UpdateTest() { + STATUS s = edm.createOrUpdateEmailDomain("orcid.org", "https://ror.org/0"); + assertEquals(STATUS.UPDATED, s); + verify(emailDomainDaoMock, times(1)).updateRorId(eq(1000L), eq("https://ror.org/0")); + verify(emailDomainDaoMock, never()).createEmailDomain(anyString(), any(), anyString()); + } + + @Test + public void createOrUpdateEmailDomain_NoUpdatesTest() { + STATUS s = edm.createOrUpdateEmailDomain("orcid.org", "https://ror.org/04fa4r544"); + assertNull(s); + verify(emailDomainDaoMock, never()).updateRorId(anyLong(), anyString()); + verify(emailDomainDaoMock, never()).createEmailDomain(anyString(), any(), anyString()); + } } From afcb0c82899d9dcdeb2df2a6a1d4e9baf43b769f Mon Sep 17 00:00:00 2001 From: amontenegro Date: Fri, 20 Oct 2023 15:01:49 -0600 Subject: [PATCH 7/8] Make the collections private since no one else should use them --- .../scheduler/loader/cli/EmailDomainToRorLoader.java | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/orcid-scheduler-web/src/main/java/org/orcid/scheduler/loader/cli/EmailDomainToRorLoader.java b/orcid-scheduler-web/src/main/java/org/orcid/scheduler/loader/cli/EmailDomainToRorLoader.java index adfd0b603f6..864dc347c59 100644 --- a/orcid-scheduler-web/src/main/java/org/orcid/scheduler/loader/cli/EmailDomainToRorLoader.java +++ b/orcid-scheduler-web/src/main/java/org/orcid/scheduler/loader/cli/EmailDomainToRorLoader.java @@ -29,12 +29,9 @@ public class EmailDomainToRorLoader { private String filePath; private EmailDomainManager emailDomainManager; - List> csvData; - - Set invalidDomains = new HashSet(); - - Map map = new HashMap(); - + private List> csvData; + private Set invalidDomains = new HashSet(); + private Map map = new HashMap(); private int updatedEntries = 0; private int createdEntries = 0; From 85c1908a3cde208c4e2cab2a13f0200e9712fe8a Mon Sep 17 00:00:00 2001 From: amontenegro Date: Fri, 20 Oct 2023 15:05:58 -0600 Subject: [PATCH 8/8] Close file reader --- .../orcid/scheduler/loader/cli/EmailDomainToRorLoader.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/orcid-scheduler-web/src/main/java/org/orcid/scheduler/loader/cli/EmailDomainToRorLoader.java b/orcid-scheduler-web/src/main/java/org/orcid/scheduler/loader/cli/EmailDomainToRorLoader.java index 864dc347c59..8037f5d62d0 100644 --- a/orcid-scheduler-web/src/main/java/org/orcid/scheduler/loader/cli/EmailDomainToRorLoader.java +++ b/orcid-scheduler-web/src/main/java/org/orcid/scheduler/loader/cli/EmailDomainToRorLoader.java @@ -32,6 +32,7 @@ public class EmailDomainToRorLoader { private List> csvData; private Set invalidDomains = new HashSet(); private Map map = new HashMap(); + private int updatedEntries = 0; private int createdEntries = 0; @@ -75,7 +76,8 @@ private void load(String filePath) throws IOException { if(r.size() > 1) csvData.add(r); } - } + } + fileReader.close(); } private void processCsvData() {