diff --git a/common/pom.xml b/common/pom.xml index 75af6f68..2013fb71 100644 --- a/common/pom.xml +++ b/common/pom.xml @@ -21,6 +21,11 @@ com.google.guava guava + + jakarta.annotation + jakarta.annotation-api + 2.1.1 + com.fasterxml.jackson.core jackson-databind diff --git a/common/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/crypto/Crypto.java b/common/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/crypto/Crypto.java index e9245a42..e15767b2 100644 --- a/common/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/crypto/Crypto.java +++ b/common/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/crypto/Crypto.java @@ -1,74 +1,147 @@ package edu.harvard.hms.dbmi.avillach.hpds.crypto; +import org.apache.commons.io.IOUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.stereotype.Component; +import jakarta.annotation.PostConstruct; +import javax.crypto.*; +import javax.crypto.spec.GCMParameterSpec; +import javax.crypto.spec.SecretKeySpec; import java.io.FileInputStream; import java.io.IOException; import java.nio.ByteBuffer; import java.nio.charset.Charset; -import java.security.InvalidAlgorithmParameterException; -import java.security.InvalidKeyException; -import java.security.NoSuchAlgorithmException; -import java.security.SecureRandom; +import java.security.*; import java.util.HashMap; -import javax.crypto.BadPaddingException; -import javax.crypto.Cipher; -import javax.crypto.IllegalBlockSizeException; -import javax.crypto.NoSuchPaddingException; -import javax.crypto.SecretKey; -import javax.crypto.ShortBufferException; -import javax.crypto.spec.GCMParameterSpec; -import javax.crypto.spec.SecretKeySpec; - -import org.apache.commons.io.IOUtils; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - +/** + * Provides encryption and decryption functionality using AES-GCM. + *

+ * This class manages encryption keys and offers methods to encrypt and decrypt data. + * It supports configurable encryption via the {@code encryption.enabled} property. + *

+ * Features: + *

+ *

+ * Encryption behavior is controlled by the {@code encryption.enabled} property, which + * is set via {@link org.springframework.beans.factory.annotation.Value} from + * {@code application.properties}. When encryption is enabled, the class loads a default + * encryption key at initialization. + * + */ +@Component public class Crypto { public static final String DEFAULT_KEY_NAME = "DEFAULT"; - - // This needs to be set in a static initializer block to be overridable in tests. - private static final String DEFAULT_ENCRYPTION_KEY_PATH; - static{ - DEFAULT_ENCRYPTION_KEY_PATH = "/opt/local/hpds/encryption_key"; - } + private static final String DEFAULT_ENCRYPTION_KEY_PATH = "/opt/local/hpds/encryption_key"; private static final Logger LOGGER = LoggerFactory.getLogger(Crypto.class); + private static final HashMap keys = new HashMap<>(); + + @Value("${encryption.enabled:true}") + private boolean encryptionEnabled; - private static final HashMap keys = new HashMap(); + public static boolean ENCRYPTION_ENABLED = true; + @PostConstruct + public void init() { + ENCRYPTION_ENABLED = encryptionEnabled; + LOGGER.info("ENCRYPTION_ENABLED set to: {}", ENCRYPTION_ENABLED); + loadDefaultKey(); + } + + /** + * Loads the default encryption key from the predefined file path. + *

+ * This method checks if encryption is enabled before attempting to load the key. + * If encryption is disabled, no action is taken. + *

+ * The key is loaded using {@link #loadKey(String, String)} with the default key name + * and default encryption key file path. + */ public static void loadDefaultKey() { - loadKey(DEFAULT_KEY_NAME, DEFAULT_ENCRYPTION_KEY_PATH); + if (ENCRYPTION_ENABLED) { + loadKey(DEFAULT_KEY_NAME, DEFAULT_ENCRYPTION_KEY_PATH); + } } + /** + * Loads an encryption key from the specified file path and stores it in memory. + *

+ * The key is read as a string from the file, trimmed of any extra spaces, and + * converted into a byte array before being stored in the key map. + *

+ * If the key file is not found or an error occurs while reading, an error is logged. + * + * @param keyName The name under which the key will be stored. + * @param filePath The file path from which the encryption key is loaded. + */ public static void loadKey(String keyName, String filePath) { try { setKey(keyName, IOUtils.toString(new FileInputStream(filePath), Charset.forName("UTF-8")).trim().getBytes()); - LOGGER.info("****LOADED CRYPTO KEY****"); + LOGGER.info("****LOADED CRYPTO KEY****"); } catch (IOException e) { LOGGER.error("****CRYPTO KEY NOT FOUND****", e); } } - public static byte[] encryptData(byte[] plaintextBytes) { - return encryptData(DEFAULT_KEY_NAME, plaintextBytes); + /** + * Encrypts the given plaintext using the default encryption key. + *

+ * If encryption is disabled, the plaintext is returned as-is. + * This method delegates encryption to {@link #encryptData(String, byte[])} + * using the default key. + * + * @param plaintext The byte array to be encrypted. + * @return The encrypted byte array, or the original plaintext if encryption is disabled. + */ + public static byte[] encryptData(byte[] plaintext) { + return encryptData(DEFAULT_KEY_NAME, plaintext); } - - public static byte[] encryptData(String keyName, byte[] plaintextBytes) { + + /** + * Encrypts the given plaintext using the specified encryption key. + *

+ * This method uses AES/GCM/NoPadding encryption with a randomly generated IV. + * The IV is included in the output for decryption purposes. + *

+ * The method returns a byte array structured as follows: + * - First 4 bytes: The length of the IV. + * - Next IV-length bytes: The IV itself. + * - Remaining bytes: The encrypted ciphertext. + *

+ * If encryption is disabled, the plaintext is returned unmodified. + * + * @param keyName The name of the encryption key to use. + * @param plaintext The byte array containing the data to encrypt. + * @return The encrypted byte array, or the original plaintext if encryption is disabled. + * @throws RuntimeException If an error occurs during encryption. + */ + public static byte[] encryptData(String keyName, byte[] plaintext) { + if (!ENCRYPTION_ENABLED) { + return plaintext; + } + byte[] key = keys.get(keyName); SecureRandom secureRandom = new SecureRandom(); SecretKey secretKey = new SecretKeySpec(key, "AES"); - byte[] iv = new byte[12]; //NEVER REUSE THIS IV WITH SAME KEY + byte[] iv = new byte[12]; // NEVER REUSE THIS IV WITH SAME KEY secureRandom.nextBytes(iv); byte[] cipherText; Cipher cipher; try { cipher = Cipher.getInstance("AES/GCM/NoPadding"); - GCMParameterSpec parameterSpec = new GCMParameterSpec(128, iv); //128 bit auth tag length + GCMParameterSpec parameterSpec = new GCMParameterSpec(128, iv); // 128-bit auth tag length cipher.init(Cipher.ENCRYPT_MODE, secretKey, parameterSpec); - cipherText = new byte[cipher.getOutputSize(plaintextBytes.length)]; - cipher.doFinal(plaintextBytes, 0, plaintextBytes.length, cipherText, 0); + cipherText = new byte[cipher.getOutputSize(plaintext.length)]; + cipher.doFinal(plaintext, 0, plaintext.length, cipherText, 0); LOGGER.debug("Length of cipherText : " + cipherText.length); ByteBuffer byteBuffer = ByteBuffer.allocate(4 + iv.length + cipherText.length); byteBuffer.putInt(iv.length); @@ -81,13 +154,40 @@ public static byte[] encryptData(String keyName, byte[] plaintextBytes) { } } - public static byte[] decryptData(byte[] encrypted) { - return decryptData(DEFAULT_KEY_NAME, encrypted); + /** + * Decrypts the provided data using the default encryption key. + *

+ * If encryption is disabled, the method returns the input data as-is. + * + * @param data The byte array to be decrypted. + * @return The decrypted byte array, or the original data if encryption is disabled. + */ + public static byte[] decryptData(byte[] data) { + return decryptData(DEFAULT_KEY_NAME, data); } - public static byte[] decryptData(String keyName, byte[] encrypted) { + /** + * Decrypts the provided data using the specified encryption key. + *

+ * If encryption is disabled, the method returns the input data as-is. + *

+ * The method assumes the input data is structured as follows: + * - First 4 bytes: The length of the IV (Initialization Vector). + * - Next IV-length bytes: The actual IV. + * - Remaining bytes: The ciphertext. + * + * @param keyName The name of the encryption key to use for decryption. + * @param data The byte array containing the encrypted data. + * @return The decrypted byte array, or the original data if encryption is disabled. + * @throws RuntimeException If an error occurs during decryption. + */ + public static byte[] decryptData(String keyName, byte[] data) { + if (!ENCRYPTION_ENABLED) { + return data; + } + byte[] key = keys.get(keyName); - ByteBuffer byteBuffer = ByteBuffer.wrap(encrypted); + ByteBuffer byteBuffer = ByteBuffer.wrap(data); int ivLength = byteBuffer.getInt(); byte[] iv = new byte[ivLength]; byteBuffer.get(iv); @@ -98,7 +198,8 @@ public static byte[] decryptData(String keyName, byte[] encrypted) { cipher = Cipher.getInstance("AES/GCM/NoPadding"); cipher.init(Cipher.DECRYPT_MODE, new SecretKeySpec(key, "AES"), new GCMParameterSpec(128, iv)); return cipher.doFinal(cipherText); - } catch (NoSuchAlgorithmException | NoSuchPaddingException | InvalidKeyException | InvalidAlgorithmParameterException | IllegalBlockSizeException | BadPaddingException e) { + } catch (NoSuchAlgorithmException | NoSuchPaddingException | InvalidKeyException | InvalidAlgorithmParameterException | + IllegalBlockSizeException | BadPaddingException e) { throw new RuntimeException("Exception caught trying to decrypt data : " + e, e); } } @@ -110,5 +211,4 @@ private static void setKey(String keyName, byte[] key) { public static boolean hasKey(String keyName) { return keys.containsKey(keyName); } - -} +} \ No newline at end of file diff --git a/docker/pic-sure-hpds-etl/Dockerfile b/docker/pic-sure-hpds-etl/Dockerfile index 661b98a1..dc258f63 100644 --- a/docker/pic-sure-hpds-etl/Dockerfile +++ b/docker/pic-sure-hpds-etl/Dockerfile @@ -1,34 +1,19 @@ -FROM maven:3.9.4-amazoncorretto-21 AS build - -RUN yum update -y && yum install -y git && yum clean all - -WORKDIR /app - -COPY .m2 /root/.m2 - -COPY . . - -RUN mvn clean install -DskipTests - -FROM eclipse-temurin:21-alpine - -RUN apk add --no-cache --purge -uU bash curl wget unzip gnupg openssl && \ - rm -rf /var/cache/apk/* /tmp/* - -WORKDIR /app -COPY --from=build /app/docker/pic-sure-hpds-etl/SQLLoader-jar-with-dependencies.jar . -COPY --from=build /app/docker/pic-sure-hpds-etl/CSVLoader-jar-with-dependencies.jar . -COPY --from=build /app/docker/pic-sure-hpds-etl/CSVLoaderNewSearch-jar-with-dependencies.jar . -COPY --from=build /app/docker/pic-sure-hpds-etl/CSVDumper-jar-with-dependencies.jar . -COPY --from=build /app/docker/pic-sure-hpds-etl/VCFLocalLoader-jar-with-dependencies.jar . -COPY --from=build /app/docker/pic-sure-hpds-etl/VariantMetadataLoader-jar-with-dependencies.jar . -COPY --from=build /app/docker/pic-sure-hpds-etl/UnifiedVCFLocalLoader-jar-with-dependencies.jar . -COPY --from=build /app/docker/pic-sure-hpds-etl/MultialleleCounter-jar-with-dependencies.jar . -COPY --from=build /app/docker/pic-sure-hpds-etl/RekeyDataset-jar-with-dependencies.jar . -COPY --from=build /app/docker/pic-sure-hpds-etl/RemoveConceptFromMetadata-jar-with-dependencies.jar . -COPY --from=build /app/docker/pic-sure-hpds-etl/HideAnnotationCategoryValue-jar-with-dependencies.jar . -COPY --from=build /app/docker/pic-sure-hpds-etl/SequentialLoader-jar-with-dependencies.jar . -COPY --from=build /app/docker/pic-sure-hpds-etl/LowRAMMultiCSVLoader-jar-with-dependencies.jar . -COPY --from=build /app/docker/pic-sure-hpds-etl/create_key.sh . - -ENTRYPOINT ["sh", "-c", "java $JAVA_OPTS -Xmx${HEAPSIZE:-2048}m -jar ${LOADER_NAME:-CSVLoader}-jar-with-dependencies.jar"] +FROM openjdk:21-jdk-slim AS build + +RUN apt-get update -y && apt-get install -y gnupg openssl && rm -rf /var/lib/apt/lists/* + +ADD create_key.sh . +ADD SQLLoader-jar-with-dependencies.jar . +ADD CSVLoader-jar-with-dependencies.jar . +ADD CSVLoaderNewSearch-jar-with-dependencies.jar . +ADD CSVDumper-jar-with-dependencies.jar . +ADD VCFLocalLoader-jar-with-dependencies.jar . +ADD VariantMetadataLoader-jar-with-dependencies.jar . +ADD UnifiedVCFLocalLoader-jar-with-dependencies.jar . +ADD MultialleleCounter-jar-with-dependencies.jar . +ADD RekeyDataset-jar-with-dependencies.jar . +ADD RemoveConceptFromMetadata-jar-with-dependencies.jar . +ADD HideAnnotationCategoryValue-jar-with-dependencies.jar . +ADD SequentialLoader-jar-with-dependencies.jar . + +ENTRYPOINT java $JAVA_OPTS -Xmx${HEAPSIZE:-2048}m -jar ${LOADER_NAME:-CSVLoader}-jar-with-dependencies.jar \ No newline at end of file diff --git a/etl/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/etl/LoadingStore.java b/etl/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/etl/LoadingStore.java index 62b5b8e9..0f394e15 100644 --- a/etl/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/etl/LoadingStore.java +++ b/etl/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/etl/LoadingStore.java @@ -29,14 +29,14 @@ public class LoadingStore { TreeMap metadataMap = new TreeMap<>(); private static Logger log = LoggerFactory.getLogger(LoadingStore.class); - + public LoadingCache store = CacheBuilder.newBuilder() - .maximumSize(16) + .maximumSize(2048) .removalListener(new RemovalListener() { @Override public void onRemoval(RemovalNotification arg0) { - log.info("removing " + arg0.getKey()); + //log.debug("Cache removal and writing to disk: " + arg0.getKey()); if(arg0.getValue().getLoadingMap()!=null) { complete(arg0.getValue()); } diff --git a/etl/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/etl/phenotype/csv/CSVLoaderNewSearch.java b/etl/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/etl/phenotype/csv/CSVLoaderNewSearch.java index 4b86232d..c681cec4 100644 --- a/etl/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/etl/phenotype/csv/CSVLoaderNewSearch.java +++ b/etl/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/etl/phenotype/csv/CSVLoaderNewSearch.java @@ -1,85 +1,31 @@ package edu.harvard.hms.dbmi.avillach.hpds.etl.phenotype.csv; import edu.harvard.hms.dbmi.avillach.hpds.crypto.Crypto; -import edu.harvard.hms.dbmi.avillach.hpds.data.phenotype.PhenoCube; -import edu.harvard.hms.dbmi.avillach.hpds.etl.LoadingStore; -import org.apache.commons.csv.CSVFormat; -import org.apache.commons.csv.CSVRecord; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; - -import java.io.*; -import java.util.Date; - -@SuppressWarnings({"unchecked", "rawtypes"}) +import org.springframework.boot.ApplicationRunner; +import org.springframework.boot.SpringApplication; +import org.springframework.boot.autoconfigure.SpringBootApplication; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.ComponentScan; +import org.springframework.context.annotation.FilterType; + +@SpringBootApplication +@ComponentScan( + basePackages = "edu.harvard.hms.dbmi.avillach.hpds", + includeFilters = @ComponentScan.Filter(type = FilterType.ASSIGNABLE_TYPE, classes = Crypto.class) +) public class CSVLoaderNewSearch { - private static final LoadingStore store = new LoadingStore(); - private static final Logger log = LoggerFactory.getLogger(CSVLoaderNewSearch.class); - private static boolean DO_VARNAME_ROLLUP = false; - - private static final String HPDS_DIRECTORY = "/opt/local/hpds/"; - - public static void main(String[] args) throws IOException { - if (args.length > 1) { - if (args[0].equalsIgnoreCase("NO_ROLLUP")) { - log.info("NO_ROLLUP SET."); - DO_VARNAME_ROLLUP = false; - } - } - store.allObservationsStore = new RandomAccessFile(HPDS_DIRECTORY + "allObservationsStore.javabin", "rw"); - initialLoad(); - store.saveStore(HPDS_DIRECTORY); - } - - private static void initialLoad() throws IOException { - Crypto.loadDefaultKey(); - Reader in = new FileReader(HPDS_DIRECTORY + "allConcepts.csv"); - Iterable records = CSVFormat.DEFAULT.withSkipHeaderRecord().withFirstRecordAsHeader().parse(new BufferedReader(in, 1024 * 1024)); - - final PhenoCube[] currentConcept = new PhenoCube[1]; - for (CSVRecord record : records) { - processRecord(currentConcept, record); - } + public static void main(String[] args) { + SpringApplication.run(CSVLoaderNewSearch.class, args); } - private static void processRecord(final PhenoCube[] currentConcept, CSVRecord record) { - if (record.size() < 4) { - log.info("Record number {} had less records than we exgpected so we are skipping it.", record.getRecordNumber()); - return; - } - - String conceptPath = CSVParserUtil.parseConceptPath(record, DO_VARNAME_ROLLUP); - String numericValue = record.get(CSVParserUtil.NUMERIC_VALUE); - boolean isAlpha = (numericValue == null || numericValue.isEmpty()); - String value = isAlpha ? record.get(CSVParserUtil.TEXT_VALUE) : numericValue; - currentConcept[0] = getPhenoCube(currentConcept[0], conceptPath, isAlpha); - - if (value != null && !value.trim().isEmpty() && - ((isAlpha && currentConcept[0].vType == String.class) || (!isAlpha && currentConcept[0].vType == Double.class))) { - value = value.trim(); - currentConcept[0].setColumnWidth(isAlpha ? Math.max(currentConcept[0].getColumnWidth(), value.getBytes().length) : Double.BYTES); - int patientId = Integer.parseInt(record.get(CSVParserUtil.PATIENT_NUM)); - Date date = null; - if (record.size() > 4 && record.get(CSVParserUtil.DATETIME) != null && !record.get(CSVParserUtil.DATETIME).isEmpty()) { - date = new Date(Long.parseLong(record.get(CSVParserUtil.DATETIME))); - } - currentConcept[0].add(patientId, isAlpha ? value : Double.parseDouble(value), date); - store.allIds.add(patientId); - } - } - - private static PhenoCube getPhenoCube(PhenoCube currentConcept, String conceptPath, boolean isAlpha) { - if (currentConcept == null || !currentConcept.name.equals(conceptPath)) { - currentConcept = store.store.getIfPresent(conceptPath); - if (currentConcept == null) { - currentConcept = new PhenoCube(conceptPath, isAlpha ? String.class : Double.class); - store.store.put(conceptPath, currentConcept); - } - } - - return currentConcept; + @Bean + ApplicationRunner runCSVLoader(CSVLoaderService csvLoaderService) { + return args -> csvLoaderService.runEtlProcess(); } } diff --git a/etl/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/etl/phenotype/csv/CSVLoaderService.java b/etl/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/etl/phenotype/csv/CSVLoaderService.java new file mode 100644 index 00000000..5f9d6061 --- /dev/null +++ b/etl/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/etl/phenotype/csv/CSVLoaderService.java @@ -0,0 +1,93 @@ +package edu.harvard.hms.dbmi.avillach.hpds.etl.phenotype.csv; + +import edu.harvard.hms.dbmi.avillach.hpds.crypto.Crypto; +import edu.harvard.hms.dbmi.avillach.hpds.data.phenotype.PhenoCube; +import edu.harvard.hms.dbmi.avillach.hpds.etl.LoadingStore; +import org.apache.commons.csv.CSVFormat; +import org.apache.commons.csv.CSVRecord; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.stereotype.Service; + +import java.io.*; +import java.util.Date; + +@Service +public class CSVLoaderService { + + private static final Logger log = LoggerFactory.getLogger(CSVLoaderService.class); + private final LoadingStore store = new LoadingStore(); + + @Value("${etl.hpds.directory:/opt/local/hpds/}") + private String hpdsDirectory; + + @Value("${etl.rollup.enabled:true}") + private boolean rollupEnabled; + + public void runEtlProcess() throws IOException { + log.info("Starting ETL process... Rollup Enabled: {}", rollupEnabled); + + store.allObservationsStore = new RandomAccessFile(hpdsDirectory + "allObservationsStore.javabin", "rw"); + initialLoad(); + store.saveStore(hpdsDirectory); + + log.info("ETL process completed."); + } + + private void initialLoad() throws IOException { + Crypto.loadDefaultKey(); + Reader in = new FileReader(hpdsDirectory + "allConcepts.csv"); + Iterable records = CSVFormat.DEFAULT + .withFirstRecordAsHeader() + .withSkipHeaderRecord(true) + .parse(new BufferedReader(in, 256 * 1024)); + + final PhenoCube[] currentConcept = new PhenoCube[1]; + for (CSVRecord record : records) { + processRecord(currentConcept, record); + } + + } + + private void processRecord(final PhenoCube[] currentConcept, CSVRecord record) { + if (record.size() < 4) { + log.warn("Skipping record #{} due to missing fields.", record.getRecordNumber()); + return; + } + + String conceptPath = CSVParserUtil.parseConceptPath(record, rollupEnabled); + String numericValue = record.get(CSVParserUtil.NUMERIC_VALUE); + boolean isAlpha = (numericValue == null || numericValue.isEmpty()); + String value = isAlpha ? record.get(CSVParserUtil.TEXT_VALUE) : numericValue; + currentConcept[0] = getPhenoCube(currentConcept[0], conceptPath, isAlpha); + + if (value != null && !value.trim().isEmpty() && + ((isAlpha && currentConcept[0].vType == String.class) || (!isAlpha && currentConcept[0].vType == Double.class))) { + value = value.trim(); + currentConcept[0].setColumnWidth(isAlpha ? Math.max(currentConcept[0].getColumnWidth(), value.getBytes().length) : Double.BYTES); + int patientId = Integer.parseInt(record.get(CSVParserUtil.PATIENT_NUM)); + Date date = null; + if (record.size() > 4 && record.get(CSVParserUtil.DATETIME) != null && !record.get(CSVParserUtil.DATETIME).isEmpty()) { + date = new Date(Long.parseLong(record.get(CSVParserUtil.DATETIME))); + } + currentConcept[0].add(patientId, isAlpha ? value : Double.parseDouble(value), date); + store.allIds.add(patientId); + } + } + + private PhenoCube getPhenoCube(PhenoCube currentConcept, String conceptPath, boolean isAlpha) { + if (currentConcept == null || !currentConcept.name.equals(conceptPath)) { + currentConcept = store.store.getIfPresent(conceptPath); + if (currentConcept == null) { + log.info("Writing - " + conceptPath); + // safe to invalidate and write store? + store.store.invalidateAll(); // force onremoval to free up cache per concept + store.store.cleanUp(); + currentConcept = new PhenoCube(conceptPath, isAlpha ? String.class : Double.class); + store.store.put(conceptPath, currentConcept); + } + } + return currentConcept; + } +} \ No newline at end of file diff --git a/etl/src/main/resources/application-local-dev.properties b/etl/src/main/resources/application-local-dev.properties new file mode 100644 index 00000000..36d954f7 --- /dev/null +++ b/etl/src/main/resources/application-local-dev.properties @@ -0,0 +1,3 @@ +encryption.enabled=false +etl.hpds.directory=./ +etl.rollup.enabled=false \ No newline at end of file diff --git a/etl/src/main/resources/application.properties b/etl/src/main/resources/application.properties new file mode 100644 index 00000000..3b40e3a5 --- /dev/null +++ b/etl/src/main/resources/application.properties @@ -0,0 +1,3 @@ +encryption.enabled=false +etl.hpds.directory=/opt/local/hpds/ +etl.rollup.enabled=false \ No newline at end of file diff --git a/etl/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/etl/phenotype/csv/CryptoBenchmarkTest.java b/etl/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/etl/phenotype/csv/CryptoBenchmarkTest.java new file mode 100644 index 00000000..2070ef5d --- /dev/null +++ b/etl/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/etl/phenotype/csv/CryptoBenchmarkTest.java @@ -0,0 +1,92 @@ +package edu.harvard.hms.dbmi.avillach.hpds.etl.phenotype.csv; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import java.security.SecureRandom; +import java.util.Arrays; +import javax.crypto.Cipher; +import javax.crypto.KeyGenerator; +import javax.crypto.SecretKey; +import javax.crypto.spec.IvParameterSpec; + +import static org.junit.jupiter.api.Assertions.*; + +public class CryptoBenchmarkTest { + private byte[] testData; + private SecretKey secretKey; + private byte[] iv; + + @BeforeEach + void setUp() throws Exception { + SecureRandom random = new SecureRandom(); + testData = new byte[100 * 1024 * 1024]; // 100MB of random data + random.nextBytes(testData); + + // Generate AES key + KeyGenerator keyGen = KeyGenerator.getInstance("AES"); + keyGen.init(128); + secretKey = keyGen.generateKey(); + + // Generate IV + iv = new byte[16]; + random.nextBytes(iv); + } + + private byte[] encryptData(byte[] data) throws Exception { + Cipher cipher = Cipher.getInstance("AES/CBC/PKCS5Padding"); + cipher.init(Cipher.ENCRYPT_MODE, secretKey, new IvParameterSpec(iv)); + return cipher.doFinal(data); + } + + private byte[] decryptData(byte[] data) throws Exception { + Cipher cipher = Cipher.getInstance("AES/CBC/PKCS5Padding"); + cipher.init(Cipher.DECRYPT_MODE, secretKey, new IvParameterSpec(iv)); + return cipher.doFinal(data); + } + + @Test + void testEncryptionPerformance() throws Exception { + long startTime = System.nanoTime(); + byte[] encryptedData = encryptData(testData); + long endTime = System.nanoTime(); + double durationMs = (endTime - startTime) / 1_000_000.0; + + System.out.println("================ Encryption Performance ================"); + System.out.printf("Encryption Time: %.3f ms\n", durationMs); + System.out.println("Description: Measures the time required to encrypt 100MB of data using AES-128 CBC."); + System.out.println("========================================================\n"); + + assertNotNull(encryptedData); + } + + @Test + void testDecryptionPerformance() throws Exception { + byte[] encryptedData = encryptData(testData); + long startTime = System.nanoTime(); + byte[] decryptedData = decryptData(encryptedData); + long endTime = System.nanoTime(); + double durationMs = (endTime - startTime) / 1_000_000.0; + + System.out.println("================ Decryption Performance ================"); + System.out.printf("Decryption Time: %.3f ms\n", durationMs); + System.out.println("Description: Measures the time required to decrypt the previously encrypted 100MB of data."); + System.out.println("========================================================\n"); + + assertArrayEquals(testData, decryptedData); + } + + @Test + void testBypassPerformance() { + long startTime = System.nanoTime(); + byte[] bypassedData = testData; // Just returning the data as is + long endTime = System.nanoTime(); + double durationMs = (endTime - startTime) / 1_000_000.0; + + System.out.println("================ Bypass Performance =================="); + System.out.printf("Bypass Time: %.6f ms\n", durationMs); + System.out.println("Description: Measures the time taken to return the original data without encryption."); + System.out.println("======================================================\n"); + + assertSame(testData, bypassedData); + } +} diff --git a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/AbstractProcessor.java b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/AbstractProcessor.java index 0ad97972..8db0b567 100644 --- a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/AbstractProcessor.java +++ b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/AbstractProcessor.java @@ -37,8 +37,8 @@ public class AbstractProcessor { private final int ID_BATCH_SIZE; private final int CACHE_SIZE; - private final String hpdsDataDirectory; - + @Value("${HPDS_DATA_DIRECTORY:/opt/local/hpds/}") + private String hpdsDataDirectory; @Value("${HPDS_GENOMIC_DATA_DIRECTORY:/opt/local/hpds/all/}") private String hpdsGenomicDataDirectory; @@ -53,10 +53,9 @@ public class AbstractProcessor { @Autowired public AbstractProcessor( PhenotypeMetaStore phenotypeMetaStore, - GenomicProcessor genomicProcessor, @Value("${HPDS_DATA_DIRECTORY:/opt/local/hpds/}") String hpdsDataDirectory + GenomicProcessor genomicProcessor ) throws ClassNotFoundException, IOException, InterruptedException { - this.hpdsDataDirectory = hpdsDataDirectory; this.phenotypeMetaStore = phenotypeMetaStore; this.genomicProcessor = genomicProcessor; @@ -66,27 +65,23 @@ public AbstractProcessor( store = initializeCache(); - if(Crypto.hasKey(Crypto.DEFAULT_KEY_NAME)) { - List cubes = new ArrayList(phenotypeMetaStore.getColumnNames()); - int conceptsToCache = Math.min(cubes.size(), CACHE_SIZE); - for(int x = 0;x cubes = new ArrayList(phenotypeMetaStore.getColumnNames()); + int conceptsToCache = Math.min(cubes.size(), CACHE_SIZE); + for(int x = 0;x query(@RequestBody QueryRequest queryJson) { - if (Crypto.hasKey(Crypto.DEFAULT_KEY_NAME)) { - try { - Query query = convertIncomingQuery(queryJson); - return ResponseEntity.ok(convertToQueryStatus(queryService.runQuery(query))); - } catch (IOException e) { - log.error("IOException caught in query processing:", e); - return ResponseEntity.status(500).build(); - } - } else { - QueryStatus status = new QueryStatus(); - status.setResourceStatus("Resource is locked."); - return ResponseEntity.ok(status); + try { + Query query = convertIncomingQuery(queryJson); + return ResponseEntity.ok(convertToQueryStatus(queryService.runQuery(query))); + } catch (IOException e) { + log.error("IOException caught in query processing:", e); + return ResponseEntity.status(500).build(); } } @@ -349,15 +343,11 @@ public ResponseEntity queryFormat(@RequestBody QueryRequest resultRequest) { @PostMapping(value = "/query/sync", produces = MediaType.TEXT_PLAIN_VALUE) public ResponseEntity querySync(@RequestBody QueryRequest resultRequest) { - if (Crypto.hasKey(Crypto.DEFAULT_KEY_NAME)) { - try { - return _querySync(resultRequest); - } catch (IOException e) { - log.error("IOException caught: ", e); - return ResponseEntity.status(500).build(); - } - } else { - return ResponseEntity.status(403).body("Resource is locked"); + try { + return _querySync(resultRequest); + } catch (IOException e) { + log.error("IOException caught: ", e); + return ResponseEntity.status(500).build(); } } diff --git a/service/src/main/resources/application-local-dev.properties b/service/src/main/resources/application-local-dev.properties new file mode 100644 index 00000000..12b68492 --- /dev/null +++ b/service/src/main/resources/application-local-dev.properties @@ -0,0 +1,12 @@ +SMALL_JOB_LIMIT = 100 +SMALL_TASK_THREADS = 1 +LARGE_TASK_THREADS = 1 +ID_BATCH_SIZE=1000 +VCF_EXCERPT_ENABLED=true + +HPDS_GENOMIC_DATA_DIRECTORY=./all +HPDS_DATA_DIRECTORY=./ + +encryption.enabled=false +etl.hpds.directory="./" +etl.rollup.enabled=false \ No newline at end of file diff --git a/service/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/test/util/BuildIntegrationTestEnvironment.java b/service/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/test/util/BuildIntegrationTestEnvironment.java index 71a75424..8a625044 100644 --- a/service/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/test/util/BuildIntegrationTestEnvironment.java +++ b/service/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/test/util/BuildIntegrationTestEnvironment.java @@ -5,6 +5,7 @@ import edu.harvard.hms.dbmi.avillach.hpds.etl.genotype.NewVCFLoader; import edu.harvard.hms.dbmi.avillach.hpds.etl.genotype.VariantMetadataLoader; import edu.harvard.hms.dbmi.avillach.hpds.etl.phenotype.csv.CSVLoader; +import edu.harvard.hms.dbmi.avillach.hpds.etl.phenotype.csv.CSVLoaderNewSearch; import java.io.IOException; @@ -21,7 +22,7 @@ public enum BuildIntegrationTestEnvironment { BuildIntegrationTestEnvironment() { try { NewVCFLoader.main(new String[] {VCF_INDEX_FILE, STORAGE_DIR, MERGED_DIR}); - CSVLoader.main(new String[] {PHENOTYPIC_DATA_DIRECTORY}); + CSVLoaderNewSearch.main(new String[] {PHENOTYPIC_DATA_DIRECTORY}); // this is not the current CSVLoader for current version of the application VariantMetadataLoader.main(new String[] {"./src/test/resources/test_vcfIndex.tsv", binFile, "target/VariantMetadataStorage.bin"}); VariantStore variantStore = VariantStore.readInstance(STORAGE_DIR); BucketIndexBySample bucketIndexBySample = new BucketIndexBySample(variantStore, STORAGE_DIR);