();
+ public static boolean ENCRYPTION_ENABLED = true;
+ @PostConstruct
+ public void init() {
+ ENCRYPTION_ENABLED = encryptionEnabled;
+ LOGGER.info("ENCRYPTION_ENABLED set to: {}", ENCRYPTION_ENABLED);
+ loadDefaultKey();
+ }
+
+ /**
+ * Loads the default encryption key from the predefined file path.
+ *
+ * This method checks if encryption is enabled before attempting to load the key.
+ * If encryption is disabled, no action is taken.
+ *
+ * The key is loaded using {@link #loadKey(String, String)} with the default key name
+ * and default encryption key file path.
+ */
public static void loadDefaultKey() {
- loadKey(DEFAULT_KEY_NAME, DEFAULT_ENCRYPTION_KEY_PATH);
+ if (ENCRYPTION_ENABLED) {
+ loadKey(DEFAULT_KEY_NAME, DEFAULT_ENCRYPTION_KEY_PATH);
+ }
}
+ /**
+ * Loads an encryption key from the specified file path and stores it in memory.
+ *
+ * The key is read as a string from the file, trimmed of any extra spaces, and
+ * converted into a byte array before being stored in the key map.
+ *
+ * If the key file is not found or an error occurs while reading, an error is logged.
+ *
+ * @param keyName The name under which the key will be stored.
+ * @param filePath The file path from which the encryption key is loaded.
+ */
public static void loadKey(String keyName, String filePath) {
try {
setKey(keyName, IOUtils.toString(new FileInputStream(filePath), Charset.forName("UTF-8")).trim().getBytes());
- LOGGER.info("****LOADED CRYPTO KEY****");
+ LOGGER.info("****LOADED CRYPTO KEY****");
} catch (IOException e) {
LOGGER.error("****CRYPTO KEY NOT FOUND****", e);
}
}
- public static byte[] encryptData(byte[] plaintextBytes) {
- return encryptData(DEFAULT_KEY_NAME, plaintextBytes);
+ /**
+ * Encrypts the given plaintext using the default encryption key.
+ *
+ * If encryption is disabled, the plaintext is returned as-is.
+ * This method delegates encryption to {@link #encryptData(String, byte[])}
+ * using the default key.
+ *
+ * @param plaintext The byte array to be encrypted.
+ * @return The encrypted byte array, or the original plaintext if encryption is disabled.
+ */
+ public static byte[] encryptData(byte[] plaintext) {
+ return encryptData(DEFAULT_KEY_NAME, plaintext);
}
-
- public static byte[] encryptData(String keyName, byte[] plaintextBytes) {
+
+ /**
+ * Encrypts the given plaintext using the specified encryption key.
+ *
+ * This method uses AES/GCM/NoPadding encryption with a randomly generated IV.
+ * The IV is included in the output for decryption purposes.
+ *
+ * The method returns a byte array structured as follows:
+ * - First 4 bytes: The length of the IV.
+ * - Next IV-length bytes: The IV itself.
+ * - Remaining bytes: The encrypted ciphertext.
+ *
+ * If encryption is disabled, the plaintext is returned unmodified.
+ *
+ * @param keyName The name of the encryption key to use.
+ * @param plaintext The byte array containing the data to encrypt.
+ * @return The encrypted byte array, or the original plaintext if encryption is disabled.
+ * @throws RuntimeException If an error occurs during encryption.
+ */
+ public static byte[] encryptData(String keyName, byte[] plaintext) {
+ if (!ENCRYPTION_ENABLED) {
+ return plaintext;
+ }
+
byte[] key = keys.get(keyName);
SecureRandom secureRandom = new SecureRandom();
SecretKey secretKey = new SecretKeySpec(key, "AES");
- byte[] iv = new byte[12]; //NEVER REUSE THIS IV WITH SAME KEY
+ byte[] iv = new byte[12]; // NEVER REUSE THIS IV WITH SAME KEY
secureRandom.nextBytes(iv);
byte[] cipherText;
Cipher cipher;
try {
cipher = Cipher.getInstance("AES/GCM/NoPadding");
- GCMParameterSpec parameterSpec = new GCMParameterSpec(128, iv); //128 bit auth tag length
+ GCMParameterSpec parameterSpec = new GCMParameterSpec(128, iv); // 128-bit auth tag length
cipher.init(Cipher.ENCRYPT_MODE, secretKey, parameterSpec);
- cipherText = new byte[cipher.getOutputSize(plaintextBytes.length)];
- cipher.doFinal(plaintextBytes, 0, plaintextBytes.length, cipherText, 0);
+ cipherText = new byte[cipher.getOutputSize(plaintext.length)];
+ cipher.doFinal(plaintext, 0, plaintext.length, cipherText, 0);
LOGGER.debug("Length of cipherText : " + cipherText.length);
ByteBuffer byteBuffer = ByteBuffer.allocate(4 + iv.length + cipherText.length);
byteBuffer.putInt(iv.length);
@@ -81,13 +154,40 @@ public static byte[] encryptData(String keyName, byte[] plaintextBytes) {
}
}
- public static byte[] decryptData(byte[] encrypted) {
- return decryptData(DEFAULT_KEY_NAME, encrypted);
+ /**
+ * Decrypts the provided data using the default encryption key.
+ *
+ * If encryption is disabled, the method returns the input data as-is.
+ *
+ * @param data The byte array to be decrypted.
+ * @return The decrypted byte array, or the original data if encryption is disabled.
+ */
+ public static byte[] decryptData(byte[] data) {
+ return decryptData(DEFAULT_KEY_NAME, data);
}
- public static byte[] decryptData(String keyName, byte[] encrypted) {
+ /**
+ * Decrypts the provided data using the specified encryption key.
+ *
+ * If encryption is disabled, the method returns the input data as-is.
+ *
+ * The method assumes the input data is structured as follows:
+ * - First 4 bytes: The length of the IV (Initialization Vector).
+ * - Next IV-length bytes: The actual IV.
+ * - Remaining bytes: The ciphertext.
+ *
+ * @param keyName The name of the encryption key to use for decryption.
+ * @param data The byte array containing the encrypted data.
+ * @return The decrypted byte array, or the original data if encryption is disabled.
+ * @throws RuntimeException If an error occurs during decryption.
+ */
+ public static byte[] decryptData(String keyName, byte[] data) {
+ if (!ENCRYPTION_ENABLED) {
+ return data;
+ }
+
byte[] key = keys.get(keyName);
- ByteBuffer byteBuffer = ByteBuffer.wrap(encrypted);
+ ByteBuffer byteBuffer = ByteBuffer.wrap(data);
int ivLength = byteBuffer.getInt();
byte[] iv = new byte[ivLength];
byteBuffer.get(iv);
@@ -98,7 +198,8 @@ public static byte[] decryptData(String keyName, byte[] encrypted) {
cipher = Cipher.getInstance("AES/GCM/NoPadding");
cipher.init(Cipher.DECRYPT_MODE, new SecretKeySpec(key, "AES"), new GCMParameterSpec(128, iv));
return cipher.doFinal(cipherText);
- } catch (NoSuchAlgorithmException | NoSuchPaddingException | InvalidKeyException | InvalidAlgorithmParameterException | IllegalBlockSizeException | BadPaddingException e) {
+ } catch (NoSuchAlgorithmException | NoSuchPaddingException | InvalidKeyException | InvalidAlgorithmParameterException |
+ IllegalBlockSizeException | BadPaddingException e) {
throw new RuntimeException("Exception caught trying to decrypt data : " + e, e);
}
}
@@ -110,5 +211,4 @@ private static void setKey(String keyName, byte[] key) {
public static boolean hasKey(String keyName) {
return keys.containsKey(keyName);
}
-
-}
+}
\ No newline at end of file
diff --git a/docker/pic-sure-hpds-etl/Dockerfile b/docker/pic-sure-hpds-etl/Dockerfile
index 661b98a1..dc258f63 100644
--- a/docker/pic-sure-hpds-etl/Dockerfile
+++ b/docker/pic-sure-hpds-etl/Dockerfile
@@ -1,34 +1,19 @@
-FROM maven:3.9.4-amazoncorretto-21 AS build
-
-RUN yum update -y && yum install -y git && yum clean all
-
-WORKDIR /app
-
-COPY .m2 /root/.m2
-
-COPY . .
-
-RUN mvn clean install -DskipTests
-
-FROM eclipse-temurin:21-alpine
-
-RUN apk add --no-cache --purge -uU bash curl wget unzip gnupg openssl && \
- rm -rf /var/cache/apk/* /tmp/*
-
-WORKDIR /app
-COPY --from=build /app/docker/pic-sure-hpds-etl/SQLLoader-jar-with-dependencies.jar .
-COPY --from=build /app/docker/pic-sure-hpds-etl/CSVLoader-jar-with-dependencies.jar .
-COPY --from=build /app/docker/pic-sure-hpds-etl/CSVLoaderNewSearch-jar-with-dependencies.jar .
-COPY --from=build /app/docker/pic-sure-hpds-etl/CSVDumper-jar-with-dependencies.jar .
-COPY --from=build /app/docker/pic-sure-hpds-etl/VCFLocalLoader-jar-with-dependencies.jar .
-COPY --from=build /app/docker/pic-sure-hpds-etl/VariantMetadataLoader-jar-with-dependencies.jar .
-COPY --from=build /app/docker/pic-sure-hpds-etl/UnifiedVCFLocalLoader-jar-with-dependencies.jar .
-COPY --from=build /app/docker/pic-sure-hpds-etl/MultialleleCounter-jar-with-dependencies.jar .
-COPY --from=build /app/docker/pic-sure-hpds-etl/RekeyDataset-jar-with-dependencies.jar .
-COPY --from=build /app/docker/pic-sure-hpds-etl/RemoveConceptFromMetadata-jar-with-dependencies.jar .
-COPY --from=build /app/docker/pic-sure-hpds-etl/HideAnnotationCategoryValue-jar-with-dependencies.jar .
-COPY --from=build /app/docker/pic-sure-hpds-etl/SequentialLoader-jar-with-dependencies.jar .
-COPY --from=build /app/docker/pic-sure-hpds-etl/LowRAMMultiCSVLoader-jar-with-dependencies.jar .
-COPY --from=build /app/docker/pic-sure-hpds-etl/create_key.sh .
-
-ENTRYPOINT ["sh", "-c", "java $JAVA_OPTS -Xmx${HEAPSIZE:-2048}m -jar ${LOADER_NAME:-CSVLoader}-jar-with-dependencies.jar"]
+FROM openjdk:21-jdk-slim AS build
+
+RUN apt-get update -y && apt-get install -y gnupg openssl && rm -rf /var/lib/apt/lists/*
+
+ADD create_key.sh .
+ADD SQLLoader-jar-with-dependencies.jar .
+ADD CSVLoader-jar-with-dependencies.jar .
+ADD CSVLoaderNewSearch-jar-with-dependencies.jar .
+ADD CSVDumper-jar-with-dependencies.jar .
+ADD VCFLocalLoader-jar-with-dependencies.jar .
+ADD VariantMetadataLoader-jar-with-dependencies.jar .
+ADD UnifiedVCFLocalLoader-jar-with-dependencies.jar .
+ADD MultialleleCounter-jar-with-dependencies.jar .
+ADD RekeyDataset-jar-with-dependencies.jar .
+ADD RemoveConceptFromMetadata-jar-with-dependencies.jar .
+ADD HideAnnotationCategoryValue-jar-with-dependencies.jar .
+ADD SequentialLoader-jar-with-dependencies.jar .
+
+ENTRYPOINT java $JAVA_OPTS -Xmx${HEAPSIZE:-2048}m -jar ${LOADER_NAME:-CSVLoader}-jar-with-dependencies.jar
\ No newline at end of file
diff --git a/etl/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/etl/LoadingStore.java b/etl/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/etl/LoadingStore.java
index 62b5b8e9..0f394e15 100644
--- a/etl/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/etl/LoadingStore.java
+++ b/etl/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/etl/LoadingStore.java
@@ -29,14 +29,14 @@ public class LoadingStore {
TreeMap metadataMap = new TreeMap<>();
private static Logger log = LoggerFactory.getLogger(LoadingStore.class);
-
+
public LoadingCache store = CacheBuilder.newBuilder()
- .maximumSize(16)
+ .maximumSize(2048)
.removalListener(new RemovalListener() {
@Override
public void onRemoval(RemovalNotification arg0) {
- log.info("removing " + arg0.getKey());
+ //log.debug("Cache removal and writing to disk: " + arg0.getKey());
if(arg0.getValue().getLoadingMap()!=null) {
complete(arg0.getValue());
}
diff --git a/etl/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/etl/phenotype/csv/CSVLoaderNewSearch.java b/etl/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/etl/phenotype/csv/CSVLoaderNewSearch.java
index 4b86232d..c681cec4 100644
--- a/etl/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/etl/phenotype/csv/CSVLoaderNewSearch.java
+++ b/etl/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/etl/phenotype/csv/CSVLoaderNewSearch.java
@@ -1,85 +1,31 @@
package edu.harvard.hms.dbmi.avillach.hpds.etl.phenotype.csv;
import edu.harvard.hms.dbmi.avillach.hpds.crypto.Crypto;
-import edu.harvard.hms.dbmi.avillach.hpds.data.phenotype.PhenoCube;
-import edu.harvard.hms.dbmi.avillach.hpds.etl.LoadingStore;
-import org.apache.commons.csv.CSVFormat;
-import org.apache.commons.csv.CSVRecord;
+
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-
-import java.io.*;
-import java.util.Date;
-
-@SuppressWarnings({"unchecked", "rawtypes"})
+import org.springframework.boot.ApplicationRunner;
+import org.springframework.boot.SpringApplication;
+import org.springframework.boot.autoconfigure.SpringBootApplication;
+import org.springframework.context.annotation.Bean;
+import org.springframework.context.annotation.ComponentScan;
+import org.springframework.context.annotation.FilterType;
+
+@SpringBootApplication
+@ComponentScan(
+ basePackages = "edu.harvard.hms.dbmi.avillach.hpds",
+ includeFilters = @ComponentScan.Filter(type = FilterType.ASSIGNABLE_TYPE, classes = Crypto.class)
+)
public class CSVLoaderNewSearch {
- private static final LoadingStore store = new LoadingStore();
-
private static final Logger log = LoggerFactory.getLogger(CSVLoaderNewSearch.class);
- private static boolean DO_VARNAME_ROLLUP = false;
-
- private static final String HPDS_DIRECTORY = "/opt/local/hpds/";
-
- public static void main(String[] args) throws IOException {
- if (args.length > 1) {
- if (args[0].equalsIgnoreCase("NO_ROLLUP")) {
- log.info("NO_ROLLUP SET.");
- DO_VARNAME_ROLLUP = false;
- }
- }
- store.allObservationsStore = new RandomAccessFile(HPDS_DIRECTORY + "allObservationsStore.javabin", "rw");
- initialLoad();
- store.saveStore(HPDS_DIRECTORY);
- }
-
- private static void initialLoad() throws IOException {
- Crypto.loadDefaultKey();
- Reader in = new FileReader(HPDS_DIRECTORY + "allConcepts.csv");
- Iterable records = CSVFormat.DEFAULT.withSkipHeaderRecord().withFirstRecordAsHeader().parse(new BufferedReader(in, 1024 * 1024));
-
- final PhenoCube[] currentConcept = new PhenoCube[1];
- for (CSVRecord record : records) {
- processRecord(currentConcept, record);
- }
+ public static void main(String[] args) {
+ SpringApplication.run(CSVLoaderNewSearch.class, args);
}
- private static void processRecord(final PhenoCube[] currentConcept, CSVRecord record) {
- if (record.size() < 4) {
- log.info("Record number {} had less records than we exgpected so we are skipping it.", record.getRecordNumber());
- return;
- }
-
- String conceptPath = CSVParserUtil.parseConceptPath(record, DO_VARNAME_ROLLUP);
- String numericValue = record.get(CSVParserUtil.NUMERIC_VALUE);
- boolean isAlpha = (numericValue == null || numericValue.isEmpty());
- String value = isAlpha ? record.get(CSVParserUtil.TEXT_VALUE) : numericValue;
- currentConcept[0] = getPhenoCube(currentConcept[0], conceptPath, isAlpha);
-
- if (value != null && !value.trim().isEmpty() &&
- ((isAlpha && currentConcept[0].vType == String.class) || (!isAlpha && currentConcept[0].vType == Double.class))) {
- value = value.trim();
- currentConcept[0].setColumnWidth(isAlpha ? Math.max(currentConcept[0].getColumnWidth(), value.getBytes().length) : Double.BYTES);
- int patientId = Integer.parseInt(record.get(CSVParserUtil.PATIENT_NUM));
- Date date = null;
- if (record.size() > 4 && record.get(CSVParserUtil.DATETIME) != null && !record.get(CSVParserUtil.DATETIME).isEmpty()) {
- date = new Date(Long.parseLong(record.get(CSVParserUtil.DATETIME)));
- }
- currentConcept[0].add(patientId, isAlpha ? value : Double.parseDouble(value), date);
- store.allIds.add(patientId);
- }
- }
-
- private static PhenoCube getPhenoCube(PhenoCube currentConcept, String conceptPath, boolean isAlpha) {
- if (currentConcept == null || !currentConcept.name.equals(conceptPath)) {
- currentConcept = store.store.getIfPresent(conceptPath);
- if (currentConcept == null) {
- currentConcept = new PhenoCube(conceptPath, isAlpha ? String.class : Double.class);
- store.store.put(conceptPath, currentConcept);
- }
- }
-
- return currentConcept;
+ @Bean
+ ApplicationRunner runCSVLoader(CSVLoaderService csvLoaderService) {
+ return args -> csvLoaderService.runEtlProcess();
}
}
diff --git a/etl/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/etl/phenotype/csv/CSVLoaderService.java b/etl/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/etl/phenotype/csv/CSVLoaderService.java
new file mode 100644
index 00000000..5f9d6061
--- /dev/null
+++ b/etl/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/etl/phenotype/csv/CSVLoaderService.java
@@ -0,0 +1,93 @@
+package edu.harvard.hms.dbmi.avillach.hpds.etl.phenotype.csv;
+
+import edu.harvard.hms.dbmi.avillach.hpds.crypto.Crypto;
+import edu.harvard.hms.dbmi.avillach.hpds.data.phenotype.PhenoCube;
+import edu.harvard.hms.dbmi.avillach.hpds.etl.LoadingStore;
+import org.apache.commons.csv.CSVFormat;
+import org.apache.commons.csv.CSVRecord;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.springframework.beans.factory.annotation.Value;
+import org.springframework.stereotype.Service;
+
+import java.io.*;
+import java.util.Date;
+
+@Service
+public class CSVLoaderService {
+
+ private static final Logger log = LoggerFactory.getLogger(CSVLoaderService.class);
+ private final LoadingStore store = new LoadingStore();
+
+ @Value("${etl.hpds.directory:/opt/local/hpds/}")
+ private String hpdsDirectory;
+
+ @Value("${etl.rollup.enabled:true}")
+ private boolean rollupEnabled;
+
+ public void runEtlProcess() throws IOException {
+ log.info("Starting ETL process... Rollup Enabled: {}", rollupEnabled);
+
+ store.allObservationsStore = new RandomAccessFile(hpdsDirectory + "allObservationsStore.javabin", "rw");
+ initialLoad();
+ store.saveStore(hpdsDirectory);
+
+ log.info("ETL process completed.");
+ }
+
+ private void initialLoad() throws IOException {
+ Crypto.loadDefaultKey();
+ Reader in = new FileReader(hpdsDirectory + "allConcepts.csv");
+ Iterable records = CSVFormat.DEFAULT
+ .withFirstRecordAsHeader()
+ .withSkipHeaderRecord(true)
+ .parse(new BufferedReader(in, 256 * 1024));
+
+ final PhenoCube[] currentConcept = new PhenoCube[1];
+ for (CSVRecord record : records) {
+ processRecord(currentConcept, record);
+ }
+
+ }
+
+ private void processRecord(final PhenoCube[] currentConcept, CSVRecord record) {
+ if (record.size() < 4) {
+ log.warn("Skipping record #{} due to missing fields.", record.getRecordNumber());
+ return;
+ }
+
+ String conceptPath = CSVParserUtil.parseConceptPath(record, rollupEnabled);
+ String numericValue = record.get(CSVParserUtil.NUMERIC_VALUE);
+ boolean isAlpha = (numericValue == null || numericValue.isEmpty());
+ String value = isAlpha ? record.get(CSVParserUtil.TEXT_VALUE) : numericValue;
+ currentConcept[0] = getPhenoCube(currentConcept[0], conceptPath, isAlpha);
+
+ if (value != null && !value.trim().isEmpty() &&
+ ((isAlpha && currentConcept[0].vType == String.class) || (!isAlpha && currentConcept[0].vType == Double.class))) {
+ value = value.trim();
+ currentConcept[0].setColumnWidth(isAlpha ? Math.max(currentConcept[0].getColumnWidth(), value.getBytes().length) : Double.BYTES);
+ int patientId = Integer.parseInt(record.get(CSVParserUtil.PATIENT_NUM));
+ Date date = null;
+ if (record.size() > 4 && record.get(CSVParserUtil.DATETIME) != null && !record.get(CSVParserUtil.DATETIME).isEmpty()) {
+ date = new Date(Long.parseLong(record.get(CSVParserUtil.DATETIME)));
+ }
+ currentConcept[0].add(patientId, isAlpha ? value : Double.parseDouble(value), date);
+ store.allIds.add(patientId);
+ }
+ }
+
+ private PhenoCube getPhenoCube(PhenoCube currentConcept, String conceptPath, boolean isAlpha) {
+ if (currentConcept == null || !currentConcept.name.equals(conceptPath)) {
+ currentConcept = store.store.getIfPresent(conceptPath);
+ if (currentConcept == null) {
+ log.info("Writing - " + conceptPath);
+ // safe to invalidate and write store?
+ store.store.invalidateAll(); // force onremoval to free up cache per concept
+ store.store.cleanUp();
+ currentConcept = new PhenoCube(conceptPath, isAlpha ? String.class : Double.class);
+ store.store.put(conceptPath, currentConcept);
+ }
+ }
+ return currentConcept;
+ }
+}
\ No newline at end of file
diff --git a/etl/src/main/resources/application-local-dev.properties b/etl/src/main/resources/application-local-dev.properties
new file mode 100644
index 00000000..36d954f7
--- /dev/null
+++ b/etl/src/main/resources/application-local-dev.properties
@@ -0,0 +1,3 @@
+encryption.enabled=false
+etl.hpds.directory=./
+etl.rollup.enabled=false
\ No newline at end of file
diff --git a/etl/src/main/resources/application.properties b/etl/src/main/resources/application.properties
new file mode 100644
index 00000000..3b40e3a5
--- /dev/null
+++ b/etl/src/main/resources/application.properties
@@ -0,0 +1,3 @@
+encryption.enabled=false
+etl.hpds.directory=/opt/local/hpds/
+etl.rollup.enabled=false
\ No newline at end of file
diff --git a/etl/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/etl/phenotype/csv/CryptoBenchmarkTest.java b/etl/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/etl/phenotype/csv/CryptoBenchmarkTest.java
new file mode 100644
index 00000000..2070ef5d
--- /dev/null
+++ b/etl/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/etl/phenotype/csv/CryptoBenchmarkTest.java
@@ -0,0 +1,92 @@
+package edu.harvard.hms.dbmi.avillach.hpds.etl.phenotype.csv;
+
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import java.security.SecureRandom;
+import java.util.Arrays;
+import javax.crypto.Cipher;
+import javax.crypto.KeyGenerator;
+import javax.crypto.SecretKey;
+import javax.crypto.spec.IvParameterSpec;
+
+import static org.junit.jupiter.api.Assertions.*;
+
+public class CryptoBenchmarkTest {
+ private byte[] testData;
+ private SecretKey secretKey;
+ private byte[] iv;
+
+ @BeforeEach
+ void setUp() throws Exception {
+ SecureRandom random = new SecureRandom();
+ testData = new byte[100 * 1024 * 1024]; // 100MB of random data
+ random.nextBytes(testData);
+
+ // Generate AES key
+ KeyGenerator keyGen = KeyGenerator.getInstance("AES");
+ keyGen.init(128);
+ secretKey = keyGen.generateKey();
+
+ // Generate IV
+ iv = new byte[16];
+ random.nextBytes(iv);
+ }
+
+ private byte[] encryptData(byte[] data) throws Exception {
+ Cipher cipher = Cipher.getInstance("AES/CBC/PKCS5Padding");
+ cipher.init(Cipher.ENCRYPT_MODE, secretKey, new IvParameterSpec(iv));
+ return cipher.doFinal(data);
+ }
+
+ private byte[] decryptData(byte[] data) throws Exception {
+ Cipher cipher = Cipher.getInstance("AES/CBC/PKCS5Padding");
+ cipher.init(Cipher.DECRYPT_MODE, secretKey, new IvParameterSpec(iv));
+ return cipher.doFinal(data);
+ }
+
+ @Test
+ void testEncryptionPerformance() throws Exception {
+ long startTime = System.nanoTime();
+ byte[] encryptedData = encryptData(testData);
+ long endTime = System.nanoTime();
+ double durationMs = (endTime - startTime) / 1_000_000.0;
+
+ System.out.println("================ Encryption Performance ================");
+ System.out.printf("Encryption Time: %.3f ms\n", durationMs);
+ System.out.println("Description: Measures the time required to encrypt 100MB of data using AES-128 CBC.");
+ System.out.println("========================================================\n");
+
+ assertNotNull(encryptedData);
+ }
+
+ @Test
+ void testDecryptionPerformance() throws Exception {
+ byte[] encryptedData = encryptData(testData);
+ long startTime = System.nanoTime();
+ byte[] decryptedData = decryptData(encryptedData);
+ long endTime = System.nanoTime();
+ double durationMs = (endTime - startTime) / 1_000_000.0;
+
+ System.out.println("================ Decryption Performance ================");
+ System.out.printf("Decryption Time: %.3f ms\n", durationMs);
+ System.out.println("Description: Measures the time required to decrypt the previously encrypted 100MB of data.");
+ System.out.println("========================================================\n");
+
+ assertArrayEquals(testData, decryptedData);
+ }
+
+ @Test
+ void testBypassPerformance() {
+ long startTime = System.nanoTime();
+ byte[] bypassedData = testData; // Just returning the data as is
+ long endTime = System.nanoTime();
+ double durationMs = (endTime - startTime) / 1_000_000.0;
+
+ System.out.println("================ Bypass Performance ==================");
+ System.out.printf("Bypass Time: %.6f ms\n", durationMs);
+ System.out.println("Description: Measures the time taken to return the original data without encryption.");
+ System.out.println("======================================================\n");
+
+ assertSame(testData, bypassedData);
+ }
+}
diff --git a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/AbstractProcessor.java b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/AbstractProcessor.java
index 0ad97972..8db0b567 100644
--- a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/AbstractProcessor.java
+++ b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/AbstractProcessor.java
@@ -37,8 +37,8 @@ public class AbstractProcessor {
private final int ID_BATCH_SIZE;
private final int CACHE_SIZE;
- private final String hpdsDataDirectory;
-
+ @Value("${HPDS_DATA_DIRECTORY:/opt/local/hpds/}")
+ private String hpdsDataDirectory;
@Value("${HPDS_GENOMIC_DATA_DIRECTORY:/opt/local/hpds/all/}")
private String hpdsGenomicDataDirectory;
@@ -53,10 +53,9 @@ public class AbstractProcessor {
@Autowired
public AbstractProcessor(
PhenotypeMetaStore phenotypeMetaStore,
- GenomicProcessor genomicProcessor, @Value("${HPDS_DATA_DIRECTORY:/opt/local/hpds/}") String hpdsDataDirectory
+ GenomicProcessor genomicProcessor
) throws ClassNotFoundException, IOException, InterruptedException {
- this.hpdsDataDirectory = hpdsDataDirectory;
this.phenotypeMetaStore = phenotypeMetaStore;
this.genomicProcessor = genomicProcessor;
@@ -66,27 +65,23 @@ public AbstractProcessor(
store = initializeCache();
- if(Crypto.hasKey(Crypto.DEFAULT_KEY_NAME)) {
- List cubes = new ArrayList(phenotypeMetaStore.getColumnNames());
- int conceptsToCache = Math.min(cubes.size(), CACHE_SIZE);
- for(int x = 0;x cubes = new ArrayList(phenotypeMetaStore.getColumnNames());
+ int conceptsToCache = Math.min(cubes.size(), CACHE_SIZE);
+ for(int x = 0;x query(@RequestBody QueryRequest queryJson) {
- if (Crypto.hasKey(Crypto.DEFAULT_KEY_NAME)) {
- try {
- Query query = convertIncomingQuery(queryJson);
- return ResponseEntity.ok(convertToQueryStatus(queryService.runQuery(query)));
- } catch (IOException e) {
- log.error("IOException caught in query processing:", e);
- return ResponseEntity.status(500).build();
- }
- } else {
- QueryStatus status = new QueryStatus();
- status.setResourceStatus("Resource is locked.");
- return ResponseEntity.ok(status);
+ try {
+ Query query = convertIncomingQuery(queryJson);
+ return ResponseEntity.ok(convertToQueryStatus(queryService.runQuery(query)));
+ } catch (IOException e) {
+ log.error("IOException caught in query processing:", e);
+ return ResponseEntity.status(500).build();
}
}
@@ -349,15 +343,11 @@ public ResponseEntity queryFormat(@RequestBody QueryRequest resultRequest) {
@PostMapping(value = "/query/sync", produces = MediaType.TEXT_PLAIN_VALUE)
public ResponseEntity querySync(@RequestBody QueryRequest resultRequest) {
- if (Crypto.hasKey(Crypto.DEFAULT_KEY_NAME)) {
- try {
- return _querySync(resultRequest);
- } catch (IOException e) {
- log.error("IOException caught: ", e);
- return ResponseEntity.status(500).build();
- }
- } else {
- return ResponseEntity.status(403).body("Resource is locked");
+ try {
+ return _querySync(resultRequest);
+ } catch (IOException e) {
+ log.error("IOException caught: ", e);
+ return ResponseEntity.status(500).build();
}
}
diff --git a/service/src/main/resources/application-local-dev.properties b/service/src/main/resources/application-local-dev.properties
new file mode 100644
index 00000000..12b68492
--- /dev/null
+++ b/service/src/main/resources/application-local-dev.properties
@@ -0,0 +1,12 @@
+SMALL_JOB_LIMIT = 100
+SMALL_TASK_THREADS = 1
+LARGE_TASK_THREADS = 1
+ID_BATCH_SIZE=1000
+VCF_EXCERPT_ENABLED=true
+
+HPDS_GENOMIC_DATA_DIRECTORY=./all
+HPDS_DATA_DIRECTORY=./
+
+encryption.enabled=false
+etl.hpds.directory="./"
+etl.rollup.enabled=false
\ No newline at end of file
diff --git a/service/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/test/util/BuildIntegrationTestEnvironment.java b/service/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/test/util/BuildIntegrationTestEnvironment.java
index 71a75424..8a625044 100644
--- a/service/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/test/util/BuildIntegrationTestEnvironment.java
+++ b/service/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/test/util/BuildIntegrationTestEnvironment.java
@@ -5,6 +5,7 @@
import edu.harvard.hms.dbmi.avillach.hpds.etl.genotype.NewVCFLoader;
import edu.harvard.hms.dbmi.avillach.hpds.etl.genotype.VariantMetadataLoader;
import edu.harvard.hms.dbmi.avillach.hpds.etl.phenotype.csv.CSVLoader;
+import edu.harvard.hms.dbmi.avillach.hpds.etl.phenotype.csv.CSVLoaderNewSearch;
import java.io.IOException;
@@ -21,7 +22,7 @@ public enum BuildIntegrationTestEnvironment {
BuildIntegrationTestEnvironment() {
try {
NewVCFLoader.main(new String[] {VCF_INDEX_FILE, STORAGE_DIR, MERGED_DIR});
- CSVLoader.main(new String[] {PHENOTYPIC_DATA_DIRECTORY});
+ CSVLoaderNewSearch.main(new String[] {PHENOTYPIC_DATA_DIRECTORY}); // this is not the current CSVLoader for current version of the application
VariantMetadataLoader.main(new String[] {"./src/test/resources/test_vcfIndex.tsv", binFile, "target/VariantMetadataStorage.bin"});
VariantStore variantStore = VariantStore.readInstance(STORAGE_DIR);
BucketIndexBySample bucketIndexBySample = new BucketIndexBySample(variantStore, STORAGE_DIR);