Skip to content

Commit 04421cd

Browse files
ramari16Luke-Sikina
authored andcommitted
[ALS-4947] Add anyRecordOfMulti field to the query object (#77)
1 parent 73d1759 commit 04421cd

File tree

2 files changed

+39
-26
lines changed
  • client-api/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/data/query
  • processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing

2 files changed

+39
-26
lines changed

client-api/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/data/query/Query.java

+14-2
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ public Query(Query query) {
3737
private List<String> fields = new ArrayList<>();
3838
private List<String> requiredFields = new ArrayList<>();
3939
private List<String> anyRecordOf = new ArrayList<>();
40+
private List<List<String>> anyRecordOfMulti = new ArrayList<>();
4041
private Map<String, DoubleFilter> numericFilters = new HashMap<>();
4142
private Map<String, String[]> categoryFilters = new HashMap<>();
4243
private List<VariantInfoFilter> variantInfoFilters = new ArrayList<>();
@@ -62,6 +63,14 @@ public List<String> getRequiredFields() {
6263
public List<String> getAnyRecordOf() {
6364
return anyRecordOf;
6465
}
66+
public List<List<String>> getAnyRecordOfMulti() {
67+
return anyRecordOfMulti;
68+
}
69+
public List<List<String>> getAllAnyRecordOf() {
70+
List<List<String>> anyRecordOfMultiCopy = new ArrayList<>(anyRecordOfMulti);
71+
anyRecordOfMultiCopy.add(anyRecordOf);
72+
return anyRecordOfMultiCopy;
73+
}
6574

6675
public Map<String, DoubleFilter> getNumericFilters() {
6776
return numericFilters;
@@ -98,6 +107,9 @@ public void setRequiredFields(Collection<String> requiredFields) {
98107
public void setAnyRecordOf(Collection<String> anyRecordOf) {
99108
this.anyRecordOf = anyRecordOf != null ? new ArrayList<>(anyRecordOf) : new ArrayList<>();
100109
}
110+
public void setAnyRecordOfMulti(Collection<List<String>> anyRecordOfMulti) {
111+
this.anyRecordOfMulti = anyRecordOfMulti != null ? new ArrayList<>(anyRecordOfMulti) : new ArrayList<>();
112+
}
101113

102114
public void setNumericFilters(Map<String, DoubleFilter> numericFilters) {
103115
this.numericFilters = numericFilters != null ? new HashMap<>(numericFilters) : new HashMap<>();
@@ -191,7 +203,7 @@ public String toString() {
191203
writePartFormat("Numeric filters", numericFilters, builder);
192204
writePartFormat("Category filters", categoryFilters, builder);
193205
writePartFormat("Variant Info filters", variantInfoFilters, builder, false);
194-
writePartFormat("Any-Record-Of filters", anyRecordOf, builder, true);
206+
writePartFormat("Any-Record-Of filters", getAllAnyRecordOf(), builder, true);
195207

196208
return builder.toString();
197209
}
@@ -234,7 +246,7 @@ private static void showTopLevelValues(Collection varList, StringBuilder builder
234246

235247
Integer count = countMap.get(firstLevel);
236248
if(count == null) {
237-
count = new Integer(1);
249+
count = 1;
238250
} else {
239251
count = count + 1;
240252
}

processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/AbstractProcessor.java

+25-24
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import java.util.Map.Entry;
77
import java.util.concurrent.*;
88
import java.util.stream.Collectors;
9+
import java.util.stream.Stream;
910
import java.util.zip.GZIPInputStream;
1011

1112
import com.google.common.util.concurrent.UncheckedExecutionException;
@@ -195,24 +196,27 @@ protected Set<Integer> applyBooleanLogic(List<Set<Integer>> filteredIdSets) {
195196
* @return
196197
*/
197198
protected List<Set<Integer>> idSetsForEachFilter(Query query) {
198-
ArrayList<Set<Integer>> filteredIdSets = new ArrayList<Set<Integer>>();
199+
final ArrayList<Set<Integer>> filteredIdSets = new ArrayList<>();
199200

200201
try {
201-
addIdSetsForAnyRecordOf(query, filteredIdSets);
202+
query.getAllAnyRecordOf().forEach(anyRecordOfFilterList -> {
203+
addIdSetsForAnyRecordOf(anyRecordOfFilterList, filteredIdSets);
204+
});
202205
addIdSetsForRequiredFields(query, filteredIdSets);
203206
addIdSetsForNumericFilters(query, filteredIdSets);
204207
addIdSetsForCategoryFilters(query, filteredIdSets);
205208
} catch (InvalidCacheLoadException e) {
206209
log.warn("Invalid query supplied: " + e.getLocalizedMessage());
207-
filteredIdSets.add(new HashSet<Integer>()); // if an invalid path is supplied, no patients should match.
210+
filteredIdSets.add(new HashSet<>()); // if an invalid path is supplied, no patients should match.
208211
}
209212

210213
//AND logic to make sure all patients match each filter
211214
if(filteredIdSets.size()>1) {
212-
filteredIdSets = new ArrayList<Set<Integer>>(List.of(applyBooleanLogic(filteredIdSets)));
215+
List<Set<Integer>> processedFilteredIdSets = new ArrayList<>(List.of(applyBooleanLogic(filteredIdSets)));
216+
return addIdSetsForVariantInfoFilters(query, processedFilteredIdSets);
217+
} else {
218+
return addIdSetsForVariantInfoFilters(query, filteredIdSets);
213219
}
214-
215-
return addIdSetsForVariantInfoFilters(query, filteredIdSets);
216220
}
217221

218222
/**
@@ -260,22 +264,19 @@ private void addIdSetsForRequiredFields(Query query, ArrayList<Set<Integer>> fil
260264
}
261265
}
262266

263-
private void addIdSetsForAnyRecordOf(Query query, ArrayList<Set<Integer>> filteredIdSets) {
264-
if(!query.getAnyRecordOf().isEmpty()) {
265-
Set<Integer> patientsInScope = new ConcurrentSkipListSet<Integer>();
266-
VariantBucketHolder<VariantMasks> bucketCache = new VariantBucketHolder<VariantMasks>();
267-
query.getAnyRecordOf().parallelStream().forEach(path->{
268-
if(patientsInScope.size()<Math.max(
269-
phenotypeMetaStore.getPatientIds().size(),
270-
variantService.getPatientIds().length)) {
271-
if(VariantUtils.pathIsVariantSpec(path)) {
272-
addIdSetsForVariantSpecCategoryFilters(new String[]{"0/1","1/1"}, path, patientsInScope, bucketCache);
273-
} else {
274-
patientsInScope.addAll(getCube(path).keyBasedIndex());
275-
}
267+
private void addIdSetsForAnyRecordOf(List<String> anyRecordOfFilters, ArrayList<Set<Integer>> filteredIdSets) {
268+
if(!anyRecordOfFilters.isEmpty()) {
269+
VariantBucketHolder<VariantMasks> bucketCache = new VariantBucketHolder<>();
270+
Set<Integer> anyRecordOfPatientSet = anyRecordOfFilters.parallelStream().flatMap(path -> {
271+
if (VariantUtils.pathIsVariantSpec(path)) {
272+
TreeSet<Integer> patientsInScope = new TreeSet<>();
273+
addIdSetsForVariantSpecCategoryFilters(new String[]{"0/1", "1/1"}, path, patientsInScope, bucketCache);
274+
return patientsInScope.stream();
275+
} else {
276+
return (Stream<Integer>) getCube(path).keyBasedIndex().stream();
276277
}
277-
});
278-
filteredIdSets.add(patientsInScope);
278+
}).collect(Collectors.toSet());
279+
filteredIdSets.add(anyRecordOfPatientSet);
279280
}
280281
}
281282

@@ -289,9 +290,9 @@ private void addIdSetsForNumericFilters(Query query, ArrayList<Set<Integer>> fil
289290

290291
private void addIdSetsForCategoryFilters(Query query, ArrayList<Set<Integer>> filteredIdSets) {
291292
if(!query.getCategoryFilters().isEmpty()) {
292-
VariantBucketHolder<VariantMasks> bucketCache = new VariantBucketHolder<VariantMasks>();
293-
Set<Set<Integer>> idsThatMatchFilters = (Set<Set<Integer>>)query.getCategoryFilters().entrySet().parallelStream().map(entry->{
294-
Set<Integer> ids = new TreeSet<Integer>();
293+
VariantBucketHolder<VariantMasks> bucketCache = new VariantBucketHolder<>();
294+
Set<Set<Integer>> idsThatMatchFilters = query.getCategoryFilters().entrySet().parallelStream().map(entry->{
295+
Set<Integer> ids = new TreeSet<>();
295296
if(VariantUtils.pathIsVariantSpec(entry.getKey())) {
296297
addIdSetsForVariantSpecCategoryFilters(entry.getValue(), entry.getKey(), ids, bucketCache);
297298
} else {

0 commit comments

Comments
 (0)