Skip to content

Commit 476ed76

Browse files
Overflow prevention
Signed-off-by: Prudhvi Godithi <pgodithi@amazon.com>
1 parent 98dbc4a commit 476ed76

File tree

6 files changed

+242
-3
lines changed

6 files changed

+242
-3
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
2121
- Support prefix list for remote repository attributes([#16271](https://github.com/opensearch-project/OpenSearch/pull/16271))
2222
- Add new configuration setting `synonym_analyzer`, to the `synonym` and `synonym_graph` filters, enabling the specification of a custom analyzer for reading the synonym file ([#16488](https://github.com/opensearch-project/OpenSearch/pull/16488)).
2323
- Add stats for remote publication failure and move download failure stats to remote methods([#16682](https://github.com/opensearch-project/OpenSearch/pull/16682/))
24+
- Added a precaution to handle extreme date values during sorting to prevent `arithmetic_exception: long overflow` ([#16812](https://github.com/opensearch-project/OpenSearch/pull/16812)).
2425

2526
### Dependencies
2627
- Bump `com.google.cloud:google-cloud-core-http` from 2.23.0 to 2.47.0 ([#16504](https://github.com/opensearch-project/OpenSearch/pull/16504))

server/src/main/java/org/opensearch/common/time/DateUtils.java

+21
Original file line numberDiff line numberDiff line change
@@ -272,6 +272,27 @@ public static Instant clampToNanosRange(Instant instant) {
272272
return instant;
273273
}
274274

275+
/**
276+
* Clamps the given {@link Instant} to the valid epoch millisecond range.
277+
*
278+
* - If the input is before {@code Long.MIN_VALUE}, it returns {@code Instant.ofEpochMilli(Long.MIN_VALUE)}.
279+
* - If the input is after {@code Long.MAX_VALUE}, it returns {@code Instant.ofEpochMilli(Long.MAX_VALUE)}.
280+
* - Otherwise, it returns the input as-is.
281+
*
282+
* @param instant the {@link Instant} to clamp
283+
* @return the clamped {@link Instant}
284+
* @throws NullPointerException if the input is {@code null}
285+
*/
286+
public static Instant clampToMillisRange(Instant instant) {
287+
if (instant.isBefore(Instant.ofEpochMilli(Long.MIN_VALUE))) {
288+
return Instant.ofEpochMilli(Long.MIN_VALUE);
289+
}
290+
if (instant.isAfter(Instant.ofEpochMilli(Long.MAX_VALUE))) {
291+
return Instant.ofEpochMilli(Long.MAX_VALUE);
292+
}
293+
return instant;
294+
}
295+
275296
/**
276297
* convert a long value to a java time instant
277298
* the long value resembles the nanoseconds since the epoch

server/src/main/java/org/opensearch/index/mapper/DateFieldMapper.java

+2-2
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ public enum Resolution {
122122
MILLISECONDS(CONTENT_TYPE, NumericType.DATE) {
123123
@Override
124124
public long convert(Instant instant) {
125-
return instant.toEpochMilli();
125+
return clampToValidRange(instant).toEpochMilli();
126126
}
127127

128128
@Override
@@ -132,7 +132,7 @@ public Instant toInstant(long value) {
132132

133133
@Override
134134
public Instant clampToValidRange(Instant instant) {
135-
return instant;
135+
return DateUtils.clampToMillisRange(instant);
136136
}
137137

138138
@Override

server/src/test/java/org/opensearch/common/time/DateUtilsTests.java

+19
Original file line numberDiff line numberDiff line change
@@ -260,4 +260,23 @@ public void testRoundYear() {
260260
long startOf1996 = Year.of(1996).atDay(1).atStartOfDay().toInstant(ZoneOffset.UTC).toEpochMilli();
261261
assertThat(DateUtils.roundYear(endOf1996), is(startOf1996));
262262
}
263+
264+
public void testClampToMillisRange() {
265+
Instant normalInstant = Instant.now();
266+
assertEquals(normalInstant, DateUtils.clampToMillisRange(normalInstant));
267+
268+
Instant beforeMinInstant = Instant.ofEpochMilli(Long.MIN_VALUE).minusMillis(1);
269+
assertEquals(Instant.ofEpochMilli(Long.MIN_VALUE), DateUtils.clampToMillisRange(beforeMinInstant));
270+
271+
Instant afterMaxInstant = Instant.ofEpochMilli(Long.MAX_VALUE).plusMillis(1);
272+
assertEquals(Instant.ofEpochMilli(Long.MAX_VALUE), DateUtils.clampToMillisRange(afterMaxInstant));
273+
274+
Instant minInstant = Instant.ofEpochMilli(Long.MIN_VALUE);
275+
assertEquals(minInstant, DateUtils.clampToMillisRange(minInstant));
276+
277+
Instant maxInstant = Instant.ofEpochMilli(Long.MAX_VALUE);
278+
assertEquals(maxInstant, DateUtils.clampToMillisRange(maxInstant));
279+
280+
assertThrows(NullPointerException.class, () -> DateUtils.clampToMillisRange(null));
281+
}
263282
}

server/src/test/java/org/opensearch/index/mapper/DateFieldMapperTests.java

-1
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,6 @@ public void testIgnoreMalformed() throws IOException {
170170
"failed to parse date field [2016-03-99] with format [strict_date_time_no_millis||strict_date_optional_time||epoch_millis]"
171171
);
172172
testIgnoreMalformedForValue("-2147483648", "Invalid value for Year (valid values -999999999 - 999999999): -2147483648");
173-
testIgnoreMalformedForValue("-522000000", "long overflow");
174173
}
175174

176175
private void testIgnoreMalformedForValue(String value, String expectedCause) throws IOException {

server/src/test/java/org/opensearch/index/mapper/DateFieldTypeTests.java

+199
Original file line numberDiff line numberDiff line change
@@ -31,20 +31,32 @@
3131

3232
package org.opensearch.index.mapper;
3333

34+
import org.apache.lucene.document.Field;
3435
import org.apache.lucene.document.LongPoint;
3536
import org.apache.lucene.document.NumericDocValuesField;
3637
import org.apache.lucene.document.SortedNumericDocValuesField;
38+
import org.apache.lucene.document.StoredField;
39+
import org.apache.lucene.document.StringField;
3740
import org.apache.lucene.index.DirectoryReader;
3841
import org.apache.lucene.index.IndexReader;
3942
import org.apache.lucene.index.IndexWriter;
4043
import org.apache.lucene.index.IndexWriterConfig;
44+
import org.apache.lucene.index.IndexableField;
4145
import org.apache.lucene.index.MultiReader;
4246
import org.apache.lucene.index.SortedNumericDocValues;
47+
import org.apache.lucene.index.Term;
48+
import org.apache.lucene.search.BooleanClause;
49+
import org.apache.lucene.search.BooleanQuery;
4350
import org.apache.lucene.search.DocIdSetIterator;
4451
import org.apache.lucene.search.IndexOrDocValuesQuery;
4552
import org.apache.lucene.search.IndexSearcher;
4653
import org.apache.lucene.search.IndexSortSortedNumericDocValuesRangeQuery;
4754
import org.apache.lucene.search.Query;
55+
import org.apache.lucene.search.ScoreDoc;
56+
import org.apache.lucene.search.Sort;
57+
import org.apache.lucene.search.SortField;
58+
import org.apache.lucene.search.TermQuery;
59+
import org.apache.lucene.search.TopDocs;
4860
import org.apache.lucene.store.Directory;
4961
import org.opensearch.Version;
5062
import org.opensearch.cluster.metadata.IndexMetadata;
@@ -71,8 +83,12 @@
7183
import org.joda.time.DateTimeZone;
7284

7385
import java.io.IOException;
86+
import java.time.Instant;
7487
import java.time.ZoneOffset;
88+
import java.util.Arrays;
7589
import java.util.Collections;
90+
import java.util.List;
91+
import java.util.Locale;
7692

7793
import static org.hamcrest.CoreMatchers.is;
7894
import static org.apache.lucene.document.LongPoint.pack;
@@ -490,4 +506,187 @@ public void testParseSourceValueNanos() throws IOException {
490506
MappedFieldType nullValueMapper = fieldType(Resolution.NANOSECONDS, "strict_date_time||epoch_millis", nullValueDate);
491507
assertEquals(Collections.singletonList(nullValueDate), fetchSourceValue(nullValueMapper, null));
492508
}
509+
510+
public void testDateResolutionForOverflow() throws IOException {
511+
Directory dir = newDirectory();
512+
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(null));
513+
514+
DateFieldType ft = new DateFieldType(
515+
"test_date",
516+
true,
517+
true,
518+
true,
519+
DateFormatter.forPattern("yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis||strict_date_optional_time"),
520+
Resolution.MILLISECONDS,
521+
null,
522+
Collections.emptyMap()
523+
);
524+
525+
List<String> dates = Arrays.asList(
526+
null,
527+
"2020-01-01T00:00:00Z",
528+
null,
529+
"2021-01-01T00:00:00Z",
530+
"+292278994-08-17T07:12:55.807Z",
531+
null,
532+
"-292275055-05-16T16:47:04.192Z"
533+
);
534+
535+
int numNullDates = 0;
536+
long minDateValue = Long.MAX_VALUE;
537+
long maxDateValue = Long.MIN_VALUE;
538+
539+
for (int i = 0; i < dates.size(); i++) {
540+
ParseContext.Document doc = new ParseContext.Document();
541+
String dateStr = dates.get(i);
542+
543+
if (dateStr != null) {
544+
long timestamp = Resolution.MILLISECONDS.convert(DateFormatters.from(ft.dateTimeFormatter().parse(dateStr)).toInstant());
545+
doc.add(new LongPoint(ft.name(), timestamp));
546+
doc.add(new SortedNumericDocValuesField(ft.name(), timestamp));
547+
doc.add(new StoredField(ft.name(), timestamp));
548+
doc.add(new StoredField("id", i));
549+
minDateValue = Math.min(minDateValue, timestamp);
550+
maxDateValue = Math.max(maxDateValue, timestamp);
551+
} else {
552+
numNullDates++;
553+
doc.add(new StoredField("id", i));
554+
}
555+
w.addDocument(doc);
556+
}
557+
558+
DirectoryReader reader = DirectoryReader.open(w);
559+
IndexSearcher searcher = new IndexSearcher(reader);
560+
561+
Settings indexSettings = Settings.builder()
562+
.put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT)
563+
.put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1)
564+
.put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1)
565+
.build();
566+
QueryShardContext context = new QueryShardContext(
567+
0,
568+
new IndexSettings(IndexMetadata.builder("foo").settings(indexSettings).build(), indexSettings),
569+
BigArrays.NON_RECYCLING_INSTANCE,
570+
null,
571+
null,
572+
null,
573+
null,
574+
null,
575+
xContentRegistry(),
576+
writableRegistry(),
577+
null,
578+
null,
579+
() -> nowInMillis,
580+
null,
581+
null,
582+
() -> true,
583+
null
584+
);
585+
586+
Query rangeQuery = ft.rangeQuery(
587+
"-292275055-05-16T16:47:04.192Z",
588+
"+292278994-08-17T07:12:55.807Z",
589+
true,
590+
true,
591+
null,
592+
null,
593+
null,
594+
context
595+
);
596+
597+
TopDocs topDocs = searcher.search(rangeQuery, dates.size());
598+
assertEquals("Number of non-null date documents", dates.size() - numNullDates, topDocs.totalHits.value);
599+
600+
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
601+
org.apache.lucene.document.Document doc = reader.document(scoreDoc.doc);
602+
IndexableField dateField = doc.getField(ft.name());
603+
if (dateField != null) {
604+
long dateValue = dateField.numericValue().longValue();
605+
assertTrue(
606+
"Date value " + dateValue + " should be within valid range",
607+
dateValue >= minDateValue && dateValue <= maxDateValue
608+
);
609+
}
610+
}
611+
612+
DateFieldType ftWithNullValue = new DateFieldType(
613+
"test_date",
614+
true,
615+
true,
616+
true,
617+
DateFormatter.forPattern("yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis||strict_date_optional_time"),
618+
Resolution.MILLISECONDS,
619+
"2020-01-01T00:00:00Z",
620+
Collections.emptyMap()
621+
);
622+
623+
Query nullValueQuery = ftWithNullValue.termQuery("2020-01-01T00:00:00Z", context);
624+
topDocs = searcher.search(nullValueQuery, dates.size());
625+
assertEquals("Documents matching the 2020-01-01 date", 1, topDocs.totalHits.value);
626+
627+
IOUtils.close(reader, w, dir);
628+
}
629+
630+
public void testDateFieldTypeWithNulls() throws IOException {
631+
DateFieldType ft = new DateFieldType(
632+
"domainAttributes.dueDate",
633+
true,
634+
true,
635+
true,
636+
DateFormatter.forPattern("yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis||date_optional_time"),
637+
Resolution.MILLISECONDS,
638+
null,
639+
Collections.emptyMap()
640+
);
641+
642+
Directory dir = newDirectory();
643+
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(null));
644+
645+
int nullDocs = 3500;
646+
int datedDocs = 50;
647+
648+
for (int i = 0; i < nullDocs; i++) {
649+
ParseContext.Document doc = new ParseContext.Document();
650+
doc.add(new StringField("domainAttributes.firmId", "12345678910111213", Field.Store.YES));
651+
w.addDocument(doc);
652+
}
653+
654+
for (int i = 1; i <= datedDocs; i++) {
655+
ParseContext.Document doc = new ParseContext.Document();
656+
String dateStr = String.format(Locale.ROOT, "2022-03-%02dT15:40:58.324", (i % 30) + 1);
657+
long timestamp = Resolution.MILLISECONDS.convert(DateFormatters.from(ft.dateTimeFormatter().parse(dateStr)).toInstant());
658+
doc.add(new StringField("domainAttributes.firmId", "12345678910111213", Field.Store.YES));
659+
doc.add(new LongPoint(ft.name(), timestamp));
660+
doc.add(new SortedNumericDocValuesField(ft.name(), timestamp));
661+
doc.add(new StoredField(ft.name(), timestamp));
662+
w.addDocument(doc);
663+
}
664+
665+
DirectoryReader reader = DirectoryReader.open(w);
666+
IndexSearcher searcher = new IndexSearcher(reader);
667+
668+
BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder();
669+
queryBuilder.add(new TermQuery(new Term("domainAttributes.firmId", "12345678910111213")), BooleanClause.Occur.MUST);
670+
671+
Sort sort = new Sort(new SortField(ft.name(), SortField.Type.DOC, false));
672+
673+
for (int i = 0; i < 100; i++) {
674+
TopDocs topDocs = searcher.search(queryBuilder.build(), nullDocs + datedDocs, sort);
675+
assertEquals("Total hits should match total documents", nullDocs + datedDocs, topDocs.totalHits.value);
676+
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
677+
org.apache.lucene.document.Document doc = reader.document(scoreDoc.doc);
678+
IndexableField dateField = doc.getField(ft.name());
679+
if (dateField != null) {
680+
long dateValue = dateField.numericValue().longValue();
681+
Instant dateInstant = Instant.ofEpochMilli(dateValue);
682+
assertTrue(
683+
"Date should be in March 2022",
684+
dateInstant.isAfter(Instant.parse("2022-03-01T00:00:00Z"))
685+
&& dateInstant.isBefore(Instant.parse("2022-04-01T00:00:00Z"))
686+
);
687+
}
688+
}
689+
}
690+
IOUtils.close(reader, w, dir);
691+
}
493692
}

0 commit comments

Comments
 (0)