@@ -622,6 +622,71 @@ gdv_date64 castDATE_utf8(int64_t context, const char* input, gdv_int32 length) {
622
622
.count ();
623
623
}
624
624
625
+ // This function Will set result to be null if input is invalid, instead of throwing error.
626
+ gdv_date64 castDATE_nullsafe_utf8 (int64_t context, const char * input, gdv_int32 length,
627
+ bool in_valid, bool * out_valid) {
628
+ if (!in_valid) {
629
+ *out_valid = false ;
630
+ return 0 ;
631
+ }
632
+ using arrow_vendored::date::day;
633
+ using arrow_vendored::date::month;
634
+ using arrow_vendored::date::sys_days;
635
+ using arrow_vendored::date::year;
636
+ using arrow_vendored::date::year_month_day;
637
+ using gandiva::TimeFields;
638
+ // format : 0 is year, 1 is month and 2 is day.
639
+ int dateFields[3 ];
640
+ int dateIndex = 0 , index = 0 , value = 0 ;
641
+ int year_str_len = 0 ;
642
+ while (dateIndex < 3 && index < length) {
643
+ if (!isdigit (input[index ])) {
644
+ dateFields[dateIndex++] = value;
645
+ value = 0 ;
646
+ } else {
647
+ value = (value * 10 ) + (input[index ] - ' 0' );
648
+ if (dateIndex == TimeFields::kYear ) {
649
+ year_str_len++;
650
+ }
651
+ }
652
+ index ++;
653
+ }
654
+
655
+ if (dateIndex < 3 ) {
656
+ // If we reached the end of input, we would have not encountered a separator
657
+ // store the last value
658
+ dateFields[dateIndex++] = value;
659
+ }
660
+ const char * msg = " Not a valid date value " ;
661
+ if (dateIndex != 3 ) {
662
+ *out_valid = false ;
663
+ return 0 ;
664
+ }
665
+
666
+ /* Handle two digit years
667
+ * If range of two digits is between 70 - 99 then year = 1970 - 1999
668
+ * Else if two digits is between 00 - 69 = 2000 - 2069
669
+ */
670
+ if (dateFields[TimeFields::kYear ] < 100 && year_str_len < 4 ) {
671
+ if (dateFields[TimeFields::kYear ] < 70 ) {
672
+ dateFields[TimeFields::kYear ] += 2000 ;
673
+ } else {
674
+ dateFields[TimeFields::kYear ] += 1900 ;
675
+ }
676
+ }
677
+ year_month_day date = year (dateFields[TimeFields::kYear ]) /
678
+ month (dateFields[TimeFields::kMonth ]) /
679
+ day (dateFields[TimeFields::kDay ]);
680
+ if (!date.ok ()) {
681
+ *out_valid = false ;
682
+ return 0 ;
683
+ }
684
+ *out_valid = true ;
685
+ return std::chrono::time_point_cast<std::chrono::milliseconds>(sys_days (date))
686
+ .time_since_epoch ()
687
+ .count ();
688
+ }
689
+
625
690
const char * castVARCHAR_date32_int64 (gdv_int64 context, gdv_date32 in_day,
626
691
gdv_int64 length, gdv_int32* out_len) {
627
692
gdv_timestamp in = castDATE_date32 (in_day);
@@ -798,6 +863,142 @@ gdv_timestamp castTIMESTAMP_utf8(int64_t context, const char* input, gdv_int32 l
798
863
return std::chrono::time_point_cast<milliseconds>(date_time).time_since_epoch ().count ();
799
864
}
800
865
866
+ /*
867
+ * Input consists of mandatory and optional fields.
868
+ * Mandatory fields are year, month and day.
869
+ * Optional fields are time, displacement and zone.
870
+ * Format is <year-month-day>[ hours:minutes:seconds][.millis][ displacement|zone]
871
+ * This function will conduct carrying when the length of ms is greater than 3.
872
+ */
873
+ gdv_timestamp castTIMESTAMP_withCarrying_utf8 (int64_t context, const char * input,
874
+ gdv_int32 length, bool in_valid,
875
+ bool * out_valid) {
876
+ if (!in_valid) {
877
+ *out_valid = false ;
878
+ return 0 ;
879
+ }
880
+ *out_valid = true ;
881
+ using arrow_vendored::date::day;
882
+ using arrow_vendored::date::month;
883
+ using arrow_vendored::date::sys_days;
884
+ using arrow_vendored::date::year;
885
+ using arrow_vendored::date::year_month_day;
886
+ using gandiva::TimeFields;
887
+ using std::chrono::hours;
888
+ using std::chrono::milliseconds;
889
+ using std::chrono::minutes;
890
+ using std::chrono::seconds;
891
+
892
+ int ts_fields[9 ] = {0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 };
893
+ gdv_boolean add_displacement = true ;
894
+ gdv_boolean encountered_zone = false ;
895
+ int year_str_len = 0 , sub_seconds_len = 0 ;
896
+ int ts_field_index = TimeFields::kYear , index = 0 , value = 0 ;
897
+ while (ts_field_index < TimeFields::kMax && index < length) {
898
+ if (isdigit (input[index ])) {
899
+ value = (value * 10 ) + (input[index ] - ' 0' );
900
+ if (ts_field_index == TimeFields::kYear ) {
901
+ year_str_len++;
902
+ }
903
+ if (ts_field_index == TimeFields::kSubSeconds ) {
904
+ sub_seconds_len++;
905
+ }
906
+ } else {
907
+ ts_fields[ts_field_index] = value;
908
+ value = 0 ;
909
+
910
+ switch (input[index ]) {
911
+ case ' .' :
912
+ case ' :' :
913
+ case ' ' :
914
+ ts_field_index++;
915
+ break ;
916
+ case ' +' :
917
+ // +08:00, means time zone is 8 hours ahead. Need to subtract.
918
+ add_displacement = false ;
919
+ ts_field_index = TimeFields::kDisplacementHours ;
920
+ break ;
921
+ case ' -' :
922
+ // Overloaded as date separator and negative displacement.
923
+ ts_field_index = (ts_field_index < 3 ) ? (ts_field_index + 1 )
924
+ : TimeFields::kDisplacementHours ;
925
+ break ;
926
+ default :
927
+ encountered_zone = true ;
928
+ break ;
929
+ }
930
+ }
931
+ if (encountered_zone) {
932
+ break ;
933
+ }
934
+ index ++;
935
+ }
936
+
937
+ // Store the last value
938
+ if (ts_field_index < TimeFields::kMax ) {
939
+ ts_fields[ts_field_index++] = value;
940
+ }
941
+
942
+ // adjust the year
943
+ if (ts_fields[TimeFields::kYear ] < 100 && year_str_len < 4 ) {
944
+ if (ts_fields[TimeFields::kYear ] < 70 ) {
945
+ ts_fields[TimeFields::kYear ] += 2000 ;
946
+ } else {
947
+ ts_fields[TimeFields::kYear ] += 1900 ;
948
+ }
949
+ }
950
+
951
+ // adjust the milliseconds
952
+ if (sub_seconds_len > 0 ) {
953
+ if (ts_fields[TimeFields::kSubSeconds ] < 1000 ) {
954
+ while (sub_seconds_len < 3 ) {
955
+ ts_fields[TimeFields::kSubSeconds ] *= 10 ;
956
+ sub_seconds_len++;
957
+ }
958
+ }
959
+ }
960
+ // handle timezone
961
+ if (encountered_zone) {
962
+ int err = 0 ;
963
+ gdv_timestamp ret_time = 0 ;
964
+ err = gdv_fn_time_with_zone (&ts_fields[0 ], (input + index ), (length - index ),
965
+ &ret_time);
966
+ if (err) {
967
+ const char * msg = " Invalid timestamp or unknown zone for timestamp value " ;
968
+ set_error_for_date (length, input, msg, context);
969
+ return 0 ;
970
+ }
971
+ return ret_time;
972
+ }
973
+
974
+ year_month_day date = year (ts_fields[TimeFields::kYear ]) /
975
+ month (ts_fields[TimeFields::kMonth ]) /
976
+ day (ts_fields[TimeFields::kDay ]);
977
+ if (!date.ok ()) {
978
+ *out_valid = false ;
979
+ return 0 ;
980
+ }
981
+
982
+ if (!is_valid_time (ts_fields[TimeFields::kHours ], ts_fields[TimeFields::kMinutes ],
983
+ ts_fields[TimeFields::kSeconds ])) {
984
+ *out_valid = false ;
985
+ return 0 ;
986
+ }
987
+
988
+ auto date_time = sys_days (date) + hours (ts_fields[TimeFields::kHours ]) +
989
+ minutes (ts_fields[TimeFields::kMinutes ]) +
990
+ seconds (ts_fields[TimeFields::kSeconds ]) +
991
+ milliseconds (ts_fields[TimeFields::kSubSeconds ]);
992
+ if (ts_fields[TimeFields::kDisplacementHours ] ||
993
+ ts_fields[TimeFields::kDisplacementMinutes ]) {
994
+ auto displacement_time = hours (ts_fields[TimeFields::kDisplacementHours ]) +
995
+ minutes (ts_fields[TimeFields::kDisplacementMinutes ]);
996
+ date_time = (add_displacement) ? (date_time + displacement_time)
997
+ : (date_time - displacement_time);
998
+ }
999
+ return std::chrono::time_point_cast<milliseconds>(date_time).time_since_epoch ().count ();
1000
+ }
1001
+
801
1002
gdv_timestamp castTIMESTAMP_date64 (gdv_date64 date_in_millis) { return date_in_millis; }
802
1003
803
1004
gdv_timestamp castTIMESTAMP_int64 (gdv_int64 in) { return in; }
@@ -917,7 +1118,8 @@ gdv_date32 castDATE32_date64(gdv_date64 date_in_millis) {
917
1118
}
918
1119
919
1120
gdv_timestamp castTIMESTAMP_date32 (gdv_date32 in_day) {
920
- return static_cast <gdv_date32>(in_day * (MILLIS_IN_DAY));
1121
+ int64_t in = (int64_t )in_day;
1122
+ return in * MILLIS_IN_DAY;
921
1123
}
922
1124
923
1125
gdv_date32 castDATE32_timestamp (gdv_timestamp timestamp_in_millis) {
@@ -1067,6 +1269,12 @@ gdv_int32 unix_date_date32(gdv_date32 in) {
1067
1269
return in;
1068
1270
}
1069
1271
1272
+ FORCE_INLINE
1273
+ gdv_int64 unix_date_seconds_date32 (gdv_date32 in) {
1274
+ gdv_int64 in_day = (gdv_int64)in;
1275
+ return in_day * SECONDS_IN_HOUR * 24 ;
1276
+ }
1277
+
1070
1278
FORCE_INLINE
1071
1279
gdv_int64 unix_seconds_timestampusutc (gdv_timestamp in) {
1072
1280
return in / 1000000 ;
0 commit comments