Skip to content

Commit 5f36fef

Browse files
committed
Add function castBIGINT_timestamp (apache#22)
* Add function castBIGINT_timestamp * fix * wip
1 parent b0b1d2d commit 5f36fef

File tree

12 files changed

+70
-9
lines changed

12 files changed

+70
-9
lines changed

cpp/src/arrow/compute/kernels/scalar_cast_test.cc

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1101,6 +1101,17 @@ TEST(Cast, TimestampToTimestamp) {
11011101
options.allow_time_truncate = true;
11021102
CheckCast(will_be_truncated, coarse, options);
11031103
}
1104+
1105+
for (auto types : {
1106+
TimestampTypePair{timestamp(TimeUnit::MILLI, "UTC+8"), timestamp(TimeUnit::MILLI)}
1107+
}) {
1108+
auto coarse = ArrayFromJSON(types.coarse, "[0, null, 200000000000, 1000000000, 2000000000]");
1109+
auto promoted =
1110+
ArrayFromJSON(types.fine, "[0, null, 200000000000, 1000000000, 2000000000]");
1111+
1112+
// multiply/promote
1113+
CheckCast(coarse, promoted);
1114+
}
11041115
}
11051116

11061117
TEST(Cast, TimestampZeroCopy) {

cpp/src/gandiva/function_registry_common.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,9 @@ inline DataTypePtr time32() { return arrow::time32(arrow::TimeUnit::MILLI); }
5555
inline DataTypePtr time64() { return arrow::time64(arrow::TimeUnit::MICRO); }
5656

5757
inline DataTypePtr timestamp() { return arrow::timestamp(arrow::TimeUnit::MILLI); }
58+
59+
inline DataTypePtr timestampusutc() { return arrow::timestamp(arrow::TimeUnit::MICRO, "UTC"); }
60+
5861
inline DataTypePtr decimal128() { return arrow::decimal(38, 0); }
5962

6063
struct KeyHash {
@@ -268,7 +271,7 @@ typedef std::unordered_map<const FunctionSignature*, const NativeFunction*, KeyH
268271

269272
// Iterate the inner macro over all date types
270273
#define DATE_TYPES(INNER, NAME, ALIASES) \
271-
INNER(NAME, ALIASES, date64), INNER(NAME, ALIASES, timestamp)
274+
INNER(NAME, ALIASES, date64), INNER(NAME, ALIASES, timestamp), INNER(NAME, ALIASES, timestampusutc)
272275

273276
// Iterate the inner macro over all time types
274277
#define TIME_TYPES(INNER, NAME, ALIASES) INNER(NAME, ALIASES, time32)

cpp/src/gandiva/function_registry_datetime.cc

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,9 @@ std::vector<NativeFunction> GetDateTimeFunctionRegistry() {
9696
NativeFunction("castTIME", {}, DataTypeVector{timestamp()}, time32(),
9797
kResultNullIfNull, "castTIME_timestamp"),
9898

99+
NativeFunction("castBIGINT", {}, DataTypeVector{timestamp()}, int64(),
100+
kResultNullIfNull, "castBIGINT_timestamp"),
101+
99102
NativeFunction("castBIGINT", {}, DataTypeVector{day_time_interval()}, int64(),
100103
kResultNullIfNull, "castBIGINT_daytimeinterval"),
101104

@@ -139,6 +142,12 @@ std::vector<NativeFunction> GetDateTimeFunctionRegistry() {
139142

140143
NativeFunction("castDATE", {}, DataTypeVector{date64()}, date32(),
141144
kResultNullIfNull, "castDATE_date64"),
145+
146+
NativeFunction("castTIMESTAMP", {}, DataTypeVector{date32()}, timestamp(),
147+
kResultNullIfNull, "castTIMESTAMP_date32"),
148+
149+
NativeFunction("castDATE", {}, DataTypeVector{timestamp()}, date32(),
150+
kResultNullIfNull, "castDATE32_timestamp"),
142151
DATE_TYPES(LAST_DAY_SAFE_NULL_IF_NULL, last_day, {});
143152

144153
return date_time_fn_registry_;

cpp/src/gandiva/function_signature.cc

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,14 @@ bool DataTypeEquals(const DataTypePtr& left, const DataTypePtr& right) {
4545
return (dleft != NULL) && (dright != NULL) &&
4646
(dleft->byte_width() == dright->byte_width());
4747
}
48+
case arrow::Type::TIMESTAMP: {
49+
// Signature for timestamp treated the same if both are with zone or without zone.
50+
auto tleft = checked_cast<arrow::TimestampType *>(left.get());
51+
auto tright = checked_cast<arrow::TimestampType *>(right.get());
52+
return (tleft != NULL) && (tright != NULL) &&
53+
(tleft->unit() == tright->unit()) &&
54+
(tleft->timezone().empty() == tleft->timezone().empty());
55+
}
4856
default:
4957
return left->Equals(right);
5058
}

cpp/src/gandiva/gdv_function_stubs.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -311,7 +311,8 @@ CRC_FUNCTION(binary)
311311
INNER(date64) \
312312
INNER(date32) \
313313
INNER(time32) \
314-
INNER(timestamp)
314+
INNER(timestamp) \
315+
INNER(timestampusutc)
315316

316317
// Expand inner macro for all numeric types.
317318
#define SHA_VAR_LEN_PARAMS(INNER) \

cpp/src/gandiva/gdv_function_stubs.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ using gdv_date64 = int64_t;
3939
using gdv_date32 = int32_t;
4040
using gdv_time32 = int32_t;
4141
using gdv_timestamp = int64_t;
42+
using gdv_timestampusutc = int64_t;
4243
using gdv_utf8 = char*;
4344
using gdv_binary = char*;
4445
using gdv_day_time_interval = int64_t;

cpp/src/gandiva/jni/jni_common.cc

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -155,19 +155,26 @@ DataTypePtr ProtoTypeToTime64(const types::ExtGandivaType& ext_type) {
155155
}
156156

157157
DataTypePtr ProtoTypeToTimestamp(const types::ExtGandivaType& ext_type) {
158+
arrow::TimeUnit::type unit;
158159
switch (ext_type.timeunit()) {
159160
case types::SEC:
160-
return arrow::timestamp(arrow::TimeUnit::SECOND);
161+
unit = arrow::TimeUnit::SECOND;
162+
break;
161163
case types::MILLISEC:
162-
return arrow::timestamp(arrow::TimeUnit::MILLI);
164+
unit = arrow::TimeUnit::MILLI;
165+
break;
163166
case types::MICROSEC:
164-
return arrow::timestamp(arrow::TimeUnit::MICRO);
167+
unit = arrow::TimeUnit::MICRO;
168+
break;
165169
case types::NANOSEC:
166-
return arrow::timestamp(arrow::TimeUnit::NANO);
170+
unit = arrow::TimeUnit::NANO;
171+
break;
167172
default:
168173
std::cerr << "Unknown time unit: " << ext_type.timeunit() << " for timestamp\n";
169174
return nullptr;
170175
}
176+
const std::string& zone_id = ext_type.timezone();
177+
return arrow::timestamp(unit, zone_id);
171178
}
172179

173180
DataTypePtr ProtoTypeToInterval(const types::ExtGandivaType& ext_type) {

cpp/src/gandiva/precompiled/arithmetic_ops.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ extern "C" {
4141
INNER(NAME, date64, OP) \
4242
INNER(NAME, date32, OP) \
4343
INNER(NAME, timestamp, OP) \
44+
INNER(NAME, timestampusutc, OP) \
4445
INNER(NAME, time32, OP)
4546

4647
#define NUMERIC_DATE_TYPES(INNER, NAME, OP) \
@@ -362,6 +363,7 @@ NUMERIC_TYPES(IS_TRUE_OR_FALSE_NUMERIC, isfalse, !)
362363
INNER(date32) \
363364
INNER(date64) \
364365
INNER(timestamp) \
366+
INNER(timestampusutc) \
365367
INNER(time32)
366368

367369
#define NUMERIC_BOOL_DATE_FUNCTION(INNER) \

cpp/src/gandiva/precompiled/hash.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -307,7 +307,8 @@ FORCE_INLINE gdv_int32 hash64_spark_int64_int32(gdv_int64 val, gdv_boolean is_va
307307
INNER(NAME, date64) \
308308
INNER(NAME, date32) \
309309
INNER(NAME, time32) \
310-
INNER(NAME, timestamp)
310+
INNER(NAME, timestamp) \
311+
INNER(NAME, timestampusutc)
311312

312313
NUMERIC_BOOL_DATE_TYPES(HASH32_OP, hash)
313314
NUMERIC_BOOL_DATE_TYPES(HASH32_OP, hash32)

cpp/src/gandiva/precompiled/time.cc

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -826,10 +826,18 @@ gdv_timestamp convertTimestampUnit_us(gdv_timestamp timestamp_in_micro) {
826826
return timestamp_in_micro / 1000;
827827
}
828828

829-
gdv_date32 castDATE_date64(gdv_date64 date_in_millis) {
829+
gdv_date32 castDATE32_date64(gdv_date64 date_in_millis) {
830830
return static_cast<gdv_date32>(date_in_millis / (MILLIS_IN_DAY));
831831
}
832832

833+
gdv_timestamp castTIMESTAMP_date32(gdv_date32 in_day) {
834+
return static_cast<gdv_date32>(in_day * (MILLIS_IN_DAY));
835+
}
836+
837+
gdv_date32 castDATE32_timestamp(gdv_timestamp timestamp_in_millis) {
838+
return static_cast<gdv_date32>(timestamp_in_millis / (MILLIS_IN_DAY));
839+
}
840+
833841
const char* castVARCHAR_timestamp_int64(gdv_int64 context, gdv_timestamp in,
834842
gdv_int64 length, gdv_int32* out_len) {
835843
gdv_int64 year = extractYear_timestamp(in);
@@ -896,6 +904,11 @@ gdv_int64 extractMillis_daytimeinterval(gdv_day_time_interval in) {
896904
return static_cast<gdv_int64>(millis);
897905
}
898906

907+
FORCE_INLINE
908+
gdv_int64 castBIGINT_timestamp(gdv_timestamp in) {
909+
return in;
910+
}
911+
899912
FORCE_INLINE
900913
gdv_int64 castBIGINT_daytimeinterval(gdv_day_time_interval in) {
901914
return extractMillis_daytimeinterval(in) +

0 commit comments

Comments
 (0)