-
Notifications
You must be signed in to change notification settings - Fork 1k
Open
Labels
Milestone
Description
Suggestions
Add trunc.IDate
(may be replace round.IDate
) to consists with lubridate::floor_date
. trunc/floor
for the week should return first day of the week and not relate with the day of year.
For the "weeks" unit add additional option for the start.on.monday
(like cut.Date
) or something like this.
Possible implementation:
trunc_week <- function(x, start.on.monday = TRUE) {
if (isTRUE(start.on.monday)) {
# lubridate::floor_date(x, "week", week_start = 1)
as.IDate(7 * ((unclass(x) - 4L) %/% 7) + 4L)
} else {
# lubridate::floor_date(x, "week", week_start = 0)
as.IDate(7 * ((unclass(x) - 3L) %/% 7) + 3L)
}
}
Improvements
For the existing round.IDate
implementation.
Utility function to print bench results:
print.bench = function(x, ...) {
cols = c("expression", "n", "min", "median", "mem_alloc", "n_itr", "n_gc")
r = as.data.table(x)[, ..cols]
r[, expression := sapply(expression, deparse)]
r[, relative := round(as.numeric(median / min(median)), 3), by = c("n")]
r[]
}
Data to process.
set.seed(42)
x = seq.Date(as.Date("1900-01-01"), as.Date("2020-03-01"), by = "day")
x = as.IDate(x)
Weeks
trunc_week_old = function(x) {
# IDateTime.R#81
# bug: first week of year have 6 days instead 7
# fix: should be yday(x) - 1L
round(x, "year") + 7L * ((yday(x) - 1L) %/% 7L)
}
trunc_week_new = function(x) {
l = as.POSIXlt(x)
w = (l$yday %/% 7L) * 7L
x - l$yday + w
}
r = bench::press(
n = c(100, 1000, 10000, 100000, 1000000),
{
d = sample(x, size = n, replace = TRUE)
bench::mark(trunc_week_old(d), trunc_week_new(d))
}
)
print.bench(r)
#> expression n min median mem_alloc n_itr n_gc relative
#> <char> <num> <bench_time> <bench_time> <bench_bytes> <int> <num> <num>
#> 1: trunc_week_old(d) 1e+02 225.62µs 232.11µs 146.73KB 1950 2 7.050
#> 2: trunc_week_new(d) 1e+02 30.57µs 32.92µs 40.53KB 9996 4 1.000
#> 3: trunc_week_old(d) 1e+03 1.39ms 1.47ms 213KB 318 2 13.605
#> 4: trunc_week_new(d) 1e+03 106.56µs 108.29µs 59.2KB 4151 4 1.000
#> 5: trunc_week_old(d) 1e+04 13.44ms 13.88ms 2.06MB 33 1 15.994
#> 6: trunc_week_new(d) 1e+04 859.88µs 867.72µs 586.55KB 534 5 1.000
#> 7: trunc_week_old(d) 1e+05 134.9ms 137.59ms 20.6MB 2 2 15.515
#> 8: trunc_week_new(d) 1e+05 8.39ms 8.87ms 5.72MB 41 9 1.000
#> 9: trunc_week_old(d) 1e+06 1.45s 1.45s 206MB 1 6 15.516
#> 10: trunc_week_new(d) 1e+06 88.27ms 93.29ms 57.22MB 6 6 1.000
Months
trunc_month_old = function(x) {
# IDateTime.R#81
as.IDate(ISOdate(year(x), month(x), 1L))
}
trunc_month_new = function(x) {
x - as.POSIXlt(x)$mday + 1L
}
r = bench::press(
n = c(100, 1000, 10000, 100000, 1000000),
{
d = sample(x, size = n, replace = TRUE)
bench::mark(trunc_month_old(d), trunc_month_new(d))
}
)
print.bench(r)
#> expression n min median mem_alloc n_itr n_gc relative
#> <char> <num> <bench_time> <bench_time> <bench_bytes> <int> <num> <num>
#> 1: trunc_month_old(d) 1e+02 198.49µs 219.99µs 27.5KB 1947 2 7.332
#> 2: trunc_month_new(d) 1e+02 28.26µs 30µs 6.03KB 9996 4 1.000
#> 3: trunc_month_old(d) 1e+03 1.47ms 1.5ms 216.91KB 329 1 15.449
#> 4: trunc_month_new(d) 1e+03 94.05µs 96.94µs 55.25KB 4683 3 1.000
#> 5: trunc_month_old(d) 1e+04 13.93ms 14.39ms 2.1MB 35 0 17.223
#> 6: trunc_month_new(d) 1e+04 794.89µs 835.49µs 547.44KB 571 4 1.000
#> 7: trunc_month_old(d) 1e+05 139.84ms 141.22ms 20.98MB 3 1 17.648
#> 8: trunc_month_new(d) 1e+05 7.93ms 8ms 5.34MB 58 4 1.000
#> 9: trunc_month_old(d) 1e+06 1.42s 1.42s 209.81MB 1 4 17.221
#> 10: trunc_month_new(d) 1e+06 80.22ms 82.72ms 53.41MB 6 7 1.000
Qurters
trunc_quarter_old = function(x) {
# IDateTime.R#82
as.IDate(ISOdate(year(x), 3L * (quarter(x) - 1L) + 1L, 1L))
}
trunc_quarter_new = function(x) {
l = as.POSIXlt(x)
l$mon = (l$mon %/% 3L) * 3L
l$mday = 1L
as.IDate(l)
}
r = bench::press(
n = c(100, 1000, 10000, 100000, 1000000),
{
d = sample(x, size = n, replace = TRUE)
bench::mark(trunc_quarter_old(d), trunc_quarter_new(d))
}
)
print.bench(r)
#> expression n min median mem_alloc n_itr n_gc relative
#> <char> <num> <bench_time> <bench_time> <bench_bytes> <int> <num> <num>
#> 1: trunc_quarter_old(d) 1e+02 199µs 217.63µs 28.12KB 2213 2 6.289
#> 2: trunc_quarter_new(d) 1e+02 32.26µs 34.61µs 18.62KB 9995 5 1.000
#> 3: trunc_quarter_old(d) 1e+03 1.55ms 1.64ms 216.91KB 294 1 8.413
#> 4: trunc_quarter_new(d) 1e+03 177.79µs 194.43µs 98.64KB 2308 3 1.000
#> 5: trunc_quarter_old(d) 1e+04 15.15ms 15.46ms 2.1MB 31 1 8.443
#> 6: trunc_quarter_new(d) 1e+04 1.73ms 1.83ms 977.55KB 258 4 1.000
#> 7: trunc_quarter_old(d) 1e+05 153.45ms 153.83ms 20.98MB 2 2 8.313
#> 8: trunc_quarter_new(d) 1e+05 18.02ms 18.5ms 9.54MB 19 6 1.000
#> 9: trunc_quarter_old(d) 1e+06 1.59s 1.59s 209.81MB 1 5 6.672
#> 10: trunc_quarter_new(d) 1e+06 187.06ms 238.51ms 95.37MB 3 6 1.000
Years
trunc_year_old = function(x) {
# IDateTime.R#83
as.IDate(ISOdate(year(x), 1L, 1L))
}
trunc_year_new = function(x) {
x - as.POSIXlt(x)$yday
}
r = bench::press(
n = c(100, 1000, 10000, 100000, 1000000),
{
d = sample(x, size = n, replace = TRUE)
bench::mark(trunc_year_old(d), trunc_year_new(d))
}
)
print.bench(r)
#> expression n min median mem_alloc n_itr n_gc relative
#> <char> <num> <bench_time> <bench_time> <bench_bytes> <int> <num> <num>
#> 1: trunc_year_old(d) 1e+02 178.84µs 184.22µs 17.56KB 2405 2 8.662
#> 2: trunc_year_new(d) 1e+02 19.81µs 21.27µs 5.59KB 9997 3 1.000
#> 3: trunc_year_old(d) 1e+03 1.26ms 1.3ms 161.7KB 375 0 14.864
#> 4: trunc_year_new(d) 1e+03 85.66µs 87.45µs 51.3KB 5431 3 1.000
#> 5: trunc_year_old(d) 1e+04 12.01ms 12.62ms 1.57MB 40 0 16.732
#> 6: trunc_year_new(d) 1e+04 743.28µs 754.02µs 508.33KB 578 3 1.000
#> 7: trunc_year_old(d) 1e+05 125.14ms 125.57ms 15.64MB 3 1 16.846
#> 8: trunc_year_new(d) 1e+05 7.38ms 7.45ms 4.96MB 64 3 1.000
#> 9: trunc_year_old(d) 1e+06 1.27s 1.27s 156.4MB 1 4 17.129
#> 10: trunc_year_new(d) 1e+06 73.92ms 74.31ms 49.59MB 7 3 1.000
Final trunc.IDate
implementation:
trunc.IDate = function (x, units = c("weeks", "months", "quarters", "years"), start.on.monday = TRUE, ...) {
units = match.arg(units)
offset = if (isTRUE(start.on.monday)) 4L else 3L
as.IDate(switch(units,
weeks = 7L * ((unclass(x) - offset) %/% 7L) + offset,
months = unclass(x) - as.POSIXlt(x)$mday + 1L,
quarters = {
l = as.POSIXlt(x)
l$mon = (l$mon %/% 3L) * 3L # trunc to quarter start month
l$mday = 1L # trunc month day
l
},
years = unclass(x) - as.POSIXlt(x)$yday))
}
Can I make the PR with this changes?
jangorecki, ColeMiller1 and mmeierer