Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 23 additions & 42 deletions src/transforms/map.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@
# Licensed under the MIT License. See LICENSE in the project root.
# ------------------------------------------------------------------

const TargetName = Union{Symbol,AbstractString}
const PairWithTarget = Pair{<:Any,<:Pair{<:Function,<:TargetName}}
const PairWithoutTarget = Pair{<:Any,<:Function}
const MapPair = Union{PairWithTarget,PairWithoutTarget}

"""
Map(cols₁ => fun₁ => target₁, cols₂ => fun₂, ..., colsₙ => funₙ => targetₙ)

Expand All @@ -13,8 +18,6 @@ a collection of identifiers or a regular expression (regex).

Passing a target column name is optional and when omitted a new name
is generated by joining the function name with the selected column names.
If the target column already exists in the table, the original
column will be replaced.

## Examples

Expand Down Expand Up @@ -44,16 +47,6 @@ end

Map() = throw(ArgumentError("cannot create Map transform without arguments"))

# utility types
const TargetName = Union{Symbol,AbstractString}
const PairWithTarget = Pair{<:Any,<:Pair{<:Function,<:TargetName}}
const PairWithoutTarget = Pair{<:Any,<:Function}
const MapPair = Union{PairWithTarget,PairWithoutTarget}

# utility functions
_extract(p::PairWithTarget) = selector(first(p)), first(last(p)), Symbol(last(last(p)))
_extract(p::PairWithoutTarget) = selector(first(p)), last(p), nothing

function Map(pairs::MapPair...)
tuples = map(_extract, pairs)
selectors = [t[1] for t in tuples]
Expand All @@ -62,32 +55,14 @@ function Map(pairs::MapPair...)
Map(selectors, funs, targets)
end

_funname(fun::Base.Fix1) = "fix1_" * _funname(fun.f)
_funname(fun::Base.Fix2) = "fix2_" * _funname(fun.f)
_funname(fun::ComposedFunction) = _funname(fun.outer) * "_" * _funname(fun.inner)
_funname(fun) = string(fun)

function _makename(snames, fun)
funname = _funname(fun)
if contains(funname, "#") # anonymous functions
funname = replace(funname, "#" => "f")
end
Symbol(funname, :_, join(snames, "_"))
end

function applyfeat(transform::Map, feat, prep)
cols = Tables.columns(feat)
onames = Tables.columnnames(cols)
names = Tables.columnnames(cols)

selectors = transform.selectors
funs = transform.funs
targets = transform.targets

# new names and columns
names = collect(onames)
columns = Any[Tables.getcolumn(cols, nm) for nm in onames]

# mapped columns
mapped = map(selectors, funs, targets) do selector, fun, target
snames = selector(names)
newname = isnothing(target) ? _makename(snames, fun) : target
Expand All @@ -96,17 +71,23 @@ function applyfeat(transform::Map, feat, prep)
newname => newcolumn
end

for (name, column) in mapped
if name ∈ onames
i = findfirst(==(name), onames)
columns[i] = column
else
push!(names, name)
push!(columns, column)
end
end
newfeat = (; mapped...) |> Tables.materializer(feat)

𝒯 = (; zip(names, columns)...)
newfeat = 𝒯 |> Tables.materializer(feat)
newfeat, nothing
end

_extract(p::PairWithTarget) = selector(first(p)), first(last(p)), Symbol(last(last(p)))
_extract(p::PairWithoutTarget) = selector(first(p)), last(p), nothing

_funname(fun::Base.Fix1) = "fix1_" * _funname(fun.f)
_funname(fun::Base.Fix2) = "fix2_" * _funname(fun.f)
_funname(fun::ComposedFunction) = _funname(fun.outer) * "_" * _funname(fun.inner)
_funname(fun) = string(fun)

function _makename(snames, fun)
funname = _funname(fun)
if contains(funname, "#") # anonymous functions
funname = replace(funname, "#" => "f")
end
Symbol(funname, :_, join(snames, "_"))
end
30 changes: 15 additions & 15 deletions test/transforms/map.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,55 +9,55 @@

T = Map(1 => sin)
n, c = apply(T, t)
@test Tables.schema(n).names == (:a, :b, :c, :d, :sin_a)
@test Tables.schema(n).names == (:sin_a,)
@test n.sin_a == sin.(t.a)

T = Map(:b => cos)
n, c = apply(T, t)
@test Tables.schema(n).names == (:a, :b, :c, :d, :cos_b)
@test Tables.schema(n).names == (:cos_b,)
@test n.cos_b == cos.(t.b)

T = Map("c" => tan)
n, c = apply(T, t)
@test Tables.schema(n).names == (:a, :b, :c, :d, :tan_c)
@test Tables.schema(n).names == (:tan_c,)
@test n.tan_c == tan.(t.c)

T = Map(:a => sin => :a)
n, c = apply(T, t)
@test Tables.schema(n).names == (:a, :b, :c, :d)
@test Tables.schema(n).names == (:a,)
@test n.a == sin.(t.a)

T = Map(:a => sin => "a")
n, c = apply(T, t)
@test Tables.schema(n).names == (:a, :b, :c, :d)
@test Tables.schema(n).names == (:a,)
@test n.a == sin.(t.a)

T = Map([2, 3] => ((b, c) -> 2b + c) => :op1)
n, c = apply(T, t)
@test Tables.schema(n).names == (:a, :b, :c, :d, :op1)
@test Tables.schema(n).names == (:op1,)
@test n.op1 == @. 2 * t.b + t.c

T = Map([:a, :c] => ((a, c) -> 2a * 3c) => :op1)
n, c = apply(T, t)
@test Tables.schema(n).names == (:a, :b, :c, :d, :op1)
@test Tables.schema(n).names == (:op1,)
@test n.op1 == @. 2 * t.a * 3 * t.c

T = Map(["c", "a"] => ((c, a) -> 3c / a) => :op1, "c" => tan)
n, c = apply(T, t)
@test Tables.schema(n).names == (:a, :b, :c, :d, :op1, :tan_c)
@test Tables.schema(n).names == (:op1, :tan_c)
@test n.op1 == @. 3 * t.c / t.a
@test n.tan_c == tan.(t.c)

T = Map(r"[abc]" => ((a, b, c) -> a^2 - 2b + c) => "op1")
n, c = apply(T, t)
@test Tables.schema(n).names == (:a, :b, :c, :d, :op1)
@test Tables.schema(n).names == (:op1,)
@test n.op1 == @. t.a^2 - 2 * t.b + t.c

# generated names
# normal function
T = Map([:c, :d] => hypot)
n, c = apply(T, t)
@test Tables.schema(n).names == (:a, :b, :c, :d, :hypot_c_d)
@test Tables.schema(n).names == (:hypot_c_d,)
@test n.hypot_c_d == hypot.(t.c, t.d)

# anonymous function
Expand All @@ -66,34 +66,34 @@
colname = Symbol(fname, :_a)
T = Map(:a => f)
n, c = apply(T, t)
@test Tables.schema(n).names == (:a, :b, :c, :d, colname)
@test Tables.schema(n).names == (colname,)
@test Tables.getcolumn(n, colname) == f.(t.a)

# composed function
f = sin ∘ cos
T = Map(:b => f)
n, c = apply(T, t)
@test Tables.schema(n).names == (:a, :b, :c, :d, :sin_cos_b)
@test Tables.schema(n).names == (:sin_cos_b,)
@test n.sin_cos_b == f.(t.b)

f = sin ∘ cos ∘ tan
T = Map(:c => sin ∘ cos ∘ tan)
n, c = apply(T, t)
@test Tables.schema(n).names == (:a, :b, :c, :d, :sin_cos_tan_c)
@test Tables.schema(n).names == (:sin_cos_tan_c,)
@test n.sin_cos_tan_c == f.(t.c)

# Base.Fix1
f = Base.Fix1(hypot, 2)
T = Map(:d => f)
n, c = apply(T, t)
@test Tables.schema(n).names == (:a, :b, :c, :d, :fix1_hypot_d)
@test Tables.schema(n).names == (:fix1_hypot_d,)
@test n.fix1_hypot_d == f.(t.d)

# Base.Fix2
f = Base.Fix2(hypot, 2)
T = Map(:a => f)
n, c = apply(T, t)
@test Tables.schema(n).names == (:a, :b, :c, :d, :fix2_hypot_a)
@test Tables.schema(n).names == (:fix2_hypot_a,)
@test n.fix2_hypot_a == f.(t.a)

# error: cannot create Map transform without arguments
Expand Down
Loading