Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,11 @@ BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0"
Dagger = "d58978e5-989f-55fb-8d15-ea34adc7bf54"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
DecisionTree = "7806a523-6efd-50cb-b5f6-3fa6f1930dbb"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
MKL = "33e6dc65-8f57-5167-99aa-e5a354878fb2"
MatrixDepot = "b51810bb-c9f3-55da-ae3c-350fc1fbce05"
OpenAI = "e9f21f70-7185-4079-aca2-91159181367c"
Expand All @@ -21,15 +23,20 @@ ScikitLearn = "3646fa90-6ef7-5e7e-9f22-8aca16db6324"
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[sources]
Dagger = {rev = "jps/lu-ldiv3", url = "https://github.com/JuliaParallel/Dagger.jl"}

[compat]
BSON = "0.3"
BenchmarkTools = "1"
CSV = "0.10"
BSON = "0.3"
CUDA = "5.9.2"
CairoMakie = "0.15"
DecisionTree = "0.12"
Dagger = "0.19.2"
DataFrames = "1"
DecisionTree = "0.12"
LinearAlgebra = "1.12.0"
Logging = "1.11.0"
MKL = "0.9"
MatrixDepot = "1.0.13"
OpenAI = "0.12.0"
Expand Down
134 changes: 134 additions & 0 deletions examples/agentic/generate-cpu-linear-solver/benchmark1.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
using LinearAlgebra
using SparseArrays
using CUDA
using BenchmarkTools
using OrderedCollections
using Plots

println("CPU benchmark with error-vs-time plot:\n")

include("solver.jl")

# Configuration
N = 15_000
sparsity_levels = [0.1, 0.5, 0.9]

umfpack_control = SparseArrays.UMFPACK.get_umfpack_control(Float64, Int64) # read Julia default configuration for a Float64 sparse matrix
#SparseArrays.UMFPACK.show_umf_ctrl(umfpack_control) # optional - display values
umfpack_control[SparseArrays.UMFPACK.JL_UMFPACK_IRSTEP] = 2.0 # reenable iterative refinement (2 is UMFPACK default max iterative refinement steps)

solvers = OrderedDict(
"UMFPACK (Baseline Direct Solve)" => (A, b) -> (A \ b),
"UMFPACK + Iterative Refinement" => (A, b) -> lu(A; control = umfpack_control) \ b,
"SmartSolve-Generated Solver" => (A, b) -> proposed_fn(A, b)
)

# Store results for plotting
results = Dict()

for sparsity in sparsity_levels
println("\n=== Sparsity: $sparsity ===")

# Generate problem
A = sprand(N, N, sparsity)
b = rand(N)

results[sparsity] = Dict()

for (solver_name, solver_fn) in solvers
println(" $solver_name...")

# Warm-up
b_warm = copy(b)
try
x_warm = solver_fn(A, b_warm)
catch e
println(" Warning: solver failed during warm-up: $e")
continue
end

# Benchmark
b_bench = copy(b)
try
bench = @benchmark begin
x = $(solver_fn)($A, $b_bench)
end

time_ms = median(bench.times) / 1e9 # Convert to s

# Compute error
b_err = copy(b)
x_sol = solver_fn(A, b_err)
error = norm(A*x_sol - b_err) / norm(b_err)

results[sparsity][solver_name] = (time=time_ms, error=error)
println(" Time: $(round(time_ms, digits=3)) s, Error: $(round(error, sigdigits=3))")
catch e
println(" Error during benchmark: $e")
end
end

GC.gc()
end

# Create error-vs-time plot
p = plot(
size=(800, 800),
#legend=:topright,
legend=:topleft,
xlabel="Time (s)",
ylabel="Relative residual: ||Ax - b||₂ / ||b||₂",
# xscale=:log10,
yscale=:log10,
guidefontsize=22,#18,
tickfontsize=20, #16,
legendfontsize=14, #14,
margin=5*Plots.mm,
framestyle=:box,
title="Random Matrices of Size $(N)x$(N),\n Varying Sparsity Levels (ρ) and\n CPU Solvers",
titlefontsize=22,
)

# Symbols encode sparsity levels; colors encode solvers.
# Define marker for each sparsity and a color for each solver.
## Sparsity shapes
marker_map_sparsity = OrderedDict(0.1=>:circle, 0.5=>:square, 0.9=>:utriangle)
## Solver color shades
colors = [:red, :blue, :green]

# Plot each point individually so marker shape shows sparsity and color shows solver.
for (i, solver_name) in enumerate(keys(solvers))
for sparsity in sparsity_levels
if sparsity in keys(results) && solver_name in keys(results[sparsity])
t = results[sparsity][solver_name].time
e = results[sparsity][solver_name].error
scatter!(p, [t], [e];
label="",
marker=marker_map_sparsity[sparsity],
markersize=15,
color=colors[i],
markerstrokecolor=:black,
markerstrokewidth=0.0,#0.8,
alpha=0.45)
end
end
end

# Create a combined legend
for (i, solver_name) in enumerate(keys(solvers))
for s in sparsity_levels
lbl = "$(solver_name), ρ:$(s)"
scatter!(p, [NaN], [NaN]; label=lbl,
marker=marker_map_sparsity[s],
markersize=15,
color=colors[i],
markerstrokecolor=:black,
markerstrokewidth=0.0,
alpha=0.45)
end
end

savefig(p, "error_vs_time.pdf")
println("\n✓ Plot saved as error_vs_time.pdf")

display(p)
37 changes: 37 additions & 0 deletions examples/agentic/generate-cpu-linear-solver/benchmark2.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
using LinearAlgebra
using SparseArrays
using BenchmarkTools

include("solver.jl")

N = 15_000
A = sprand(N, N, 0.5)
b = rand(N)

# See https://docs.julialang.org/en/v1/stdlib/LinearAlgebra/#LinearAlgebra.lu
umfpack_control = SparseArrays.UMFPACK.get_umfpack_control(Float64, Int64) # read Julia default configuration for a Float64 sparse matrix
#SparseArrays.UMFPACK.show_umf_ctrl(umfpack_control) # optional - display values
umfpack_control[SparseArrays.UMFPACK.JL_UMFPACK_IRSTEP] = 2.0 # reenable iterative refinement (2 is UMFPACK default max iterative refinement steps)

# warm-up
x1 = A \ b # solve Ax = b
x2 = lu(A; control = umfpack_control) \ b # solve Ax = b, including UMFPACK iterative refinement
x3 = proposed_fn(A, b)

@benchmark begin
x = $A \ $b # solve Ax = b
end

@benchmark begin
x = lu($A; control = $umfpack_control) \ $b # solve Ax = b, including UMFPACK iterative refinement
end

@benchmark begin
x = proposed_fn($A, $b)
end

error1 = norm(A*x1 - b) / norm(b)

error2 = norm(A*x2 - b) / norm(b)

error3 = norm(A*x3 - b) / norm(b)
Binary file not shown.
91 changes: 35 additions & 56 deletions examples/agentic/generate-cpu-linear-solver/solver.jl
Original file line number Diff line number Diff line change
@@ -1,65 +1,44 @@
function proposed_fn(A::SparseMatrixCSC, b::AbstractVector)
@assert size(A,1) == size(A,2) "A must be square"
n = length(b)
@assert size(A,2) == n "Dimensions of A and b must agree"

niters = 4

# Convert sparse matrix to dense double for accurate residual computation
# and to dense single for fast factorization/solves with multithreaded BLAS.
Ad64 = Array(A) # dense Float64
Ad32 = Array{Float32}(undef, n, n)
@inbounds for j in 1:n
for i in 1:n
Ad32[i,j] = Float32(Ad64[i,j])
end
function proposed_fn(A, b)
# Cache Float32 factorizations and work buffers per matrix identity
if !isdefined(@__MODULE__, :LU32_CACHE)
global LU32_CACHE = IdDict{UInt64, Tuple{Any, Vector{Float64}, Vector{Float32}, Vector{Float32}}}()
end

# Convert rhs to Float32 once
b32 = Vector{Float32}(undef, n)
@inbounds @simd for i in 1:n
b32[i] = Float32(b[i])
end

# Factorize dense single-precision matrix (uses LAPACK/BLAS and is multithreaded)
F32 = lu(Ad32)

# Initial solve in single precision, in-place if possible
x32 = copy(b32)
try
LinearAlgebra.ldiv!(F32, x32) # in-place: x32 <- Ad32 \ b32
catch
x32 = F32 \ b32 # fallback
# Ensure b as Float64 vector (avoid copy if already Float64)
b64 = eltype(b) === Float64 ? b : Vector{Float64}(b)
n = length(b64)

key = objectid(A)
F32, r64, work32, bf32 = get!(LU32_CACHE, key) do
# Build a single-precision copy of the numeric values (structure reuse)
nz32 = Float32.(A.nzval)
Af = SparseMatrixCSC{Float32, Int}(size(A,1), size(A,2),
copy(A.colptr), copy(A.rowval),
nz32)
F32_local = lu(Af) # single-precision sparse LU
r64_local = Vector{Float64}(undef, n) # residual buffer (double)
work32_local = Vector{Float32}(undef, n) # temp residual in single
bf32_local = Vector{Float32}(undef, n) # temp right-hand side in single
return (F32_local, r64_local, work32_local, bf32_local)
end

# Promote to double precision for accumulation and residual computation
x = Vector{Float64}(undef, n)
@inbounds @simd for i in 1:n
x[i] = Float64(x32[i])
# Initial solve in single precision, accumulate in double
@inbounds for i = 1:n
bf32[i] = Float32(b64[i])
end

# Preallocate working vectors
r = similar(b) # Float64 residual
r32 = Vector{Float32}(undef, n) # single-precision correction (in-place)

for iter in 1:niters
# r = b - Ad64 * x (use BLAS for dense matvec)
mul!(r, Ad64, x) # r = Ad64 * x
@inbounds @simd for i in 1:n
r[i] = b[i] - r[i]
r32[i] = Float32(r[i])
xf32 = F32 \ bf32
x = Float64.(xf32)

# Iterative refinement: compute residual in double, solve correction in single, update double solution
for _ = 1:5
mul!(r64, A, x) # r64 = A * x (double)
@inbounds for i = 1:n
r64[i] = b64[i] - r64[i] # r64 = b - A*x
work32[i] = Float32(r64[i]) # convert residual to single
end

# Solve correction in single precision using the LU factorization
try
LinearAlgebra.ldiv!(F32, r32) # r32 <- Ad32 \ r32 (in-place)
catch
r32 = F32 \ r32 # fallback
end

# Update double-precision solution
@inbounds @simd for i in 1:n
x[i] += Float64(r32[i])
d32 = F32 \ work32
@inbounds for i = 1:n
x[i] += Float64(d32[i]) # update solution in double
end
end

Expand Down
12 changes: 12 additions & 0 deletions examples/agentic/generate-dagger-linear-solver/Project.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
[deps]
BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
Dagger = "d58978e5-989f-55fb-8d15-ea34adc7bf54"
OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
SmartSolve = "4fbb3a3c-2fa1-4c19-8d57-bae8bc1e16ac"
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"

[sources]
Dagger = {rev = "jps/lu-ldiv3", url = "https://github.com/JuliaParallel/Dagger.jl"}
SmartSolve = {path = "../../.."}
Loading
Loading