Skip to content

Commit b3bc748

Browse files
authored
Merge branch 'main' into jm/tracedtotypes
2 parents f92e8d5 + e28e663 commit b3bc748

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

49 files changed

+2645
-1270
lines changed

.github/workflows/CI-localjll.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ jobs:
4747
with:
4848
version: ${{ matrix.version }}
4949
- uses: julia-actions/cache@v2
50-
- uses: bazel-contrib/setup-bazel@0.14.0
50+
- uses: bazel-contrib/setup-bazel@0.15.0
5151
name: Set up Bazel
5252
with:
5353
# Avoid downloading Bazel every time.

Project.toml

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "Reactant"
22
uuid = "3c362404-f566-11ee-1572-e11a4b42c853"
33
authors = ["William Moses <[email protected]>", "Valentin Churavy <[email protected]>", "Sergio Sánchez Ramírez <[email protected]>", "Paul Berg <[email protected]>", "Avik Pal <[email protected]>", "Mosè Giordano <[email protected]>"]
4-
version = "0.2.113"
4+
version = "0.2.127"
55

66
[deps]
77
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
@@ -36,6 +36,7 @@ LLVM = "929cbde3-209d-540e-8aea-75f648917ca0"
3636
MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195"
3737
NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd"
3838
OffsetArrays = "6fe1bfb0-de20-5000-8ca7-80f57d26f881"
39+
OneHotArrays = "0b1bfda6-eb8a-41d2-88d8-f5af5cad476f"
3940
PythonCall = "6099a3de-0909-46bc-b1f4-468b9a2dfc0d"
4041
Random123 = "74087812-796a-5b5d-8853-05524746bad3"
4142
SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
@@ -53,6 +54,7 @@ ReactantKernelAbstractionsExt = "KernelAbstractions"
5354
ReactantMPIExt = "MPI"
5455
ReactantNNlibExt = "NNlib"
5556
ReactantOffsetArraysExt = "OffsetArrays"
57+
ReactantOneHotArraysExt = "OneHotArrays"
5658
ReactantPythonCallExt = "PythonCall"
5759
ReactantRandom123Ext = "Random123"
5860
ReactantSpecialFunctionsExt = "SpecialFunctions"
@@ -67,8 +69,8 @@ CEnum = "0.5"
6769
CUDA = "5.6"
6870
Downloads = "1.6"
6971
EnumX = "1"
70-
Enzyme = "0.13.35"
71-
EnzymeCore = "0.8.8"
72+
Enzyme = "0.13.49"
73+
EnzymeCore = "0.8.11"
7274
Functors = "0.5"
7375
GPUArraysCore = "0.2"
7476
GPUCompiler = "1.3"
@@ -80,14 +82,15 @@ LinearAlgebra = "1.10"
8082
MPI = "0.20"
8183
NNlib = "0.9.26"
8284
OffsetArrays = "1"
85+
OneHotArrays = "0.2.10"
8386
OrderedCollections = "1"
8487
PrecompileTools = "1.2"
8588
Preferences = "1.4"
8689
PythonCall = "0.9"
8790
Random = "1.10"
8891
Random123 = "1.7"
89-
ReactantCore = "0.1.9"
90-
Reactant_jll = "0.0.187"
92+
ReactantCore = "0.1.11"
93+
Reactant_jll = "0.0.196"
9194
ScopedValues = "1.3.0"
9295
Scratch = "1.2"
9396
Sockets = "1.10"

deps/ReactantExtra/.bazelrc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@ build -c opt
1919
build:cuda --repo_env TF_NEED_CUDA=1
2020
build:cuda --repo_env TF_NVCC_CLANG=1
2121
build:cuda --repo_env TF_NCCL_USE_STUB=1
22-
build:cuda --repo_env=HERMETIC_CUDA_VERSION="12.6.2"
23-
build:cuda --repo_env=HERMETIC_CUDNN_VERSION="9.4.0"
22+
build:cuda --repo_env=HERMETIC_CUDA_VERSION="12.8.1"
23+
build:cuda --repo_env=HERMETIC_CUDNN_VERSION="9.8.0"
2424
# "sm" means we emit only cubin, which is forward compatible within a GPU generation.
2525
# "compute" means we emit both cubin and PTX, which is larger but also forward compatible to future GPU generations.
2626
build:cuda --repo_env HERMETIC_CUDA_COMPUTE_CAPABILITIES="sm_50,sm_60,sm_70,sm_80,compute_90"

deps/ReactantExtra/API.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2628,3 +2628,15 @@ extern "C" void addSdyPropagationPipeline(
26282628
0};
26292629
mlir::sdy::addPropagationPipeline(pm, options);
26302630
}
2631+
2632+
extern "C" HeldIfrtArray *ifrt_copy_array(HeldIfrtArray *array) {
2633+
auto pjrtArray = dyn_cast<ifrt::PjRtArray>(array->obj().get());
2634+
if (pjrtArray) {
2635+
std::optional<ifrt::DeviceListRef> devices;
2636+
std::optional<ifrt::MemoryKind> memory_kind;
2637+
auto res = MyValueOrThrow(pjrtArray->Copy(
2638+
devices, memory_kind, static_cast<ifrt::ArrayCopySemantics>(0)));
2639+
return reactant::capture(res);
2640+
}
2641+
ReactantThrowError("Only ifrt-pjrt arrays are supported for now");
2642+
}

deps/ReactantExtra/WORKSPACE

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ http_archive(
99
urls = ["https://github.com/wsmoses/nsync/archive/{commit}.tar.gz".format(commit = NSYNC_COMMIT)],
1010
)
1111

12-
ENZYMEXLA_COMMIT = "1db6a9efb41e49148f5c9b6abac798e81f318f06"
12+
ENZYMEXLA_COMMIT = "d9a0f3dde79f94e56b0a0570f37c4d14121c888d"
1313
ENZYMEXLA_SHA256 = ""
1414

1515
http_archive(

docs/src/.vitepress/config.mts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ export default defineConfig({
7676
items: [
7777
{ text: "Introduction", link: "/introduction" },
7878
{ text: "Configuration", link: "/introduction/configuration" },
79+
{ text: "FAQs", link: "/introduction/FAQs" },
7980
],
8081
},
8182
{ text: "Benchmarks", link: "https://enzymead.github.io/Reactant.jl/benchmarks/" },
@@ -140,6 +141,7 @@ export default defineConfig({
140141
items: [
141142
{ text: "Introduction", link: "/introduction" },
142143
{ text: "Configuration", link: "/introduction/configuration" },
144+
{ text: "FAQs", link: "/introduction/FAQs" },
143145
],
144146
}
145147
],

docs/src/api/config.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ Reactant.with_config
2929

3030
```@docs
3131
Reactant.DotGeneralAlgorithmPreset
32-
Reactant.DotGeneralPrecision
32+
Reactant.PrecisionConfig
3333
Reactant.DotGeneralAlgorithm
3434
```
3535

docs/src/introduction/FAQs.md

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
# FAQs
2+
3+
## XLA auto-tuner: Results do not match the reference. This is likely a bug/unexpected loss of precision
4+
5+
If you see this error with the CUDA backend, use a scoped value to increase the precision
6+
of the dot-general algorithm.
7+
8+
```julia
9+
Reactant.with_config(; dot_general_precision=PrecisionConfig.HIGH) do
10+
@compile ...
11+
end
12+
```
13+
14+
For more information, see [this XLA issue](https://github.com/openxla/xla/issues/23934).
15+
16+
## Emptying the cache to avoid OOM issues
17+
18+
When you encounter OOM (Out of Memory) errors, you can try to clear the cache by using
19+
Julia's builtin `GC.gc()` between memory-intensive operations.
20+
21+
!!! note
22+
This will only free memory which is not currently live. If the result of compiled
23+
function was stored in a vector, it will still be alive and `GC.gc()` won't free it.
24+
25+
```julia
26+
using Reactant
27+
n = 500_000_000
28+
input1 = Reactant.ConcreteRArray(ones(n))
29+
input2 = Reactant.ConcreteRArray(ones(n))
30+
31+
function sin_add(x, y)
32+
return sin.(x) .+ y
33+
end
34+
35+
f = @compile sin_add(input1,input2)
36+
37+
for i = 1:10
38+
GC.gc()
39+
@info "gc... $i"
40+
f(input1, input2) # May cause OOM here for a 24GB GPU if GC is not used
41+
end
42+
```
43+
44+
If you **don't** use `GC.gc()` here, this may cause an OOM:
45+
46+
```bash
47+
[ Info: gc... 1
48+
[ Info: gc... 2
49+
[ Info: gc... 3
50+
...
51+
E0105 09:48:28.755177 110350 pjrt_stream_executor_client.cc:3088] Execution of replica 0 failed: RESOURCE_EXHAUSTED: Out of memory while trying to allocate 4000000000 bytes.
52+
ERROR: RESOURCE_EXHAUSTED: Out of memory while trying to allocate 4000000000 bytes.
53+
54+
Stacktrace:
55+
[1] reactant_err(msg::Cstring)
56+
@ Reactant.XLA ~/.julia/packages/Reactant/7m11i/src/XLA.jl:104
57+
[2] macro expansion
58+
@ ~/.julia/packages/Reactant/7m11i/src/XLA.jl:357 [inlined]
59+
[3] ExecutableCall
60+
@ ~/.julia/packages/Reactant/7m11i/src/XLA.jl:334 [inlined]
61+
[4] macro expansion
62+
@ ~/.julia/packages/Reactant/7m11i/src/Compiler.jl:798 [inlined]
63+
[5] (::Reactant.Compiler.Thunk{…})(::ConcreteRArray{…}, ::ConcreteRArray{…})
64+
@ Reactant.Compiler ~/.julia/packages/Reactant/7m11i/src/Compiler.jl:909
65+
[6] top-level scope
66+
@ ./REPL[7]:4
67+
Some type information was truncated. Use `show(err)` to see complete types.
68+
```
69+
70+
After using Julia's built-in `GC.gc()`:
71+
72+
```bash
73+
[ Info: gc... 1
74+
[ Info: gc... 2
75+
[ Info: gc... 3
76+
[ Info: gc... 4
77+
[ Info: gc... 5
78+
[ Info: gc... 6
79+
[ Info: gc... 7
80+
[ Info: gc... 8
81+
[ Info: gc... 9
82+
[ Info: gc... 10
83+
```

docs/src/introduction/index.md

Lines changed: 0 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -53,83 +53,3 @@ f = @compile sinsum_add(input1,input2)
5353
# one can now run the program
5454
f(input1, input2)
5555
```
56-
57-
58-
## Tips
59-
60-
### Empty Cache
61-
62-
When you encounter OOM (Out of Memory) errors, you can try to clear the cache by using Julia's builtin `GC.gc()` between memory-intensive operations.
63-
64-
!!! note
65-
This will only free memory which is not currently live. If the result of compiled function was stored in a vector, it will still be alive and `GC.gc()` won't free it.
66-
67-
```julia
68-
using Reactant
69-
n = 500_000_000
70-
input1 = Reactant.ConcreteRArray(ones(n))
71-
input2 = Reactant.ConcreteRArray(ones(n))
72-
73-
function sin_add(x, y)
74-
return sin.(x) .+ y
75-
end
76-
77-
f = @compile sin_add(input1,input2)
78-
79-
for i = 1:10
80-
GC.gc()
81-
@info "gc... $i"
82-
f(input1, input2) # May cause OOM here for a 24GB GPU if GC is not used
83-
end
84-
```
85-
86-
If you **don't** use `GC.gc()` here, this may cause an OOM:
87-
88-
89-
90-
```bash
91-
[ Info: gc... 1
92-
[ Info: gc... 2
93-
[ Info: gc... 3
94-
...
95-
E0105 09:48:28.755177 110350 pjrt_stream_executor_client.cc:3088] Execution of replica 0 failed: RESOURCE_EXHAUSTED: Out of memory while trying to allocate 4000000000 bytes.
96-
ERROR: RESOURCE_EXHAUSTED: Out of memory while trying to allocate 4000000000 bytes.
97-
98-
Stacktrace:
99-
[1] reactant_err(msg::Cstring)
100-
@ Reactant.XLA ~/.julia/packages/Reactant/7m11i/src/XLA.jl:104
101-
[2] macro expansion
102-
@ ~/.julia/packages/Reactant/7m11i/src/XLA.jl:357 [inlined]
103-
[3] ExecutableCall
104-
@ ~/.julia/packages/Reactant/7m11i/src/XLA.jl:334 [inlined]
105-
[4] macro expansion
106-
@ ~/.julia/packages/Reactant/7m11i/src/Compiler.jl:798 [inlined]
107-
[5] (::Reactant.Compiler.Thunk{…})(::ConcreteRArray{…}, ::ConcreteRArray{…})
108-
@ Reactant.Compiler ~/.julia/packages/Reactant/7m11i/src/Compiler.jl:909
109-
[6] top-level scope
110-
@ ./REPL[7]:4
111-
Some type information was truncated. Use `show(err)` to see complete types.
112-
```
113-
114-
115-
After using Julia's built-in `GC.gc()`:
116-
117-
118-
119-
```bash
120-
[ Info: gc... 1
121-
[ Info: gc... 2
122-
[ Info: gc... 3
123-
[ Info: gc... 4
124-
[ Info: gc... 5
125-
[ Info: gc... 6
126-
[ Info: gc... 7
127-
[ Info: gc... 8
128-
[ Info: gc... 9
129-
[ Info: gc... 10
130-
```
131-
132-
133-
134-
135-

0 commit comments

Comments
 (0)