|
17 | 17 | }, |
18 | 18 | { |
19 | 19 | "cell_type": "code", |
20 | | - "execution_count": 1, |
| 20 | + "execution_count": null, |
21 | 21 | "metadata": { |
22 | 22 | "collapsed": true, |
23 | 23 | "id": "AoHkvSPMC5Fs", |
|
29 | 29 | "source": [ |
30 | 30 | "import os\n", |
31 | 31 | "\n", |
32 | | - "if os.getenv(\"COLAB_RELEASE_TAG\"): # If running in Google Colab:\n", |
33 | | - " !curl -s -L -O https://developer.nvidia.com/downloads/assets/tools/secure/nsight-systems/2025_3/NsightSystems-linux-cli-public-2025.3.1.90-3582212.deb\n", |
34 | | - " !sudo dpkg -i NsightSystems-linux-cli-public-2025.3.1.90-3582212.deb > /dev/null\n", |
35 | | - " !pip install \"numba-cuda\" \"cupy-cuda12x\" \"nsightful[notebook] @ git+https://github.com/brycelelbach/nsightful.git\" > /dev/null 2>&1\n", |
36 | | - " os.environ[\"NUMBA_CUDA_ENABLE_PYNVJITLINK\"] = \"1\"" |
| 32 | + "if os.getenv(\"COLAB_RELEASE_TAG\") and not os.path.exists(\"/accelerated-computing-hub-installed\"): # If running in Google Colab:\n", |
| 33 | + " print(\"Downloading NCU package.\")\n", |
| 34 | + " !curl -s -L -O https://developer.download.nvidia.com/compute/cuda/repos/debian12/x86_64/nsight-compute-2025.2.1_2025.2.1.3-1_amd64.deb\n", |
| 35 | + " print(\"Installing NCU package.\")\n", |
| 36 | + " !dpkg -i nsight-compute-2025.2.1_2025.2.1.3-1_amd64.deb > /dev/null\n", |
| 37 | + " !update-alternatives --install /opt/bin/ncu ncu /opt/nvidia/nsight-compute/2025.2.1/ncu 20250201 > /dev/null\n", |
| 38 | + " print(\"Installing PIP packages.\")\n", |
| 39 | + " !pip uninstall \"cuda-python\" --yes > /dev/null\n", |
| 40 | + " !pip install \"numba-cuda\" \"cuda-cccl[test-cu12]\" \"nvtx\" \"nsightful[notebook] @ git+https://github.com/brycelelbach/nsightful.git\" > /dev/null 2>&1\n", |
| 41 | + " open(\"/accelerated-computing-hub-installed\", \"a\").close()" |
37 | 42 | ] |
38 | 43 | }, |
39 | 44 | { |
|
203 | 208 | "outputs": [ |
204 | 209 | { |
205 | 210 | "data": { |
206 | | - "application/javascript": [ |
207 | | - "window[\"5ecc93f0-740d-11f0-abc7-0242ac1c000c\"] = google.colab.output.setIframeHeight(-1, true, {\"interactive\": true, \"maxHeight\": 99999});\n", |
208 | | - "//# sourceURL=js_bae5335f4f" |
209 | | - ], |
| 211 | + "application/javascript": "window[\"5ecc93f0-740d-11f0-abc7-0242ac1c000c\"] = google.colab.output.setIframeHeight(-1, true, {\"interactive\": true, \"maxHeight\": 99999});\n//# sourceURL=js_bae5335f4f", |
210 | 212 | "text/plain": [ |
211 | 213 | "<IPython.core.display.Javascript object>" |
212 | 214 | ] |
|
516 | 518 | "outputs": [ |
517 | 519 | { |
518 | 520 | "data": { |
519 | | - "application/javascript": [ |
520 | | - "window[\"6a0b3852-740d-11f0-abc7-0242ac1c000c\"] = google.colab.output.setIframeHeight(-1, true, {\"interactive\": true, \"maxHeight\": 99999});\n", |
521 | | - "//# sourceURL=js_b772b58d4e" |
522 | | - ], |
| 521 | + "application/javascript": "window[\"6a0b3852-740d-11f0-abc7-0242ac1c000c\"] = google.colab.output.setIframeHeight(-1, true, {\"interactive\": true, \"maxHeight\": 99999});\n//# sourceURL=js_b772b58d4e", |
523 | 522 | "text/plain": [ |
524 | 523 | "<IPython.core.display.Javascript object>" |
525 | 524 | ] |
|
2429 | 2428 | "_titles": { |
2430 | 2429 | "0": "Summary", |
2431 | 2430 | "1": "Speed Of Light", |
2432 | | - "10": "Occupancy", |
2433 | | - "11": "Source Counters", |
2434 | 2431 | "2": "Memory Workload", |
2435 | 2432 | "3": "Compute Workload", |
2436 | 2433 | "4": "Compute & Memory Distribution", |
2437 | 2434 | "5": "Scheduler", |
2438 | 2435 | "6": "Warp State", |
2439 | 2436 | "7": "Instruction", |
2440 | 2437 | "8": "Launch", |
2441 | | - "9": "PM Sampling" |
| 2438 | + "9": "PM Sampling", |
| 2439 | + "10": "Occupancy", |
| 2440 | + "11": "Source Counters" |
2442 | 2441 | }, |
2443 | 2442 | "_view_count": null, |
2444 | 2443 | "_view_module": "@jupyter-widgets/controls", |
|
2633 | 2632 | "_titles": { |
2634 | 2633 | "0": "Summary", |
2635 | 2634 | "1": "Speed Of Light", |
2636 | | - "10": "Occupancy", |
2637 | | - "11": "Source Counters", |
2638 | 2635 | "2": "Memory Workload", |
2639 | 2636 | "3": "Compute Workload", |
2640 | 2637 | "4": "Compute & Memory Distribution", |
2641 | 2638 | "5": "Scheduler", |
2642 | 2639 | "6": "Warp State", |
2643 | 2640 | "7": "Instruction", |
2644 | 2641 | "8": "Launch", |
2645 | | - "9": "PM Sampling" |
| 2642 | + "9": "PM Sampling", |
| 2643 | + "10": "Occupancy", |
| 2644 | + "11": "Source Counters" |
2646 | 2645 | }, |
2647 | 2646 | "_view_count": null, |
2648 | 2647 | "_view_module": "@jupyter-widgets/controls", |
|
0 commit comments