neuromatch · iamzoltan · Aug 25, 2025 · Aug 26, 2025 · Aug 26, 2025 · Aug 26, 2025
diff --git a/.github/actions/setup-rendering-deps/action.yml b/.github/actions/setup-rendering-deps/action.yml
@@ -18,15 +18,12 @@ inputs:
 runs:
   using: 'composite'
   steps:
-    - name: Cache APT packages
-      id: cache-apt
+    - name: Cache fonts
+      id: cache-fonts
       uses: actions/cache@v3
       with:
-        path: |
-          /var/cache/apt/archives
-          /usr/share/fonts/truetype/humor-sans
-        key: apt-packages-${{ runner.os }}-fonts-backend-graphviz-v2
-        restore-keys: apt-packages-${{ runner.os }}-
+        path: /usr/share/fonts/truetype/humor-sans
+        key: fonts-${{ runner.os }}-humor-sans-v1
 
     - name: Install XKCD fonts
       if: ${{ inputs.skip-fonts != 'true' }}

diff --git a/.github/workflows/notebook-pr.yaml b/.github/workflows/notebook-pr.yaml
@@ -148,33 +148,23 @@ jobs:
         run: |
           nb="${{ matrix.notebook }}"
           dir=$(dirname "$nb")
+          nb_name=$(basename "$nb" .ipynb)
           echo "dir=$dir" >> $GITHUB_OUTPUT
-          # Create a safe artifact name from the notebook path
-          safe_name=$(echo "$nb" | tr '/' '_' | tr ' ' '_')
-          echo "artifact_name=$safe_name" >> $GITHUB_OUTPUT
+          echo "nb_name=$nb_name" >> $GITHUB_OUTPUT
+          # Use ___ as delimiter (won't appear in paths) so we can restore later
+          safe_dir=$(echo "$dir" | sed 's|/|___|g')
+          echo "artifact_name=${safe_dir}___${nb_name}" >> $GITHUB_OUTPUT
 
-      - name: Upload processed notebook
+      - name: Upload processed tutorial directory
         uses: actions/upload-artifact@v4
         with:
-          name: notebook-${{ steps.get-dir.outputs.artifact_name }}
-          path: ${{ matrix.notebook }}
-          retention-days: 1
-
-      - name: Upload static files
-        uses: actions/upload-artifact@v4
-        if: always()
-        with:
-          name: static-${{ steps.get-dir.outputs.artifact_name }}
-          path: ${{ steps.get-dir.outputs.dir }}/static/
-          if-no-files-found: ignore
-          retention-days: 1
-
-      - name: Upload solutions
-        uses: actions/upload-artifact@v4
-        if: always()
-        with:
-          name: solutions-${{ steps.get-dir.outputs.artifact_name }}
-          path: ${{ steps.get-dir.outputs.dir }}/solutions/
+          name: tutorial-${{ steps.get-dir.outputs.artifact_name }}
+          path: |
+            ${{ matrix.notebook }}
+            ${{ steps.get-dir.outputs.dir }}/static/${{ steps.get-dir.outputs.nb_name }}*
+            ${{ steps.get-dir.outputs.dir }}/solutions/${{ steps.get-dir.outputs.nb_name }}*
+            ${{ steps.get-dir.outputs.dir }}/student/${{ steps.get-dir.outputs.nb_name }}*
+            ${{ steps.get-dir.outputs.dir }}/instructor/${{ steps.get-dir.outputs.nb_name }}*
           if-no-files-found: ignore
           retention-days: 1
 
@@ -212,53 +202,19 @@ jobs:
         run: |
           echo "Restoring processed files from artifacts..."
 
-          # Process notebook artifacts
-          for dir in artifacts/notebook-*; do
+          # Each artifact preserves the full path structure (e.g., tutorials/W2D4_Macrolearning/...)
+          # We just need to copy everything from each artifact to the repo root
+          for dir in artifacts/tutorial-*; do
             if [ -d "$dir" ]; then
-              echo "Processing $dir"
-              cp -v "$dir"/*.ipynb tutorials/ 2>/dev/null || true
-              # Find the actual notebook and copy to correct location
-              find "$dir" -name "*.ipynb" -exec sh -c '
-                for f; do
-                  # Extract original path from artifact structure
-                  rel_path=$(basename "$f")
-                  # Find where this notebook should go based on its name
-                  original=$(find tutorials -name "$rel_path" -type f 2>/dev/null | head -1)
-                  if [ -n "$original" ]; then
-                    cp -v "$f" "$original"
-                  fi
-                done
-              ' sh {} +
+              echo "Restoring from artifact: $(basename "$dir")"
+              # Copy all contents preserving directory structure
+              # The artifact contains files at their original paths (e.g., tutorials/W2D4_Macrolearning/solutions/...)
+              cp -rv "$dir"/* . 2>/dev/null || true
             fi
           done
 
-          # Process static artifacts
-          for dir in artifacts/static-*; do
-            if [ -d "$dir" ]; then
-              echo "Processing static: $dir"
-              # Extract tutorial path from artifact name
-              artifact_name=$(basename "$dir" | sed 's/^static-//')
-              # Convert back: tutorials_W1D1_xxx.ipynb -> tutorials/W1D1_xxx
-              tutorial_dir=$(echo "$artifact_name" | sed 's/_/\//g' | sed 's/\.ipynb$//' | xargs dirname)
-              if [ -d "$tutorial_dir" ]; then
-                mkdir -p "$tutorial_dir/static"
-                cp -rv "$dir"/* "$tutorial_dir/static/" 2>/dev/null || true
-              fi
-            fi
-          done
-
-          # Process solutions artifacts
-          for dir in artifacts/solutions-*; do
-            if [ -d "$dir" ]; then
-              echo "Processing solutions: $dir"
-              artifact_name=$(basename "$dir" | sed 's/^solutions-//')
-              tutorial_dir=$(echo "$artifact_name" | sed 's/_/\//g' | sed 's/\.ipynb$//' | xargs dirname)
-              if [ -d "$tutorial_dir" ]; then
-                mkdir -p "$tutorial_dir/solutions"
-                cp -rv "$dir"/* "$tutorial_dir/solutions/" 2>/dev/null || true
-              fi
-            fi
-          done
+          echo "Restore complete. Listing processed tutorials:"
+          find tutorials -name "*.ipynb" -newer .git/index 2>/dev/null | head -20 || true
 
       - name: Verify exercises
         env:
@@ -285,13 +241,18 @@ jobs:
           python ci/find_unreferenced_content.py > to_remove.txt
           if [ -s to_remove.txt ]; then git rm --pathspec-from-file=to_remove.txt; fi
 
+      - name: Clean up artifacts directory
+        run: rm -rf artifacts/
+
       - name: Commit post-processed files
         run: |
           git config --local user.email "action@github.com"
           git config --local user.name "GitHub Action"
           git add '**/*.ipynb'
           git add '**/static/*.png'
           git add '**/solutions/*.py'
+          git add '**/student/*.ipynb'
+          git add '**/instructor/*.ipynb'
           git add '**/README.md'
           git diff-index --quiet HEAD || git commit -m "Process tutorial notebooks"
 

diff --git a/requirements.txt b/requirements.txt
@@ -32,4 +32,4 @@ git+https://github.com/neuromatch/GNS-Modeling#egg=gns
 git+https://github.com/neuromatch/pyBPL#egg=pybpl
 git+https://github.com/neuromatch/MotorNet#egg=motornet
 git+https://github.com/ctn-waterloo/sspspace@neuromatch#egg=sspspace
-git+https://github.com/mitchellostrow/DSA#egg=DSA
+git+https://github.com/mitchellostrow/DSA#egg=dsa-metric
diff --git a/tutorials/W1D1_Generalization/W1D1_Tutorial1.ipynb b/tutorials/W1D1_Generalization/W1D1_Tutorial1.ipynb
@@ -1563,7 +1563,7 @@
     "# Section 3: Dissecting TrOCR\n",
     "\n",
     "TrOCR is a model that performs printed optical character recognition and handwriting transcription using the transformer model. But what's inside of it?\n",
-    "It's important to note here that the original transformer model consisted of an encoder step, following by a decoder step. Taken together, this was the initial Transformer model of Vaswani et al. However, subsequent research into transformers led researchers to find applications of the encoding step specifically (encoding models like BERT) and also specific applications of the decoder step (autoregressive models like GPT). This meant that the terminology then changed to be *encoder transformers* and *decoder/causal/autoregressive transformers*. TrOCR is an example of the original transformer setup (both an encoder step and decoder step joined together). The image below outlines this setup. This also matches the transformer architecture given in the video above."
+    "It's important to note here that the original transformer model consisted of an encoder step, followed by a decoder step. Taken together, this was the initial Transformer model of Vaswani et al. However, subsequent research into transformers led researchers to find applications of the encoding step specifically (encoding models like BERT) and also specific applications of the decoder step (autoregressive models like GPT). This meant that the terminology then changed to be *encoder transformers* and *decoder/causal/autoregressive transformers*. TrOCR is an example of the original transformer setup (both an encoder step and decoder step joined together). The image below outlines this setup. This also matches the transformer architecture given in the video above."
    ]
   },
   {

diff --git a/tutorials/W1D1_Generalization/W1D1_Tutorial3.ipynb b/tutorials/W1D1_Generalization/W1D1_Tutorial3.ipynb
@@ -187,7 +187,7 @@
     "logging.getLogger('matplotlib.font_manager').disabled = True\n",
     "\n",
     "%matplotlib inline\n",
-    "%config InlineBackend.figure_format = 'retina' # perfrom high definition rendering for images and plots\n",
+    "%config InlineBackend.figure_format = 'retina' # perform high-definition rendering for images and plots\n",
     "plt.style.use(\"https://raw.githubusercontent.com/NeuromatchAcademy/course-content/main/nma.mplstyle\")"
    ]
   },
@@ -620,7 +620,7 @@
     "\n",
     "Let's put ourselves in the mindset of a cognitive scientist studying handwriting. We're interested in how people learn to recognize new characters. Indeed, humans display low **sample complexity** when learning new visual concepts: they seem to grasp new concepts with very few presentations, generalizing effortlessly. In AI, learning from $k$ labeled examples is known as $k$-shot learning; one-shot and few-shot learning refer to learning from one or a few labeled examples.\n",
     "\n",
-    "A good dataset to investigate one-shot learning is the Omniglot dataset. Omniglot has sometimes been described as *MNIST, transposed*. Instead of **thousands** of examples from **10** digit classes, Omniglot consists of **20** instances from **1623** character classes. These character classes are sourced from 50 alphabets, both natural (e.g. Cherokee or Greek) and constructed (e.g. the alien alphabet from the TV show Futurama). \n",
+    "A good dataset to investigate one-shot learning is the Omniglot dataset. Omniglot has sometimes been described as *MNIST, transposed*. Instead of **thousands** of examples from **10** digit classes (many examples, few classes), Omniglot consists of **20** instances from **1623** character classes (few examples, many classes). These character classes are sourced from 50 alphabets, both natural (e.g. Cherokee or Greek) and constructed (e.g. the alien alphabet from the TV show Futurama). \n",
     "\n",
     "![Sample characters from the Omniglot dataset](https://github.com/brendenlake/omniglot/raw/master/omniglot_grid.jpg)\n",
     "\n",
@@ -992,7 +992,7 @@
    "name": "python3"
   },
   "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "Python 3",
    "language": "python",
    "name": "python3"
   },
@@ -1006,7 +1006,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.22"
+   "version": "3.13.7"
   }
  },
  "nbformat": 4,

diff --git a/tutorials/W1D2_ComparingTasks/W1D2_Tutorial1.ipynb b/tutorials/W1D2_ComparingTasks/W1D2_Tutorial1.ipynb
@@ -39,7 +39,7 @@
     "\n",
     "# Tutorial Objectives\n",
     "\n",
-    "*Estimated timing of tutorial: 90 minutes*\n",
+    "*Estimated time of tutorial: 90 minutes*\n",
     "\n",
     "In this tutorial, we'll explore how task specification affects generalization in networks. We will use the same base architecture (a convolutional neural network / CNN) to perform multiple different tasks. We will explore the number of training points and number of epochs needed to train these networks up to a specific accuracy value. Additionally, we will explore how well representations learned for a given task generalize, and whether these representations can be used to solve the other tasks.\n",
     "\n",

diff --git a/tutorials/W1D2_ComparingTasks/W1D2_Tutorial2.ipynb b/tutorials/W1D2_ComparingTasks/W1D2_Tutorial2.ipynb
@@ -796,7 +796,7 @@
     "\n",
     "Here $\\tau$ is a temperature parameter that controls the sharpness of the distribution. You can think of it as a cross-entropy loss with a single pseudo-class corresponding to similar labels and the negative pairs corresponding to different labels. \n",
     "\n",
-    "### Decoupled constrastive learning\n",
+    "### Decoupled contrastive learning\n",
     "\n",
     "InfoNCE typically requires substantial batch sizes—commonly 128 or larger—to perform optimally. The need for large batch sizes stems from the necessity for diverse negative samples in the batch to effectively learn the contrasts. However, large batch sizes can be impractical in resource-constrained settings or when data availability is limited.\n",
     "\n",
@@ -1658,7 +1658,7 @@
     "\n",
     "Through practical exercises with the MNIST dataset, we've seen how contrastive learning can be implemented. The session highlighted the intuitive appeal of contrastive learning: learning by comparison, which is a natural way for both humans and machines to understand the world.\n",
     "\n",
-    "Let's bring this back to the overall goal of the overall theme of the course, **generalization**. So far, we've looked at how tasks are defined by cost functions, specifications of different losses. Some tasks require models to learn representatinos that are not task-specific but can be very task general in large parts, and specialised (task-specific) in other parts. For example, most of a CNN architecture might specialise in learning features present in the real world: shapes, colors, lines. Contrastive learning is a way we can efficently make use of the large amounts of unlabeled data in the world. It's also a task that, over large datasets, results in models learning very rich, general representations. \n",
+    "Let's bring this back to the overall goal of the overall theme of the course, **generalization**. So far, we've looked at how tasks are defined by cost functions, specifications of different losses. Some tasks require models to learn representations that are not task-specific but can be very task general in large parts, and specialised (task-specific) in other parts. For example, most of a CNN architecture might specialise in learning features present in the real world: shapes, colors, lines. Contrastive learning is a way we can efficiently make use of the large amounts of unlabeled data in the world. It's also a task that, over large datasets, results in models learning very rich, general representations. \n",
     "\n",
     "Take a moment to think about all the ways that learning via contrastive learning might give a network (human or artificial) a rich set of representations. We'll soon get to studying how to measure representations and across multiple systems and their geometry. But first, let's address another method to learn tasks that's a little bit different to what we've seen before: Reinforcement Learning."
    ]

diff --git a/tutorials/W1D2_ComparingTasks/W1D2_Tutorial3.ipynb b/tutorials/W1D2_ComparingTasks/W1D2_Tutorial3.ipynb
@@ -515,7 +515,7 @@
     "\n",
     "$$R(T) = \\sum_{t = 1}^T (p^* - \\mathbb{E}(p_{a_t})),$$\n",
     "\n",
-    "where $p^*$ is the probability of the reward for the best arm, i.e., max($p_L$, $p_r$). $\\mathbb{E}(p_{a_t})$ corresponds to the expected probability of reward for the action that was chosen at the previous time $t$."
+    "where $p^*$ is the probability of the reward for the best arm, i.e., max($p_L$, $p_R$). $\\mathbb{E}(p_{a_t})$ corresponds to the expected probability of reward for the action that was chosen at the previous time $t$."
    ]
   },
   {

diff --git a/tutorials/W1D3_ComparingArtificialAndBiologicalNetworks/W1D3_Tutorial1.ipynb b/tutorials/W1D3_ComparingArtificialAndBiologicalNetworks/W1D3_Tutorial1.ipynb
@@ -1120,7 +1120,7 @@
    },
    "outputs": [],
    "source": [
-    "# @title Train adversirally robust model\n",
+    "# @title Train adversirially robust model\n",
     "\n",
     "# model_robust = Net().to(args.device)\n",
     "# optimizer = optim.Adadelta(model_robust.parameters(), lr=args.lr)\n",
@@ -1532,7 +1532,7 @@
     "\n",
     "The color-coded matrix shows the dot product similarity between activations for the training images (rows) and test images (columns)--yellow means that the two images are highly similar based on the dot product of the activations for those images, and dark blue means the two images are highly dissimilar. The goal of this exercise is to explore how the predicted leaning direction of the test stimuli is determined by your leaning ratings of the training stimuli, and the similarity of the training and test stimuli.\n",
     "\n",
-    "1) Using the matrix, find training and test images that are highly similar and play around with the rating of the training image. How much does the predicted learning rating for the test image change? Try this for a few different pairs.\n",
+    "1) Using the matrix, find training and test images that are highly similar and play around with the rating of the training image. How much does the predicted leaning rating for the test image change? Try this for a few different pairs.\n",
     "\n",
     "2) Now find a highly dissimilar pair and play with the rating. How much does the predicted leaning of the test image change? Try this for a few different pairs.\n",
     "\n",

diff --git a/tutorials/W1D3_ComparingArtificialAndBiologicalNetworks/W1D3_Tutorial2.ipynb b/tutorials/W1D3_ComparingArtificialAndBiologicalNetworks/W1D3_Tutorial2.ipynb
@@ -1698,7 +1698,7 @@
     "- Characterized the computation that happens across different layers of a network as a path, with each step changing the geometry of the representation to go from input pixels to target labels\n",
     "- Examined the representational geometry paths for different model architectures and different inputs and learned how to interpret them\n",
     "\n",
-    "We used this method to examine how models trained on adversarial stimulu (vs control) differentially treat inputs that are both normal and adversarial. We saw that the category / class level similarity structure, which was different for the standard model on adversarial stimuli, resulting in lower accuracies, actually has a divergent path during the conversion from input data to output labels. This is another link into the idea of **similarity** as a lens that helps us understand **generalization**."
+    "We used this method to examine how models trained on adversarial stimuli (vs control) differentially treat inputs that are both normal and adversarial. We saw that the category / class level similarity structure, which was different for the standard model on adversarial stimuli, resulting in lower accuracies, actually has a divergent path during the conversion from input data to output labels. This is another link into the idea of **similarity** as a lens that helps us understand **generalization**."
    ]
   }
  ],
-Original file line number
+Diff line change
@@ Expand Up / @@ -515,7 +515,7 @@ @@
         "\n",
         "$$R(T) = \\sum_{t = 1}^T (p^* - \\mathbb{E}(p_{a_t})),$$\n",
         "\n",
-        "where $p^*$ is the probability of the reward for the best arm, i.e., max($p_L$, $p_r$). $\\mathbb{E}(p_{a_t})$ corresponds to the expected probability of reward for the action that was chosen at the previous time $t$."
+        "where $p^*$ is the probability of the reward for the best arm, i.e., max($p_L$, $p_R$). $\\mathbb{E}(p_{a_t})$ corresponds to the expected probability of reward for the action that was chosen at the previous time $t$."
        ]
       },
       {
@@ Expand Down @@