diff --git a/Chapter_4.ipynb b/Chapter_4.ipynb index 116e06e..ce16e25 100644 --- a/Chapter_4.ipynb +++ b/Chapter_4.ipynb @@ -1085,10 +1085,10 @@ "\n", "first_rnn = nn.Sequential(\n", " nn.Embedding(vocab_size, D), #(B, T) -> (B, T, D)\n", - " nn.RNN(D, hidden_nodes, batch_first=True), #(B, T, D) -> ( (B,T,D) , (S, B, D) )\n", + " nn.RNN(D, hidden_nodes, batch_first=True), #(B, T, D) -> ( (B,T,hidden_nodes) , (S, B, hidden_nodes) )\n", " #the tanh activation is built into the RNN object, so we don't need to do it here\n", - " LastTimeStep(), #We need to take the RNN output and reduce it to one item, (B, D)\n", - " nn.Linear(hidden_nodes, classes), #(B, D) -> (B, classes)\n", + " LastTimeStep(), #We need to take the RNN output and reduce it to one item, (B, hidden_nodes)\n", + " nn.Linear(hidden_nodes, classes), #(B, hidden_nodes) -> (B, classes)\n", ")" ] }, @@ -1468,9 +1468,9 @@ "source": [ "rnn_packed = nn.Sequential(\n", " EmbeddingPackable(nn.Embedding(vocab_size, D)), #(B, T) -> (B, T, D)\n", - " nn.RNN(D, hidden_nodes, batch_first=True), #(B, T, D) -> ( (B,T,D) , (S, B, D) )\n", - " LastTimeStep(), #We need to take the RNN output and reduce it to one item, (B, D)\n", - " nn.Linear(hidden_nodes, classes), #(B, D) -> (B, classes)\n", + " nn.RNN(D, hidden_nodes, batch_first=True), #(B, T, D) -> ( (B,T,hidden_nodes) , (S, B, hidden_nodes) )\n", + " LastTimeStep(), #We need to take the RNN output and reduce it to one item, (B, hidden_nodes)\n", + " nn.Linear(hidden_nodes, classes), #(B, hidden_nodes) -> (B, classes)\n", ")\n", "\n", "rnn_packed.to(device)" @@ -2795,9 +2795,9 @@ "source": [ "rnn_3layer = nn.Sequential(\n", " EmbeddingPackable(nn.Embedding(vocab_size, D)), #(B, T) -> (B, T, D)\n", - " nn.RNN(D, hidden_nodes, num_layers=3, batch_first=True), #(B, T, D) -> ( (B,T,D) , (S, B, D) )\n", - " LastTimeStep(rnn_layers=3), #We need to take the RNN output and reduce it to one item, (B, D)\n", - " nn.Linear(hidden_nodes, classes), #(B, D) -> (B, classes)\n", + " nn.RNN(D, hidden_nodes, num_layers=3, batch_first=True), #(B, T, D) -> ( (B,T,hidden_nodes) , (S, B, hidden_nodes) )\n", + " LastTimeStep(rnn_layers=3), #We need to take the RNN output and reduce it to one item, (B, hidden_nodes)\n", + " nn.Linear(hidden_nodes, classes), #(B, hidden_nodes) -> (B, classes)\n", ")\n", "\n", "rnn_3layer.to(device)\n", @@ -3441,9 +3441,9 @@ "source": [ "rnn_3layer_bidir = nn.Sequential(\n", " EmbeddingPackable(nn.Embedding(vocab_size, D)), #(B, T) -> (B, T, D)\n", - " nn.RNN(D, hidden_nodes, num_layers=3, batch_first=True, bidirectional=True), #(B, T, D) -> ( (B,T,D) , (S, B, D) )\n", - " LastTimeStep(rnn_layers=3, bidirectional=True), #We need to take the RNN output and reduce it to one item, (B, D)\n", - " nn.Linear(hidden_nodes*2, classes), #(B, D) -> (B, classes)\n", + " nn.RNN(D, hidden_nodes, num_layers=3, batch_first=True, bidirectional=True), #(B, T, D) -> ( (B,T,hidden_nodes*2) , (S, B, hidden_nodes) )\n", + " LastTimeStep(rnn_layers=3, bidirectional=True), #We need to take the RNN output and reduce it to one item, (B, hidden_nodes*2)\n", + " nn.Linear(hidden_nodes*2, classes), #(B, hidden_nodes*2) -> (B, classes)\n", ")\n", "\n", "rnn_3layer_bidir.to(device)\n",