@@ -62,7 +62,7 @@ pure module subroutine forward(self, input)
62
62
integer :: iws, iwe
63
63
64
64
input_channels = size (input, dim= 1 )
65
- input_width = size (input, dim= 2 )
65
+ input_width = size (input, dim= 2 )
66
66
67
67
! Loop over output positions.
68
68
do j = 1 , self % width
@@ -73,11 +73,11 @@ pure module subroutine forward(self, input)
73
73
74
74
! For each filter, compute the convolution (inner product over channels and kernel width).
75
75
do concurrent (n = 1 :self % filters)
76
- self % z(n, j) = sum (self % kernel(n, :, :) * input(:, iws:iwe))
76
+ self % z(n, j) = sum (self % kernel(n,:, :) * input(:,iws:iwe))
77
77
end do
78
78
79
79
! Add the bias for each filter.
80
- self % z(:, j) = self % z(:, j) + self % biases
80
+ self % z(:,j) = self % z(:,j) + self % biases
81
81
end do
82
82
83
83
! Apply the activation function.
@@ -103,18 +103,14 @@ pure module subroutine backward(self, input, gradient)
103
103
104
104
! Determine dimensions.
105
105
input_channels = size (input, dim= 1 )
106
- input_width = size (input, dim= 2 )
107
- output_width = self % width ! Note: output_width = input_width - kernel_size + 1
106
+ input_width = size (input, dim= 2 )
107
+ output_width = self % width ! Note: output_width = input_width - kernel_size + 1
108
108
109
109
!- -- Compute the local gradient gdz = (dL/dy) * sigma'(z) for each output.
110
- do j = 1 , output_width
111
- gdz(:, j) = gradient(:, j) * self % activation % eval_prime(self % z(:, j))
112
- end do
110
+ gdz = gradient * self % activation % eval_prime(self % z)
113
111
114
112
!- -- Compute bias gradients: db(n) = sum_j gdz(n, j)
115
- do n = 1 , self % filters
116
- db_local(n) = sum (gdz(n, :), dim= 1 )
117
- end do
113
+ db_local = sum (gdz, dim= 2 )
118
114
119
115
!- -- Initialize weight gradient and input gradient accumulators.
120
116
dw_local = 0.0
@@ -124,16 +120,16 @@ pure module subroutine backward(self, input, gradient)
124
120
! In the forward pass the window for output index j was:
125
121
! iws = j, iwe = j + kernel_size - 1.
126
122
do n = 1 , self % filters
127
- do j = 1 , output_width
128
- iws = j
129
- iwe = j + self % kernel_size - 1
130
- do k = 1 , self % channels
131
- ! Weight gradient: accumulate contribution from the input window.
132
- dw_local(n, k, :) = dw_local(n, k, :) + input(k, iws:iwe) * gdz(n, j)
133
- ! Input gradient: propagate gradient back to the input window.
134
- self % gradient(k, iws:iwe) = self % gradient(k, iws:iwe) + self % kernel(n, k, :) * gdz(n, j)
135
- end do
136
- end do
123
+ do j = 1 , output_width
124
+ iws = j
125
+ iwe = j + self % kernel_size - 1
126
+ do k = 1 , self % channels
127
+ ! Weight gradient: accumulate contribution from the input window.
128
+ dw_local(n,k, :) = dw_local(n,k, :) + input(k,iws:iwe) * gdz(n,j)
129
+ ! Input gradient: propagate gradient back to the input window.
130
+ self % gradient(k,iws:iwe) = self % gradient(k,iws:iwe) + self % kernel(n,k, :) * gdz(n,j)
131
+ end do
132
+ end do
137
133
end do
138
134
139
135
!- -- Update stored gradients.
0 commit comments