MOM_ANN.F90

1! This file is part of MOM6, the Modular Ocean Model version 6.
2! See the LICENSE file for licensing information.
3! SPDX-License-Identifier: Apache-2.0
4
5!> Implements the general purpose Artificial Neural Network (ANN).
6module mom_ann
7
8! This file is part of MOM6. See LICENSE.md for the license
9
10use mom_io, only : mom_read_data, field_exists
11use mom_error_handler, only : mom_error, fatal, mom_mesg
13
14implicit none ; private
15
16!#include <MOM_memory.h>
17
22
23!> Applies ANN to x, returning results in y
24interface ann_apply
25 module procedure ann_apply_vector_oi
26 module procedure ann_apply_array_sio
27end interface ann_apply
28
29!> Type for a single Linear layer of ANN,
30!! i.e. stores the matrix A and bias b
31!! for matrix-vector multiplication
32!! y = A*x + b.
33type, private :: layer_type ; private
34 integer :: output_width !< Number of rows in matrix A
35 integer :: input_width !< Number of columns in matrix A
36 logical :: activation = .true. !< If true, apply the default activation function
37
38 real, allocatable :: a(:,:) !< Matrix in column-major order
39 !! of size A(output_width, input_width) [nondim]
40 real, allocatable :: b(:) !< bias vector of size output_width [nondim]
41end type layer_type
42
43!> Control structure/type for ANN
44type, public :: ann_cs ; private
45 ! Parameters
46 integer :: num_layers !< Number of layers in the ANN, including the input and output.
47 !! For example, for ANN with one hidden layer, num_layers = 3.
48 integer, allocatable &
49 :: layer_sizes(:) !< Array of length num_layers, storing the number of neurons in
50 !! each layer.
51
52 type(layer_type), allocatable &
53 :: layers(:) !< Array of length num_layers-1, where each element is the Linear
54 !! transformation between layers defined by Matrix A and vias b.
55
56 real, allocatable :: &
57 input_means(:), & !< Array of length layer_sizes(1) containing the mean of each input feature
58 !! prior to normalization by input_norms [arbitrary].
59 input_norms(:), & !< Array of length layer_sizes(1) containing the *inverse* of the standard
60 !! deviation for each input feature used to normalize (multiply) before
61 !! feeding into the ANN [arbitrary]
62 output_means(:), & !< Array of length layer_sizes(num_layers) containing the mean of each
63 !! output prior to normalization by output_norms [arbitrary].
64 output_norms(:) !< Array of length layer_sizes(num_layers) containing the standard deviation
65 !! each output of the ANN will be multiplied [arbitrary]
66
67 integer, public :: parameters = 0 !< Count of number of parameters
68end type ann_cs
69
70contains
71
72!> Initialization of ANN. Allocates memory and reads ANN parameters from NetCDF file.
73!! The NetCDF file must contain:
74!! Integer num_layers.
75!! Integer arrays: layer_sizes, input_norms, output_norms
76!! Matrices and biases for Linear layers can be Real(4) or Real(8) and
77!! are named as: A0, b0 for the first layer; A1, b1 for the second layer and so on.
78subroutine ann_init(CS, NNfile)
79 type(ann_cs), intent(inout) :: cs !< ANN control structure.
80 character(*), intent(in) :: nnfile !< The name of NetCDF file having neural network parameters
81 ! Local variables
82 integer :: i
83 integer :: num_layers ! Number of layers, including input and output layers
84 integer, allocatable :: layer_sizes(:) ! Number of neurons in each layer
85 character(len=1) :: layer_num_str
86 character(len=3) :: fieldname
87
88 call mom_mesg('ANN: init from ' // trim(nnfile), 2)
89
90 ! Read the number of layers
91 call mom_read_data(nnfile, "num_layers", num_layers)
92
93 ! Read size of layers
94 allocate( layer_sizes(num_layers) )
95 call mom_read_data(nnfile, "layer_sizes", layer_sizes)
96
97 ! Allocates the memory for storing normalization, weights and biases
98 call ann_allocate(cs, num_layers, layer_sizes)
99 deallocate( layer_sizes )
100
101 ! Read normalization factors
102 if (field_exists(nnfile, 'input_means')) &
103 call mom_read_data(nnfile, 'input_means', cs%input_means)
104 if (field_exists(nnfile, 'input_norms')) then
105 call mom_read_data(nnfile, 'input_norms', cs%input_norms)
106 ! We calculate the reciprocal here to avoid repeated divisions later
107 cs%input_norms(:) = 1. / cs%input_norms(:)
108 endif
109 if (field_exists(nnfile, 'output_means')) &
110 call mom_read_data(nnfile, 'output_means', cs%output_means)
111 if (field_exists(nnfile, 'output_norms')) &
112 call mom_read_data(nnfile, 'output_norms', cs%output_norms)
113
114 ! Allocate and read matrix A and bias b for each layer
115 do i = 1,cs%num_layers-1
116 cs%layers(i)%input_width = cs%layer_sizes(i)
117 cs%layers(i)%output_width = cs%layer_sizes(i+1)
118
119 ! Reading matrix A
120 write(layer_num_str, '(I0)') i-1
121 fieldname = trim('A') // trim(layer_num_str)
122 call mom_read_data(nnfile, fieldname, cs%layers(i)%A, &
123 (/1,1,1,1/),(/cs%layers(i)%output_width,cs%layers(i)%input_width,1,1/))
124
125 ! Reading bias b
126 fieldname = trim('b') // trim(layer_num_str)
127 call mom_read_data(nnfile, fieldname, cs%layers(i)%b)
128 enddo
129
130 ! No activation function for the last layer
131 cs%layers(cs%num_layers-1)%activation = .false.
132
133 if (field_exists(nnfile, 'x_test') .and. field_exists(nnfile, 'y_test') ) &
134 call ann_test(cs, nnfile)
135
136 call mom_mesg('ANN: have been read from ' // trim(nnfile), 2)
137
138end subroutine ann_init
139
140!> Allocate an ANN
141!!
142!! This creates the memory for storing weights and intermediate work arrays, but does not set
143!! the values of weights or biases (not even initializing with zeros).
144subroutine ann_allocate(CS, num_layers, layer_sizes)
145 type(ann_cs), intent(inout) :: cs !< ANN control structure
146 integer, intent(in) :: num_layers !< The number of layers, including the input and output layer
147 integer, intent(in) :: layer_sizes(num_layers) !< The number of neurons in each layer
148 ! Local variables
149 integer :: l ! Layer number
150
151 ! Assert that there is always an input and output layer
152 if (num_layers < 2) call mom_error(fatal, "The number of layers in an ANN must be >=2")
153
154 cs%num_layers = num_layers
155
156 ! Layers
157 allocate( cs%layer_sizes(cs%num_layers) )
158 cs%layer_sizes(:) = layer_sizes(:)
159
160 ! Input and output normalization values
161 allocate( cs%input_means(cs%layer_sizes(1)), source=0. ) ! Assume zero mean by default
162 allocate( cs%input_norms(cs%layer_sizes(1)), source=1. ) ! Assume unit variance by default
163 allocate( cs%output_means(cs%layer_sizes(cs%num_layers)), source=0. ) ! Assume zero mean by default
164 allocate( cs%output_norms(cs%layer_sizes(cs%num_layers)), source=1. ) ! Assume unit variance by default
165
166 ! Allocate the Linear transformations between layers
167 allocate(cs%layers(cs%num_layers-1))
168 cs%parameters = 2 * cs%layer_sizes(1) ! For input normalization
169
170 ! Allocate matrix A and bias b for each layer
171 do l = 1, cs%num_layers-1
172 cs%layers(l)%input_width = cs%layer_sizes(l)
173 cs%layers(l)%output_width = cs%layer_sizes(l+1)
174
175 allocate( cs%layers(l)%A(cs%layers(l)%output_width, cs%layers(l)%input_width) )
176 allocate( cs%layers(l)%b(cs%layers(l)%output_width) )
177
178 cs%parameters = cs%parameters &
179 + cs%layer_sizes(l) * cs%layer_sizes(l+1) & ! For weights
180 + cs%layer_sizes(l+1) ! For bias
181 enddo
182 cs%parameters = cs%parameters &
183 + 2 * cs%layer_sizes(cs%num_layers) ! For output normalization
184
185end subroutine ann_allocate
186
187!> Test ANN by comparing the prediction with the test data.
188subroutine ann_test(CS, NNfile)
189 type(ann_cs), intent(inout) :: CS !< ANN control structure.
190 character(*), intent(in) :: NNfile !< The name of NetCDF file having neural network parameters
191 ! Local variables
192 real, dimension(:), allocatable :: x_test, y_test, y_pred ! [arbitrary]
193 real :: relative_error ! [arbitrary]
194 character(len=200) :: relative_error_str
195
196 ! Allocate data
197 allocate(x_test(cs%layer_sizes(1)))
198 allocate(y_test(cs%layer_sizes(cs%num_layers)))
199 allocate(y_pred(cs%layer_sizes(cs%num_layers)))
200
201 ! Read test vectors
202 call mom_read_data(nnfile, 'x_test', x_test)
203 call mom_read_data(nnfile, 'y_test', y_test)
204
205 ! Compute prediction
206 call ann_apply_vector_oi(x_test, y_pred, cs)
207
208 relative_error = maxval(abs(y_pred(:) - y_test(:))) / maxval(abs(y_test(:)))
209
210 if (relative_error > 1e-5) then
211 write(relative_error_str, '(ES12.4)') relative_error
212 call mom_error(fatal, 'Relative error in ANN prediction is too large: ' // trim(relative_error_str))
213 endif
214
215 deallocate(x_test)
216 deallocate(y_test)
217 deallocate(y_pred)
218end subroutine ann_test
219
220!> Deallocates memory of ANN
221subroutine ann_end(CS)
222 type(ann_cs), intent(inout) :: cs !< ANN control structure.
223 ! Local variables
224 integer :: i
225
226 deallocate(cs%layer_sizes)
227 deallocate(cs%input_means)
228 deallocate(cs%input_norms)
229 deallocate(cs%output_means)
230 deallocate(cs%output_norms)
231
232 do i = 1, cs%num_layers-1
233 deallocate(cs%layers(i)%A)
234 deallocate(cs%layers(i)%b)
235 enddo
236 deallocate(cs%layers)
237
238end subroutine ann_end
239
240!> The default activation function
241pure elemental function activation_fn(x) result (y)
242 real, intent(in) :: x !< Scalar input value [nondim]
243 real :: y !< Scalar output value [nondim]
244
245 y = max(x, 0.0) ! ReLU activation
246
247end function activation_fn
248
249!> Single application of ANN inference using vector input and output
250!!
251!! This implementation is the simplest using allocation and de-allocation
252!! of temporary arrays
253subroutine ann_apply_vector_orig(x, y, CS)
254 type(ann_cs), intent(in) :: cs !< ANN instance
255 real, intent(in) :: x(cs%layer_sizes(1)) !< Inputs [arbitrary]
256 real, intent(inout) :: y(cs%layer_sizes(cs%num_layers)) !< Outputs [arbitrary]
257 ! Local variables
258 real, allocatable :: x_1(:), x_2(:) ! intermediate states [nondim]
259 integer :: i, o ! Input, output indices
260
261 ! Normalize input
262 allocate(x_1(cs%layer_sizes(1)))
263 do i = 1,cs%layer_sizes(1)
264 x_1(i) = ( x(i) - cs%input_means(i) ) * cs%input_norms(i)
265 enddo
266
267 ! Apply Linear layers
268 do i = 1, cs%num_layers-1
269 allocate(x_2(cs%layer_sizes(i+1)))
270 call layer_apply_orig(x_1, x_2, cs%layers(i))
271 deallocate(x_1)
272 allocate(x_1(cs%layer_sizes(i+1)))
273 x_1(:) = x_2(:)
274 deallocate(x_2)
275 enddo
276
277 ! Un-normalize output
278 do o = 1, cs%layer_sizes(cs%num_layers)
279 y(o) = ( x_1(o) * cs%output_norms(o) ) + cs%output_means(o)
280 enddo
281
282 deallocate(x_1)
283
284 contains
285
286 !> Applies linear layer to input data x and stores the result in y with
287 !! y = A*x + b with optional application of the activation function so the
288 !! overall operations is ReLU(A*x + b)
289 subroutine layer_apply_orig(x, y, layer)
290 type(layer_type), intent(in) :: layer !< Linear layer
291 real, intent(in) :: x(layer%input_width) !< Input vector [nondim]
292 real, intent(inout) :: y(layer%output_width) !< Output vector [nondim]
293 ! Local variables
294 integer :: i, o ! Input, output indices
295
296 ! Add bias
297 y(:) = layer%b(:)
298 ! Multiply by kernel
299 do i=1,layer%input_width
300 do o=1,layer%output_width
301 y(o) = y(o) + x(i) * layer%A(o, i)
302 enddo
303 enddo
304 ! Apply activation function
305 if (layer%activation) y(:) = activation_fn(y(:))
306
307 end subroutine layer_apply_orig
308end subroutine ann_apply_vector_orig
309
310!> Single application of ANN inference using vector input and output
311!!
312!! This implementation avoids repeated reallocation of work arrays and uses the
313!! output index for the fastest (inner-most) loop in the layer matrix multiply.
314subroutine ann_apply_vector_oi(x, y, CS)
315 type(ann_cs), intent(in) :: CS !< ANN instance
316 real, intent(in) :: x(CS%layer_sizes(1)) !< Inputs [arbitrary]
317 real, intent(inout) :: y(CS%layer_sizes(CS%num_layers)) !< Outputs [arbitrary]
318 ! Local variables
319 real, allocatable :: x_1(:), x_2(:) ! intermediate states [nondim]
320 integer :: i, o ! Input, output indices
321
322 allocate( x_1( maxval( cs%layer_sizes(:) ) ) )
323 allocate( x_2( maxval( cs%layer_sizes(:) ) ) )
324
325 ! Normalize input
326 do i = 1,cs%layer_sizes(1)
327 x_1(i) = ( x(i) - cs%input_means(i) ) * cs%input_norms(i)
328 enddo
329
330 ! Apply Linear layers
331 do i = 1, cs%num_layers-2, 2
332 call layer_apply_oi(x_1, x_2, cs%layers(i))
333 call layer_apply_oi(x_2, x_1, cs%layers(i+1))
334 enddo
335 if (mod(cs%num_layers,2)==0) then
336 call layer_apply_oi(x_1, x_2, cs%layers(cs%num_layers-1))
337 ! Un-normalize output
338 do o = 1, cs%layer_sizes(cs%num_layers)
339 y(o) = x_2(o) * cs%output_norms(o) + cs%output_means(o)
340 enddo
341 else
342 ! Un-normalize output
343 do o = 1, cs%layer_sizes(cs%num_layers)
344 y(o) = x_1(o) * cs%output_norms(o) + cs%output_means(o)
345 enddo
346 endif
347
348 deallocate(x_1, x_2)
349
350 contains
351
352 !> Applies linear layer to input data x and stores the result in y with
353 !! y = A*x + b with optional application of the activation function so the
354 !! overall operations is ReLU(A*x + b)
355 subroutine layer_apply_oi(x, y, layer)
356 type(layer_type), intent(in) :: layer !< Linear layer
357 real, intent(in) :: x(layer%input_width) !< Input vector [nondim]
358 real, intent(inout) :: y(layer%output_width) !< Output vector [nondim]
359 ! Local variables
360 integer :: i, o ! Input, output indices
361
362 ! Add bias
363 y(:) = layer%b(:)
364 ! Multiply by kernel
365 do i=1,layer%input_width
366 do o=1,layer%output_width
367 y(o) = y(o) + x(i) * layer%A(o, i)
368 enddo
369 enddo
370 ! Apply activation function
371 if (layer%activation) y(:) = activation_fn(y(:))
372
373 end subroutine layer_apply_oi
374end subroutine ann_apply_vector_oi
375
376!> Single application of ANN inference using array input and output
377!! with (space,feature) indexing
378!!
379!! This implementation uses the space index for the fastest (inner-most) loop
380!! in the layer matrix multiply, with the input index as the next fastest loop,
381!! and uses the weights matrix A(output,index). It also applies the activation
382!! function within the outer loop of the matrix multiply.
383subroutine ann_apply_array_sio(nij, x, y, CS)
384 type(ann_cs), intent(in) :: CS !< ANN control structure
385 integer, intent(in) :: nij !< Size of spatial dimension
386 real, intent(in) :: x(nij, CS%layer_sizes(1)) !< input [arbitrary]
387 real, intent(inout) :: y(nij, CS%layer_sizes(CS%num_layers)) !< output [arbitrary]
388 ! Local variables
389 real, allocatable :: x_1(:,:), x_2(:,:) ! intermediate states [nondim]
390 integer :: l, i, o ! Layer, input, output index
391
392 allocate( x_1( nij, maxval( cs%layer_sizes(:) ) ) )
393 allocate( x_2( nij, maxval( cs%layer_sizes(:) ) ) )
394
395 ! Normalize input
396 do i = 1, cs%layer_sizes(1)
397 x_1(:,i) = ( x(:,i) - cs%input_means(i) ) * cs%input_norms(i)
398 enddo
399
400 ! Apply Linear layers
401 do l = 1, cs%num_layers-2, 2
402 call layer_apply_sio(nij, x_1, x_2, cs%layers(l))
403 call layer_apply_sio(nij, x_2, x_1, cs%layers(l+1))
404 enddo
405 if (mod(cs%num_layers,2)==0) then
406 call layer_apply_sio(nij, x_1, x_2, cs%layers(cs%num_layers-1))
407 ! Un-normalize output
408 do o = 1, cs%layer_sizes(cs%num_layers)
409 y(:,o) = x_2(:,o) * cs%output_norms(o) + cs%output_means(o)
410 enddo
411 else
412 ! Un-normalize output
413 do o = 1, cs%layer_sizes(cs%num_layers)
414 y(:,o) = x_1(:,o) * cs%output_norms(o) + cs%output_means(o)
415 enddo
416 endif
417
418 deallocate(x_1, x_2)
419
420 contains
421
422 !> Applies linear layer to input data x and stores the result in y with
423 !! y = A*x + b with optional application of the activation function so the
424 !! overall operations is ReLU(A*x + b)
425 subroutine layer_apply_sio(nij, x, y, layer)
426 type(layer_type), intent(in) :: layer !< Linear layer
427 integer, intent(in) :: nij !< Size of spatial dimension
428 real, intent(in) :: x(nij, layer%input_width) !< Input vector [nondim]
429 real, intent(inout) :: y(nij, layer%output_width) !< Output vector [nondim]
430 ! Local variables
431 integer :: i, o ! Input, output indices
432
433 do o = 1, layer%output_width
434 ! Add bias
435 y(:,o) = layer%b(o)
436 ! Multiply by kernel
437 do i = 1, layer%input_width
438 y(:,o) = y(:,o) + x(:,i) * layer%A(o, i)
439 enddo
440 ! Apply activation function
441 if (layer%activation) y(:,o) = activation_fn(y(:,o))
442 enddo
443
444 end subroutine layer_apply_sio
445end subroutine ann_apply_array_sio
446
447!> Sets weights and bias for a single layer
448subroutine set_layer(ANN, layer, weights, biases, activation)
449 type(ann_cs), intent(inout) :: ann !< ANN control structure
450 integer, intent(in) :: layer !< The number of the layer being adjusted
451 real, intent(in) :: weights(:,:) !< The weights to assign
452 real, intent(in) :: biases(:) !< The biases to assign
453 logical, intent(in) :: activation !< Turn on the activation function
454
455 if ( layer >= ann%num_layers ) &
456 call mom_error(fatal, "MOM_ANN, set_layer: layer is out of range")
457 if ( layer < 1 ) &
458 call mom_error(fatal, "MOM_ANN, set_layer: layer should be >= 1")
459
460 if ( size(biases) /= size(ann%layers(layer)%b) ) &
461 call mom_error(fatal, "MOM_ANN, set_layer: mismatch in size of biases")
462 ann%layers(layer)%b(:) = biases(:)
463
464 if ( size(weights,1) /= size(ann%layers(layer)%A,1) ) &
465 call mom_error(fatal, "MOM_ANN, set_layer: mismatch in size of weights (first dim)")
466 if ( size(weights,2) /= size(ann%layers(layer)%A,2) ) &
467 call mom_error(fatal, "MOM_ANN, set_layer: mismatch in size of weights (second dim)")
468 ann%layers(layer)%A(:,:) = weights(:,:)
469
470 ann%layers(layer)%activation = activation
471end subroutine set_layer
472
473!> Sets input normalization
474subroutine set_input_normalization(ANN, means, norms)
475 type(ann_cs), intent(inout) :: ann !< ANN control structure
476 real, optional, intent(in) :: means(:) !< The mean of each input
477 real, optional, intent(in) :: norms(:) !< The standard deviation of each input
478
479 if (present(means)) then
480 if ( size(means) /= size(ann%input_means) ) &
481 call mom_error(fatal, "MOM_ANN, set_input_normalization: mismatch in size of means")
482 ann%input_means(:) = means(:)
483 endif
484
485 if (present(norms)) then
486 if ( size(norms) /= size(ann%input_norms) ) &
487 call mom_error(fatal, "MOM_ANN, set_input_normalization: mismatch in size of norms")
488 ann%input_norms(:) = norms(:)
489 endif
490
491end subroutine set_input_normalization
492
493!> Sets output normalization
494subroutine set_output_normalization(ANN, means, norms)
495 type(ann_cs), intent(inout) :: ann !< ANN control structure
496 real, optional, intent(in) :: means(:) !< The mean of each output
497 real, optional, intent(in) :: norms(:) !< The standard deviation of each output
498
499 if (present(means)) then
500 if ( size(means) /= size(ann%output_means) ) &
501 call mom_error(fatal, "MOM_ANN, set_output_normalization: mismatch in size of means")
502 ann%output_means(:) = means(:)
503 endif
504
505 if (present(norms)) then
506 if ( size(norms) /= size(ann%output_norms) ) &
507 call mom_error(fatal, "MOM_ANN, set_output_normalization: mismatch in size of norms")
508 ann%output_norms(:) = norms(:)
509 endif
510
511end subroutine set_output_normalization
512
513!> Create a random ANN
514subroutine ann_random(ANN, nlayers, widths)
515 type(ann_cs), intent(inout) :: ann !< ANN control structure
516 integer, intent(in) :: nlayers !< Number of layers
517 integer, intent(in) :: widths(nlayers) !< Width of each layer
518 ! Local variables
519 integer :: l
520
521 call ann_allocate(ann, nlayers, widths)
522
523 do l = 1, nlayers-1
524 call randomize_layer(ann, nlayers, l, widths)
525 enddo
526
527end subroutine ann_random
528
529!> Fill a layer with random numbers
530subroutine randomize_layer(ANN, nlayers, layer, widths)
531 type(ann_cs), intent(inout) :: ann !< ANN control structure
532 integer, intent(in) :: nlayers !< Number of layers
533 integer, intent(in) :: layer !< Layer number to randomize
534 integer, intent(in) :: widths(nlayers) !< Width of each layer
535 ! Local variables
536 real :: weights(widths(layer+1),widths(layer)) ! Weights
537 real :: biases(widths(layer+1)) ! Biases
538
539 call random_number(weights)
540 weights(:,:) = 2. * weights(:,:) - 1.
541
542 call random_number(biases)
543 biases(:) = 2. * biases(:) - 1.
544
545 call set_layer(ann, layer, weights, biases, layer<nlayers-1)
546
547end subroutine randomize_layer
548
549!> Runs unit tests on ANN functions.
550!!
551!! Should only be called from a single/root thread.
552!! Returns True if a test fails, otherwise False.
553logical function ann_unit_tests(verbose)
554 logical, intent(in) :: verbose !< If true, write results to stdout
555 ! Local variables
556 type(ann_cs) :: ann ! An ANN
557 type(testing) :: test ! Manage tests
558 real, allocatable :: x(:), y(:), y_good(:), x2(:,:), y2(:,:) ! Inputs, outputs [arbitrary]
559 integer, parameter :: max_rand_nlay = 10 ! Deepest random ANN to generate
560 integer :: widths(max_rand_nlay) ! Number of layers for random ANN
561 integer :: nlay ! Number of layers for random ANN
562 integer :: i, iter ! Loop counters
563 logical :: rand_res ! Status of random tests
564
565 ann_unit_tests = .false. ! Start by assuming all is well
566 call test%set(verbose=verbose) ! Pass verbose mode to test
567
568 ! Identity ANN for one input
569 allocate( y(1) )
570 call ann_allocate(ann, 2, [1,1])
571 call set_layer(ann, 1, reshape([1.],[1,1]), [0.], .false.)
572 call ann_apply([1.], y, ann)
573 call test%real_scalar(y(1), 1., 'Scalar identity')
574 deallocate( y )
575 call ann_end(ann)
576
577 ! Summation ANN
578 allocate( y(1) )
579 call ann_allocate(ann, 2, [4,1])
580 call set_layer(ann, 1, reshape([1.,1.,1.,1.], [1,4]), [0.], .false.)
581 call ann_apply([-1.,0.,1.,2.], y, ann)
582 call test%real_scalar(y(1), 2., 'Summation')
583 deallocate( y )
584 call ann_end(ann)
585
586 ! Identity ANN for vector input/output
587 call ann_allocate(ann, 2, [3,3])
588 allocate( y(3) )
589 call set_layer(ann, 1, reshape([1.,0.,0., &
590 0.,1.,0., &
591 0.,0.,1.], [3,3]), [0.,0.,0.], .false.)
592 call ann_apply([-1.,0.,1.], y, ann)
593 call test%real_arr(3, y, [-1.,0.,1.], 'Vector identity')
594 deallocate( y )
595 call ann_end(ann)
596
597 ! Rectifying ANN for vector input/output
598 allocate( y(3) )
599 call ann_allocate(ann, 2, [3,3])
600 call set_layer(ann, 1, reshape([1.,0.,0., &
601 0.,1.,0., &
602 0.,0.,1.], [3,3]), [0.,0.,0.], .true.)
603 call ann_apply([-1.,0.,1.], y, ann)
604 call test%real_arr(3, y, [0.,0.,1.], 'Rectifier')
605 deallocate( y )
606 call ann_end(ann)
607
608 ! The next 3 tests re-use the same network with 4 inputs, a 4-wide hidden layer, and one output
609 allocate( y(1) )
610 call ann_allocate(ann, 3, [4,4,1])
611
612 ! 1 hidden layer: rectifier followed by summation
613 ! Inputs: [-1,0,1,2]
614 ! Rectified: [0,0,1,2]
615 ! Sum: 3
616 ! Outputs: 3
617 call set_layer(ann, 1, reshape([1.,0.,0.,0., &
618 0.,1.,0.,0., &
619 0.,0.,1.,0., &
620 0.,0.,0.,1.], [4,4]), [0.,0.,0.,0.], .true.)
621 call set_layer(ann, 2, reshape([1.,1.,1.,1.], [1,4]), [0.], .false.)
622 call ann_apply_vector_orig([-1.,0.,1.,2.], y, ann)
623 call test%real_scalar(y(1), 3., 'Rectifier+summation')
624
625 ! as above but with biases
626 ! Inputs: [-2,-1,0,1]
627 ! After bias: [-1,0,1,2] with b=1
628 ! Rectified: [0,0,1,2]
629 ! Sum: 3
630 ! After bias: 6 with b=3
631 ! Outputs: 6
632 call set_layer(ann, 1, reshape([1.,0.,0.,0., &
633 0.,1.,0.,0., &
634 0.,0.,1.,0., &
635 0.,0.,0.,1.], [4,4]), [1.,1.,1.,1.], .true.)
636 call set_layer(ann, 2, reshape([1.,1.,1.,1.], [1,4]), [3.], .false.)
637 call ann_apply_vector_orig([-2.,-1.,0.,1.], y, ann)
638 call test%real_scalar(y(1), 6., 'Rectifier+summation+bias')
639
640 ! as above but with normalization of inputs and outputs
641 ! Inputs: [0,2,4,6]
642 ! Normalized inputs: [-2,-1,0,1] (using mean=-4, norm=2)
643 ! Normalized outputs: 6
644 ! De-normalized output: 2 (using mean=-10, norm=2)
645 call set_input_normalization(ann, means=[4.,4.,4.,4.], norms=[0.5,0.5,0.5,0.5])
646 call set_output_normalization(ann, norms=[2.], means=[-10.])
647 call ann_apply_vector_orig([0.,2.,4.,6.], y, ann)
648 call test%real_scalar(y(1), 2., 'Rectifier+summation+bias+norms')
649
650 deallocate( y )
651 call ann_end(ann)
652
653 ! as above with a 1x1 4th identity layer (to check loop combinations)
654 allocate( y(1) )
655 call ann_allocate(ann, 4, [4,4,1,1])
656 call set_layer(ann, 1, reshape([1.,0.,0.,0., &
657 0.,1.,0.,0., &
658 0.,0.,1.,0., &
659 0.,0.,0.,1.], [4,4]), [1.,1.,1.,1.], .true.)
660 call set_layer(ann, 2, reshape([1.,1.,1.,1.], [1,4]), [3.], .false.)
661 call set_layer(ann, 3, reshape([1.],[1,1]), [0.], .false.)
662 call set_input_normalization(ann, means=[4.,4.,4.,4.], norms=[0.5,0.5,0.5,0.5])
663 call set_output_normalization(ann, norms=[2.], means=[-10.])
664 call ann_apply_vector_orig([0.,2.,4.,6.], y, ann)
665 call test%real_scalar(y(1), 2., 'Rectifier+summation+bias+norms 4-layer')
666
667 ! as above with v2 of ANN_apply
668 call ann_apply_vector_oi([0.,2.,4.,6.], y, ann)
669 call test%real_scalar(y(1), 2., 'Rectifier+summation+bias+norms 4-layer v2')
670 deallocate( y )
671
672 allocate( y2(1,2) )
673 ! as above with v5 of ANN_apply applied to 2d inputs, x(space,feature)
674 call ann_apply_array_sio(2, reshape([0.,1.,2.,3.,4.,5.,6.,7.],[2,4]), y2, ann)
675 call test%real_arr(2, y2, [2.,5.], 'Rectifier+summation+bias+norms 4-layer array v2')
676 deallocate( y2 )
677
678 call ann_end(ann)
679
680 ! The following block checks that for random ANN (weights and layers widths)
681 ! each of the various implementations of inference give identical results.
682 ! This helped catch loop and allocation errors.
683 rand_res = .false.
684 do iter = 1, 1000
685 allocate( y(max_rand_nlay+1) )
686 call random_number(y) ! Vector of random numbers 0..1
687 nlay = 2 + floor( y(max_rand_nlay+1) * ( max_rand_nlay - 1 ) ) ! 2 < nlay < max_rand_nlay
688 widths(:) = 1 + floor( y(1:nlay) * 8 ) ! 1 < layer width < 8
689 deallocate( y )
690 call ann_random(ann, nlay, widths)
691 allocate( x(widths(1)), y(widths(nlay)), y_good(widths(nlay)) )
692 call ann_apply_vector_orig(x, y_good, ann)
693 call ann_apply_vector_oi(x, y, ann)
694 rand_res = rand_res .or. maxval( abs( y(:) - y_good(:) ) ) > 0. ! Check results from v2 = v1
695 allocate( x2(20,widths(1)), y2(20,widths(nlay)) ) ! 2D input, output
696 do i = 1, 20
697 x2(i,:) = x(:)
698 enddo
699 call ann_apply_array_sio(20, x2, y2, ann)
700 rand_res = rand_res .or. maxval( abs( maxval(y2(:,:),1) - y_good(:) ) ) > 0. ! Check results from array v2 = v1
701 rand_res = rand_res .or. maxval( abs( minval(y2(:,:),1) - y_good(:) ) ) > 0. ! Check results from array v2 = v1
702 deallocate( x, y, y_good, x2, y2 )
703 call ann_end(ann)
704 enddo
705 call test%test(rand_res, 'Equivalence between inference variants with random results')
706
707 ann_unit_tests = test%summarize('ANN_unit_tests')
708
709end function ann_unit_tests
710
711!> \namespace mom_ann
712!!
713!! The mom_ann module is a pure fortran implementation of fully-connected feed-forward
714!! networks to facilitate easy evaluation of data-driven functions in MOM6. For performant
715!! implementations or for novel architectires, using machine-learning libraries (e.g. via
716!! mom_database_comms) are necessary, or at least likely to be more efficient.
717!!
718!! The artificial neural network (ANN) understood by this MOM6 module has \f$ N \f$ layers,
719!! including the input-layer and output-layer, thus requireing \f$ N \geq 2\f$.
720!!
721!! The output values (neurons or nodes) of any layer other than the input layer (i.e. \f$ l>1 \f$) are
722!! \f[
723!! y_{l,j} = f_l( b_{l,j} + A_{l,j,i} x_{l-1,i} )
724!! \f]
725!! where \f$ f(x) = max(0, x) \f$ is the ReLU activation function, \f$b_{l,j}\f$ is a bias for each neuron,
726!! \f$A_{l,j,i}\f$ are a rectangular matrix of weights for each layer, and \f$x_{l-1,i}\f$ are the outputs
727!! of the previous layer, \f$l-1\f$. The subscript on \f$ f_l() \f$ indicates the activation function is
728!! optional for each layer.
729!!
730!! Currently, the performance of various implementations is dependent on the shape/size of the network and
731!! the size of input data. For this reason we provide several versions that all yield the same result but
732!! for differently shaped inputs.
733!!
734!! \image html https://upload.wikimedia.org/wikipedia/commons/4/46/Colored_neural_network.svg
735!! Fig: A three layer network with 3 inputs, 2 outputs, and 1 hidden layer. There are two rectanglar
736!! matrices of weights (black arrows). The bias for each neuron is implied."
737
738end module mom_ann