Consider a following approach to implementing a simple neural network in Fortran : an abstract polymorphic type layer
type, abstract :: layer
real, allocatable :: A(:,:)
class(layer), pointer :: nextLayer => null()
class(layer), pointer :: prevLayer => null()
contains
procedure(updateLayerINTRFC), deferred :: update
end type
abstract interface
subroutine updateLayerINTRFC(self)
import :: layer
class(layer), intent(inout) :: self
end subroutine
end interface
encapsulates universal properties all layers must have, ie. content (2d real array A), rule for updating the layer when new input is provided and pointers to neighbouring layers, both previous and next. Extending the base layer
type are any number of specific layer types like
type, extends(layer) :: layerA
! Weights determining the network behaviour
real, allocatable :: W(:,:)
contains
procedure :: update => updateA
end type
! to be located in the contains section of the module
subroutine updateA(self)
class(layerA), intent(inout) :: self
! The update rule has been simplified for clarity
self%A = matmul(self%W,self%prevLayer%A)
end subroutine
and a special kind layerFirst
for the first layer of the network
type, extends(layer) :: layerFirst
integer :: nLayers ! number of layers in a network
integer, allocatable :: nCells(:) ! number of cells in each layer
contains
procedure :: forwardFull
procedure :: update => updateFirst
end type
Which serves as the handle for the entire network and describes its dimensions. Network is made up of linked list of layers which begins with instance of layerFirst
. The list is generally heterogeneous which rules out using an array of type layer instead (I'm aware of a workaround by introducing an intermediate wrapper type, but prefer the pointer approach). A forward pass (ie. getting output from input) is performed simply by traversing the list and updating each layer
subroutine forwardFull(self,input,output)
class(layerFirst), intent(inout) :: self
real, intent(in) :: input(self%nCells(1))
real, intent(out) :: output(self%nCells(self%nLayers))
class(layer), pointer :: currentLayer
integer :: i
self%A(:,1) = input
currentLayer => self%nextLayer
do i=2,self%nLayers-1
call currentLayer%update()
currentLayer => currentLayer%nextLayer
end do
call currentLayer%update
output = reshape(currentLayer%A,shape=[self%nCells(self%nLayers)])
end subroutine
Since the question is not about training the networks, the weights can be just randomly generated to give a minimal working example which compiles under gfortran-10
module utils
implicit none
type, abstract :: layer
real, allocatable :: A(:,:)
class(layer), pointer :: nextLayer => null()
class(layer), pointer :: prevLayer => null()
contains
procedure(updateLayerINTRFC), deferred :: update
end type
type, extends(layer) :: layerFirst
integer :: nLayers
integer, allocatable :: nCells(:)
contains
procedure :: forwardFull
procedure :: update => updateFirst
end type
type, extends(layer) :: layerA
real, allocatable :: W(:,:)
contains
procedure :: update => updateA
end type
abstract interface
subroutine updateLayerINTRFC(self)
import :: layer
class(layer), intent(inout) :: self
end subroutine
end interface
contains
! ignore this, first layer does not need to be updated
subroutine updateFirst(self)
class(layerFirst), intent(inout) :: self
! Do nothing
end subroutine
subroutine updateA(self)
class(layerA), intent(inout) :: self
! the update rule has been simplified for clarity
self%A = matmul(self%W,self%prevLayer%A)
end subroutine
subroutine forwardFull(self,input,output)
class(layerFirst), intent(inout) :: self
real, intent(in) :: input(self%nCells(1))
real, intent(out) :: output(self%nCells(self%nLayers))
class(layer), pointer :: currentLayer
integer :: i
self%A(:,1) = input
currentLayer => self%nextLayer
do i=2,self%nLayers-1
call currentLayer%update()
currentLayer => currentLayer%nextLayer
end do
call currentLayer%update
output = reshape(currentLayer%A,shape=[self%nCells(self%nLayers)])
end subroutine
function newLayerA(ncurr,nprev) result(res)
type(layerA) :: res
integer, intent(in) :: ncurr, nprev
allocate(res%A(ncurr,1))
allocate(res%W(ncurr,nprev))
call random_number(res%A)
end function
function newNetwork() result(net)
type(layerFirst), target :: net
class(layer), pointer :: currentLayer
integer :: i
! network dimensions are hardcoded for simplicity
net%nLayers = 4
net%nCells = [784,128,64,10]
allocate(net%A(net%nCells(1),1))
allocate(net%nextLayer, source=newLayerA(net%nCells(2),net%nCells(1)))
currentLayer => net%nextLayer
currentLayer%prevLayer => net ! is net missing a target attribute
do i=2,net%nLayers-1
allocate(currentLayer%nextLayer, source=newLayerA(net%nCells(i+1),net%nCells(i)))
currentLayer%nextLayer%prevLayer => currentLayer
currentLayer => currentLayer%nextLayer
end do
end function
end module
program test
use utils
implicit none
type(layerFirst) :: net
real :: input(784,1), output(10,1)
call random_number(input)
net = newNetwork()
call net%forwardFull(input,output)
print '(" Most likely match : ", I0)', maxloc(output,1)-1
end program
However, this does not seems to correctly associate second layers prevLayer
pointer to the first layer, at least not consistently. All others can be simply checked and only problem is with the one marked "not ok" on the diagram
Ok Ok Ok
┌────────────────┐ ┌────────────────┐ ┌────────────────┐ ┌────────────────┐
│type(layerFirst)├─────►│ type(layerA) ├─────►│ type(layerA) ├─────►│ type(layerA) ├─────► Null()
│ :: net │ │ │ │ │ │ │
│ 784 cells │ │ 128 cells │ │ 64 cells │ │ 10 cells │
Null() ◄─────┤ │◄─────┤ │◄─────┤ │◄─────┤ │
└────────────────┘ └────────────────┘ └────────────────┘ └────────────────┘
Not ok Ok Ok
For example, calling print*, shape(net%nextLayer%prevLayer%A)
from the main program produces incorrect output 3784704 0
instead of 784 1
as print*, shape(net%A)
would. This sometimes (with annoying inconsistency) causes the matrix multiplication in updateA
for the second layer to crash the program.
Are there some esoteric rules pertaining to linking different types of the same general class? Have I made a mistake (probably in the function newNetwork
) or is this a compiler issue?
UPDATE : ifx error message points in the direction of the line allocate(net%nextLayer, source=newLayerA(net%nCells(2),net%nCells(1)))
claiming Uninitialized value was created by a heap allocation
, but I'm not sure what to make of it. Full message goes
==11782==WARNING: MemorySanitizer: use-of-uninitialized-value
#0 0x4a7ae6 in utils_mp_newnetwork_ /home/pavel/Dokumenty/meteorologie/otazka/src/neco.f90:93:13
#1 0x4aad5f in MAIN__ /home/pavel/Dokumenty/meteorologie/otazka/src/neco.f90:107:11
#2 0x40a768 in main (/home/pavel/Dokumenty/meteorologie/otazka/a.out+0x40a768) (BuildId: 344b775a27909a7ac2f3797db049781f62dcc8f5)
#3 0x7f5a69821c86 in __libc_start_main /build/glibc-CVJwZb/glibc-2.27/csu/../csu/libc-start.c:310
#4 0x40a619 in _start (/home/pavel/Dokumenty/meteorologie/otazka/a.out+0x40a619) (BuildId: 344b775a27909a7ac2f3797db049781f62dcc8f5)
Uninitialized value was created by a heap allocation
#0 0x418046 in malloc (/home/pavel/Dokumenty/meteorologie/otazka/a.out+0x418046) (BuildId: 344b775a27909a7ac2f3797db049781f62dcc8f5)
#1 0x50a454 in _mm_malloc (/home/pavel/Dokumenty/meteorologie/otazka/a.out+0x50a454) (BuildId: 344b775a27909a7ac2f3797db049781f62dcc8f5)
#2 0x4aeb23 in do_alloc_copy for_alloc_copy.c
#3 0x4addbd in do_for_alloc_copy for_alloc_copy.c
#4 0x4a40fc in utils_mp_newnetwork_ /home/pavel/Dokumenty/meteorologie/otazka/src/neco.f90:88:9
#5 0x4aad5f in MAIN__ /home/pavel/Dokumenty/meteorologie/otazka/src/neco.f90:107:11
#6 0x40a768 in main (/home/pavel/Dokumenty/meteorologie/otazka/a.out+0x40a768) (BuildId: 344b775a27909a7ac2f3797db049781f62dcc8f5)
#7 0x7f5a69821c86 in __libc_start_main /build/glibc-CVJwZb/glibc-2.27/csu/../csu/libc-start.c:310
SUMMARY: MemorySanitizer: use-of-uninitialized-value /home/pavel/Dokumenty/meteorologie/otazka/src/neco.f90:93:13 in utils_mp_newnetwork_
Exiting
One problem is this line in the main program:
net = newNetwork()
and this one in the newNetwork()
function:
currentLayer%prevLayer => net ! is net missing a target attribute
net
is declared as a local object in newNetwork()
, which is different from net
declared in the main program. This means that you are pointing to something that disapears once the function returns.
There are several solutions to make it working:
newNetwork()
as a type bound procedure of your firstLayer
class and call it from the main program:call net%netNetwork()
type(layerFirst), pointer :: net
net => newNetwork()
type(layerFirst), pointer :: net
allocate(net)
net
pointer of the function will disappear as well, but the net
pointer of main will eventually point to the same memory area.Maybe there are other issues, but this one should be fixed first.