[8]{.chapter-number}  [Recurrent neural networks]{.chapter-title}

8 Recurrent neural networks

Recurrent neural networks are also deep neural networks but use layers specialized to handle time-series. In the following, we will use again the ‘keras’ package (Python: ‘keras’ (Chollet et al. (2015)); Julia: ‘Flux’ (Innes et al. (2018))) but we will not differentiate between classification and regression because the only difference would be to change the last layer and the loss function (see section ‘Deep neural networks’).

About the data, we simulated in the following one time series from a simple ARIMA process, using the ‘arima.sim’ function. Our goal is to train a net which is able to predict the next 10 time points based on the previous 10 time points.

## RNNs
library(keras)
data = as.matrix(arima.sim(n = 1000, list(ar = c(0.3, -0.7)) ))
# We use here a simplified way to create X and Y 
# since the focus is on creating the RNNs
data = matrix(data, ncol = 10L, byrow = TRUE)
X = array(data[seq(1, 100, by = 2), ], dim = c(50, 10, 1))
Y = data[seq(2, 100, by = 2), ]

RNN = 
  keras_model_sequential() %>% 
  # first hidden layer
  layer_gru(input_shape = list(10L, 1L),
            units = 50, 
            activation = "relu") %>%
  # we want to predict the next 10 time steps
  layer_dense(units = 10)

Loaded Tensorflow version 2.10.0

# add loss function and optimizer
RNN %>% 
  compile(loss = loss_mean_squared_error,
          optimizer = optimizer_adamax(0.01))

RNN %>% 
  fit(X, Y, epochs = 5, verbose = 0)

Make predictions:

head(predict(RNN, X), n = 3)

             [,1]       [,2]      [,3]         [,4]       [,5]       [,6]
[1,]  0.182871714 0.05779056 0.2804810  0.095762551 -0.2876692 -0.7354215
[2,]  0.242111340 0.21076134 0.3804666  0.008897863 -0.3699271 -0.8426715
[3,] -0.004352083 0.33250374 0.3860463 -0.031897288 -0.3857037 -0.5717312
            [,7]      [,8]        [,9]       [,10]
[1,] -0.08431444 0.4072831 -0.06652965 -0.09972054
[2,] -0.04815055 0.4886538 -0.09961714 -0.06673245
[3,]  0.07023557 0.5174094  0.04489198 -0.01002921

from tensorflow import keras
from tensorflow.keras.layers import *
X = r.X # get data from R
Y = r.Y 

RNN = keras.Sequential()
  # first hidden layer
RNN.add(GRU(input_shape = [10, 1],units = 50, activation = "relu"))
RNN.add(Dense(units = 10))

RNN.summary()

# add loss function and optimizer

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 gru_1 (GRU)                 (None, 50)                7950      
                                                                 
 dense_1 (Dense)             (None, 10)                510       
                                                                 
=================================================================
Total params: 8,460
Trainable params: 8,460
Non-trainable params: 0
_________________________________________________________________

RNN.compile(loss = keras.losses.mean_squared_error,
            optimizer = keras.optimizers.Adamax(0.01))

# train model
RNN.fit(X, Y, epochs = 5, verbose = 0)

<keras.callbacks.History object at 0x7f0436fa8ee0>

Make predictions:

RNN.predict(X)[0:10,:]


1/2 [==============>...............] - ETA: 0s
2/2 [==============================] - 0s 3ms/step
array([[ 0.08350393, -0.00180464,  0.34325618,  0.21686247, -0.18946874,
        -0.5937582 , -0.16486388,  0.33871526, -0.11158126, -0.15158123],
       [ 0.2227844 ,  0.4065776 ,  0.38111433, -0.00899748, -0.41014364,
        -0.6645749 , -0.13457376,  0.36858225, -0.20998326, -0.1766688 ],
       [-0.17471497,  0.74995714,  0.7496149 , -0.37774378, -0.77188545,
        -0.6525765 ,  0.24792194,  0.66503304, -0.12292746, -0.17186296],
       [-0.28794998,  1.5990531 ,  1.306996  , -0.829319  , -1.5312688 ,
        -1.1471694 ,  0.5062746 ,  1.2400869 , -0.250422  , -0.299824  ],
       [-0.23757203,  0.9901169 ,  1.0352945 , -0.53825665, -1.0542827 ,
        -0.8925454 ,  0.3523099 ,  0.9200909 , -0.17282516, -0.25122654],
       [-0.08620348,  1.4480689 ,  0.8258031 , -0.821919  , -1.1419564 ,
        -0.7132156 ,  0.4358264 ,  0.6400992 , -0.24556354, -0.15073477],
       [-0.17174806,  0.6353523 ,  0.73129326, -0.30230048, -0.7101729 ,
        -0.66088593,  0.20432712,  0.67419755, -0.10780463, -0.18124355],
       [-0.31512833, -0.9728573 ,  0.3660514 ,  0.58512855,  0.24408501,
        -0.4672906 , -0.29277703,  0.7419173 ,  0.24761096, -0.18694049],
       [-0.18512186,  0.9227145 ,  0.83411753, -0.45813644, -0.9110877 ,
        -0.73084646,  0.29302734,  0.76778096, -0.14175564, -0.18799058],
       [-0.3057377 ,  0.2650427 ,  0.8319554 , -0.1744615 , -0.5854104 ,
        -0.6983775 ,  0.16514157,  0.7936327 , -0.02395438, -0.21329436]],
      dtype=float32)

import StatsBase;
using RDatasets;
using StatsBase;
using DataFrames;
import MLJBase.int;
using Flux, Statistics;
using Flux.Losses: mse;
using ARFIMA;

Data preparation:

X = transpose(reshape(convert(Vector{Float32}, arfima(1000,0.5, 0.3, SVector(-0.7))), 100, 10));
xtrain = X[:, collect(1:2:100)];
ytrain = X[:, collect(2:2:100)];

Create model (similar to Keras):

model = Chain(
  GRU(10=>50),
  Dense(50, 10)
)

Chain(
  Recur(
    GRUCell(10 => 50),                  # 9_200 parameters
  ),
  Dense(50 => 10),                      # 510 parameters
)         # Total: 6 trainable arrays, 9_710 parameters,
          # plus 1 non-trainable, 50 parameters, summarysize 38.297 KiB.

Train/optimize Model:

parameters = Flux.params(model);
optimizer = ADAM(0.01);

for epoch in 1:10
    Flux.reset!(model); 
    grads = gradient(parameters) do 
        Pred = [model( xtrain[:,i]) for i in 1:50];
        loss = mean([mse(Pred[i], ytrain[:,i]) for i in 1:50]);
        println(loss);
        loss
    end
    Flux.update!(optimizer, parameters, grads);
end

Predictions:

Pred = [model( xtrain[:,i]) for i in 1:50];
Pred[1]

10-element Vector{Float32}:
  0.5586194
  0.21556926
 -0.4545607
  0.31776372
 -0.19870074
 -0.2925711
 -0.1116208
  0.16067675
  0.064349174
 -0.20158282