7.4. TensorFlow, PyTorch and Keras
This section presents the code implementation for simple RNNs using TensorFlow, Keras and PyTorch frameworks.
7.4.1. TensorFlow Version
Complete Python code is available at: RNN_tf.py
The RNN class is composed of tf.keras.layers.SimpleRNN and tf.keras.layers.Dense.
class RNN(tf.keras.Model):
def __init__(self, hidden_units, output_units):
super().__init__()
self.simple_rnn = tf.keras.layers.SimpleRNN(
hidden_units,
activation="tanh",
kernel_initializer="glorot_normal",
recurrent_initializer="orthogonal",
)
self.dense = tf.keras.layers.Dense(output_units, activation="linear")
def call(self, x):
x = self.simple_rnn(x)
x = self.dense(x)
return x
[1] Create dataset.
n_sequence = 25
n_data = 100
n_sample = n_data - n_sequence # number of sample
sin_data = ds.create_wave(n_data, 0.05)
X, Y = ds.dataset(sin_data, n_sequence, False)
X = X.reshape(X.shape[0], X.shape[1], 1)
Y = Y.reshape(Y.shape[0], Y.shape[1])
[2] Create model.
input_units = 1
hidden_units = 32
output_units = 1
model = RNN(hidden_units, output_units)
lr = 0.0001
beta1 = 0.99
beta2 = 0.9999
optimizer = Optimizer.Adam(lr=lr, beta1=beta1, beta2=beta2)
[3] Training.
n_epochs = 300
for epoch in range(1, n_epochs + 1):
with tf.GradientTape() as tape:
preds = model(X)
loss = criterion(Y, preds)
grads = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(zip(grads, model.trainable_variables))
train_loss(loss)
[4] Prediction.
$ python RNN_tf.py
Model: "rnn"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
simple_rnn (SimpleRNN) multiple 1088
dense (Dense) multiple 33
=================================================================
Total params: 1,121
Trainable params: 1,121
Non-trainable params: 0
_________________________________________________________________
epoch: 1/300, loss: 0.0864
... snip ...
7.4.2. PyTorch Version
Complete Python code is available at: RNN_pt.py
[1] Create dataset.
n_sequence = 25
n_data = 100
n_sample = n_data - n_sequence # number of sample
sin_data = ds.create_wave(n_data, 0.05)
X, Y = ds.dataset(sin_data, n_sequence, False)
X = X.reshape(X.shape[0], X.shape[1], 1)
Y = Y.reshape(Y.shape[0], Y.shape[1])
[2] Create model.
if torch.cuda.is_available():
device = torch.device("cuda")
elif torch.backends.mps.is_available():
device = torch.device("mps")
else:
device = torch.device("cpu")
print(device)
class RNN(nn.Module):
def __init__(self, input_units, hidden_units, output_units):
super().__init__()
self.rnn = nn.RNN(input_units, hidden_units, nonlinearity="tanh", batch_first=True)
self.dense = nn.Linear(hidden_units, output_units)
nn.init.xavier_normal_(self.rnn.weight_ih_l0)
nn.init.orthogonal_(self.rnn.weight_hh_l0)
def forward(self, x):
h, _ = self.rnn(x)
y = self.dense(h[:, -1])
return y
input_units = 1
hidden_units = 32
output_units = 1
model = RNN(input_units, hidden_units, output_units).to(device)
summary(model=RNN(input_units, hidden_units, output_units), input_size=X.shape)
lr = 0.001
beta1 = 0.9
beta2 = 0.999
criterion = nn.MSELoss(reduction="mean")
optimizer = optimizers.Adam(
model.parameters(), lr=lr, betas=(beta1, beta2), amsgrad=True
)
[3] Training.
n_epochs = 300
history_loss = []
for epoch in range(1, n_epochs + 1):
train_loss = 0.0
x = torch.Tensor(X).to(device)
y = torch.Tensor(Y).to(device)
model.train()
preds = model(x)
loss = criterion(y, preds)
optimizer.zero_grad()
loss.backward()
optimizer.step()
train_loss += loss.item()
history_loss.append(train_loss)
[4] Prediction.
$ python RNN_pt.py
mps
==========================================================================================
Layer (type:depth-idx) Output Shape Param #
==========================================================================================
RNN [75, 1] --
├─RNN: 1-1 [75, 25, 32] 1,120
├─Linear: 1-2 [75, 1] 33
==========================================================================================
Total params: 1,153
Trainable params: 1,153
Non-trainable params: 0
Total mult-adds (Units.MEGABYTES): 2.10
==========================================================================================
Input size (MB): 0.01
Forward/backward pass size (MB): 0.48
Params size (MB): 0.00
Estimated Total Size (MB): 0.49
==========================================================================================
epoch: 10/300, loss: 0.193
... snip ...
7.4.3. Keras Version
Complete Python code is available at: RNN_keras.py
[1] Create model.
lr = 0.001
input_units = 1
hidden_units = 32
output_units = 1
model = Sequential()
model.add(
SimpleRNN(
hidden_units,
activation="tanh",
use_bias=True,
input_shape=(n_sequence, input_units),
return_sequences=False,
)
)
model.add(Dense(output_units, activation="linear", use_bias=True))
model.compile(loss="mean_squared_error", optimizer=Adam(lr))
[2] Training.
n_epochs = 100
batch_size = 5
history_rst = model.fit(
X,
Y,
batch_size=batch_size,
epochs=n_epochs,
validation_split=0.1,
shuffle=True,
verbose=1,
)
[3] Prediction.
$ python RNN_keras.py
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
simple_rnn (SimpleRNN) (None, 32) 1088
dense (Dense) (None, 1) 33
=================================================================
Total params: 1,121
Trainable params: 1,121
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/100
... snip ...