ATTENTION (16 Apr 2023): From 6.0.6, you can use the latest CUDA to run eddy_cuda10.2. So this post is obsolete. I wrote a new post, so please check it out.
FSL 6.0.5 ships eddy_cuda10.2 which literally uses CUDA 10.2.
I explored a way to use eddy_cuda10.2, PyTorch and Tensorflow concurrently. Below is How-To for Ubuntu 18.04/20.04.
loss = criterion(y_pred, y)
if t % 100 == 99:
print(t, loss.item())
13. 勾配の初期化
逆伝播(backward)させる前に、モデルのパラメータが持つ勾配を0(ゼロ)で初期化します。
optimizer.zero_grad()
14. 勾配の計算
backwardメソッドでモデルパラメータ(Weight)の勾配を算出します。
loss.backward()
15. パラメータ(Weight)の更新
stepメソッドでモデルのパラメータを更新します。
optimizer.step()
16. 実行
以下のコードを9_dynamic_graph.pyとして保存します。
16.1. 9_dynamic_graph.py
import random
import torch
class DynamicNet(torch.nn.Module):
def __init__(self, D_in, H, D_out):
super(DynamicNet, self).__init__()
self.input_linear = torch.nn.Linear(D_in, H)
self.middle_linear = torch.nn.Linear(H, H)
self.output_linear = torch.nn.Linear(H, D_out)
def forward(self, x):
h_relu = self.input_linear(x).clamp(min=0)
for _ in range(random.randint(0, 3)):
h_relu = self.middle_linear(h_relu).clamp(min=0)
y_pred = self.output_linear(h_relu)
return y_pred
# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out = 64, 1000, 100, 10
# Create random Tensors to hold inputs and outputs
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)
# Construct our model by instantiating the class defined above
model = DynamicNet(D_in, H, D_out)
# Construct our loss function and an Optimizer.
criterion = torch.nn.MSELoss(reduction='sum')
optimizer = torch.optim.SGD(model.parameters(), lr=1e-4, momentum=0.9)
for t in range(500):
# Forward pass: Compute predicted y by passing x to the model
y_pred = model(x)
# Compute and print loss
loss = criterion(y_pred, y)
if t % 100 == 99:
print(t, loss.item())
# Zero gradients, perform a backward pass, and update the weights.
optimizer.zero_grad()
loss.backward()
optimizer.step()
loss = criterion(y_pred, y)
if t % 100 == 99:
print(t, loss.item())
12. 勾配の初期化
逆伝播(backward)させる前に、モデルのパラメータが持つ勾配を0(ゼロ)で初期化します。
optimizer.zero_grad()
13. 勾配の計算
backwardメソッドでモデルパラメータ(Weight)の勾配を算出します。
loss.backward()
パラメータ(Weight)の更新
stepメソッドでモデルのパラメータを更新します。
optimizer.step()
14. 実行
以下のコードを8_class_model.pyとして保存します。
14.1. 8_class_model.py
import torch
class TwoLayerNet(torch.nn.Module):
def __init__(self, D_in, H, D_out):
super(TwoLayerNet, self).__init__()
self.linear1 = torch.nn.Linear(D_in, H)
self.linear2 = torch.nn.Linear(H, D_out)
def forward(self, x):
h_relu = self.linear1(x).clamp(min=0)
y_pred = self.linear2(h_relu)
return y_pred
# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out = 64, 1000, 100, 10
# Create random Tensors to hold inputs and outputs
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)
# Construct our model by instantiating the class defined above
model = TwoLayerNet(D_in, H, D_out)
# Construct our loss function and an Optimizer.
criterion = torch.nn.MSELoss(reduction='sum')
optimizer = torch.optim.SGD(model.parameters(), lr=1e-4)
for t in range(500):
# Forward pass: Compute predicted y by passing x to the model
y_pred = model(x)
# Compute and print loss
loss = criterion(y_pred, y)
if t % 100 == 99:
print(t, loss.item())
# Zero gradients, perform a backward pass, and update the weights.
optimizer.zero_grad()
loss.backward()
optimizer.step()
loss = loss_fn(y_pred, y)
if t % 100 == 99:
print(t, loss.item())
13. 勾配の初期化
逆伝播(backward)させる前に、モデルのパラメータが持つ勾配を0(ゼロ)で初期化します。
optimizer.zero_grad()
(参考) optimパッケージを使わない場合、以下のように記述していました。
model.zero_grad()
14. 勾配の計算
backwardメソッドでモデルパラメータ(Weight)の勾配を算出します。
loss.backward()
15. パラメータ(Weight)の更新
stepメソッドでモデルのパラメータを更新します。
optimizer.step()
(参考) optimパッケージを使わない場合、以下のように記述していました。
with torch.no_grad():
for param in model.parameters():
param -= learning_rate param.grad
16. 実行
以下のコードを7_optim_package.pyとして保存します。
16.1. 7_optim_package.py
import torch
# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out = 64, 1000, 100, 10
# Create random Tensors to hold inputs and outputs
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)
# Use the nn package to define our model and loss function.
model = torch.nn.Sequential(
torch.nn.Linear(D_in, H),
torch.nn.ReLU(),
torch.nn.Linear(H, D_out),
)
loss_fn = torch.nn.MSELoss(reduction='sum')
# Use the optim package to define an Optimizer that will update the weights of
# the model for us.
learning_rate = 1e-4
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
for t in range(500):
# Forward pass: compute predicted y by passing x to the model.
y_pred = model(x)
# Compute and print loss.
loss = loss_fn(y_pred, y)
if t % 100 == 99:
print(t, loss.item())
# Before the backward pass, use the optimizer object to zero all of the
# gradients for the variables it will update (which are the learnable
# weights of the model).
optimizer.zero_grad()
# Backward pass: compute gradient of the loss with respect to model
# parameters
loss.backward()
# Calling the step function on an Optimizer makes an update to its
# parameters
optimizer.step()
with torch.no_grad():
for param in model.parameters():
param -= learning_rate param.grad
(参考) nnパッケージを使う前は以下のように記述していた。
with torch.no_grad():
w1 -= learning_rate w1.grad
w2 -= learning_rate w2.grad
14. 実行
以下のコードを6_nn_package.pyとして保存します。
14.1. 6_nn_package.py
import torch
# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out = 64, 1000, 100, 10
# Create random Tensors to hold inputs and outputs
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)
# Use the nn package to define our model as a sequence of layers.
model = torch.nn.Sequential(
torch.nn.Linear(D_in, H),
torch.nn.ReLU(),
torch.nn.Linear(H, D_out),
)
# The nn package also contains definitions of popular loss functions; in this
# case we will use Mean Squared Error (MSE) as our loss function.
loss_fn = torch.nn.MSELoss(reduction='sum')
learning_rate = 1e-4
for t in range(500):
# Forward pass: compute predicted y by passing x to the model.
y_pred = model(x)
# Compute and print loss.
loss = loss_fn(y_pred, y)
if t % 100 == 99:
print(t, loss.item())
# Zero the gradients before running the backward pass.
model.zero_grad()
# Backward pass: compute gradient of the loss with respect to all the learnable
loss.backward()
# Update the weights using gradient descent.
with torch.no_grad():
for param in model.parameters():
param -= learning_rate param.grad
with tf.Session() as sess:
# Run the graph once to initialize the Variables w1 and w2.
sess.run(tf.global_variables_initializer())
# Create numpy arrays holding the actual data for the inputs x and targets
# y
x_value = np.random.randn(N, D_in)
y_value = np.random.randn(N, D_out)
for t in range(500):
# Execute the graph many times.
loss_value, _, _ = sess.run([loss, new_w1, new_w2],
feed_dict={x: x_value, y: y_value})
if t % 100 == 99:
print(t, loss_value)
TensorFlowで、計算グラフを実行するには、with tf.Session() as sess:でSessionオブジェクトを作成する。
with tf.Session() as sess:
重みw1とw2を初期化するため、Sessionをrunする。
# Run the graph once to initialize the Variables w1 and w2.
sess.run(tf.global_variables_initializer())
# Create numpy arrays holding the actual data for the inputs x and targets
# y
x_value = np.random.randn(N, D_in)
y_value = np.random.randn(N, D_out)
今回は、学習回数を500回とする。
各学習ごとに、損失関数loss、更新された重みw1、w2から損失値(二乗誤差)を計算する。
100回学習するごとに、学習回数tと二乗誤差loss_valueを出力する。
for t in range(500):
loss_value, _, _ = sess.run([loss, new_w1, new_w2],
feed_dict={x: x_value, y: y_value})
if t % 100 == 99:
print(t, loss_value)
12. 実行
以下のコードを5_static_graphs.pyとして保存します。
12.1. 5_static_graphs.py
import tensorflow as tf
import numpy as np
# First we set up the computational graph:
# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out = 64, 1000, 100, 10
# Create placeholders for the input and target data; these will be filled
# with real data when we execute the graph.
x = tf.placeholder(tf.float32, shape=(None, D_in))
y = tf.placeholder(tf.float32, shape=(None, D_out))
# Create Variables for the weights and initialize them with random data.
# A TensorFlow Variable persists its value across executions of the graph.
w1 = tf.Variable(tf.random_normal((D_in, H)))
w2 = tf.Variable(tf.random_normal((H, D_out)))
# Forward pass
h = tf.matmul(x, w1)
h_relu = tf.maximum(h, tf.zeros(1))
y_pred = tf.matmul(h_relu, w2)
# Compute loss using operations on TensorFlow Tensors
loss = tf.reduce_sum((y - y_pred) *2.0)
# Compute gradient of the loss with respect to w1 and w2.
grad_w1, grad_w2 = tf.gradients(loss, [w1, w2])
# Update the weights using gradient descent.
learning_rate = 1e-6
new_w1 = w1.assign(w1 - learning_rate grad_w1)
new_w2 = w2.assign(w2 - learning_rate grad_w2)
# actually execute the graph.
with tf.Session() as sess:
# Run the graph once to initialize the Variables w1 and w2.
sess.run(tf.global_variables_initializer())
# Create numpy arrays holding the actual data for the inputs x and targets
# y
x_value = np.random.randn(N, D_in)
y_value = np.random.randn(N, D_out)
for t in range(500):
loss_value, _, _ = sess.run([loss, new_w1, new_w2],
feed_dict={x: x_value, y: y_value})
if t % 100 == 99:
print(t, loss_value)
with torch.no_grad():
w1 -= learning_rate w1.grad
w2 -= learning_rate w2.grad
# Manually zero the gradients after updating weights
w1.grad.zero_()
w2.grad.zero_()
13. 実行
以下のコードを4_autograd_func.pyとして保存します。
13.1. 4_autograd_func.py
import torch
dtype = torch.float
device = torch.device("cpu")
# device = torch.device("cuda:0") # Uncomment this to run on GPU
# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out = 64, 1000, 100, 10
# Create random Tensors to hold input and outputs.
x = torch.randn(N, D_in, device=device, dtype=dtype)
y = torch.randn(N, D_out, device=device, dtype=dtype)
# Create random Tensors for weights.
w1 = torch.randn(D_in, H, device=device, dtype=dtype, requires_grad=True)
w2 = torch.randn(H, D_out, device=device, dtype=dtype, requires_grad=True)
learning_rate = 1e-6
for t in range(500):
# Forward pass: compute predicted y using operations on Tensors
y_pred = x.mm(w1).clamp(min=0).mm(w2)
# Compute and print loss using operations on Tensors.
loss = (y_pred - y).pow(2).sum()
if t % 100 == 99:
print(t, loss.item())
# Use autograd to compute the backward pass.
loss.backward()
# Manually update weights using gradient descent. Wrap in torch.no_grad()
with torch.no_grad():
w1 -= learning_rate w1.grad
w2 -= learning_rate w2.grad
# Manually zero the gradients after updating weights
w1.grad.zero_()
w2.grad.zero_()
with torch.no_grad():
w1 -= learning_rate w1.grad
w2 -= learning_rate w2.grad
# Manually zero the gradients after updating weights
w1.grad.zero_()
w2.grad.zero_()
13. 実行
以下のコードを3_autograd.pyとして保存します。
13.1. 3_autograd.py
import torch
dtype = torch.float
device = torch.device("cpu")
# device = torch.device("cuda:0") # Uncomment this to run on GPU
# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out = 64, 1000, 100, 10
# Create random Tensors to hold input and outputs.
x = torch.randn(N, D_in, device=device, dtype=dtype)
y = torch.randn(N, D_out, device=device, dtype=dtype)
# Create random Tensors for weights.
w1 = torch.randn(D_in, H, device=device, dtype=dtype, requires_grad=True)
w2 = torch.randn(H, D_out, device=device, dtype=dtype, requires_grad=True)
learning_rate = 1e-6
for t in range(500):
# Forward pass: compute predicted y using operations on Tensors
y_pred = x.mm(w1).clamp(min=0).mm(w2)
# Compute and print loss using operations on Tensors.
loss = (y_pred - y).pow(2).sum()
if t % 100 == 99:
print(t, loss.item())
# Use autograd to compute the backward pass.
loss.backward()
# Manually update weights using gradient descent. Wrap in torch.no_grad()
with torch.no_grad():
w1 -= learning_rate w1.grad
w2 -= learning_rate w2.grad
# Manually zero the gradients after updating weights
w1.grad.zero_()
w2.grad.zero_()