avatar

前向传播

tensorflow基本操作

创建tensor

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
tf.convert_to_tensor(np.ones([2, 3]))
tf.convert_to_tensor(np.zeros([2, 3]))

tf.convert_to_tensor([1, 2])
tf.convert_to_tensor([[1], [2]])

tf.ones([2, 2])
tf.ones([1, 4, 2])
tf.ones([2, 4, 2])
tf.ones([3, 4, 2])

tf.fill([2, 3], 6)

tf.random.normal([2, 2], mean=1, stddev=1) # 正态分布
tf.random.uniform([2, 2], minval=0, maxval=1) # 均匀分布

tf.ones([2, 5, 5, 3])

前向传播实战

step1 导入模块

1
2
import tensorflow as tf
from tensorflow.keras import datasets

step2 导入数据集

1
(x_train, y_train), _ = datasets.mnist.load_data()

step3 处理数据集

1
2
3
4
5
6
7
x_train = tf.convert_to_tensor(x_train, dtype=tf.float32) / 255
y_train = tf.convert_to_tensor(y_train, dtype=tf.int32)
# print(tf.reduce_max(x_train))

train_db = tf.data.Dataset.from_tensor_slices((x_train, y_train)).batch(128)
train_iter = iter(train_db)
sample = next(train_iter)

step4 设置参数

1
2
3
4
5
6
7
# [b, 784] -> [784, 256] -> [256, 10]
w1 = tf.Variable(tf.random.truncated_normal([784, 256], stddev=0.1))
b1 = tf.Variable(tf.zeros(256))
w2 = tf.Variable(tf.random.truncated_normal([256, 128], stddev=0.1))
b2 = tf.Variable(tf.zeros(128))
w3 = tf.Variable(tf.random.truncated_normal([128, 10], stddev=0.1))
b3 = tf.Variable(tf.zeros(10))

step5训练模型

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
for epoch in range(10):  # iterate dataset
for step, (x_train, y_train) in enumerate(train_db): # for every batch # x.shape: [128, 28, 28] y.shape[128]
x_train = tf.reshape(x_train, [-1, 28*28]) # x.shape: [b, 784]
with tf.GradientTape() as tape:
h1 = x_train @ w1 + b1 # h1.shape: [b, 256]
h1 = tf.nn.relu(h1)
h2 = h1 @ w2 + b2 # h2.shape: [b, 128]
h2 = tf.nn.relu(h2)
out = h2 @ w3 + b3 # out.shape: [b, 10]

# compute loss
y_hot = tf.one_hot(y_train, depth=10)
loss = tf.square(y_hot - out)
loss = tf.reduce_mean(loss)

# compute gradients
grads = tape.gradient(loss, [w1, b1, w2, b2, w3, b3])
# update gradients
w1.assign_sub(lr * grads[0])
b1.assign_sub(lr * grads[1])
w2.assign_sub(lr * grads[2])
b2.assign_sub(lr * grads[3])
w3.assign_sub(lr * grads[4])
b3.assign_sub(lr * grads[5])

# w1 = tf.Variable(w1 - lr * grads[0])
# b1 = tf.Variable(b1 - lr * grads[1])
# w2 = tf.Variable(w2 - lr * grads[2])
# b2 = tf.Variable(b2 - lr * grads[3])
# w3 = tf.Variable(w3 - lr * grads[4])
# b3 = tf.Variable(b3 - lr * grads[5])

if step % 100 == 0:
print('epoch:', epoch, 'step:', step, 'current loss:', float(loss))

完整代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import tensorflow as tf
from tensorflow.keras import datasets

lr = 0.001

(x_train, y_train), _ = datasets.mnist.load_data()
x_train = tf.convert_to_tensor(x_train, dtype=tf.float32) / 255
y_train = tf.convert_to_tensor(y_train, dtype=tf.int32)
# print(tf.reduce_max(x_train))

train_db = tf.data.Dataset.from_tensor_slices((x_train, y_train)).batch(128)
train_iter = iter(train_db)
sample = next(train_iter)
# print('batch:', sample[0].shape, sample[1].shape)

# [b, 784] -> [784, 256] -> [256, 10]
w1 = tf.Variable(tf.random.truncated_normal([784, 256], stddev=0.1))
b1 = tf.Variable(tf.zeros(256))
w2 = tf.Variable(tf.random.truncated_normal([256, 128], stddev=0.1))
b2 = tf.Variable(tf.zeros(128))
w3 = tf.Variable(tf.random.truncated_normal([128, 10], stddev=0.1))
b3 = tf.Variable(tf.zeros(10))

for epoch in range(10): # iterate dataset
for step, (x_train, y_train) in enumerate(train_db): # for every batch # x.shape: [128, 28, 28] y.shape[128]
x_train = tf.reshape(x_train, [-1, 28*28]) # x.shape: [b, 784]
with tf.GradientTape() as tape:
h1 = x_train @ w1 + b1 # h1.shape: [b, 256]
h1 = tf.nn.relu(h1)
h2 = h1 @ w2 + b2 # h2.shape: [b, 128]
h2 = tf.nn.relu(h2)
out = h2 @ w3 + b3 # out.shape: [b, 10]

# compute loss
y_hot = tf.one_hot(y_train, depth=10)
loss = tf.square(y_hot - out)
loss = tf.reduce_mean(loss)

# compute gradients
grads = tape.gradient(loss, [w1, b1, w2, b2, w3, b3])
# update gradients
w1.assign_sub(lr * grads[0])
b1.assign_sub(lr * grads[1])
w2.assign_sub(lr * grads[2])
b2.assign_sub(lr * grads[3])
w3.assign_sub(lr * grads[4])
b3.assign_sub(lr * grads[5])

# w1 = tf.Variable(w1 - lr * grads[0])
# b1 = tf.Variable(b1 - lr * grads[1])
# w2 = tf.Variable(w2 - lr * grads[2])
# b2 = tf.Variable(b2 - lr * grads[3])
# w3 = tf.Variable(w3 - lr * grads[4])
# b3 = tf.Variable(b3 - lr * grads[5])

if step % 100 == 0:
print('epoch:', epoch, 'step:', step, 'current loss:', float(loss))
文章作者: gh
文章链接: https://ghclub.top/posts/39625/
版权声明: 本博客所有文章除特别声明外,均采用 CC BY-NC-SA 4.0 许可协议。转载请注明来自 GHBlog
打赏
  • 微信
    微信
  • 支付寶
    支付寶

评论