1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57
| import tensorflow as tf from tensorflow.keras import datasets
lr = 0.001
(x_train, y_train), _ = datasets.mnist.load_data() x_train = tf.convert_to_tensor(x_train, dtype=tf.float32) / 255 y_train = tf.convert_to_tensor(y_train, dtype=tf.int32)
train_db = tf.data.Dataset.from_tensor_slices((x_train, y_train)).batch(128) train_iter = iter(train_db) sample = next(train_iter)
w1 = tf.Variable(tf.random.truncated_normal([784, 256], stddev=0.1)) b1 = tf.Variable(tf.zeros(256)) w2 = tf.Variable(tf.random.truncated_normal([256, 128], stddev=0.1)) b2 = tf.Variable(tf.zeros(128)) w3 = tf.Variable(tf.random.truncated_normal([128, 10], stddev=0.1)) b3 = tf.Variable(tf.zeros(10))
for epoch in range(10): for step, (x_train, y_train) in enumerate(train_db): x_train = tf.reshape(x_train, [-1, 28*28]) with tf.GradientTape() as tape: h1 = x_train @ w1 + b1 h1 = tf.nn.relu(h1) h2 = h1 @ w2 + b2 h2 = tf.nn.relu(h2) out = h2 @ w3 + b3
y_hot = tf.one_hot(y_train, depth=10) loss = tf.square(y_hot - out) loss = tf.reduce_mean(loss)
grads = tape.gradient(loss, [w1, b1, w2, b2, w3, b3]) w1.assign_sub(lr * grads[0]) b1.assign_sub(lr * grads[1]) w2.assign_sub(lr * grads[2]) b2.assign_sub(lr * grads[3]) w3.assign_sub(lr * grads[4]) b3.assign_sub(lr * grads[5])
if step % 100 == 0: print('epoch:', epoch, 'step:', step, 'current loss:', float(loss))
|