File size: 3,360 Bytes
a61ce3d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import tensorflow as tf
import numpy as np
import os

# Load the MNIST dataset
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

# Normalize and reshape the data
x_train, x_test = x_train.astype(np.float32) / 255.0, x_test.astype(np.float32) / 255.0
x_train = x_train.reshape(-1, 784)
x_test = x_test.reshape(-1, 784)

# Convert labels to one-hot encoding
y_train = tf.one_hot(y_train, depth=10)
y_test = tf.one_hot(y_test, depth=10)

# Create TensorFlow datasets for better performance
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(60000).batch(100)
test_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(100)

# Network parameters
input_layer_size = 784
hidden_layer_one = 256
hidden_layer_two = 256
number_classes = 10

weights = {
    'w1': tf.Variable(tf.random.normal([input_layer_size, hidden_layer_one], dtype=tf.float32)),
    'w2': tf.Variable(tf.random.normal([hidden_layer_one, hidden_layer_two], dtype=tf.float32)),
    'w_out': tf.Variable(tf.random.normal([hidden_layer_two, number_classes], dtype=tf.float32))
}

biases = {
    'b1': tf.Variable(tf.random.normal([hidden_layer_one], dtype=tf.float32)),
    'b2': tf.Variable(tf.random.normal([hidden_layer_two], dtype=tf.float32)),
    'b_out': tf.Variable(tf.random.normal([number_classes], dtype=tf.float32))
}

# Network architecture
def feedforward_network(x):
    layer_1 = tf.nn.relu(tf.add(tf.matmul(x, weights['w1']), biases['b1']))
    layer_2 = tf.nn.relu(tf.add(tf.matmul(layer_1, weights['w2']), biases['b2']))
    output_layer = tf.matmul(layer_2, weights['w_out']) + biases['b_out']
    return output_layer

# Training hyperparameters
epochs = 45
learning_rate = 0.001
job_dir = 'mnist_model'

# Training loop
for epoch in range(epochs):
    for step, (batch_x, batch_y) in enumerate(train_dataset):
        with tf.GradientTape() as tape:
            logits = feedforward_network(batch_x)
            loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=batch_y))
        
        gradients = tape.gradient(loss, list(weights.values()) + list(biases.values()))
        optimizer = tf.optimizers.Adam(learning_rate=learning_rate)
        optimizer.apply_gradients(zip(gradients, list(weights.values()) + list(biases.values())))
    
    # Print loss every epoch
    print(f"Epoch {epoch+1}, Loss: {loss.numpy()}")

# Evaluation
def evaluate(dataset):
    correct_predictions = 0
    total_predictions = 0
    for batch_x, batch_y in dataset:
        logits = feedforward_network(batch_x)
        correct_predictions += tf.reduce_sum(tf.cast(tf.equal(tf.argmax(logits, 1), tf.argmax(batch_y, 1)), tf.int32)).numpy()
        total_predictions += batch_x.shape[0]
    return correct_predictions / total_predictions

accuracy = evaluate(test_dataset)
print(f"Test accuracy: {accuracy}")

@tf.function(input_signature=[tf.TensorSpec(shape=[None, 784], dtype=tf.float32)])
def serve_model(x):
    return {'output': feedforward_network(x)}

# Save the model
class MyModel(tf.Module):
    def __init__(self, weights, biases):
        super(MyModel, self).__init__()
        self.weights = weights
        self.biases = biases
        self.serve_model = serve_model

model = MyModel(weights, biases)
save_path = os.path.join(job_dir, 'model')
tf.saved_model.save(model, save_path)