Loading...
Loading...
Build and train neural networks with TensorFlow
npx skill4agent add thebushidocollective/han tensorflow-neural-networksimport tensorflow as tf
from tensorflow import keras
import numpy as np
# Load MNIST dataset
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
# Preprocess data
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0
x_train = x_train.reshape(-1, 28 * 28)
x_test = x_test.reshape(-1, 28 * 28)
# Build Sequential model
model = keras.Sequential([
keras.layers.Dense(128, activation='relu', input_shape=(784,)),
keras.layers.Dropout(0.2),
keras.layers.Dense(64, activation='relu'),
keras.layers.Dropout(0.2),
keras.layers.Dense(10, activation='softmax')
])
# Compile model
model.compile(
optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy']
)
# Display model architecture
model.summary()
# Train model
history = model.fit(
x_train, y_train,
batch_size=32,
epochs=5,
validation_split=0.2,
verbose=1
)
# Evaluate model
test_loss, test_accuracy = model.evaluate(x_test, y_test, verbose=0)
print(f"Test accuracy: {test_accuracy:.4f}")
# Make predictions
predictions = model.predict(x_test[:5])
predicted_classes = np.argmax(predictions, axis=1)
print(f"Predicted classes: {predicted_classes}")
print(f"True classes: {y_test[:5]}")
# Save model
model.save('mnist_model.h5')
# Load model
loaded_model = keras.models.load_model('mnist_model.h5')def create_cnn_model(input_shape=(224, 224, 3), num_classes=1000):
"""Create CNN model for image classification."""
model = tf.keras.Sequential([
# Block 1
tf.keras.layers.Conv2D(64, (3, 3), activation='relu', padding='same',
input_shape=input_shape),
tf.keras.layers.Conv2D(64, (3, 3), activation='relu', padding='same'),
tf.keras.layers.MaxPooling2D((2, 2)),
tf.keras.layers.BatchNormalization(),
# Block 2
tf.keras.layers.Conv2D(128, (3, 3), activation='relu', padding='same'),
tf.keras.layers.Conv2D(128, (3, 3), activation='relu', padding='same'),
tf.keras.layers.MaxPooling2D((2, 2)),
tf.keras.layers.BatchNormalization(),
# Block 3
tf.keras.layers.Conv2D(256, (3, 3), activation='relu', padding='same'),
tf.keras.layers.Conv2D(256, (3, 3), activation='relu', padding='same'),
tf.keras.layers.MaxPooling2D((2, 2)),
tf.keras.layers.BatchNormalization(),
# Classification head
tf.keras.layers.GlobalAveragePooling2D(),
tf.keras.layers.Dense(512, activation='relu'),
tf.keras.layers.Dropout(0.5),
tf.keras.layers.Dense(num_classes, activation='softmax')
])
return modeldef generate_model():
return tf.keras.models.Sequential([
tf.keras.layers.Conv2D(32, (3, 3), padding='same', input_shape=x_train.shape[1:]),
tf.keras.layers.Activation('relu'),
tf.keras.layers.Conv2D(32, (3, 3)),
tf.keras.layers.Activation('relu'),
tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
tf.keras.layers.Dropout(0.25),
tf.keras.layers.Conv2D(64, (3, 3), padding='same'),
tf.keras.layers.Activation('relu'),
tf.keras.layers.Conv2D(64, (3, 3)),
tf.keras.layers.Activation('relu'),
tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
tf.keras.layers.Dropout(0.25),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(512),
tf.keras.layers.Activation('relu'),
tf.keras.layers.Dropout(0.5),
tf.keras.layers.Dense(10),
tf.keras.layers.Activation('softmax')
])
model = generate_model()tf.keras.layers.Layerimport tensorflow as tf
class CustomDense(tf.keras.layers.Layer):
def __init__(self, units=32, activation=None):
super(CustomDense, self).__init__()
self.units = units
self.activation = tf.keras.activations.get(activation)
def build(self, input_shape):
"""Create layer weights."""
self.w = self.add_weight(
shape=(input_shape[-1], self.units),
initializer='glorot_uniform',
trainable=True,
name='kernel'
)
self.b = self.add_weight(
shape=(self.units,),
initializer='zeros',
trainable=True,
name='bias'
)
def call(self, inputs):
"""Forward pass."""
output = tf.matmul(inputs, self.w) + self.b
if self.activation is not None:
output = self.activation(output)
return output
def get_config(self):
"""Enable serialization."""
config = super().get_config()
config.update({
'units': self.units,
'activation': tf.keras.activations.serialize(self.activation)
})
return config
# Use custom components
custom_model = tf.keras.Sequential([
CustomDense(64, activation='relu', input_shape=(10,)),
CustomDense(32, activation='relu'),
CustomDense(1, activation='sigmoid')
])
custom_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])import tensorflow as tf
class ResidualBlock(tf.keras.layers.Layer):
def __init__(self, filters, kernel_size=3):
super(ResidualBlock, self).__init__()
self.conv1 = tf.keras.layers.Conv2D(filters, kernel_size, padding='same')
self.bn1 = tf.keras.layers.BatchNormalization()
self.conv2 = tf.keras.layers.Conv2D(filters, kernel_size, padding='same')
self.bn2 = tf.keras.layers.BatchNormalization()
self.activation = tf.keras.layers.Activation('relu')
self.add = tf.keras.layers.Add()
def call(self, inputs, training=False):
x = self.conv1(inputs)
x = self.bn1(x, training=training)
x = self.activation(x)
x = self.conv2(x)
x = self.bn2(x, training=training)
x = self.add([x, inputs]) # Residual connection
x = self.activation(x)
return xclass ProjectionLayer(tf.keras.layers.Layer):
"""Linear projection layer using TF NumPy."""
def __init__(self, units):
super(ProjectionLayer, self).__init__()
self._units = units
def build(self, input_shape):
import tensorflow.experimental.numpy as tnp
stddev = tnp.sqrt(self._units).astype(tnp.float32)
initial_value = tnp.random.randn(input_shape[1], self._units).astype(
tnp.float32) / stddev
# Note that TF NumPy can interoperate with tf.Variable.
self.w = tf.Variable(initial_value, trainable=True)
def call(self, inputs):
import tensorflow.experimental.numpy as tnp
return tnp.matmul(inputs, self.w)
# Call with ndarray inputs
layer = ProjectionLayer(2)
tnp_inputs = tnp.random.randn(2, 4).astype(tnp.float32)
print("output:", layer(tnp_inputs))
# Call with tf.Tensor inputs
tf_inputs = tf.random.uniform([2, 4])
print("\noutput: ", layer(tf_inputs))tf.keras.Modelimport tensorflow as tf
class MultiTaskModel(tf.keras.Model):
def __init__(self, num_classes_task1=10, num_classes_task2=5):
super(MultiTaskModel, self).__init__()
# Shared layers
self.conv1 = tf.keras.layers.Conv2D(32, 3, activation='relu')
self.pool = tf.keras.layers.MaxPooling2D()
self.flatten = tf.keras.layers.Flatten()
self.shared_dense = tf.keras.layers.Dense(128, activation='relu')
# Task-specific layers
self.task1_dense = tf.keras.layers.Dense(64, activation='relu')
self.task1_output = tf.keras.layers.Dense(num_classes_task1,
activation='softmax', name='task1')
self.task2_dense = tf.keras.layers.Dense(64, activation='relu')
self.task2_output = tf.keras.layers.Dense(num_classes_task2,
activation='softmax', name='task2')
def call(self, inputs, training=False):
# Shared feature extraction
x = self.conv1(inputs)
x = self.pool(x)
x = self.flatten(x)
x = self.shared_dense(x)
# Task 1 branch
task1 = self.task1_dense(x)
task1_output = self.task1_output(task1)
# Task 2 branch
task2 = self.task2_dense(x)
task2_output = self.task2_output(task2)
return task1_output, task2_outputclass Model(tf.Module):
"""A three layer neural network."""
def __init__(self):
self.layer1 = Dense(128)
self.layer2 = Dense(32)
self.layer3 = Dense(NUM_CLASSES, use_relu=False)
def __call__(self, inputs):
x = self.layer1(inputs)
x = self.layer2(x)
return self.layer3(x)
@property
def params(self):
return self.layer1.params + self.layer2.params + self.layer3.paramsimport tensorflow.experimental.numpy as tnp
class GRUCell:
"""Builds a traditional GRU cell with dense internal transformations.
Gated Recurrent Unit paper: https://arxiv.org/abs/1412.3555
"""
def __init__(self, n_units, forget_bias=0.0):
self._n_units = n_units
self._forget_bias = forget_bias
self._built = False
def __call__(self, inputs):
if not self._built:
self.build(inputs)
x, gru_state = inputs
# Dense layer on the concatenation of x and h.
y = tnp.dot(tnp.concatenate([x, gru_state], axis=-1), self.w1) + self.b1
# Update and reset gates.
u, r = tnp.split(tf.sigmoid(y), 2, axis=-1)
# Candidate.
c = tnp.dot(tnp.concatenate([x, r * gru_state], axis=-1), self.w2) + self.b2
new_gru_state = u * gru_state + (1 - u) * tnp.tanh(c)
return new_gru_state
def build(self, inputs):
# State last dimension must be n_units.
assert inputs[1].shape[-1] == self._n_units
# The dense layer input is the input and half of the GRU state.
dense_shape = inputs[0].shape[-1] + self._n_units
self.w1 = tf.Variable(tnp.random.uniform(
-0.01, 0.01, (dense_shape, 2 * self._n_units)).astype(tnp.float32))
self.b1 = tf.Variable((tnp.random.randn(2 * self._n_units) * 1e-6 + self._forget_bias
).astype(tnp.float32))
self.w2 = tf.Variable(tnp.random.uniform(
-0.01, 0.01, (dense_shape, self._n_units)).astype(tnp.float32))
self.b2 = tf.Variable((tnp.random.randn(self._n_units) * 1e-6).astype(tnp.float32))
self._built = True
@property
def weights(self):
return (self.w1, self.b1, self.w2, self.b2)import tensorflow.experimental.numpy as tnp
class Dense:
def __init__(self, n_units, activation=None):
self._n_units = n_units
self._activation = activation
self._built = False
def __call__(self, inputs):
if not self._built:
self.build(inputs)
y = tnp.dot(inputs, self.w) + self.b
if self._activation != None:
y = self._activation(y)
return y
def build(self, inputs):
shape_w = (inputs.shape[-1], self._n_units)
lim = tnp.sqrt(6.0 / (shape_w[0] + shape_w[1]))
self.w = tf.Variable(tnp.random.uniform(-lim, lim, shape_w).astype(tnp.float32))
self.b = tf.Variable((tnp.random.randn(self._n_units) * 1e-6).astype(tnp.float32))
self._built = True
@property
def weights(self):
return (self.w, self.b)class Model:
def __init__(self, vocab_size, embedding_dim, rnn_units, forget_bias=0.0, stateful=False, activation=None):
self._embedding = Embedding(vocab_size, embedding_dim)
self._gru = GRU(rnn_units, forget_bias=forget_bias, stateful=stateful)
self._dense = Dense(vocab_size, activation=activation)
self._layers = [self._embedding, self._gru, self._dense]
self._built = False
def __call__(self, inputs):
if not self._built:
self.build(inputs)
xs = inputs
for layer in self._layers:
xs = layer(xs)
return xs
def build(self, inputs):
self._embedding.build(inputs)
self._gru.build(tf.TensorSpec(inputs.shape + (self._embedding._embedding_dim,), tf.float32))
self._dense.build(tf.TensorSpec(inputs.shape + (self._gru._cell._n_units,), tf.float32))
self._built = True
@property
def weights(self):
return [layer.weights for layer in self._layers]
@property
def state(self):
return self._gru.state
def create_state(self, *args):
self._gru.create_state(*args)
def reset_state(self, *args):
self._gru.reset_state(*args)# Length of the vocabulary in chars
vocab_size = len(vocab)
# The embedding dimension
embedding_dim = 256
# Number of RNN units
rnn_units = 1024
# Batch size
BATCH_SIZE = 64
# Buffer size to shuffle the dataset
BUFFER_SIZE = 10000# Size of each input image, 28 x 28 pixels
IMAGE_SIZE = 28 * 28
# Number of distinct number labels, [0..9]
NUM_CLASSES = 10
# Number of examples in each training batch (step)
TRAIN_BATCH_SIZE = 100
# Number of training steps to run
TRAIN_STEPS = 1000
# Loads MNIST dataset.
train, test = tf.keras.datasets.mnist.load_data()
train_ds = tf.data.Dataset.from_tensor_slices(train).batch(TRAIN_BATCH_SIZE).repeat()
# Casting from raw data to the required datatypes.
def cast(images, labels):
images = tf.cast(
tf.reshape(images, [-1, IMAGE_SIZE]), tf.float32)
labels = tf.cast(labels, tf.int64)
return (images, labels)# Load MNIST dataset
mnist = keras.datasets.mnist
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()
# Normalize the input image so that each pixel value is between 0 to 1.
train_images = train_images / 255.0
test_images = test_images / 255.0
# Define the model architecture
model = keras.Sequential([
keras.layers.InputLayer(input_shape=(28, 28)),
keras.layers.Reshape(target_shape=(28, 28, 1)),
keras.layers.Conv2D(filters=12, kernel_size=(3, 3), activation=tf.nn.relu),
keras.layers.MaxPooling2D(pool_size=(2, 2)),
keras.layers.Flatten(),
keras.layers.Dense(10)
])
# Train the digit classification model
model.compile(optimizer='adam',
loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=['accuracy'])
model.fit(
train_images,
train_labels,
epochs=1,
validation_data=(test_images, test_labels)
)