TensorFlow Model Building
Overview of Model Building Methods
TensorFlow provides multiple ways to build models, from the simple Sequential API to complex custom models. Choosing the right approach depends on the model complexity and specific requirements.
python
import tensorflow as tf
import numpy as np
# Check TensorFlow version
print(f"TensorFlow version: {tf.__version__}")
# Set random seed for reproducibility
tf.random.set_seed(42)
np.random.seed(42)Sequential API: Sequential Models
Basic Usage
python
# Simplest model building approach
model = tf.keras.Sequential([
tf.keras.layers.Dense(64, activation='relu', input_shape=(784,)),
tf.keras.layers.Dropout(0.2),
tf.keras.layers.Dense(32, activation='relu'),
tf.keras.layers.Dense(10, activation='softmax')
])
# View model structure
model.summary()
# Compile model
model.compile(
optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy']
)Adding Layers Sequentially
python
# Create empty Sequential model
model = tf.keras.Sequential()
# Add layers one by one
model.add(tf.keras.layers.Dense(128, activation='relu', input_shape=(784,)))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Dropout(0.3))
model.add(tf.keras.layers.Dense(64, activation='relu'))
model.add(tf.keras.layers.Dropout(0.2))
model.add(tf.keras.layers.Dense(10, activation='softmax'))
print("Sequentially added model:")
model.summary()Practical Example: MNIST Classifier
python
# Load MNIST dataset
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
# Data preprocessing
x_train = x_train.reshape(-1, 784).astype('float32') / 255.0
x_test = x_test.reshape(-1, 784).astype('float32') / 255.0
# Build Sequential model
mnist_model = tf.keras.Sequential([
tf.keras.layers.Dense(512, activation='relu', input_shape=(784,)),
tf.keras.layers.Dropout(0.2),
tf.keras.layers.Dense(256, activation='relu'),
tf.keras.layers.Dropout(0.2),
tf.keras.layers.Dense(10, activation='softmax')
])
# Compile model
mnist_model.compile(
optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy']
)
# Train model (few epochs for demonstration)
history = mnist_model.fit(
x_train[:1000], y_train[:1000],
epochs=3,
batch_size=32,
validation_split=0.2,
verbose=1
)
# Evaluate model
test_loss, test_acc = mnist_model.evaluate(x_test[:200], y_test[:200], verbose=0)
print(f"Test accuracy: {test_acc:.4f}")Functional API: Functional Models
Basic Concepts
python
# Functional API allows building more complex model structures
inputs = tf.keras.Input(shape=(784,))
# Build network
x = tf.keras.layers.Dense(64, activation='relu')(inputs)
x = tf.keras.layers.Dropout(0.2)(x)
x = tf.keras.layers.Dense(32, activation='relu')(x)
outputs = tf.keras.layers.Dense(10, activation='softmax')(x)
# Create model
functional_model = tf.keras.Model(inputs=inputs, outputs=outputs)
functional_model.summary()Multi-Input Multi-Output Models
python
# Multi-input model example
# Input 1: Image features
image_input = tf.keras.Input(shape=(64, 64, 3), name='image_input')
x1 = tf.keras.layers.Conv2D(32, 3, activation='relu')(image_input)
x1 = tf.keras.layers.GlobalAveragePooling2D()(x1)
# Input 2: Numeric features
numeric_input = tf.keras.Input(shape=(10,), name='numeric_input')
x2 = tf.keras.layers.Dense(32, activation='relu')(numeric_input)
# Combine inputs
combined = tf.keras.layers.concatenate([x1, x2])
z = tf.keras.layers.Dense(64, activation='relu')(combined)
# Multiple outputs
output1 = tf.keras.layers.Dense(1, activation='sigmoid', name='binary_output')(z)
output2 = tf.keras.layers.Dense(3, activation='softmax', name='categorical_output')(z)
# Create multi-input multi-output model
multi_model = tf.keras.Model(
inputs=[image_input, numeric_input],
outputs=[output1, output2]
)
multi_model.summary()
# Compile multi-output model
multi_model.compile(
optimizer='adam',
loss={
'binary_output': 'binary_crossentropy',
'categorical_output': 'sparse_categorical_crossentropy'
},
metrics={
'binary_output': ['accuracy'],
'categorical_output': ['accuracy']
}
)Residual Connection Example
python
def residual_block(x, filters):
"""Residual block"""
# Main path
shortcut = x
# Residual path
x = tf.keras.layers.Dense(filters, activation='relu')(x)
x = tf.keras.layers.Dense(filters)(x)
# Adjust shortcut if dimensions don't match
if shortcut.shape[-1] != filters:
shortcut = tf.keras.layers.Dense(filters)(shortcut)
# Residual connection
x = tf.keras.layers.Add()([x, shortcut])
x = tf.keras.layers.Activation('relu')(x)
return x
# Build model with residual connections
inputs = tf.keras.Input(shape=(100,))
x = tf.keras.layers.Dense(64, activation='relu')(inputs)
# Add multiple residual blocks
x = residual_block(x, 64)
x = residual_block(x, 64)
x = residual_block(x, 128)
outputs = tf.keras.layers.Dense(10, activation='softmax')(x)
residual_model = tf.keras.Model(inputs=inputs, outputs=outputs)
residual_model.summary()Custom Layers
Simple Custom Layer
python
class CustomDense(tf.keras.layers.Layer):
def __init__(self, units, activation=None):
super(CustomDense, self).__init__()
self.units = units
self.activation = tf.keras.activations.get(activation)
def build(self, input_shape):
# Create weights
self.w = self.add_weight(
shape=(input_shape[-1], self.units),
initializer='random_normal',
trainable=True,
name='weights'
)
self.b = self.add_weight(
shape=(self.units,),
initializer='zeros',
trainable=True,
name='bias'
)
def call(self, inputs):
# Forward propagation
output = tf.matmul(inputs, self.w) + self.b
if self.activation is not None:
output = self.activation(output)
return output
def get_config(self):
# For serialization
config = super().get_config()
config.update({
'units': self.units,
'activation': tf.keras.activations.serialize(self.activation)
})
return config
# Use custom layer
custom_model = tf.keras.Sequential([
CustomDense(64, activation='relu', input_shape=(784,)),
CustomDense(32, activation='relu'),
CustomDense(10, activation='softmax')
])
custom_model.compile(
optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy']
)
custom_model.summary()Complex Custom Layer: Attention Mechanism
python
class AttentionLayer(tf.keras.layers.Layer):
def __init__(self, attention_dim):
super(AttentionLayer, self).__init__()
self.attention_dim = attention_dim
def build(self, input_shape):
# Attention weights
self.W = self.add_weight(
shape=(input_shape[-1], self.attention_dim),
initializer='glorot_uniform',
trainable=True,
name='attention_weights'
)
self.b = self.add_weight(
shape=(self.attention_dim,),
initializer='zeros',
trainable=True,
name='attention_bias'
)
self.u = self.add_weight(
shape=(self.attention_dim,),
initializer='glorot_uniform',
trainable=True,
name='attention_context'
)
def call(self, inputs):
# Calculate attention scores
uit = tf.tanh(tf.tensordot(inputs, self.W, axes=1) + self.b)
ait = tf.tensordot(uit, self.u, axes=1)
# Attention weights
attention_weights = tf.nn.softmax(ait, axis=1)
attention_weights = tf.expand_dims(attention_weights, -1)
# Weighted sum
weighted_input = inputs * attention_weights
output = tf.reduce_sum(weighted_input, axis=1)
return output
def get_config(self):
config = super().get_config()
config.update({'attention_dim': self.attention_dim})
return config
# Model with attention layer
sequence_input = tf.keras.Input(shape=(20, 64)) # Sequence length 20, feature dimension 64
attention_output = AttentionLayer(32)(sequence_input)
dense_output = tf.keras.layers.Dense(10, activation='softmax')(attention_output)
attention_model = tf.keras.Model(inputs=sequence_input, outputs=dense_output)
attention_model.summary()Custom Model Classes
Basic Custom Model
python
class CustomModel(tf.keras.Model):
def __init__(self, num_classes=10):
super(CustomModel, self).__init__()
self.num_classes = num_classes
# Define layers
self.dense1 = tf.keras.layers.Dense(64, activation='relu')
self.dropout1 = tf.keras.layers.Dropout(0.2)
self.dense2 = tf.keras.layers.Dense(32, activation='relu')
self.dropout2 = tf.keras.layers.Dropout(0.2)
self.classifier = tf.keras.layers.Dense(num_classes, activation='softmax')
def call(self, inputs, training=None):
x = self.dense1(inputs)
x = self.dropout1(x, training=training)
x = self.dense2(x)
x = self.dropout2(x, training=training)
return self.classifier(x)
def get_config(self):
return {'num_classes': self.num_classes}
# Create and use custom model
custom_model = CustomModel(num_classes=10)
# Build model (by calling once)
sample_input = tf.random.normal([1, 784])
_ = custom_model(sample_input)
custom_model.summary()Complex Custom Model: ResNet Block
python
class ResNetBlock(tf.keras.layers.Layer):
def __init__(self, filters, kernel_size=3, stride=1):
super(ResNetBlock, self).__init__()
self.filters = filters
self.kernel_size = kernel_size
self.stride = stride
# Main path
self.conv1 = tf.keras.layers.Conv2D(
filters, kernel_size, strides=stride, padding='same'
)
self.bn1 = tf.keras.layers.BatchNormalization()
self.conv2 = tf.keras.layers.Conv2D(
filters, kernel_size, padding='same'
)
self.bn2 = tf.keras.layers.BatchNormalization()
# Skip connection
self.shortcut_conv = None
if stride != 1:
self.shortcut_conv = tf.keras.layers.Conv2D(
filters, 1, strides=stride, padding='same'
)
self.shortcut_bn = tf.keras.layers.BatchNormalization()
def call(self, inputs, training=None):
# Main path
x = self.conv1(inputs)
x = self.bn1(x, training=training)
x = tf.nn.relu(x)
x = self.conv2(x)
x = self.bn2(x, training=training)
# Skip connection
shortcut = inputs
if self.shortcut_conv is not None:
shortcut = self.shortcut_conv(inputs)
shortcut = self.shortcut_bn(shortcut, training=training)
# Residual connection
x = tf.keras.layers.Add()([x, shortcut])
x = tf.nn.relu(x)
return x
class MiniResNet(tf.keras.Model):
def __init__(self, num_classes=10):
super(MiniResNet, self).__init__()
self.num_classes = num_classes
# Initial convolution
self.initial_conv = tf.keras.layers.Conv2D(32, 7, strides=2, padding='same')
self.initial_bn = tf.keras.layers.BatchNormalization()
self.initial_pool = tf.keras.layers.MaxPooling2D(3, strides=2, padding='same')
# ResNet blocks
self.block1 = ResNetBlock(32)
self.block2 = ResNetBlock(64, stride=2)
self.block3 = ResNetBlock(128, stride=2)
# Classification head
self.global_pool = tf.keras.layers.GlobalAveragePooling2D()
self.classifier = tf.keras.layers.Dense(num_classes, activation='softmax')
def call(self, inputs, training=None):
# Initial processing
x = self.initial_conv(inputs)
x = self.initial_bn(x, training=training)
x = tf.nn.relu(x)
x = self.initial_pool(x)
# ResNet blocks
x = self.block1(x, training=training)
x = self.block2(x, training=training)
x = self.block3(x, training=training)
# Classification
x = self.global_pool(x)
return self.classifier(x)
# Create MiniResNet model
resnet_model = MiniResNet(num_classes=10)
# Build model
sample_image = tf.random.normal([1, 224, 224, 3])
_ = resnet_model(sample_image)
resnet_model.summary()Advanced Model Subclassing Techniques
Dynamic Model Structure
python
class DynamicModel(tf.keras.Model):
def __init__(self, layer_sizes, num_classes=10):
super(DynamicModel, self).__init__()
self.layer_sizes = layer_sizes
self.num_classes = num_classes
# Dynamically create layers
self.hidden_layers = []
for i, size in enumerate(layer_sizes):
self.hidden_layers.append(
tf.keras.layers.Dense(size, activation='relu', name=f'hidden_{i}')
)
self.hidden_layers.append(
tf.keras.layers.Dropout(0.2, name=f'dropout_{i}')
)
self.output_layer = tf.keras.layers.Dense(num_classes, activation='softmax')
def call(self, inputs, training=None):
x = inputs
for layer in self.hidden_layers:
x = layer(x, training=training)
return self.output_layer(x)
def get_config(self):
return {
'layer_sizes': self.layer_sizes,
'num_classes': self.num_classes
}
# Create dynamic model
dynamic_model = DynamicModel([128, 64, 32], num_classes=10)
# Test model
test_input = tf.random.normal([10, 784])
output = dynamic_model(test_input)
print(f"Dynamic model output shape: {output.shape}")Conditional Execution Model
python
class ConditionalModel(tf.keras.Model):
def __init__(self, num_classes=10):
super(ConditionalModel, self).__init__()
self.num_classes = num_classes
# Different processing branches
self.branch_a = tf.keras.Sequential([
tf.keras.layers.Dense(128, activation='relu'),
tf.keras.layers.Dropout(0.3)
])
self.branch_b = tf.keras.Sequential([
tf.keras.layers.Dense(64, activation='tanh'),
tf.keras.layers.Dropout(0.2)
])
self.classifier = tf.keras.layers.Dense(num_classes, activation='softmax')
def call(self, inputs, training=None, use_branch_a=True):
if use_branch_a:
x = self.branch_a(inputs, training=training)
else:
x = self.branch_b(inputs, training=training)
return self.classifier(x)
# Use conditional model
conditional_model = ConditionalModel()
test_input = tf.random.normal([5, 100])
# Use different branches
output_a = conditional_model(test_input, use_branch_a=True)
output_b = conditional_model(test_input, use_branch_a=False)
print(f"Branch A output: {output_a.shape}")
print(f"Branch B output: {output_b.shape}")Model Composition and Ensembling
Model Stacking
python
# Create multiple base models
model1 = tf.keras.Sequential([
tf.keras.layers.Dense(64, activation='relu', input_shape=(784,)),
tf.keras.layers.Dense(32, activation='relu'),
tf.keras.layers.Dense(10, activation='softmax')
])
model2 = tf.keras.Sequential([
tf.keras.layers.Dense(128, activation='tanh', input_shape=(784,)),
tf.keras.layers.Dense(64, activation='tanh'),
tf.keras.layers.Dense(10, activation='softmax')
])
# Ensemble model
class EnsembleModel(tf.keras.Model):
def __init__(self, models):
super(EnsembleModel, self).__init__()
self.models = models
def call(self, inputs):
# Get predictions from all models
predictions = [model(inputs) for model in self.models]
# Average ensemble
ensemble_pred = tf.reduce_mean(tf.stack(predictions), axis=0)
return ensemble_pred
# Create ensemble model
ensemble = EnsembleModel([model1, model2])
# Test ensemble model
test_input = tf.random.normal([5, 784])
ensemble_output = ensemble(test_input)
print(f"Ensemble model output: {ensemble_output.shape}")Weighted Ensemble
python
class WeightedEnsemble(tf.keras.Model):
def __init__(self, models, weights=None):
super(WeightedEnsemble, self).__init__()
self.models = models
if weights is None:
weights = [1.0 / len(models)] * len(models)
# Create trainable weights
self.ensemble_weights = tf.Variable(
weights, trainable=True, name='ensemble_weights'
)
def call(self, inputs):
# Get predictions from all models
predictions = tf.stack([model(inputs) for model in self.models])
# Apply softmax to ensure weights sum to 1
normalized_weights = tf.nn.softmax(self.ensemble_weights)
# Weighted average
weighted_pred = tf.reduce_sum(
predictions * tf.reshape(normalized_weights, [-1, 1, 1]),
axis=0
)
return weighted_pred
# Create weighted ensemble
weighted_ensemble = WeightedEnsemble([model1, model2], weights=[0.6, 0.4])
weighted_output = weighted_ensemble(test_input)
print(f"Weighted ensemble output: {weighted_output.shape}")Model Debugging and Visualization
Model Structure Visualization
python
# Create complex model for visualization
def create_complex_model():
inputs = tf.keras.Input(shape=(784,), name='input')
# First branch
branch1 = tf.keras.layers.Dense(128, activation='relu', name='branch1_dense1')(inputs)
branch1 = tf.keras.layers.Dropout(0.2, name='branch1_dropout')(branch1)
branch1 = tf.keras.layers.Dense(64, activation='relu', name='branch1_dense2')(branch1)
# Second branch
branch2 = tf.keras.layers.Dense(64, activation='tanh', name='branch2_dense1')(inputs)
branch2 = tf.keras.layers.Dropout(0.3, name='branch2_dropout')(branch2)
# Merge branches
merged = tf.keras.layers.concatenate([branch1, branch2], name='merge')
# Output layer
outputs = tf.keras.layers.Dense(10, activation='softmax', name='output')(merged)
return tf.keras.Model(inputs=inputs, outputs=outputs, name='complex_model')
complex_model = create_complex_model()
# Visualize model structure
tf.keras.utils.plot_model(
complex_model,
to_file='model_structure.png',
show_shapes=True,
show_layer_names=True,
rankdir='TB'
)
print("Model structure diagram saved as model_structure.png")
# Detailed model information
complex_model.summary()
# Get layer information
print("\nLayer details:")
for i, layer in enumerate(complex_model.layers):
print(f"Layer {i}: {layer.name} ({layer.__class__.__name__})")
if hasattr(layer, 'units'):
print(f" Units: {layer.units}")
if hasattr(layer, 'activation'):
print(f" Activation: {layer.activation.__name__}")Intermediate Layer Output Check
python
# Create intermediate layer output model
def create_intermediate_model(base_model, layer_names):
"""Create model that outputs intermediate layer results"""
outputs = [base_model.get_layer(name).output for name in layer_names]
return tf.keras.Model(inputs=base_model.input, outputs=outputs)
# Check intermediate layer outputs
layer_names = ['branch1_dense1', 'branch2_dense1', 'merge']
intermediate_model = create_intermediate_model(complex_model, layer_names)
# Get intermediate layer outputs
test_input = tf.random.normal([1, 784])
intermediate_outputs = intermediate_model(test_input)
print("Intermediate layer outputs:")
for name, output in zip(layer_names, intermediate_outputs):
print(f"{name}: {output.shape}")Model Saving and Loading
Complete Model Saving
python
# Train a simple model
simple_model = tf.keras.Sequential([
tf.keras.layers.Dense(64, activation='relu', input_shape=(784,)),
tf.keras.layers.Dense(10, activation='softmax')
])
simple_model.compile(
optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy']
)
# Create fake data for training
fake_x = tf.random.normal([100, 784])
fake_y = tf.random.uniform([100], maxval=10, dtype=tf.int32)
simple_model.fit(fake_x, fake_y, epochs=1, verbose=0)
# Save complete model
simple_model.save('complete_model.h5')
print("Complete model saved")
# Load complete model
loaded_model = tf.keras.models.load_model('complete_model.h5')
print("Model loaded successfully")
# Verify loaded model
test_pred = loaded_model.predict(fake_x[:5], verbose=0)
print(f"Prediction result shape: {test_pred.shape}")Save Only Weights
python
# Save model weights
simple_model.save_weights('model_weights.h5')
print("Weights saved")
# Create new model with same structure
new_model = tf.keras.Sequential([
tf.keras.layers.Dense(64, activation='relu', input_shape=(784,)),
tf.keras.layers.Dense(10, activation='softmax')
])
# Load weights
new_model.load_weights('model_weights.h5')
print("Weights loaded successfully")
# Verify weights are same
original_pred = simple_model.predict(fake_x[:1], verbose=0)
new_pred = new_model.predict(fake_x[:1], verbose=0)
print(f"Predictions are identical: {np.allclose(original_pred, new_pred)}")SavedModel Format
python
# Save as SavedModel format (recommended for production)
tf.saved_model.save(simple_model, 'saved_model_dir')
print("SavedModel format saved")
# Load SavedModel
loaded_saved_model = tf.saved_model.load('saved_model_dir')
# Use loaded model for inference
inference_func = loaded_saved_model.signatures['serving_default']
test_input_dict = {'dense_input': tf.constant(fake_x[:1])}
saved_model_pred = inference_func(**test_input_dict)
print(f"SavedModel prediction result: {list(saved_model_pred.values())[0].shape}")Best Practices
1. Model Design Principles
python
# Good practice: Modular design
class ModelBlock(tf.keras.layers.Layer):
def __init__(self, units, dropout_rate=0.2):
super(ModelBlock, self).__init__()
self.dense = tf.keras.layers.Dense(units, activation='relu')
self.dropout = tf.keras.layers.Dropout(dropout_rate)
self.batch_norm = tf.keras.layers.BatchNormalization()
def call(self, inputs, training=None):
x = self.dense(inputs)
x = self.batch_norm(x, training=training)
x = self.dropout(x, training=training)
return x
# Build model using modular blocks
modular_model = tf.keras.Sequential([
ModelBlock(128),
ModelBlock(64),
ModelBlock(32),
tf.keras.layers.Dense(10, activation='softmax')
])2. Performance Optimization
python
# Use mixed precision training
policy = tf.keras.mixed_precision.Policy('mixed_float16')
tf.keras.mixed_precision.set_global_policy(policy)
# Optimized model structure
@tf.function
def optimized_model_call(model, inputs):
return model(inputs, training=False)
# Correct use of batch normalization
class OptimizedModel(tf.keras.Model):
def __init__(self):
super(OptimizedModel, self).__init__()
self.conv1 = tf.keras.layers.Conv2D(32, 3, padding='same')
self.bn1 = tf.keras.layers.BatchNormalization()
self.conv2 = tf.keras.layers.Conv2D(64, 3, padding='same')
self.bn2 = tf.keras.layers.BatchNormalization()
self.global_pool = tf.keras.layers.GlobalAveragePooling2D()
self.classifier = tf.keras.layers.Dense(10)
@tf.function
def call(self, inputs, training=None):
x = self.conv1(inputs)
x = self.bn1(x, training=training)
x = tf.nn.relu(x)
x = self.conv2(x)
x = self.bn2(x, training=training)
x = tf.nn.relu(x)
x = self.global_pool(x)
return self.classifier(x)3. Debugging Techniques
python
# Model with debug information
class DebuggableModel(tf.keras.Model):
def __init__(self):
super(DebuggableModel, self).__init__()
self.dense1 = tf.keras.layers.Dense(64, activation='relu')
self.dense2 = tf.keras.layers.Dense(32, activation='relu')
self.output_layer = tf.keras.layers.Dense(10)
def call(self, inputs, training=None):
# Add shape check
tf.debugging.assert_rank(inputs, 2, "Input must be a 2D tensor")
x = self.dense1(inputs)
tf.debugging.assert_all_finite(x, "dense1 output contains invalid values")
x = self.dense2(x)
tf.debugging.assert_all_finite(x, "dense2 output contains invalid values")
outputs = self.output_layer(x)
# Print info in debug mode
if training:
tf.print("Training mode - output range:", tf.reduce_min(outputs), tf.reduce_max(outputs))
return outputs
# Gradient checking
def check_gradients(model, inputs, targets):
with tf.GradientTape() as tape:
predictions = model(inputs, training=True)
loss = tf.keras.losses.sparse_categorical_crossentropy(targets, predictions)
gradients = tape.gradient(loss, model.trainable_variables)
# Check gradients
for i, grad in enumerate(gradients):
if grad is not None:
grad_norm = tf.norm(grad)
tf.print(f"Layer {i} gradient norm: {grad_norm}")
if grad_norm > 10.0:
tf.print(f"Warning: Layer {i} gradient too large!")Summary
TensorFlow provides multiple flexible ways to build models:
- Sequential API: Suitable for simple linear models
- Functional API: Suitable for complex network structures, supports multi-input and multi-output
- Custom Layers: Implement special computation logic
- Model Subclassing: Maximum flexibility, suitable for research and complex applications
- Model Composition: Ensemble multiple models to improve performance
Choosing the right building approach and following best practices will help you build efficient, maintainable deep learning models!