Skip to content

TensorFlow Model Building

Overview of Model Building Methods

TensorFlow provides multiple ways to build models, from the simple Sequential API to complex custom models. Choosing the right approach depends on the model complexity and specific requirements.

python
import tensorflow as tf
import numpy as np

# Check TensorFlow version
print(f"TensorFlow version: {tf.__version__}")

# Set random seed for reproducibility
tf.random.set_seed(42)
np.random.seed(42)

Sequential API: Sequential Models

Basic Usage

python
# Simplest model building approach
model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(784,)),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(10, activation='softmax')
])

# View model structure
model.summary()

# Compile model
model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

Adding Layers Sequentially

python
# Create empty Sequential model
model = tf.keras.Sequential()

# Add layers one by one
model.add(tf.keras.layers.Dense(128, activation='relu', input_shape=(784,)))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Dropout(0.3))
model.add(tf.keras.layers.Dense(64, activation='relu'))
model.add(tf.keras.layers.Dropout(0.2))
model.add(tf.keras.layers.Dense(10, activation='softmax'))

print("Sequentially added model:")
model.summary()

Practical Example: MNIST Classifier

python
# Load MNIST dataset
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

# Data preprocessing
x_train = x_train.reshape(-1, 784).astype('float32') / 255.0
x_test = x_test.reshape(-1, 784).astype('float32') / 255.0

# Build Sequential model
mnist_model = tf.keras.Sequential([
    tf.keras.layers.Dense(512, activation='relu', input_shape=(784,)),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(10, activation='softmax')
])

# Compile model
mnist_model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

# Train model (few epochs for demonstration)
history = mnist_model.fit(
    x_train[:1000], y_train[:1000],
    epochs=3,
    batch_size=32,
    validation_split=0.2,
    verbose=1
)

# Evaluate model
test_loss, test_acc = mnist_model.evaluate(x_test[:200], y_test[:200], verbose=0)
print(f"Test accuracy: {test_acc:.4f}")

Functional API: Functional Models

Basic Concepts

python
# Functional API allows building more complex model structures
inputs = tf.keras.Input(shape=(784,))

# Build network
x = tf.keras.layers.Dense(64, activation='relu')(inputs)
x = tf.keras.layers.Dropout(0.2)(x)
x = tf.keras.layers.Dense(32, activation='relu')(x)
outputs = tf.keras.layers.Dense(10, activation='softmax')(x)

# Create model
functional_model = tf.keras.Model(inputs=inputs, outputs=outputs)

functional_model.summary()

Multi-Input Multi-Output Models

python
# Multi-input model example
# Input 1: Image features
image_input = tf.keras.Input(shape=(64, 64, 3), name='image_input')
x1 = tf.keras.layers.Conv2D(32, 3, activation='relu')(image_input)
x1 = tf.keras.layers.GlobalAveragePooling2D()(x1)

# Input 2: Numeric features
numeric_input = tf.keras.Input(shape=(10,), name='numeric_input')
x2 = tf.keras.layers.Dense(32, activation='relu')(numeric_input)

# Combine inputs
combined = tf.keras.layers.concatenate([x1, x2])
z = tf.keras.layers.Dense(64, activation='relu')(combined)

# Multiple outputs
output1 = tf.keras.layers.Dense(1, activation='sigmoid', name='binary_output')(z)
output2 = tf.keras.layers.Dense(3, activation='softmax', name='categorical_output')(z)

# Create multi-input multi-output model
multi_model = tf.keras.Model(
    inputs=[image_input, numeric_input],
    outputs=[output1, output2]
)

multi_model.summary()

# Compile multi-output model
multi_model.compile(
    optimizer='adam',
    loss={
        'binary_output': 'binary_crossentropy',
        'categorical_output': 'sparse_categorical_crossentropy'
    },
    metrics={
        'binary_output': ['accuracy'],
        'categorical_output': ['accuracy']
    }
)

Residual Connection Example

python
def residual_block(x, filters):
    """Residual block"""
    # Main path
    shortcut = x

    # Residual path
    x = tf.keras.layers.Dense(filters, activation='relu')(x)
    x = tf.keras.layers.Dense(filters)(x)

    # Adjust shortcut if dimensions don't match
    if shortcut.shape[-1] != filters:
        shortcut = tf.keras.layers.Dense(filters)(shortcut)

    # Residual connection
    x = tf.keras.layers.Add()([x, shortcut])
    x = tf.keras.layers.Activation('relu')(x)

    return x

# Build model with residual connections
inputs = tf.keras.Input(shape=(100,))
x = tf.keras.layers.Dense(64, activation='relu')(inputs)

# Add multiple residual blocks
x = residual_block(x, 64)
x = residual_block(x, 64)
x = residual_block(x, 128)

outputs = tf.keras.layers.Dense(10, activation='softmax')(x)

residual_model = tf.keras.Model(inputs=inputs, outputs=outputs)
residual_model.summary()

Custom Layers

Simple Custom Layer

python
class CustomDense(tf.keras.layers.Layer):
    def __init__(self, units, activation=None):
        super(CustomDense, self).__init__()
        self.units = units
        self.activation = tf.keras.activations.get(activation)

    def build(self, input_shape):
        # Create weights
        self.w = self.add_weight(
            shape=(input_shape[-1], self.units),
            initializer='random_normal',
            trainable=True,
            name='weights'
        )
        self.b = self.add_weight(
            shape=(self.units,),
            initializer='zeros',
            trainable=True,
            name='bias'
        )

    def call(self, inputs):
        # Forward propagation
        output = tf.matmul(inputs, self.w) + self.b
        if self.activation is not None:
            output = self.activation(output)
        return output

    def get_config(self):
        # For serialization
        config = super().get_config()
        config.update({
            'units': self.units,
            'activation': tf.keras.activations.serialize(self.activation)
        })
        return config

# Use custom layer
custom_model = tf.keras.Sequential([
    CustomDense(64, activation='relu', input_shape=(784,)),
    CustomDense(32, activation='relu'),
    CustomDense(10, activation='softmax')
])

custom_model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

custom_model.summary()

Complex Custom Layer: Attention Mechanism

python
class AttentionLayer(tf.keras.layers.Layer):
    def __init__(self, attention_dim):
        super(AttentionLayer, self).__init__()
        self.attention_dim = attention_dim

    def build(self, input_shape):
        # Attention weights
        self.W = self.add_weight(
            shape=(input_shape[-1], self.attention_dim),
            initializer='glorot_uniform',
            trainable=True,
            name='attention_weights'
        )
        self.b = self.add_weight(
            shape=(self.attention_dim,),
            initializer='zeros',
            trainable=True,
            name='attention_bias'
        )
        self.u = self.add_weight(
            shape=(self.attention_dim,),
            initializer='glorot_uniform',
            trainable=True,
            name='attention_context'
        )

    def call(self, inputs):
        # Calculate attention scores
        uit = tf.tanh(tf.tensordot(inputs, self.W, axes=1) + self.b)
        ait = tf.tensordot(uit, self.u, axes=1)

        # Attention weights
        attention_weights = tf.nn.softmax(ait, axis=1)
        attention_weights = tf.expand_dims(attention_weights, -1)

        # Weighted sum
        weighted_input = inputs * attention_weights
        output = tf.reduce_sum(weighted_input, axis=1)

        return output

    def get_config(self):
        config = super().get_config()
        config.update({'attention_dim': self.attention_dim})
        return config

# Model with attention layer
sequence_input = tf.keras.Input(shape=(20, 64))  # Sequence length 20, feature dimension 64
attention_output = AttentionLayer(32)(sequence_input)
dense_output = tf.keras.layers.Dense(10, activation='softmax')(attention_output)

attention_model = tf.keras.Model(inputs=sequence_input, outputs=dense_output)
attention_model.summary()

Custom Model Classes

Basic Custom Model

python
class CustomModel(tf.keras.Model):
    def __init__(self, num_classes=10):
        super(CustomModel, self).__init__()
        self.num_classes = num_classes

        # Define layers
        self.dense1 = tf.keras.layers.Dense(64, activation='relu')
        self.dropout1 = tf.keras.layers.Dropout(0.2)
        self.dense2 = tf.keras.layers.Dense(32, activation='relu')
        self.dropout2 = tf.keras.layers.Dropout(0.2)
        self.classifier = tf.keras.layers.Dense(num_classes, activation='softmax')

    def call(self, inputs, training=None):
        x = self.dense1(inputs)
        x = self.dropout1(x, training=training)
        x = self.dense2(x)
        x = self.dropout2(x, training=training)
        return self.classifier(x)

    def get_config(self):
        return {'num_classes': self.num_classes}

# Create and use custom model
custom_model = CustomModel(num_classes=10)

# Build model (by calling once)
sample_input = tf.random.normal([1, 784])
_ = custom_model(sample_input)

custom_model.summary()

Complex Custom Model: ResNet Block

python
class ResNetBlock(tf.keras.layers.Layer):
    def __init__(self, filters, kernel_size=3, stride=1):
        super(ResNetBlock, self).__init__()
        self.filters = filters
        self.kernel_size = kernel_size
        self.stride = stride

        # Main path
        self.conv1 = tf.keras.layers.Conv2D(
            filters, kernel_size, strides=stride, padding='same'
        )
        self.bn1 = tf.keras.layers.BatchNormalization()
        self.conv2 = tf.keras.layers.Conv2D(
            filters, kernel_size, padding='same'
        )
        self.bn2 = tf.keras.layers.BatchNormalization()

        # Skip connection
        self.shortcut_conv = None
        if stride != 1:
            self.shortcut_conv = tf.keras.layers.Conv2D(
                filters, 1, strides=stride, padding='same'
            )
            self.shortcut_bn = tf.keras.layers.BatchNormalization()

    def call(self, inputs, training=None):
        # Main path
        x = self.conv1(inputs)
        x = self.bn1(x, training=training)
        x = tf.nn.relu(x)

        x = self.conv2(x)
        x = self.bn2(x, training=training)

        # Skip connection
        shortcut = inputs
        if self.shortcut_conv is not None:
            shortcut = self.shortcut_conv(inputs)
            shortcut = self.shortcut_bn(shortcut, training=training)

        # Residual connection
        x = tf.keras.layers.Add()([x, shortcut])
        x = tf.nn.relu(x)

        return x

class MiniResNet(tf.keras.Model):
    def __init__(self, num_classes=10):
        super(MiniResNet, self).__init__()
        self.num_classes = num_classes

        # Initial convolution
        self.initial_conv = tf.keras.layers.Conv2D(32, 7, strides=2, padding='same')
        self.initial_bn = tf.keras.layers.BatchNormalization()
        self.initial_pool = tf.keras.layers.MaxPooling2D(3, strides=2, padding='same')

        # ResNet blocks
        self.block1 = ResNetBlock(32)
        self.block2 = ResNetBlock(64, stride=2)
        self.block3 = ResNetBlock(128, stride=2)

        # Classification head
        self.global_pool = tf.keras.layers.GlobalAveragePooling2D()
        self.classifier = tf.keras.layers.Dense(num_classes, activation='softmax')

    def call(self, inputs, training=None):
        # Initial processing
        x = self.initial_conv(inputs)
        x = self.initial_bn(x, training=training)
        x = tf.nn.relu(x)
        x = self.initial_pool(x)

        # ResNet blocks
        x = self.block1(x, training=training)
        x = self.block2(x, training=training)
        x = self.block3(x, training=training)

        # Classification
        x = self.global_pool(x)
        return self.classifier(x)

# Create MiniResNet model
resnet_model = MiniResNet(num_classes=10)

# Build model
sample_image = tf.random.normal([1, 224, 224, 3])
_ = resnet_model(sample_image)

resnet_model.summary()

Advanced Model Subclassing Techniques

Dynamic Model Structure

python
class DynamicModel(tf.keras.Model):
    def __init__(self, layer_sizes, num_classes=10):
        super(DynamicModel, self).__init__()
        self.layer_sizes = layer_sizes
        self.num_classes = num_classes

        # Dynamically create layers
        self.hidden_layers = []
        for i, size in enumerate(layer_sizes):
            self.hidden_layers.append(
                tf.keras.layers.Dense(size, activation='relu', name=f'hidden_{i}')
            )
            self.hidden_layers.append(
                tf.keras.layers.Dropout(0.2, name=f'dropout_{i}')
            )

        self.output_layer = tf.keras.layers.Dense(num_classes, activation='softmax')

    def call(self, inputs, training=None):
        x = inputs
        for layer in self.hidden_layers:
            x = layer(x, training=training)
        return self.output_layer(x)

    def get_config(self):
        return {
            'layer_sizes': self.layer_sizes,
            'num_classes': self.num_classes
        }

# Create dynamic model
dynamic_model = DynamicModel([128, 64, 32], num_classes=10)

# Test model
test_input = tf.random.normal([10, 784])
output = dynamic_model(test_input)
print(f"Dynamic model output shape: {output.shape}")

Conditional Execution Model

python
class ConditionalModel(tf.keras.Model):
    def __init__(self, num_classes=10):
        super(ConditionalModel, self).__init__()
        self.num_classes = num_classes

        # Different processing branches
        self.branch_a = tf.keras.Sequential([
            tf.keras.layers.Dense(128, activation='relu'),
            tf.keras.layers.Dropout(0.3)
        ])

        self.branch_b = tf.keras.Sequential([
            tf.keras.layers.Dense(64, activation='tanh'),
            tf.keras.layers.Dropout(0.2)
        ])

        self.classifier = tf.keras.layers.Dense(num_classes, activation='softmax')

    def call(self, inputs, training=None, use_branch_a=True):
        if use_branch_a:
            x = self.branch_a(inputs, training=training)
        else:
            x = self.branch_b(inputs, training=training)

        return self.classifier(x)

# Use conditional model
conditional_model = ConditionalModel()
test_input = tf.random.normal([5, 100])

# Use different branches
output_a = conditional_model(test_input, use_branch_a=True)
output_b = conditional_model(test_input, use_branch_a=False)

print(f"Branch A output: {output_a.shape}")
print(f"Branch B output: {output_b.shape}")

Model Composition and Ensembling

Model Stacking

python
# Create multiple base models
model1 = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(784,)),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(10, activation='softmax')
])

model2 = tf.keras.Sequential([
    tf.keras.layers.Dense(128, activation='tanh', input_shape=(784,)),
    tf.keras.layers.Dense(64, activation='tanh'),
    tf.keras.layers.Dense(10, activation='softmax')
])

# Ensemble model
class EnsembleModel(tf.keras.Model):
    def __init__(self, models):
        super(EnsembleModel, self).__init__()
        self.models = models

    def call(self, inputs):
        # Get predictions from all models
        predictions = [model(inputs) for model in self.models]

        # Average ensemble
        ensemble_pred = tf.reduce_mean(tf.stack(predictions), axis=0)
        return ensemble_pred

# Create ensemble model
ensemble = EnsembleModel([model1, model2])

# Test ensemble model
test_input = tf.random.normal([5, 784])
ensemble_output = ensemble(test_input)
print(f"Ensemble model output: {ensemble_output.shape}")

Weighted Ensemble

python
class WeightedEnsemble(tf.keras.Model):
    def __init__(self, models, weights=None):
        super(WeightedEnsemble, self).__init__()
        self.models = models

        if weights is None:
            weights = [1.0 / len(models)] * len(models)

        # Create trainable weights
        self.ensemble_weights = tf.Variable(
            weights, trainable=True, name='ensemble_weights'
        )

    def call(self, inputs):
        # Get predictions from all models
        predictions = tf.stack([model(inputs) for model in self.models])

        # Apply softmax to ensure weights sum to 1
        normalized_weights = tf.nn.softmax(self.ensemble_weights)

        # Weighted average
        weighted_pred = tf.reduce_sum(
            predictions * tf.reshape(normalized_weights, [-1, 1, 1]),
            axis=0
        )

        return weighted_pred

# Create weighted ensemble
weighted_ensemble = WeightedEnsemble([model1, model2], weights=[0.6, 0.4])
weighted_output = weighted_ensemble(test_input)
print(f"Weighted ensemble output: {weighted_output.shape}")

Model Debugging and Visualization

Model Structure Visualization

python
# Create complex model for visualization
def create_complex_model():
    inputs = tf.keras.Input(shape=(784,), name='input')

    # First branch
    branch1 = tf.keras.layers.Dense(128, activation='relu', name='branch1_dense1')(inputs)
    branch1 = tf.keras.layers.Dropout(0.2, name='branch1_dropout')(branch1)
    branch1 = tf.keras.layers.Dense(64, activation='relu', name='branch1_dense2')(branch1)

    # Second branch
    branch2 = tf.keras.layers.Dense(64, activation='tanh', name='branch2_dense1')(inputs)
    branch2 = tf.keras.layers.Dropout(0.3, name='branch2_dropout')(branch2)

    # Merge branches
    merged = tf.keras.layers.concatenate([branch1, branch2], name='merge')

    # Output layer
    outputs = tf.keras.layers.Dense(10, activation='softmax', name='output')(merged)

    return tf.keras.Model(inputs=inputs, outputs=outputs, name='complex_model')

complex_model = create_complex_model()

# Visualize model structure
tf.keras.utils.plot_model(
    complex_model,
    to_file='model_structure.png',
    show_shapes=True,
    show_layer_names=True,
    rankdir='TB'
)

print("Model structure diagram saved as model_structure.png")

# Detailed model information
complex_model.summary()

# Get layer information
print("\nLayer details:")
for i, layer in enumerate(complex_model.layers):
    print(f"Layer {i}: {layer.name} ({layer.__class__.__name__})")
    if hasattr(layer, 'units'):
        print(f"  Units: {layer.units}")
    if hasattr(layer, 'activation'):
        print(f"  Activation: {layer.activation.__name__}")

Intermediate Layer Output Check

python
# Create intermediate layer output model
def create_intermediate_model(base_model, layer_names):
    """Create model that outputs intermediate layer results"""
    outputs = [base_model.get_layer(name).output for name in layer_names]
    return tf.keras.Model(inputs=base_model.input, outputs=outputs)

# Check intermediate layer outputs
layer_names = ['branch1_dense1', 'branch2_dense1', 'merge']
intermediate_model = create_intermediate_model(complex_model, layer_names)

# Get intermediate layer outputs
test_input = tf.random.normal([1, 784])
intermediate_outputs = intermediate_model(test_input)

print("Intermediate layer outputs:")
for name, output in zip(layer_names, intermediate_outputs):
    print(f"{name}: {output.shape}")

Model Saving and Loading

Complete Model Saving

python
# Train a simple model
simple_model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(784,)),
    tf.keras.layers.Dense(10, activation='softmax')
])

simple_model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

# Create fake data for training
fake_x = tf.random.normal([100, 784])
fake_y = tf.random.uniform([100], maxval=10, dtype=tf.int32)

simple_model.fit(fake_x, fake_y, epochs=1, verbose=0)

# Save complete model
simple_model.save('complete_model.h5')
print("Complete model saved")

# Load complete model
loaded_model = tf.keras.models.load_model('complete_model.h5')
print("Model loaded successfully")

# Verify loaded model
test_pred = loaded_model.predict(fake_x[:5], verbose=0)
print(f"Prediction result shape: {test_pred.shape}")

Save Only Weights

python
# Save model weights
simple_model.save_weights('model_weights.h5')
print("Weights saved")

# Create new model with same structure
new_model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(784,)),
    tf.keras.layers.Dense(10, activation='softmax')
])

# Load weights
new_model.load_weights('model_weights.h5')
print("Weights loaded successfully")

# Verify weights are same
original_pred = simple_model.predict(fake_x[:1], verbose=0)
new_pred = new_model.predict(fake_x[:1], verbose=0)

print(f"Predictions are identical: {np.allclose(original_pred, new_pred)}")

SavedModel Format

python
# Save as SavedModel format (recommended for production)
tf.saved_model.save(simple_model, 'saved_model_dir')
print("SavedModel format saved")

# Load SavedModel
loaded_saved_model = tf.saved_model.load('saved_model_dir')

# Use loaded model for inference
inference_func = loaded_saved_model.signatures['serving_default']
test_input_dict = {'dense_input': tf.constant(fake_x[:1])}
saved_model_pred = inference_func(**test_input_dict)

print(f"SavedModel prediction result: {list(saved_model_pred.values())[0].shape}")

Best Practices

1. Model Design Principles

python
# Good practice: Modular design
class ModelBlock(tf.keras.layers.Layer):
    def __init__(self, units, dropout_rate=0.2):
        super(ModelBlock, self).__init__()
        self.dense = tf.keras.layers.Dense(units, activation='relu')
        self.dropout = tf.keras.layers.Dropout(dropout_rate)
        self.batch_norm = tf.keras.layers.BatchNormalization()

    def call(self, inputs, training=None):
        x = self.dense(inputs)
        x = self.batch_norm(x, training=training)
        x = self.dropout(x, training=training)
        return x

# Build model using modular blocks
modular_model = tf.keras.Sequential([
    ModelBlock(128),
    ModelBlock(64),
    ModelBlock(32),
    tf.keras.layers.Dense(10, activation='softmax')
])

2. Performance Optimization

python
# Use mixed precision training
policy = tf.keras.mixed_precision.Policy('mixed_float16')
tf.keras.mixed_precision.set_global_policy(policy)

# Optimized model structure
@tf.function
def optimized_model_call(model, inputs):
    return model(inputs, training=False)

# Correct use of batch normalization
class OptimizedModel(tf.keras.Model):
    def __init__(self):
        super(OptimizedModel, self).__init__()
        self.conv1 = tf.keras.layers.Conv2D(32, 3, padding='same')
        self.bn1 = tf.keras.layers.BatchNormalization()
        self.conv2 = tf.keras.layers.Conv2D(64, 3, padding='same')
        self.bn2 = tf.keras.layers.BatchNormalization()
        self.global_pool = tf.keras.layers.GlobalAveragePooling2D()
        self.classifier = tf.keras.layers.Dense(10)

    @tf.function
    def call(self, inputs, training=None):
        x = self.conv1(inputs)
        x = self.bn1(x, training=training)
        x = tf.nn.relu(x)

        x = self.conv2(x)
        x = self.bn2(x, training=training)
        x = tf.nn.relu(x)

        x = self.global_pool(x)
        return self.classifier(x)

3. Debugging Techniques

python
# Model with debug information
class DebuggableModel(tf.keras.Model):
    def __init__(self):
        super(DebuggableModel, self).__init__()
        self.dense1 = tf.keras.layers.Dense(64, activation='relu')
        self.dense2 = tf.keras.layers.Dense(32, activation='relu')
        self.output_layer = tf.keras.layers.Dense(10)

    def call(self, inputs, training=None):
        # Add shape check
        tf.debugging.assert_rank(inputs, 2, "Input must be a 2D tensor")

        x = self.dense1(inputs)
        tf.debugging.assert_all_finite(x, "dense1 output contains invalid values")

        x = self.dense2(x)
        tf.debugging.assert_all_finite(x, "dense2 output contains invalid values")

        outputs = self.output_layer(x)

        # Print info in debug mode
        if training:
            tf.print("Training mode - output range:", tf.reduce_min(outputs), tf.reduce_max(outputs))

        return outputs

# Gradient checking
def check_gradients(model, inputs, targets):
    with tf.GradientTape() as tape:
        predictions = model(inputs, training=True)
        loss = tf.keras.losses.sparse_categorical_crossentropy(targets, predictions)

    gradients = tape.gradient(loss, model.trainable_variables)

    # Check gradients
    for i, grad in enumerate(gradients):
        if grad is not None:
            grad_norm = tf.norm(grad)
            tf.print(f"Layer {i} gradient norm: {grad_norm}")
            if grad_norm > 10.0:
                tf.print(f"Warning: Layer {i} gradient too large!")

Summary

TensorFlow provides multiple flexible ways to build models:

  1. Sequential API: Suitable for simple linear models
  2. Functional API: Suitable for complex network structures, supports multi-input and multi-output
  3. Custom Layers: Implement special computation logic
  4. Model Subclassing: Maximum flexibility, suitable for research and complex applications
  5. Model Composition: Ensemble multiple models to improve performance

Choosing the right building approach and following best practices will help you build efficient, maintainable deep learning models!

Content is for learning and research only.