Skip to content

TensorFlow Keras High-Level API

Keras Introduction

Keras is TensorFlow's high-level API that provides a simple, intuitive interface for building and training deep learning models. Starting from TensorFlow 2.0, Keras has been fully integrated into TensorFlow, becoming the primary way to build neural networks.

python
import tensorflow as tf
from tensorflow import keras
import numpy as np

print(f"TensorFlow version: {tf.__version__}")
print(f"Keras version: {keras.__version__}")

Model Building Methods

1. Sequential Model

The simplest model building method, suitable for linear stacking of layers.

python
# Method 1: Define in constructor
model = keras.Sequential([
    keras.layers.Dense(64, activation='relu', input_shape=(784,)),
    keras.layers.Dropout(0.2),
    keras.layers.Dense(32, activation='relu'),
    keras.layers.Dense(10, activation='softmax')
])

print(model.summary())

# Method 2: Add layer by layer
model = keras.Sequential()
model.add(keras.layers.Dense(64, activation='relu', input_shape=(784,)))
model.add(keras.layers.Dropout(0.2))
model.add(keras.layers.Dense(32, activation='relu'))
model.add(keras.layers.Dense(10, activation='softmax'))

# Method 3: Use names
model = keras.Sequential([
    keras.layers.Dense(64, activation='relu', input_shape=(784,), name='hidden1'),
    keras.layers.Dropout(0.2, name='dropout1'),
    keras.layers.Dense(32, activation='relu', name='hidden2'),
    keras.layers.Dense(10, activation='softmax', name='output')
], name='mnist_model')

print(f"Model name: {model.name}")

2. Functional API

A more flexible model building method, supporting complex network structures.

python
# Define inputs
inputs = keras.Input(shape=(784,), name='input_layer')

# Build network
x = keras.layers.Dense(64, activation='relu', name='hidden1')(inputs)
x = keras.layers.Dropout(0.2, name='dropout1')(x)
x = keras.layers.Dense(32, activation='relu', name='hidden2')(x)
outputs = keras.layers.Dense(10, activation='softmax', name='output')(x)

# Create model
model = keras.Model(inputs=inputs, outputs=outputs, name='functional_model')

print(model.summary())

# Multi-input multi-output example
input1 = keras.Input(shape=(64,), name='input1')
input2 = keras.Input(shape=(32,), name='input2')

# Process first input
x1 = keras.layers.Dense(32, activation='relu')(input1)
x1 = keras.layers.Dropout(0.2)(x1)

# Process second input
x2 = keras.layers.Dense(16, activation='relu')(input2)

# Merge two branches
merged = keras.layers.concatenate([x1, x2])
output1 = keras.layers.Dense(10, activation='softmax', name='classification')(merged)
output2 = keras.layers.Dense(1, activation='sigmoid', name='regression')(merged)

# Create multi-output model
multi_model = keras.Model(
    inputs=[input1, input2],
    outputs=[output1, output2],
    name='multi_io_model'
)

print(multi_model.summary())

3. Subclassed Model

The most flexible method, defining models by inheriting from keras.Model.

python
class CustomModel(keras.Model):
    def __init__(self, num_classes=10):
        super(CustomModel, self).__init__(name='custom_model')
        self.num_classes = num_classes
        
        # Define layers
        self.dense1 = keras.layers.Dense(64, activation='relu')
        self.dropout1 = keras.layers.Dropout(0.2)
        self.dense2 = keras.layers.Dense(32, activation='relu')
        self.classifier = keras.layers.Dense(num_classes, activation='softmax')
    
    def call(self, inputs, training=None):
        x = self.dense1(inputs)
        x = self.dropout1(x, training=training)
        x = self.dense2(x)
        return self.classifier(x)
    
    def get_config(self):
        config = super(CustomModel, self).get_config()
        config.update({'num_classes': self.num_classes})
        return config

# Create custom model
custom_model = CustomModel(num_classes=10)

# Need to call model first to build
dummy_input = tf.random.normal([1, 784])
_ = custom_model(dummy_input)

print(custom_model.summary())

Common Layers Explained

1. Core Layers

python
# Dense layer (fully connected layer)
dense = keras.layers.Dense(
    units=64,                    # Number of neurons
    activation='relu',           # Activation function
    use_bias=True,              # Whether to use bias
    kernel_initializer='glorot_uniform',  # Weight initialization
    bias_initializer='zeros',    # Bias initialization
    kernel_regularizer=keras.regularizers.l2(0.01),  # Weight regularization
    name='dense_layer'
)

# Dropout layer
dropout = keras.layers.Dropout(
    rate=0.2,                   # Dropout rate
    noise_shape=None,           # Noise shape
    seed=None                   # Random seed
)

# Activation layer
activation = keras.layers.Activation('relu')
# Or use specific activation function
relu = keras.layers.ReLU()
leaky_relu = keras.layers.LeakyReLU(alpha=0.1)

2. Convolutional Layers

python
# 2D convolutional layer
conv2d = keras.layers.Conv2D(
    filters=32,                 # Number of convolution kernels
    kernel_size=(3, 3),         # Kernel size
    strides=(1, 1),            # Strides
    padding='valid',            # Padding: 'valid' or 'same'
    activation='relu',          # Activation function
    use_bias=True,             # Whether to use bias
    kernel_initializer='glorot_uniform'
)

# 1D convolutional layer (for sequence data)
conv1d = keras.layers.Conv1D(
    filters=64,
    kernel_size=3,
    activation='relu'
)

# Transposed convolution (deconvolution)
conv2d_transpose = keras.layers.Conv2DTranspose(
    filters=32,
    kernel_size=(3, 3),
    strides=(2, 2),
    padding='same'
)

# Depthwise separable convolution
separable_conv = keras.layers.SeparableConv2D(
    filters=32,
    kernel_size=(3, 3),
    activation='relu'
)

3. Pooling Layers

python
# Max pooling
max_pool = keras.layers.MaxPooling2D(
    pool_size=(2, 2),          # Pooling window size
    strides=None,              # Strides (default equals pool_size)
    padding='valid'            # Padding method
)

# Average pooling
avg_pool = keras.layers.AveragePooling2D(
    pool_size=(2, 2),
    strides=None,
    padding='valid'
)

# Global pooling
global_max_pool = keras.layers.GlobalMaxPooling2D()
global_avg_pool = keras.layers.GlobalAveragePooling2D()

# 1D pooling
max_pool_1d = keras.layers.MaxPooling1D(pool_size=2)

4. Recurrent Layers

python
# LSTM layer
lstm = keras.layers.LSTM(
    units=128,                  # Number of hidden units
    activation='tanh',          # Activation function
    recurrent_activation='sigmoid',  # Recurrent activation function
    use_bias=True,
    return_sequences=False,     # Whether to return complete sequence
    return_state=False,         # Whether to return final state
    dropout=0.0,               # Input dropout
    recurrent_dropout=0.0      # Recurrent dropout
)

# GRU layer
gru = keras.layers.GRU(
    units=128,
    activation='tanh',
    return_sequences=True
)

# Simple RNN layer
simple_rnn = keras.layers.SimpleRNN(
    units=64,
    activation='tanh'
)

# Bidirectional RNN
bidirectional_lstm = keras.layers.Bidirectional(
    keras.layers.LSTM(64, return_sequences=True)
)

5. Normalization Layers

python
# Batch normalization
batch_norm = keras.layers.BatchNormalization(
    axis=-1,                   # Axis to normalize
    momentum=0.99,             # Momentum for moving average
    epsilon=0.001,             # Numerical stability parameter
    center=True,               # Whether to use beta parameter
    scale=True                 # Whether to use gamma parameter
)

# Layer normalization
layer_norm = keras.layers.LayerNormalization(
    axis=-1,
    epsilon=0.001
)

# Dropout
dropout = keras.layers.Dropout(0.2)

# Spatial dropout (for convolutional layers)
spatial_dropout = keras.layers.SpatialDropout2D(0.2)

6. Other Common Layers

python
# Flatten layer
flatten = keras.layers.Flatten()

# Reshape layer
reshape = keras.layers.Reshape((28, 28, 1))

# Permute layer (dimension reordering)
permute = keras.layers.Permute((2, 1))

# RepeatVector layer
repeat = keras.layers.RepeatVector(3)

# Lambda layer (custom operation)
lambda_layer = keras.layers.Lambda(lambda x: tf.square(x))

# Embedding layer
embedding = keras.layers.Embedding(
    input_dim=10000,           # Vocabulary size
    output_dim=128,            # Embedding dimension
    input_length=100           # Input sequence length
)

Model Compilation

python
# Basic compilation
model.compile(
    optimizer='adam',           # Optimizer
    loss='sparse_categorical_crossentropy',  # Loss function
    metrics=['accuracy']        # Evaluation metrics
)

# Detailed optimizer configuration
model.compile(
    optimizer=keras.optimizers.Adam(
        learning_rate=0.001,
        beta_1=0.9,
        beta_2=0.999,
        epsilon=1e-7
    ),
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=False),
    metrics=[
        keras.metrics.SparseCategoricalAccuracy(name='accuracy'),
        keras.metrics.TopKCategoricalAccuracy(k=5, name='top5_accuracy')
    ]
)

# Multi-output model compilation
multi_model.compile(
    optimizer='adam',
    loss={
        'classification': 'sparse_categorical_crossentropy',
        'regression': 'mse'
    },
    loss_weights={
        'classification': 1.0,
        'regression': 0.5
    },
    metrics={
        'classification': ['accuracy'],
        'regression': ['mae']
    }
)

Model Training

1. Basic Training

python
# Prepare data
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
x_train = x_train.reshape(-1, 784).astype('float32') / 255.0
x_test = x_test.reshape(-1, 784).astype('float32') / 255.0

# Train model
history = model.fit(
    x_train, y_train,
    batch_size=32,             # Batch size
    epochs=10,                 # Number of epochs
    validation_data=(x_test, y_test),  # Validation data
    verbose=1,                 # Verbosity: 0=silent, 1=progress bar, 2=one line per epoch
    shuffle=True               # Whether to shuffle data
)

# View training history
print("Training history keys:", history.history.keys())
print("Final training accuracy:", history.history['accuracy'][-1])
print("Final validation accuracy:", history.history['val_accuracy'][-1])

2. Using Callbacks

python
# Define callbacks
callbacks = [
    # Early stopping
    keras.callbacks.EarlyStopping(
        monitor='val_loss',
        patience=5,
        restore_best_weights=True
    ),
    
    # Learning rate scheduling
    keras.callbacks.ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.5,
        patience=3,
        min_lr=1e-7
    ),
    
    # Model checkpoint
    keras.callbacks.ModelCheckpoint(
        filepath='best_model.h5',
        monitor='val_accuracy',
        save_best_only=True,
        save_weights_only=False
    ),
    
    # TensorBoard logging
    keras.callbacks.TensorBoard(
        log_dir='./logs',
        histogram_freq=1,
        write_graph=True
    ),
    
    # Custom callback
    keras.callbacks.LambdaCallback(
        on_epoch_end=lambda epoch, logs: print(f"Epoch {epoch}: loss={logs['loss']:.4f}")
    )
]

# Train with callbacks
history = model.fit(
    x_train, y_train,
    batch_size=32,
    epochs=100,
    validation_data=(x_test, y_test),
    callbacks=callbacks
)

3. Custom Training Loop

python
# Custom training step
@tf.function
def train_step(x, y, model, optimizer, loss_fn, train_accuracy):
    with tf.GradientTape() as tape:
        predictions = model(x, training=True)
        loss = loss_fn(y, predictions)
    
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    
    train_accuracy.update_state(y, predictions)
    return loss

# Custom validation step
@tf.function
def val_step(x, y, model, loss_fn, val_accuracy):
    predictions = model(x, training=False)
    loss = loss_fn(y, predictions)
    val_accuracy.update_state(y, predictions)
    return loss

# Training loop
def custom_training_loop(model, train_dataset, val_dataset, epochs):
    optimizer = keras.optimizers.Adam()
    loss_fn = keras.losses.SparseCategoricalCrossentropy()
    
    train_accuracy = keras.metrics.SparseCategoricalAccuracy()
    val_accuracy = keras.metrics.SparseCategoricalAccuracy()
    
    for epoch in range(epochs):
        print(f"Epoch {epoch + 1}/{epochs}")
        
        # Training
        train_loss = 0
        train_accuracy.reset_states()
        for x_batch, y_batch in train_dataset:
            loss = train_step(x_batch, y_batch, model, optimizer, loss_fn, train_accuracy)
            train_loss += loss
        
        # Validation
        val_loss = 0
        val_accuracy.reset_states()
        for x_batch, y_batch in val_dataset:
            loss = val_step(x_batch, y_batch, model, loss_fn, val_accuracy)
            val_loss += loss
        
        print(f"Loss: {train_loss:.4f}, Accuracy: {train_accuracy.result():.4f}, "
              f"Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy.result():.4f}")

# Prepare dataset
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)).batch(32)
val_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(32)

# Run custom training
# custom_training_loop(model, train_dataset, val_dataset, epochs=5)

Model Evaluation and Prediction

python
# Model evaluation
test_loss, test_accuracy = model.evaluate(x_test, y_test, verbose=0)
print(f"Test loss: {test_loss:.4f}")
print(f"Test accuracy: {test_accuracy:.4f}")

# Detailed evaluation
evaluation = model.evaluate(
    x_test, y_test,
    batch_size=32,
    verbose=1,
    return_dict=True
)
print("Detailed evaluation results:", evaluation)

# Prediction
predictions = model.predict(x_test[:10])
print(f"Prediction shape: {predictions.shape}")
print(f"First 5 predictions: {np.argmax(predictions[:5], axis=1)}")
print(f"True labels: {y_test[:5]}")

# Batch prediction
batch_predictions = model.predict(
    x_test,
    batch_size=32,
    verbose=1
)

# Single sample prediction
single_prediction = model.predict(x_test[0:1])
predicted_class = np.argmax(single_prediction, axis=1)[0]
print(f"Single sample predicted class: {predicted_class}")

Model Saving and Loading

python
# Save entire model
model.save('my_model.h5')  # HDF5 format
model.save('my_model')     # SavedModel format (recommended)

# Save weights only
model.save_weights('model_weights.h5')

# Save model architecture
model_json = model.to_json()
with open('model_architecture.json', 'w') as f:
    f.write(model_json)

# Load model
loaded_model = keras.models.load_model('my_model.h5')

# Load weights
model.load_weights('model_weights.h5')

# Rebuild model from architecture
with open('model_architecture.json', 'r') as f:
    model_json = f.read()
model_from_json = keras.models.model_from_json(model_json)
model_from_json.load_weights('model_weights.h5')

# Verify loaded model
loaded_predictions = loaded_model.predict(x_test[:5])
original_predictions = model.predict(x_test[:5])
print("Model loading verification:", np.allclose(loaded_predictions, original_predictions))

Model Visualization

python
# Plot model structure
keras.utils.plot_model(
    model,
    to_file='model.png',
    show_shapes=True,
    show_layer_names=True,
    rankdir='TB',              # 'TB'=top to bottom, 'LR'=left to right
    expand_nested=False,
    dpi=96
)

# View model summary
print(model.summary())

# Get layer information
for i, layer in enumerate(model.layers):
    print(f"Layer {i}: {layer.name} - {layer.__class__.__name__}")
    if hasattr(layer, 'units'):
        print(f"  Units: {layer.units}")
    if hasattr(layer, 'activation'):
        print(f"  Activation: {layer.activation}")
    print(f"  Output shape: {layer.output_shape}")
    print(f"  Parameter count: {layer.count_params()}")
    print()

# Visualize training history
import matplotlib.pyplot as plt

def plot_training_history(history):
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))
    
    # Loss curve
    ax1.plot(history.history['loss'], label='Training loss')
    ax1.plot(history.history['val_loss'], label='Validation loss')
    ax1.set_title('Model loss')
    ax1.set_xlabel('Epoch')
    ax1.set_ylabel('Loss')
    ax1.legend()
    
    # Accuracy curve
    ax2.plot(history.history['accuracy'], label='Training accuracy')
    ax2.plot(history.history['val_accuracy'], label='Validation accuracy')
    ax2.set_title('Model accuracy')
    ax2.set_xlabel('Epoch')
    ax2.set_ylabel('Accuracy')
    ax2.legend()
    
    plt.tight_layout()
    plt.show()

# plot_training_history(history)

Advanced Features

1. Custom Layers

python
class CustomDense(keras.layers.Layer):
    def __init__(self, units, activation=None, **kwargs):
        super(CustomDense, self).__init__(**kwargs)
        self.units = units
        self.activation = keras.activations.get(activation)
    
    def build(self, input_shape):
        # Create weights
        self.w = self.add_weight(
            shape=(input_shape[-1], self.units),
            initializer='random_normal',
            trainable=True,
            name='kernel'
        )
        self.b = self.add_weight(
            shape=(self.units,),
            initializer='zeros',
            trainable=True,
            name='bias'
        )
        super(CustomDense, self).build(input_shape)
    
    def call(self, inputs):
        output = tf.matmul(inputs, self.w) + self.b
        if self.activation is not None:
            output = self.activation(output)
        return output
    
    def get_config(self):
        config = super(CustomDense, self).get_config()
        config.update({
            'units': self.units,
            'activation': keras.activations.serialize(self.activation)
        })
        return config

# Use custom layer
model_with_custom = keras.Sequential([
    CustomDense(64, activation='relu', input_shape=(784,)),
    CustomDense(10, activation='softmax')
])

2. Custom Loss Functions

python
def custom_loss(y_true, y_pred):
    # Custom loss function
    return tf.reduce_mean(tf.square(y_true - y_pred))

# Use custom loss
model.compile(
    optimizer='adam',
    loss=custom_loss,
    metrics=['accuracy']
)

3. Custom Metrics

python
class CustomAccuracy(keras.metrics.Metric):
    def __init__(self, name='custom_accuracy', **kwargs):
        super(CustomAccuracy, self).__init__(name=name, **kwargs)
        self.total = self.add_weight(name='total', initializer='zeros')
        self.count = self.add_weight(name='count', initializer='zeros')
    
    def update_state(self, y_true, y_pred, sample_weight=None):
        y_pred = tf.argmax(y_pred, axis=1)
        y_true = tf.cast(y_true, y_pred.dtype)
        
        matches = tf.cast(tf.equal(y_true, y_pred), tf.float32)
        self.total.assign_add(tf.reduce_sum(matches))
        self.count.assign_add(tf.cast(tf.size(y_true), tf.float32))
    
    def result(self):
        return self.total / self.count
    
    def reset_states(self):
        self.total.assign(0)
        self.count.assign(0)

# Use custom metric
model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=[CustomAccuracy()]
)

Summary

Keras provides high-level interfaces for building deep learning models with main features including:

  1. Multiple Building Methods: Sequential, Functional API, Subclassed Models
  2. Rich Layer Types: Dense, Conv2D, LSTM, and various predefined layers
  3. Flexible Training: fit method, custom training loops, callbacks
  4. Complete Toolchain: Model saving and loading, visualization, evaluation and prediction
  5. Highly Extensible: Support for custom layers, loss functions, and metrics

Mastering Keras API is a key skill for deep learning with TensorFlow!

Content is for learning and research only.