TensorFlow Keras High-Level API
Keras Introduction
Keras is TensorFlow's high-level API that provides a simple, intuitive interface for building and training deep learning models. Starting from TensorFlow 2.0, Keras has been fully integrated into TensorFlow, becoming the primary way to build neural networks.
python
import tensorflow as tf
from tensorflow import keras
import numpy as np
print(f"TensorFlow version: {tf.__version__}")
print(f"Keras version: {keras.__version__}")Model Building Methods
1. Sequential Model
The simplest model building method, suitable for linear stacking of layers.
python
# Method 1: Define in constructor
model = keras.Sequential([
keras.layers.Dense(64, activation='relu', input_shape=(784,)),
keras.layers.Dropout(0.2),
keras.layers.Dense(32, activation='relu'),
keras.layers.Dense(10, activation='softmax')
])
print(model.summary())
# Method 2: Add layer by layer
model = keras.Sequential()
model.add(keras.layers.Dense(64, activation='relu', input_shape=(784,)))
model.add(keras.layers.Dropout(0.2))
model.add(keras.layers.Dense(32, activation='relu'))
model.add(keras.layers.Dense(10, activation='softmax'))
# Method 3: Use names
model = keras.Sequential([
keras.layers.Dense(64, activation='relu', input_shape=(784,), name='hidden1'),
keras.layers.Dropout(0.2, name='dropout1'),
keras.layers.Dense(32, activation='relu', name='hidden2'),
keras.layers.Dense(10, activation='softmax', name='output')
], name='mnist_model')
print(f"Model name: {model.name}")2. Functional API
A more flexible model building method, supporting complex network structures.
python
# Define inputs
inputs = keras.Input(shape=(784,), name='input_layer')
# Build network
x = keras.layers.Dense(64, activation='relu', name='hidden1')(inputs)
x = keras.layers.Dropout(0.2, name='dropout1')(x)
x = keras.layers.Dense(32, activation='relu', name='hidden2')(x)
outputs = keras.layers.Dense(10, activation='softmax', name='output')(x)
# Create model
model = keras.Model(inputs=inputs, outputs=outputs, name='functional_model')
print(model.summary())
# Multi-input multi-output example
input1 = keras.Input(shape=(64,), name='input1')
input2 = keras.Input(shape=(32,), name='input2')
# Process first input
x1 = keras.layers.Dense(32, activation='relu')(input1)
x1 = keras.layers.Dropout(0.2)(x1)
# Process second input
x2 = keras.layers.Dense(16, activation='relu')(input2)
# Merge two branches
merged = keras.layers.concatenate([x1, x2])
output1 = keras.layers.Dense(10, activation='softmax', name='classification')(merged)
output2 = keras.layers.Dense(1, activation='sigmoid', name='regression')(merged)
# Create multi-output model
multi_model = keras.Model(
inputs=[input1, input2],
outputs=[output1, output2],
name='multi_io_model'
)
print(multi_model.summary())3. Subclassed Model
The most flexible method, defining models by inheriting from keras.Model.
python
class CustomModel(keras.Model):
def __init__(self, num_classes=10):
super(CustomModel, self).__init__(name='custom_model')
self.num_classes = num_classes
# Define layers
self.dense1 = keras.layers.Dense(64, activation='relu')
self.dropout1 = keras.layers.Dropout(0.2)
self.dense2 = keras.layers.Dense(32, activation='relu')
self.classifier = keras.layers.Dense(num_classes, activation='softmax')
def call(self, inputs, training=None):
x = self.dense1(inputs)
x = self.dropout1(x, training=training)
x = self.dense2(x)
return self.classifier(x)
def get_config(self):
config = super(CustomModel, self).get_config()
config.update({'num_classes': self.num_classes})
return config
# Create custom model
custom_model = CustomModel(num_classes=10)
# Need to call model first to build
dummy_input = tf.random.normal([1, 784])
_ = custom_model(dummy_input)
print(custom_model.summary())Common Layers Explained
1. Core Layers
python
# Dense layer (fully connected layer)
dense = keras.layers.Dense(
units=64, # Number of neurons
activation='relu', # Activation function
use_bias=True, # Whether to use bias
kernel_initializer='glorot_uniform', # Weight initialization
bias_initializer='zeros', # Bias initialization
kernel_regularizer=keras.regularizers.l2(0.01), # Weight regularization
name='dense_layer'
)
# Dropout layer
dropout = keras.layers.Dropout(
rate=0.2, # Dropout rate
noise_shape=None, # Noise shape
seed=None # Random seed
)
# Activation layer
activation = keras.layers.Activation('relu')
# Or use specific activation function
relu = keras.layers.ReLU()
leaky_relu = keras.layers.LeakyReLU(alpha=0.1)2. Convolutional Layers
python
# 2D convolutional layer
conv2d = keras.layers.Conv2D(
filters=32, # Number of convolution kernels
kernel_size=(3, 3), # Kernel size
strides=(1, 1), # Strides
padding='valid', # Padding: 'valid' or 'same'
activation='relu', # Activation function
use_bias=True, # Whether to use bias
kernel_initializer='glorot_uniform'
)
# 1D convolutional layer (for sequence data)
conv1d = keras.layers.Conv1D(
filters=64,
kernel_size=3,
activation='relu'
)
# Transposed convolution (deconvolution)
conv2d_transpose = keras.layers.Conv2DTranspose(
filters=32,
kernel_size=(3, 3),
strides=(2, 2),
padding='same'
)
# Depthwise separable convolution
separable_conv = keras.layers.SeparableConv2D(
filters=32,
kernel_size=(3, 3),
activation='relu'
)3. Pooling Layers
python
# Max pooling
max_pool = keras.layers.MaxPooling2D(
pool_size=(2, 2), # Pooling window size
strides=None, # Strides (default equals pool_size)
padding='valid' # Padding method
)
# Average pooling
avg_pool = keras.layers.AveragePooling2D(
pool_size=(2, 2),
strides=None,
padding='valid'
)
# Global pooling
global_max_pool = keras.layers.GlobalMaxPooling2D()
global_avg_pool = keras.layers.GlobalAveragePooling2D()
# 1D pooling
max_pool_1d = keras.layers.MaxPooling1D(pool_size=2)4. Recurrent Layers
python
# LSTM layer
lstm = keras.layers.LSTM(
units=128, # Number of hidden units
activation='tanh', # Activation function
recurrent_activation='sigmoid', # Recurrent activation function
use_bias=True,
return_sequences=False, # Whether to return complete sequence
return_state=False, # Whether to return final state
dropout=0.0, # Input dropout
recurrent_dropout=0.0 # Recurrent dropout
)
# GRU layer
gru = keras.layers.GRU(
units=128,
activation='tanh',
return_sequences=True
)
# Simple RNN layer
simple_rnn = keras.layers.SimpleRNN(
units=64,
activation='tanh'
)
# Bidirectional RNN
bidirectional_lstm = keras.layers.Bidirectional(
keras.layers.LSTM(64, return_sequences=True)
)5. Normalization Layers
python
# Batch normalization
batch_norm = keras.layers.BatchNormalization(
axis=-1, # Axis to normalize
momentum=0.99, # Momentum for moving average
epsilon=0.001, # Numerical stability parameter
center=True, # Whether to use beta parameter
scale=True # Whether to use gamma parameter
)
# Layer normalization
layer_norm = keras.layers.LayerNormalization(
axis=-1,
epsilon=0.001
)
# Dropout
dropout = keras.layers.Dropout(0.2)
# Spatial dropout (for convolutional layers)
spatial_dropout = keras.layers.SpatialDropout2D(0.2)6. Other Common Layers
python
# Flatten layer
flatten = keras.layers.Flatten()
# Reshape layer
reshape = keras.layers.Reshape((28, 28, 1))
# Permute layer (dimension reordering)
permute = keras.layers.Permute((2, 1))
# RepeatVector layer
repeat = keras.layers.RepeatVector(3)
# Lambda layer (custom operation)
lambda_layer = keras.layers.Lambda(lambda x: tf.square(x))
# Embedding layer
embedding = keras.layers.Embedding(
input_dim=10000, # Vocabulary size
output_dim=128, # Embedding dimension
input_length=100 # Input sequence length
)Model Compilation
python
# Basic compilation
model.compile(
optimizer='adam', # Optimizer
loss='sparse_categorical_crossentropy', # Loss function
metrics=['accuracy'] # Evaluation metrics
)
# Detailed optimizer configuration
model.compile(
optimizer=keras.optimizers.Adam(
learning_rate=0.001,
beta_1=0.9,
beta_2=0.999,
epsilon=1e-7
),
loss=keras.losses.SparseCategoricalCrossentropy(from_logits=False),
metrics=[
keras.metrics.SparseCategoricalAccuracy(name='accuracy'),
keras.metrics.TopKCategoricalAccuracy(k=5, name='top5_accuracy')
]
)
# Multi-output model compilation
multi_model.compile(
optimizer='adam',
loss={
'classification': 'sparse_categorical_crossentropy',
'regression': 'mse'
},
loss_weights={
'classification': 1.0,
'regression': 0.5
},
metrics={
'classification': ['accuracy'],
'regression': ['mae']
}
)Model Training
1. Basic Training
python
# Prepare data
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
x_train = x_train.reshape(-1, 784).astype('float32') / 255.0
x_test = x_test.reshape(-1, 784).astype('float32') / 255.0
# Train model
history = model.fit(
x_train, y_train,
batch_size=32, # Batch size
epochs=10, # Number of epochs
validation_data=(x_test, y_test), # Validation data
verbose=1, # Verbosity: 0=silent, 1=progress bar, 2=one line per epoch
shuffle=True # Whether to shuffle data
)
# View training history
print("Training history keys:", history.history.keys())
print("Final training accuracy:", history.history['accuracy'][-1])
print("Final validation accuracy:", history.history['val_accuracy'][-1])2. Using Callbacks
python
# Define callbacks
callbacks = [
# Early stopping
keras.callbacks.EarlyStopping(
monitor='val_loss',
patience=5,
restore_best_weights=True
),
# Learning rate scheduling
keras.callbacks.ReduceLROnPlateau(
monitor='val_loss',
factor=0.5,
patience=3,
min_lr=1e-7
),
# Model checkpoint
keras.callbacks.ModelCheckpoint(
filepath='best_model.h5',
monitor='val_accuracy',
save_best_only=True,
save_weights_only=False
),
# TensorBoard logging
keras.callbacks.TensorBoard(
log_dir='./logs',
histogram_freq=1,
write_graph=True
),
# Custom callback
keras.callbacks.LambdaCallback(
on_epoch_end=lambda epoch, logs: print(f"Epoch {epoch}: loss={logs['loss']:.4f}")
)
]
# Train with callbacks
history = model.fit(
x_train, y_train,
batch_size=32,
epochs=100,
validation_data=(x_test, y_test),
callbacks=callbacks
)3. Custom Training Loop
python
# Custom training step
@tf.function
def train_step(x, y, model, optimizer, loss_fn, train_accuracy):
with tf.GradientTape() as tape:
predictions = model(x, training=True)
loss = loss_fn(y, predictions)
gradients = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(zip(gradients, model.trainable_variables))
train_accuracy.update_state(y, predictions)
return loss
# Custom validation step
@tf.function
def val_step(x, y, model, loss_fn, val_accuracy):
predictions = model(x, training=False)
loss = loss_fn(y, predictions)
val_accuracy.update_state(y, predictions)
return loss
# Training loop
def custom_training_loop(model, train_dataset, val_dataset, epochs):
optimizer = keras.optimizers.Adam()
loss_fn = keras.losses.SparseCategoricalCrossentropy()
train_accuracy = keras.metrics.SparseCategoricalAccuracy()
val_accuracy = keras.metrics.SparseCategoricalAccuracy()
for epoch in range(epochs):
print(f"Epoch {epoch + 1}/{epochs}")
# Training
train_loss = 0
train_accuracy.reset_states()
for x_batch, y_batch in train_dataset:
loss = train_step(x_batch, y_batch, model, optimizer, loss_fn, train_accuracy)
train_loss += loss
# Validation
val_loss = 0
val_accuracy.reset_states()
for x_batch, y_batch in val_dataset:
loss = val_step(x_batch, y_batch, model, loss_fn, val_accuracy)
val_loss += loss
print(f"Loss: {train_loss:.4f}, Accuracy: {train_accuracy.result():.4f}, "
f"Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy.result():.4f}")
# Prepare dataset
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)).batch(32)
val_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(32)
# Run custom training
# custom_training_loop(model, train_dataset, val_dataset, epochs=5)Model Evaluation and Prediction
python
# Model evaluation
test_loss, test_accuracy = model.evaluate(x_test, y_test, verbose=0)
print(f"Test loss: {test_loss:.4f}")
print(f"Test accuracy: {test_accuracy:.4f}")
# Detailed evaluation
evaluation = model.evaluate(
x_test, y_test,
batch_size=32,
verbose=1,
return_dict=True
)
print("Detailed evaluation results:", evaluation)
# Prediction
predictions = model.predict(x_test[:10])
print(f"Prediction shape: {predictions.shape}")
print(f"First 5 predictions: {np.argmax(predictions[:5], axis=1)}")
print(f"True labels: {y_test[:5]}")
# Batch prediction
batch_predictions = model.predict(
x_test,
batch_size=32,
verbose=1
)
# Single sample prediction
single_prediction = model.predict(x_test[0:1])
predicted_class = np.argmax(single_prediction, axis=1)[0]
print(f"Single sample predicted class: {predicted_class}")Model Saving and Loading
python
# Save entire model
model.save('my_model.h5') # HDF5 format
model.save('my_model') # SavedModel format (recommended)
# Save weights only
model.save_weights('model_weights.h5')
# Save model architecture
model_json = model.to_json()
with open('model_architecture.json', 'w') as f:
f.write(model_json)
# Load model
loaded_model = keras.models.load_model('my_model.h5')
# Load weights
model.load_weights('model_weights.h5')
# Rebuild model from architecture
with open('model_architecture.json', 'r') as f:
model_json = f.read()
model_from_json = keras.models.model_from_json(model_json)
model_from_json.load_weights('model_weights.h5')
# Verify loaded model
loaded_predictions = loaded_model.predict(x_test[:5])
original_predictions = model.predict(x_test[:5])
print("Model loading verification:", np.allclose(loaded_predictions, original_predictions))Model Visualization
python
# Plot model structure
keras.utils.plot_model(
model,
to_file='model.png',
show_shapes=True,
show_layer_names=True,
rankdir='TB', # 'TB'=top to bottom, 'LR'=left to right
expand_nested=False,
dpi=96
)
# View model summary
print(model.summary())
# Get layer information
for i, layer in enumerate(model.layers):
print(f"Layer {i}: {layer.name} - {layer.__class__.__name__}")
if hasattr(layer, 'units'):
print(f" Units: {layer.units}")
if hasattr(layer, 'activation'):
print(f" Activation: {layer.activation}")
print(f" Output shape: {layer.output_shape}")
print(f" Parameter count: {layer.count_params()}")
print()
# Visualize training history
import matplotlib.pyplot as plt
def plot_training_history(history):
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))
# Loss curve
ax1.plot(history.history['loss'], label='Training loss')
ax1.plot(history.history['val_loss'], label='Validation loss')
ax1.set_title('Model loss')
ax1.set_xlabel('Epoch')
ax1.set_ylabel('Loss')
ax1.legend()
# Accuracy curve
ax2.plot(history.history['accuracy'], label='Training accuracy')
ax2.plot(history.history['val_accuracy'], label='Validation accuracy')
ax2.set_title('Model accuracy')
ax2.set_xlabel('Epoch')
ax2.set_ylabel('Accuracy')
ax2.legend()
plt.tight_layout()
plt.show()
# plot_training_history(history)Advanced Features
1. Custom Layers
python
class CustomDense(keras.layers.Layer):
def __init__(self, units, activation=None, **kwargs):
super(CustomDense, self).__init__(**kwargs)
self.units = units
self.activation = keras.activations.get(activation)
def build(self, input_shape):
# Create weights
self.w = self.add_weight(
shape=(input_shape[-1], self.units),
initializer='random_normal',
trainable=True,
name='kernel'
)
self.b = self.add_weight(
shape=(self.units,),
initializer='zeros',
trainable=True,
name='bias'
)
super(CustomDense, self).build(input_shape)
def call(self, inputs):
output = tf.matmul(inputs, self.w) + self.b
if self.activation is not None:
output = self.activation(output)
return output
def get_config(self):
config = super(CustomDense, self).get_config()
config.update({
'units': self.units,
'activation': keras.activations.serialize(self.activation)
})
return config
# Use custom layer
model_with_custom = keras.Sequential([
CustomDense(64, activation='relu', input_shape=(784,)),
CustomDense(10, activation='softmax')
])2. Custom Loss Functions
python
def custom_loss(y_true, y_pred):
# Custom loss function
return tf.reduce_mean(tf.square(y_true - y_pred))
# Use custom loss
model.compile(
optimizer='adam',
loss=custom_loss,
metrics=['accuracy']
)3. Custom Metrics
python
class CustomAccuracy(keras.metrics.Metric):
def __init__(self, name='custom_accuracy', **kwargs):
super(CustomAccuracy, self).__init__(name=name, **kwargs)
self.total = self.add_weight(name='total', initializer='zeros')
self.count = self.add_weight(name='count', initializer='zeros')
def update_state(self, y_true, y_pred, sample_weight=None):
y_pred = tf.argmax(y_pred, axis=1)
y_true = tf.cast(y_true, y_pred.dtype)
matches = tf.cast(tf.equal(y_true, y_pred), tf.float32)
self.total.assign_add(tf.reduce_sum(matches))
self.count.assign_add(tf.cast(tf.size(y_true), tf.float32))
def result(self):
return self.total / self.count
def reset_states(self):
self.total.assign(0)
self.count.assign(0)
# Use custom metric
model.compile(
optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=[CustomAccuracy()]
)Summary
Keras provides high-level interfaces for building deep learning models with main features including:
- Multiple Building Methods: Sequential, Functional API, Subclassed Models
- Rich Layer Types: Dense, Conv2D, LSTM, and various predefined layers
- Flexible Training: fit method, custom training loops, callbacks
- Complete Toolchain: Model saving and loading, visualization, evaluation and prediction
- Highly Extensible: Support for custom layers, loss functions, and metrics
Mastering Keras API is a key skill for deep learning with TensorFlow!