TensorFlow Tensor Basics
What is a Tensor?
A tensor is the basic data structure in TensorFlow, which can be understood as a generalization of multidimensional arrays. In TensorFlow, all data exists and flows in the form of tensors.
python
import tensorflow as tf
import numpy as np
# Tensors of different dimensions
scalar = tf.constant(3.14) # 0-dimensional tensor (scalar)
vector = tf.constant([1, 2, 3]) # 1-dimensional tensor (vector)
matrix = tf.constant([[1, 2], [3, 4]]) # 2-dimensional tensor (matrix)
tensor_3d = tf.zeros([2, 3, 4]) # 3-dimensional tensor
print(f"Scalar: {scalar}")
print(f"Vector: {vector}")
print(f"Matrix: {matrix}")
print(f"3D tensor shape: {tensor_3d.shape}")Creating Tensors
1. Create from Constants
python
import tensorflow as tf
# Create constant tensors
const_tensor = tf.constant([1, 2, 3, 4])
print(f"Constant tensor: {const_tensor}")
# Specify data type
float_tensor = tf.constant([1.0, 2.0, 3.0], dtype=tf.float32)
int_tensor = tf.constant([1, 2, 3], dtype=tf.int32)
print(f"Float tensor: {float_tensor}")
print(f"Integer tensor: {int_tensor}")
# Create multidimensional tensors
matrix = tf.constant([[1, 2, 3], [4, 5, 6]])
print(f"Matrix: {matrix}")
# Create from nested lists
nested_tensor = tf.constant([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])
print(f"Nested tensor shape: {nested_tensor.shape}")2. Create Using Built-in Functions
python
# Zero tensors
zeros = tf.zeros([3, 4])
print(f"Zero tensor:\n{zeros}")
# One tensors
ones = tf.ones([2, 3])
print(f"One tensor:\n{ones}")
# Identity matrix
eye = tf.eye(3)
print(f"Identity matrix:\n{eye}")
# Fill with specific values
fill = tf.fill([2, 3], 7)
print(f"Fill tensor:\n{fill}")
# Random tensors
random_normal = tf.random.normal([2, 3]) # Normal distribution
random_uniform = tf.random.uniform([2, 3]) # Uniform distribution
print(f"Normal distribution random tensor:\n{random_normal}")
print(f"Uniform distribution random tensor:\n{random_uniform}")
# Sequence tensors
range_tensor = tf.range(10) # 0 to 9
linspace_tensor = tf.linspace(0.0, 1.0, 5) # 5 evenly spaced points from 0 to 1
print(f"Range tensor: {range_tensor}")
print(f"Linspace tensor: {linspace_tensor}")3. Create from NumPy Arrays
python
import numpy as np
# Create tensors from NumPy arrays
np_array = np.array([[1, 2, 3], [4, 5, 6]])
tf_tensor = tf.constant(np_array)
print(f"Created from NumPy: {tf_tensor}")
# Automatic type conversion
np_float = np.array([1.0, 2.0, 3.0])
tf_from_np = tf.constant(np_float)
print(f"NumPy to TensorFlow: {tf_from_np.dtype}")
# Convert tensor to NumPy
tf_tensor = tf.constant([1, 2, 3])
np_from_tf = tf_tensor.numpy()
print(f"TensorFlow to NumPy: {np_from_tf}")
print(f"Type: {type(np_from_tf)}")4. Variable Tensors
python
# Create variables (trainable parameters)
variable = tf.Variable([1.0, 2.0, 3.0])
print(f"Variable: {variable}")
# Variables can be modified
variable.assign([4.0, 5.0, 6.0])
print(f"Modified variable: {variable}")
# Partial modification
variable.assign_add([1.0, 1.0, 1.0]) # Addition
print(f"After addition: {variable}")
variable.assign_sub([0.5, 0.5, 0.5]) # Subtraction
print(f"After subtraction: {variable}")
# Create trainable variables
trainable_var = tf.Variable(tf.random.normal([3, 3]), trainable=True)
print(f"Trainable variable: {trainable_var.trainable}")Tensor Properties
python
# Create example tensor
tensor = tf.random.normal([2, 3, 4])
print(f"Shape: {tensor.shape}") # Tensor shape
print(f"Rank: {tensor.ndim}") # Number of tensor dimensions
print(f"Size: {tf.size(tensor)}") # Total number of elements
print(f"Data type: {tensor.dtype}") # Data type
print(f"Device: {tensor.device}") # Device location
# Get shape information
print(f"Shape list: {tensor.shape.as_list()}")
print(f"Dynamic shape: {tf.shape(tensor)}")
# Check tensor properties
print(f"Is variable: {isinstance(tensor, tf.Variable)}")Data Types
1. Basic Data Types
python
# Floating-point types
float16_tensor = tf.constant([1.0, 2.0], dtype=tf.float16) # Half precision
float32_tensor = tf.constant([1.0, 2.0], dtype=tf.float32) # Single precision
float64_tensor = tf.constant([1.0, 2.0], dtype=tf.float64) # Double precision
# Integer types
int8_tensor = tf.constant([1, 2], dtype=tf.int8)
int16_tensor = tf.constant([1, 2], dtype=tf.int16)
int32_tensor = tf.constant([1, 2], dtype=tf.int32)
int64_tensor = tf.constant([1, 2], dtype=tf.int64)
# Boolean type
bool_tensor = tf.constant([True, False], dtype=tf.bool)
# String type
string_tensor = tf.constant(["hello", "world"], dtype=tf.string)
print(f"Float32: {float32_tensor.dtype}")
print(f"Int32: {int32_tensor.dtype}")
print(f"Bool: {bool_tensor.dtype}")
print(f"String: {string_tensor.dtype}")2. Type Conversion
python
# Create integer tensor
int_tensor = tf.constant([1, 2, 3], dtype=tf.int32)
# Type conversion
float_tensor = tf.cast(int_tensor, tf.float32)
bool_tensor = tf.cast(int_tensor, tf.bool)
print(f"Original type: {int_tensor.dtype}")
print(f"Converted to float: {float_tensor.dtype}")
print(f"Converted to bool: {bool_tensor.dtype}")
# Automatic type promotion
mixed_result = tf.add(tf.constant(1), tf.constant(2.0))
print(f"Mixed operation result type: {mixed_result.dtype}")Tensor Operations
1. Indexing and Slicing
python
# Create example tensor
tensor = tf.constant([[1, 2, 3, 4],
[5, 6, 7, 8],
[9, 10, 11, 12]])
# Basic indexing
print(f"First row: {tensor[0]}")
print(f"First column: {tensor[:, 0]}")
print(f"Specific element: {tensor[1, 2]}")
# Slicing operations
print(f"First two rows: {tensor[:2]}")
print(f"Last two columns: {tensor[:, -2:]}")
print(f"Sub-matrix: {tensor[1:3, 1:3]}")
# Step slicing
print(f"Every other row: {tensor[::2]}")
print(f"Reversed: {tensor[::-1]}")
# Advanced indexing
indices = tf.constant([0, 2])
selected_rows = tf.gather(tensor, indices)
print(f"Select specific rows: {selected_rows}")
# Boolean indexing
mask = tensor > 6
masked_values = tf.boolean_mask(tensor, mask)
print(f"Elements greater than 6: {masked_values}")2. Shape Transformation
python
# Create example tensor
tensor = tf.random.normal([2, 3, 4])
print(f"Original shape: {tensor.shape}")
# Reshape
reshaped = tf.reshape(tensor, [6, 4])
print(f"After reshape: {reshaped.shape}")
# Flatten
flattened = tf.reshape(tensor, [-1]) # -1 means automatic calculation
print(f"After flatten: {flattened.shape}")
# Add dimensions
expanded = tf.expand_dims(tensor, axis=0)
print(f"Add dimension: {expanded.shape}")
expanded_last = tf.expand_dims(tensor, axis=-1)
print(f"Add dimension at end: {expanded_last.shape}")
# Remove dimensions
squeezed = tf.squeeze(expanded)
print(f"Squeeze dimension: {squeezed.shape}")
# Transpose
transposed = tf.transpose(tensor, perm=[2, 0, 1])
print(f"After transpose: {transposed.shape}")
# Matrix transpose (2D)
matrix = tf.constant([[1, 2, 3], [4, 5, 6]])
matrix_t = tf.transpose(matrix)
print(f"Matrix transpose: {matrix_t}")3. Concatenation and Splitting
python
# Create example tensors
a = tf.constant([[1, 2], [3, 4]])
b = tf.constant([[5, 6], [7, 8]])
# Concatenation
concat_0 = tf.concat([a, b], axis=0) # Concatenate along dimension 0
concat_1 = tf.concat([a, b], axis=1) # Concatenate along dimension 1
print(f"Concatenate along rows:\n{concat_0}")
print(f"Concatenate along columns:\n{concat_1}")
# Stacking (add new dimension)
stacked = tf.stack([a, b], axis=0)
print(f"Stacked result shape: {stacked.shape}")
# Splitting
tensor = tf.constant([[1, 2, 3, 4, 5, 6]])
split_result = tf.split(tensor, num_or_size_splits=3, axis=1)
print(f"Split result: {[t.numpy() for t in split_result]}")
# Uneven splitting
uneven_split = tf.split(tensor, num_or_size_splits=[2, 2, 2], axis=1)
print(f"Uneven split: {[t.numpy() for t in uneven_split]}")Mathematical Operations
1. Basic Operations
python
# Create example tensors
a = tf.constant([[1.0, 2.0], [3.0, 4.0]])
b = tf.constant([[5.0, 6.0], [7.0, 8.0]])
# Element-wise operations
add_result = tf.add(a, b) # or a + b
sub_result = tf.subtract(a, b) # or a - b
mul_result = tf.multiply(a, b) # or a * b
div_result = tf.divide(a, b) # or a / b
print(f"Addition:\n{add_result}")
print(f"Multiplication:\n{mul_result}")
# Scalar operations
scalar_mul = tf.multiply(a, 2.0) # or a * 2.0
scalar_add = tf.add(a, 1.0) # or a + 1.0
print(f"Scalar multiplication:\n{scalar_mul}")
print(f"Scalar addition:\n{scalar_add}")
# Mathematical functions
sqrt_result = tf.sqrt(a)
exp_result = tf.exp(a)
log_result = tf.math.log(a)
sin_result = tf.sin(a)
print(f"Square root:\n{sqrt_result}")
print(f"Exponential:\n{exp_result}")2. Matrix Operations
python
# Matrix multiplication
a = tf.constant([[1.0, 2.0], [3.0, 4.0]])
b = tf.constant([[5.0, 6.0], [7.0, 8.0]])
matmul_result = tf.matmul(a, b) # or a @ b
print(f"Matrix multiplication:\n{matmul_result}")
# Batch matrix multiplication
batch_a = tf.random.normal([3, 2, 4])
batch_b = tf.random.normal([3, 4, 5])
batch_result = tf.matmul(batch_a, batch_b)
print(f"Batch matrix multiplication shape: {batch_result.shape}")
# Matrix inversion
matrix = tf.constant([[1.0, 2.0], [3.0, 4.0]])
try:
inverse = tf.linalg.inv(matrix)
print(f"Matrix inversion:\n{inverse}")
except:
print("Matrix is not invertible")
# Matrix determinant
det = tf.linalg.det(matrix)
print(f"Determinant: {det}")
# Eigenvalue decomposition
eigenvalues, eigenvectors = tf.linalg.eigh(matrix)
print(f"Eigenvalues: {eigenvalues}")3. Statistical Operations
python
# Create example tensor
tensor = tf.random.normal([3, 4])
# Basic statistics
mean_all = tf.reduce_mean(tensor) # Global mean
mean_axis0 = tf.reduce_mean(tensor, axis=0) # Mean along dimension 0
mean_axis1 = tf.reduce_mean(tensor, axis=1) # Mean along dimension 1
print(f"Global mean: {mean_all}")
print(f"Mean along rows: {mean_axis0}")
print(f"Mean along columns: {mean_axis1}")
# Other statistics
std_dev = tf.math.reduce_std(tensor) # Standard deviation
variance = tf.math.reduce_variance(tensor) # Variance
max_val = tf.reduce_max(tensor) # Maximum value
min_val = tf.reduce_min(tensor) # Minimum value
sum_val = tf.reduce_sum(tensor) # Sum
print(f"Standard deviation: {std_dev}")
print(f"Variance: {variance}")
print(f"Maximum value: {max_val}")
print(f"Minimum value: {min_val}")
# Index of extremum
argmax = tf.argmax(tensor, axis=1) # Index of maximum
argmin = tf.argmin(tensor, axis=1) # Index of minimum
print(f"Index of maximum: {argmax}")
print(f"Index of minimum: {argmin}")
# Top-k operation
top_k_values, top_k_indices = tf.nn.top_k(tensor, k=2)
print(f"Top-2 values: {top_k_values}")
print(f"Top-2 indices: {top_k_indices}")Broadcasting Mechanism
python
# TensorFlow supports broadcasting
a = tf.constant([[1, 2, 3]]) # Shape: (1, 3)
b = tf.constant([[1], [2], [3]]) # Shape: (3, 1)
# Broadcast addition
result = a + b # Result shape: (3, 3)
print(f"Broadcast result:\n{result}")
# Broadcasting with different dimensions
scalar = tf.constant(10)
vector = tf.constant([1, 2, 3])
matrix = tf.constant([[1, 2, 3], [4, 5, 6]])
# Scalar and vector
scalar_vector = scalar + vector
print(f"Scalar + vector: {scalar_vector}")
# Vector and matrix
vector_matrix = vector + matrix
print(f"Vector + matrix:\n{vector_matrix}")
# Check broadcast compatibility
def check_broadcast_compatibility(shape1, shape2):
try:
a = tf.ones(shape1)
b = tf.ones(shape2)
result = a + b
print(f"Shapes {shape1} and {shape2} compatible, result shape: {result.shape}")
except Exception as e:
print(f"Shapes {shape1} and {shape2} incompatible: {e}")
check_broadcast_compatibility([3, 1], [1, 4])
check_broadcast_compatibility([3, 4], [2, 4])Device Management
python
# Check available devices
print("Available devices:")
for device in tf.config.list_physical_devices():
print(f" {device}")
# Specify device
with tf.device('/CPU:0'):
cpu_tensor = tf.constant([1, 2, 3])
print(f"CPU tensor: {cpu_tensor.device}")
# GPU device (if available)
if tf.config.list_physical_devices('GPU'):
with tf.device('/GPU:0'):
gpu_tensor = tf.constant([1, 2, 3])
print(f"GPU tensor: {gpu_tensor.device}")
# Data transfer between devices
cpu_data = tf.constant([1, 2, 3])
if tf.config.list_physical_devices('GPU'):
# Copy to GPU
gpu_data = tf.identity(cpu_data) # Automatic placement
print(f"Data device: {gpu_data.device}")
# Manual device placement
def manual_placement():
with tf.device('/CPU:0'):
a = tf.constant([1.0, 2.0, 3.0])
with tf.device('/GPU:0' if tf.config.list_physical_devices('GPU') else '/CPU:0'):
b = tf.constant([4.0, 5.0, 6.0])
# Operations automatically handle inter-device data transfer
result = a + b
return result
result = manual_placement()
print(f"Cross-device operation result: {result}")Memory Management
python
# Check tensor memory usage
def check_memory_usage():
large_tensor = tf.random.normal([1000, 1000])
print(f"Large tensor shape: {large_tensor.shape}")
print(f"Memory usage: {large_tensor.numpy().nbytes / 1024 / 1024:.2f} MB")
check_memory_usage()
# Delete tensor to free memory
del large_tensor # Delete reference in Python
# Use tf.function to optimize memory
@tf.function
def memory_efficient_operation(x):
# Run in graph mode, more efficient
return tf.reduce_sum(x ** 2)
# Memory mapping (for large datasets)
def create_memory_mapped_dataset():
# Use tf.data API when creating large datasets
dataset = tf.data.Dataset.range(1000000)
dataset = dataset.map(lambda x: tf.cast(x, tf.float32))
return dataset
# Gradient tape memory management
def gradient_tape_memory():
x = tf.Variable(2.0)
# Use persistent=False (default) to save memory
with tf.GradientTape() as tape:
y = x ** 2
grad = tape.gradient(y, x)
print(f"Gradient: {grad}")
# Tape is automatically released after use
gradient_tape_memory()Practical Tips
1. Tensor Debugging
python
# Print tensor information
def print_tensor_info(tensor, name="Tensor"):
print(f"{name}:")
print(f" Shape: {tensor.shape}")
print(f" Data type: {tensor.dtype}")
print(f" Device: {tensor.device}")
print(f" Values:\n{tensor}")
print()
tensor = tf.random.normal([2, 3])
print_tensor_info(tensor, "Random tensor")
# Use tf.print for debugging (works in graph mode too)
@tf.function
def debug_function(x):
tf.print("Input tensor:", x)
result = x * 2
tf.print("Result:", result)
return result
debug_function(tf.constant([1, 2, 3]))2. Conditional Operations
python
# Conditional selection
condition = tf.constant([True, False, True])
x = tf.constant([1, 2, 3])
y = tf.constant([4, 5, 6])
result = tf.where(condition, x, y)
print(f"Conditional selection: {result}")
# Numerical conditions
values = tf.constant([-1, 0, 1, 2])
positive_mask = values > 0
positive_values = tf.where(positive_mask, values, 0)
print(f"Positive mask: {positive_values}")
# Complex conditions
def complex_condition(x):
return tf.where(
x > 0,
tf.sqrt(x), # Take square root when x > 0
tf.zeros_like(x) # Set to 0 when x <= 0
)
test_values = tf.constant([-1.0, 0.0, 1.0, 4.0])
result = complex_condition(test_values)
print(f"Complex condition result: {result}")3. Performance Optimization
python
# Use tf.function decorator
@tf.function
def optimized_computation(x, y):
# This function will be compiled to a graph, faster execution
return tf.matmul(x, y) + tf.reduce_mean(x)
# Pre-allocate tensors
def preallocate_tensors():
# Pre-allocation avoids repeated memory allocation
result = tf.zeros([1000, 1000])
for i in range(10):
# In-place operations are more efficient
result = result + tf.random.normal([1000, 1000])
return result
# Use appropriate data types
# float32 is usually faster than float64 and uses less memory
fast_tensor = tf.constant([1.0, 2.0, 3.0], dtype=tf.float32)Summary
Tensors are the core concept of TensorFlow, and mastering tensor operations is the foundation of deep learning:
- Creating Tensors: Understand various creation methods and data types
- Tensor Properties: Understand concepts like shape, rank, data type
- Basic Operations: Indexing, slicing, shape transformation, concatenation and splitting
- Mathematical Operations: Element-wise operations, matrix operations, statistical operations
- Broadcasting Mechanism: Understand operation rules for tensors of different shapes
- Device Management: Reasonably use CPU and GPU resources
- Memory Management: Optimize memory usage to improve performance
These basic operations will be frequently used in subsequent neural network construction. Practice more!