Array Shape Manipulation
In this chapter, we'll learn how to manipulate NumPy array shapes, including reshaping, transposing, concatenation, splitting, and more operations that are important skills in data processing.
Array Shape Basics
Understanding Array Shape
python
import numpy as np
# Create arrays of different shapes
array_1d = np.array([1, 2, 3, 4, 5, 6])
array_2d = np.array([[1, 2, 3], [4, 5, 6]])
array_3d = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])
print("1D array:")
print(f"Array: {array_1d}")
print(f"Shape: {array_1d.shape}")
print(f"Dimensions: {array_1d.ndim}")
print(f"Size: {array_1d.size}")
print()
print("2D array:")
print(f"Array:\n{array_2d}")
print(f"Shape: {array_2d.shape}")
print(f"Dimensions: {array_2d.ndim}")
print(f"Size: {array_2d.size}")
print()
print("3D array:")
print(f"Array:\n{array_3d}")
print(f"Shape: {array_3d.shape}")
print(f"Dimensions: {array_3d.ndim}")
print(f"Size: {array_3d.size}")Array Reshaping
Basic Reshape Operations
python
import numpy as np
# Create 1D array
array_1d = np.arange(12)
print("Original 1D array:", array_1d)
print("Shape:", array_1d.shape)
print()
# Reshape to 2D array
array_2d_3x4 = array_1d.reshape(3, 4)
print("Reshaped to 3x4:")
print(array_2d_3x4)
print("Shape:", array_2d_3x4.shape)
print()
array_2d_4x3 = array_1d.reshape(4, 3)
print("Reshaped to 4x3:")
print(array_2d_4x3)
print("Shape:", array_2d_4x3.shape)
print()
# Reshape to 3D array
array_3d = array_1d.reshape(2, 2, 3)
print("Reshaped to 2x2x3:")
print(array_3d)
print("Shape:", array_3d.shape)
print()
# Auto-calculate dimension using -1
auto_reshape = array_1d.reshape(3, -1) # Auto-calculate columns
print("Auto-calculate dimension (3, -1):")
print(auto_reshape)
print("Shape:", auto_reshape.shape)
auto_reshape2 = array_1d.reshape(-1, 4) # Auto-calculate rows
print("Auto-calculate dimension (-1, 4):")
print(auto_reshape2)
print("Shape:", auto_reshape2.shape)Reshape Considerations
python
import numpy as np
array = np.arange(12)
print("Original array:", array)
# Reshape must preserve total element count
try:
# This will error because 3*5 = 15 ≠ 12
wrong_reshape = array.reshape(3, 5)
except ValueError as e:
print(f"Error: {e}")
# Correct reshape
correct_reshape = array.reshape(3, 4)
print("Correct reshape:")
print(correct_reshape)
print()
# Reshape returns view, not copy
original = np.arange(6)
reshaped = original.reshape(2, 3)
print("Original array:", original)
print("Reshaped array:")
print(reshaped)
print("Shares memory:", np.shares_memory(original, reshaped))
# Modifying reshaped array affects original
reshaped[0, 0] = 999
print("After modifying reshaped:")
print("Original:", original)
print("Reshaped:")
print(reshaped)Array Flattening
flatten() vs ravel()
python
import numpy as np
# Create 2D array
array_2d = np.array([[1, 2, 3], [4, 5, 6]])
print("Original 2D array:")
print(array_2d)
print()
# flatten() - returns copy
flattened = array_2d.flatten()
print("flatten() result:", flattened)
print("Shares memory:", np.shares_memory(array_2d, flattened))
# Modifying flatten result doesn't affect original
flattened[0] = 999
print("After modifying flatten result:")
print("Original:", array_2d)
print("flatten result:", flattened)
print()
# ravel() - returns view (if possible)
array_2d = np.array([[1, 2, 3], [4, 5, 6]]) # Recreate
raveled = array_2d.ravel()
print("ravel() result:", raveled)
print("Shares memory:", np.shares_memory(array_2d, raveled))
# Modifying ravel result affects original
raveled[0] = 888
print("After modifying ravel result:")
print("Original:", array_2d)
print("ravel result:", raveled)Different Flattening Orders
python
import numpy as np
array_2d = np.array([[1, 2, 3], [4, 5, 6]])
print("Original array:")
print(array_2d)
print()
# C-style flattening (row-major, default)
c_order = array_2d.flatten('C')
print("C-style flatten (row-major):", c_order)
# Fortran-style flattening (column-major)
f_order = array_2d.flatten('F')
print("Fortran-style flatten (column-major):", f_order)
# Demo 3D array flattening
array_3d = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])
print("\n3D array:")
print(array_3d)
print("Shape:", array_3d.shape)
print("C-style flatten:", array_3d.flatten('C'))
print("F-style flatten:", array_3d.flatten('F'))Array Transposition
Basic Transpose Operations
python
import numpy as np
# 2D array transpose
array_2d = np.array([[1, 2, 3], [4, 5, 6]])
print("Original array:")
print(array_2d)
print("Shape:", array_2d.shape)
print()
# Using .T attribute
transposed_T = array_2d.T
print("Using .T transpose:")
print(transposed_T)
print("Shape:", transposed_T.shape)
print()
# Using transpose() method
transposed_method = array_2d.transpose()
print("Using transpose() method:")
print(transposed_method)
print("Shape:", transposed_method.shape)
print()
# Using np.transpose() function
transposed_func = np.transpose(array_2d)
print("Using np.transpose() function:")
print(transposed_func)
print("Shape:", transposed_func.shape)Multidimensional Array Transpose
python
import numpy as np
# 3D array transpose
array_3d = np.arange(24).reshape(2, 3, 4)
print("Original 3D array:")
print(array_3d)
print("Shape:", array_3d.shape)
print()
# Default transpose (reverses all axes)
default_transpose = array_3d.T
print("Default transpose:")
print(default_transpose)
print("Shape:", default_transpose.shape)
print()
# Specify axis order for transpose
# Original axis order: (0, 1, 2) -> New order: (2, 0, 1)
custom_transpose = array_3d.transpose(2, 0, 1)
print("Custom transpose (2, 0, 1):")
print(custom_transpose)
print("Shape:", custom_transpose.shape)Array Concatenation
Basic Concatenation Operations
python
import numpy as np
# Create sample arrays
array1 = np.array([1, 2, 3])
array2 = np.array([4, 5, 6])
array3 = np.array([7, 8, 9])
print("Array 1:", array1)
print("Array 2:", array2)
print("Array 3:", array3)
print()
# Horizontal concatenation (1D arrays)
hstack_result = np.hstack([array1, array2, array3])
print("Horizontal concatenation:", hstack_result)
# Vertical concatenation (creates 2D array)
vstack_result = np.vstack([array1, array2, array3])
print("Vertical concatenation:")
print(vstack_result)
print()
# Using concatenate
concat_result = np.concatenate([array1, array2, array3])
print("concatenate result:", concat_result)2D Array Concatenation
python
import numpy as np
# Create 2D arrays
array2d_1 = np.array([[1, 2], [3, 4]])
array2d_2 = np.array([[5, 6], [7, 8]])
print("Array 1:")
print(array2d_1)
print("Array 2:")
print(array2d_2)
print()
# Horizontal concatenation (along columns)
hstack_2d = np.hstack([array2d_1, array2d_2])
print("Horizontal concatenation:")
print(hstack_2d)
print()
# Vertical concatenation (along rows)
vstack_2d = np.vstack([array2d_1, array2d_2])
print("Vertical concatenation:")
print(vstack_2d)
print()
# Using concatenate with axis
concat_axis0 = np.concatenate([array2d_1, array2d_2], axis=0)
print("Concatenate along axis 0 (rows):")
print(concat_axis0)
print()
concat_axis1 = np.concatenate([array2d_1, array2d_2], axis=1)
print("Concatenate along axis 1 (columns):")
print(concat_axis1)Depth Concatenation and Stack
python
import numpy as np
# Create 2D arrays
array1 = np.array([[1, 2], [3, 4]])
array2 = np.array([[5, 6], [7, 8]])
print("Array 1:")
print(array1)
print("Array 2:")
print(array2)
print()
# Depth concatenation (along third axis)
dstack_result = np.dstack([array1, array2])
print("Depth concatenation:")
print(dstack_result)
print("Shape:", dstack_result.shape)
print()
# Using stack to create new axis
stack_axis0 = np.stack([array1, array2], axis=0)
print("Stack along axis 0:")
print(stack_axis0)
print("Shape:", stack_axis0.shape)
print()
stack_axis1 = np.stack([array1, array2], axis=1)
print("Stack along axis 1:")
print(stack_axis1)
print("Shape:", stack_axis1.shape)Array Splitting
Basic Split Operations
python
import numpy as np
# 1D array splitting
array_1d = np.arange(12)
print("Original array:", array_1d)
print()
# Equal split
split_equal = np.split(array_1d, 3) # Split into 3 equal parts
print("Split into 3 equal parts:")
for i, part in enumerate(split_equal):
print(f" Part {i+1}: {part}")
print()
# Split at specified points
split_points = np.split(array_1d, [3, 7]) # Split at indices 3 and 7
print("Split at indices 3 and 7:")
for i, part in enumerate(split_points):
print(f" Part {i+1}: {part}")
print()
# Using array_split (allows unequal splits)
array_split_result = np.array_split(array_1d, 5) # Split into 5 parts (unequal)
print("Unequal split into 5 parts:")
for i, part in enumerate(array_split_result):
print(f" Part {i+1}: {part}")2D Array Splitting
python
import numpy as np
# Create 2D array
array_2d = np.arange(24).reshape(4, 6)
print("Original 2D array:")
print(array_2d)
print("Shape:", array_2d.shape)
print()
# Horizontal split (along columns)
hsplit_result = np.hsplit(array_2d, 3) # Split into 3 columns
print("Horizontal split (3 columns):")
for i, part in enumerate(hsplit_result):
print(f" Part {i+1}:")
print(part)
print()
# Vertical split (along rows)
vsplit_result = np.vsplit(array_2d, 2) # Split into 2 rows
print("Vertical split (2 rows):")
for i, part in enumerate(vsplit_result):
print(f" Part {i+1}:")
print(part)Array Dimension Operations
Adding and Removing Dimensions
python
import numpy as np
# Create 1D array
array_1d = np.array([1, 2, 3, 4, 5])
print("Original 1D array:", array_1d)
print("Shape:", array_1d.shape)
print()
# Add dimension
# Using np.newaxis
array_row = array_1d[np.newaxis, :] # Add row dimension
array_col = array_1d[:, np.newaxis] # Add column dimension
print("Add row dimension:")
print(array_row)
print("Shape:", array_row.shape)
print()
print("Add column dimension:")
print(array_col)
print("Shape:", array_col.shape)
print()
# Using np.expand_dims
expand_axis0 = np.expand_dims(array_1d, axis=0)
expand_axis1 = np.expand_dims(array_1d, axis=1)
print("expand_dims axis=0:")
print(expand_axis0)
print("Shape:", expand_axis0.shape)
print()
print("expand_dims axis=1:")
print(expand_axis1)
print("Shape:", expand_axis1.shape)
print()
# Remove dimensions
array_with_extra_dim = np.array([[[1, 2, 3, 4, 5]]])
print("Array with extra dimensions:")
print(array_with_extra_dim)
print("Shape:", array_with_extra_dim.shape)
# Using squeeze to remove length-1 dimensions
squeezed = np.squeeze(array_with_extra_dim)
print("After removing extra dimensions:")
print(squeezed)
print("Shape:", squeezed.shape)Axis Swapping and Moving
python
import numpy as np
# Create 3D array
array_3d = np.arange(24).reshape(2, 3, 4)
print("Original 3D array:")
print(array_3d)
print("Shape:", array_3d.shape)
print()
# Swap axes
swapped = np.swapaxes(array_3d, 0, 2) # Swap axis 0 and axis 2
print("Swap axis 0 and axis 2:")
print(swapped)
print("Shape:", swapped.shape)
print()
# Move axis
moved = np.moveaxis(array_3d, 0, -1) # Move axis 0 to last position
print("Move axis 0 to last:")
print(moved)
print("Shape:", moved.shape)Practical Examples
Example 1: Image Data Processing
python
import numpy as np
# Simulate RGB image data (height, width, channels)
height, width, channels = 4, 6, 3
image = np.random.randint(0, 256, (height, width, channels), dtype=np.uint8)
print(f"Original image shape: {image.shape}")
print(f"Image data type: {image.dtype}")
print("Image data (first 2 rows):")
print(image[:2])
print()
# Convert to different channel order (HWC -> CHW)
image_chw = np.transpose(image, (2, 0, 1))
print(f"Converted to CHW format: {image_chw.shape}")
print()
# Extract single channel
red_channel = image[:, :, 0]
green_channel = image[:, :, 1]
blue_channel = image[:, :, 2]
print(f"Red channel shape: {red_channel.shape}")
print("Red channel data:")
print(red_channel)
print()
# Recombine channels (change order: RGB -> BGR)
bgr_image = np.stack([blue_channel, green_channel, red_channel], axis=2)
print(f"BGR image shape: {bgr_image.shape}")
print()
# Batch processing (add batch dimension)
batch_size = 2
batch_images = np.stack([image, image], axis=0) # Duplicate image to create batch
print(f"Batch images shape: {batch_images.shape}")
# Flatten image for machine learning
flattened_image = image.reshape(-1, channels) # Flatten to (pixels, channels)
print(f"Flattened image shape: {flattened_image.shape}")Example 2: Data Reorganization and Analysis
python
import numpy as np
# Simulate time series data: 30 days, 24 hours each, 3 sensors
days, hours, sensors = 30, 24, 3
data = np.random.normal(20, 5, (days, hours, sensors)) # Temperature data
print(f"Original data shape: {data.shape}")
print(f"Data range: {data.min():.1f} - {data.max():.1f}")
print()
# Reorganize data: group by sensor
sensor_data = np.transpose(data, (2, 0, 1)) # (sensors, days, hours)
print(f"Grouped by sensor: {sensor_data.shape}")
# Calculate daily averages for each sensor
daily_averages = np.mean(sensor_data, axis=2) # Average over hours dimension
print(f"Daily averages shape: {daily_averages.shape}")
print("Sensor 1 first 5 days averages:", daily_averages[0, :5])
print()
# Reorganize to weekly data (assuming starting Monday)
weeks = days // 7
weekly_data = data[:weeks*7].reshape(weeks, 7, hours, sensors)
print(f"Weekly data shape: {weekly_data.shape}")
# Calculate weekly averages
weekly_averages = np.mean(weekly_data, axis=(1, 2)) # Average over days and hours
print(f"Weekly averages shape: {weekly_averages.shape}")
print("Weekly averages per sensor:")
for week in range(weeks):
print(f" Week {week+1}: {weekly_averages[week]}")
print()
# Flatten data for statistical analysis
flat_data = data.reshape(-1, sensors)
print(f"Flattened data shape: {flat_data.shape}")
print("Overall statistics per sensor:")
for i in range(sensors):
sensor_values = flat_data[:, i]
print(f" Sensor {i+1}: mean={np.mean(sensor_values):.2f}, "
f"std={np.std(sensor_values):.2f}")Example 3: Matrix Operations and Linear Algebra
python
import numpy as np
# Create matrices
matrix_a = np.array([[1, 2, 3], [4, 5, 6]])
matrix_b = np.array([[7, 8], [9, 10], [11, 12]])
print("Matrix A:")
print(matrix_a)
print(f"Shape: {matrix_a.shape}")
print()
print("Matrix B:")
print(matrix_b)
print(f"Shape: {matrix_b.shape}")
print()
# Matrix multiplication
matrix_product = np.dot(matrix_a, matrix_b)
print("Matrix product A × B:")
print(matrix_product)
print(f"Shape: {matrix_product.shape}")
print()
# Transpose in matrix operations
# A^T × A (commonly used in least squares)
ata = np.dot(matrix_a.T, matrix_a)
print("A^T × A:")
print(ata)
print(f"Shape: {ata.shape}")
print()
# Create symmetric matrix
symmetric = np.array([[1, 2, 3], [2, 4, 5], [3, 5, 6]])
print("Symmetric matrix:")
print(symmetric)
print("Is symmetric:", np.allclose(symmetric, symmetric.T))Performance Optimization Tips
Memory Layout and Performance
python
import numpy as np
import time
# Create large arrays to test performance
size = 1000
array_c = np.random.random((size, size))
array_f = np.asfortranarray(array_c) # Fortran order
print(f"C-order array flag: {array_c.flags['C_CONTIGUOUS']}")
print(f"Fortran-order array flag: {array_f.flags['F_CONTIGUOUS']}")
print()
# Test row access performance
start_time = time.time()
for i in range(100):
_ = array_c[i, :].sum() # Access rows
c_row_time = time.time() - start_time
start_time = time.time()
for i in range(100):
_ = array_f[i, :].sum() # Access rows
f_row_time = time.time() - start_time
print(f"C-order row access time: {c_row_time:.4f} seconds")
print(f"F-order row access time: {f_row_time:.4f} seconds")
print()
# Test column access performance
start_time = time.time()
for i in range(100):
_ = array_c[:, i].sum() # Access columns
c_col_time = time.time() - start_time
start_time = time.time()
for i in range(100):
_ = array_f[:, i].sum() # Access columns
f_col_time = time.time() - start_time
print(f"C-order column access time: {c_col_time:.4f} seconds")
print(f"F-order column access time: {f_col_time:.4f} seconds")Chapter Summary
In this chapter, we learned:
- Basic concepts and attributes of array shapes
- Various methods for array reshaping
- Difference between flatten and ravel
- Multiple ways to transpose arrays
- Various concatenation methods (hstack, vstack, concatenate, stack, etc.)
- Different splitting techniques
- Adding and removing array dimensions
- Axis swapping and moving operations
- Shape operation techniques in practical applications
- Performance optimization considerations
Next Steps
In the next chapter, we'll learn NumPy's mathematical operations and functions, including basic operations, statistical functions, trigonometric functions, and more.
Exercises
- Create a 3x4 array, reshape it to 2x6, then transpose it
- Create three 2x3 arrays, concatenate them horizontally, vertically, and depth-wise
- Create a 4x6 array, split it into 2x2 subarrays
- Implement a function to convert images from HWC format to CHW format
- Create a batch processing function to combine multiple 1D arrays into a 2D batch array