Skip to content

Array Shape Manipulation

In this chapter, we'll learn how to manipulate NumPy array shapes, including reshaping, transposing, concatenation, splitting, and more operations that are important skills in data processing.

Array Shape Basics

Understanding Array Shape

python
import numpy as np

# Create arrays of different shapes
array_1d = np.array([1, 2, 3, 4, 5, 6])
array_2d = np.array([[1, 2, 3], [4, 5, 6]])
array_3d = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])

print("1D array:")
print(f"Array: {array_1d}")
print(f"Shape: {array_1d.shape}")
print(f"Dimensions: {array_1d.ndim}")
print(f"Size: {array_1d.size}")
print()

print("2D array:")
print(f"Array:\n{array_2d}")
print(f"Shape: {array_2d.shape}")
print(f"Dimensions: {array_2d.ndim}")
print(f"Size: {array_2d.size}")
print()

print("3D array:")
print(f"Array:\n{array_3d}")
print(f"Shape: {array_3d.shape}")
print(f"Dimensions: {array_3d.ndim}")
print(f"Size: {array_3d.size}")

Array Reshaping

Basic Reshape Operations

python
import numpy as np

# Create 1D array
array_1d = np.arange(12)
print("Original 1D array:", array_1d)
print("Shape:", array_1d.shape)
print()

# Reshape to 2D array
array_2d_3x4 = array_1d.reshape(3, 4)
print("Reshaped to 3x4:")
print(array_2d_3x4)
print("Shape:", array_2d_3x4.shape)
print()

array_2d_4x3 = array_1d.reshape(4, 3)
print("Reshaped to 4x3:")
print(array_2d_4x3)
print("Shape:", array_2d_4x3.shape)
print()

# Reshape to 3D array
array_3d = array_1d.reshape(2, 2, 3)
print("Reshaped to 2x2x3:")
print(array_3d)
print("Shape:", array_3d.shape)
print()

# Auto-calculate dimension using -1
auto_reshape = array_1d.reshape(3, -1)  # Auto-calculate columns
print("Auto-calculate dimension (3, -1):")
print(auto_reshape)
print("Shape:", auto_reshape.shape)

auto_reshape2 = array_1d.reshape(-1, 4)  # Auto-calculate rows
print("Auto-calculate dimension (-1, 4):")
print(auto_reshape2)
print("Shape:", auto_reshape2.shape)

Reshape Considerations

python
import numpy as np

array = np.arange(12)
print("Original array:", array)

# Reshape must preserve total element count
try:
    # This will error because 3*5 = 15 ≠ 12
    wrong_reshape = array.reshape(3, 5)
except ValueError as e:
    print(f"Error: {e}")

# Correct reshape
correct_reshape = array.reshape(3, 4)
print("Correct reshape:")
print(correct_reshape)
print()

# Reshape returns view, not copy
original = np.arange(6)
reshaped = original.reshape(2, 3)

print("Original array:", original)
print("Reshaped array:")
print(reshaped)
print("Shares memory:", np.shares_memory(original, reshaped))

# Modifying reshaped array affects original
reshaped[0, 0] = 999
print("After modifying reshaped:")
print("Original:", original)
print("Reshaped:")
print(reshaped)

Array Flattening

flatten() vs ravel()

python
import numpy as np

# Create 2D array
array_2d = np.array([[1, 2, 3], [4, 5, 6]])
print("Original 2D array:")
print(array_2d)
print()

# flatten() - returns copy
flattened = array_2d.flatten()
print("flatten() result:", flattened)
print("Shares memory:", np.shares_memory(array_2d, flattened))

# Modifying flatten result doesn't affect original
flattened[0] = 999
print("After modifying flatten result:")
print("Original:", array_2d)
print("flatten result:", flattened)
print()

# ravel() - returns view (if possible)
array_2d = np.array([[1, 2, 3], [4, 5, 6]])  # Recreate
raveled = array_2d.ravel()
print("ravel() result:", raveled)
print("Shares memory:", np.shares_memory(array_2d, raveled))

# Modifying ravel result affects original
raveled[0] = 888
print("After modifying ravel result:")
print("Original:", array_2d)
print("ravel result:", raveled)

Different Flattening Orders

python
import numpy as np

array_2d = np.array([[1, 2, 3], [4, 5, 6]])
print("Original array:")
print(array_2d)
print()

# C-style flattening (row-major, default)
c_order = array_2d.flatten('C')
print("C-style flatten (row-major):", c_order)

# Fortran-style flattening (column-major)
f_order = array_2d.flatten('F')
print("Fortran-style flatten (column-major):", f_order)

# Demo 3D array flattening
array_3d = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])
print("\n3D array:")
print(array_3d)
print("Shape:", array_3d.shape)

print("C-style flatten:", array_3d.flatten('C'))
print("F-style flatten:", array_3d.flatten('F'))

Array Transposition

Basic Transpose Operations

python
import numpy as np

# 2D array transpose
array_2d = np.array([[1, 2, 3], [4, 5, 6]])
print("Original array:")
print(array_2d)
print("Shape:", array_2d.shape)
print()

# Using .T attribute
transposed_T = array_2d.T
print("Using .T transpose:")
print(transposed_T)
print("Shape:", transposed_T.shape)
print()

# Using transpose() method
transposed_method = array_2d.transpose()
print("Using transpose() method:")
print(transposed_method)
print("Shape:", transposed_method.shape)
print()

# Using np.transpose() function
transposed_func = np.transpose(array_2d)
print("Using np.transpose() function:")
print(transposed_func)
print("Shape:", transposed_func.shape)

Multidimensional Array Transpose

python
import numpy as np

# 3D array transpose
array_3d = np.arange(24).reshape(2, 3, 4)
print("Original 3D array:")
print(array_3d)
print("Shape:", array_3d.shape)
print()

# Default transpose (reverses all axes)
default_transpose = array_3d.T
print("Default transpose:")
print(default_transpose)
print("Shape:", default_transpose.shape)
print()

# Specify axis order for transpose
# Original axis order: (0, 1, 2) -> New order: (2, 0, 1)
custom_transpose = array_3d.transpose(2, 0, 1)
print("Custom transpose (2, 0, 1):")
print(custom_transpose)
print("Shape:", custom_transpose.shape)

Array Concatenation

Basic Concatenation Operations

python
import numpy as np

# Create sample arrays
array1 = np.array([1, 2, 3])
array2 = np.array([4, 5, 6])
array3 = np.array([7, 8, 9])

print("Array 1:", array1)
print("Array 2:", array2)
print("Array 3:", array3)
print()

# Horizontal concatenation (1D arrays)
hstack_result = np.hstack([array1, array2, array3])
print("Horizontal concatenation:", hstack_result)

# Vertical concatenation (creates 2D array)
vstack_result = np.vstack([array1, array2, array3])
print("Vertical concatenation:")
print(vstack_result)
print()

# Using concatenate
concat_result = np.concatenate([array1, array2, array3])
print("concatenate result:", concat_result)

2D Array Concatenation

python
import numpy as np

# Create 2D arrays
array2d_1 = np.array([[1, 2], [3, 4]])
array2d_2 = np.array([[5, 6], [7, 8]])

print("Array 1:")
print(array2d_1)
print("Array 2:")
print(array2d_2)
print()

# Horizontal concatenation (along columns)
hstack_2d = np.hstack([array2d_1, array2d_2])
print("Horizontal concatenation:")
print(hstack_2d)
print()

# Vertical concatenation (along rows)
vstack_2d = np.vstack([array2d_1, array2d_2])
print("Vertical concatenation:")
print(vstack_2d)
print()

# Using concatenate with axis
concat_axis0 = np.concatenate([array2d_1, array2d_2], axis=0)
print("Concatenate along axis 0 (rows):")
print(concat_axis0)
print()

concat_axis1 = np.concatenate([array2d_1, array2d_2], axis=1)
print("Concatenate along axis 1 (columns):")
print(concat_axis1)

Depth Concatenation and Stack

python
import numpy as np

# Create 2D arrays
array1 = np.array([[1, 2], [3, 4]])
array2 = np.array([[5, 6], [7, 8]])

print("Array 1:")
print(array1)
print("Array 2:")
print(array2)
print()

# Depth concatenation (along third axis)
dstack_result = np.dstack([array1, array2])
print("Depth concatenation:")
print(dstack_result)
print("Shape:", dstack_result.shape)
print()

# Using stack to create new axis
stack_axis0 = np.stack([array1, array2], axis=0)
print("Stack along axis 0:")
print(stack_axis0)
print("Shape:", stack_axis0.shape)
print()

stack_axis1 = np.stack([array1, array2], axis=1)
print("Stack along axis 1:")
print(stack_axis1)
print("Shape:", stack_axis1.shape)

Array Splitting

Basic Split Operations

python
import numpy as np

# 1D array splitting
array_1d = np.arange(12)
print("Original array:", array_1d)
print()

# Equal split
split_equal = np.split(array_1d, 3)  # Split into 3 equal parts
print("Split into 3 equal parts:")
for i, part in enumerate(split_equal):
    print(f"  Part {i+1}: {part}")
print()

# Split at specified points
split_points = np.split(array_1d, [3, 7])  # Split at indices 3 and 7
print("Split at indices 3 and 7:")
for i, part in enumerate(split_points):
    print(f"  Part {i+1}: {part}")
print()

# Using array_split (allows unequal splits)
array_split_result = np.array_split(array_1d, 5)  # Split into 5 parts (unequal)
print("Unequal split into 5 parts:")
for i, part in enumerate(array_split_result):
    print(f"  Part {i+1}: {part}")

2D Array Splitting

python
import numpy as np

# Create 2D array
array_2d = np.arange(24).reshape(4, 6)
print("Original 2D array:")
print(array_2d)
print("Shape:", array_2d.shape)
print()

# Horizontal split (along columns)
hsplit_result = np.hsplit(array_2d, 3)  # Split into 3 columns
print("Horizontal split (3 columns):")
for i, part in enumerate(hsplit_result):
    print(f"  Part {i+1}:")
    print(part)
print()

# Vertical split (along rows)
vsplit_result = np.vsplit(array_2d, 2)  # Split into 2 rows
print("Vertical split (2 rows):")
for i, part in enumerate(vsplit_result):
    print(f"  Part {i+1}:")
    print(part)

Array Dimension Operations

Adding and Removing Dimensions

python
import numpy as np

# Create 1D array
array_1d = np.array([1, 2, 3, 4, 5])
print("Original 1D array:", array_1d)
print("Shape:", array_1d.shape)
print()

# Add dimension
# Using np.newaxis
array_row = array_1d[np.newaxis, :]  # Add row dimension
array_col = array_1d[:, np.newaxis]  # Add column dimension

print("Add row dimension:")
print(array_row)
print("Shape:", array_row.shape)
print()

print("Add column dimension:")
print(array_col)
print("Shape:", array_col.shape)
print()

# Using np.expand_dims
expand_axis0 = np.expand_dims(array_1d, axis=0)
expand_axis1 = np.expand_dims(array_1d, axis=1)

print("expand_dims axis=0:")
print(expand_axis0)
print("Shape:", expand_axis0.shape)
print()

print("expand_dims axis=1:")
print(expand_axis1)
print("Shape:", expand_axis1.shape)
print()

# Remove dimensions
array_with_extra_dim = np.array([[[1, 2, 3, 4, 5]]])
print("Array with extra dimensions:")
print(array_with_extra_dim)
print("Shape:", array_with_extra_dim.shape)

# Using squeeze to remove length-1 dimensions
squeezed = np.squeeze(array_with_extra_dim)
print("After removing extra dimensions:")
print(squeezed)
print("Shape:", squeezed.shape)

Axis Swapping and Moving

python
import numpy as np

# Create 3D array
array_3d = np.arange(24).reshape(2, 3, 4)
print("Original 3D array:")
print(array_3d)
print("Shape:", array_3d.shape)
print()

# Swap axes
swapped = np.swapaxes(array_3d, 0, 2)  # Swap axis 0 and axis 2
print("Swap axis 0 and axis 2:")
print(swapped)
print("Shape:", swapped.shape)
print()

# Move axis
moved = np.moveaxis(array_3d, 0, -1)  # Move axis 0 to last position
print("Move axis 0 to last:")
print(moved)
print("Shape:", moved.shape)

Practical Examples

Example 1: Image Data Processing

python
import numpy as np

# Simulate RGB image data (height, width, channels)
height, width, channels = 4, 6, 3
image = np.random.randint(0, 256, (height, width, channels), dtype=np.uint8)

print(f"Original image shape: {image.shape}")
print(f"Image data type: {image.dtype}")
print("Image data (first 2 rows):")
print(image[:2])
print()

# Convert to different channel order (HWC -> CHW)
image_chw = np.transpose(image, (2, 0, 1))
print(f"Converted to CHW format: {image_chw.shape}")
print()

# Extract single channel
red_channel = image[:, :, 0]
green_channel = image[:, :, 1]
blue_channel = image[:, :, 2]

print(f"Red channel shape: {red_channel.shape}")
print("Red channel data:")
print(red_channel)
print()

# Recombine channels (change order: RGB -> BGR)
bgr_image = np.stack([blue_channel, green_channel, red_channel], axis=2)
print(f"BGR image shape: {bgr_image.shape}")
print()

# Batch processing (add batch dimension)
batch_size = 2
batch_images = np.stack([image, image], axis=0)  # Duplicate image to create batch
print(f"Batch images shape: {batch_images.shape}")

# Flatten image for machine learning
flattened_image = image.reshape(-1, channels)  # Flatten to (pixels, channels)
print(f"Flattened image shape: {flattened_image.shape}")

Example 2: Data Reorganization and Analysis

python
import numpy as np

# Simulate time series data: 30 days, 24 hours each, 3 sensors
days, hours, sensors = 30, 24, 3
data = np.random.normal(20, 5, (days, hours, sensors))  # Temperature data

print(f"Original data shape: {data.shape}")
print(f"Data range: {data.min():.1f} - {data.max():.1f}")
print()

# Reorganize data: group by sensor
sensor_data = np.transpose(data, (2, 0, 1))  # (sensors, days, hours)
print(f"Grouped by sensor: {sensor_data.shape}")

# Calculate daily averages for each sensor
daily_averages = np.mean(sensor_data, axis=2)  # Average over hours dimension
print(f"Daily averages shape: {daily_averages.shape}")
print("Sensor 1 first 5 days averages:", daily_averages[0, :5])
print()

# Reorganize to weekly data (assuming starting Monday)
weeks = days // 7
weekly_data = data[:weeks*7].reshape(weeks, 7, hours, sensors)
print(f"Weekly data shape: {weekly_data.shape}")

# Calculate weekly averages
weekly_averages = np.mean(weekly_data, axis=(1, 2))  # Average over days and hours
print(f"Weekly averages shape: {weekly_averages.shape}")
print("Weekly averages per sensor:")
for week in range(weeks):
    print(f"  Week {week+1}: {weekly_averages[week]}")
print()

# Flatten data for statistical analysis
flat_data = data.reshape(-1, sensors)
print(f"Flattened data shape: {flat_data.shape}")
print("Overall statistics per sensor:")
for i in range(sensors):
    sensor_values = flat_data[:, i]
    print(f"  Sensor {i+1}: mean={np.mean(sensor_values):.2f}, "
          f"std={np.std(sensor_values):.2f}")

Example 3: Matrix Operations and Linear Algebra

python
import numpy as np

# Create matrices
matrix_a = np.array([[1, 2, 3], [4, 5, 6]])
matrix_b = np.array([[7, 8], [9, 10], [11, 12]])

print("Matrix A:")
print(matrix_a)
print(f"Shape: {matrix_a.shape}")
print()

print("Matrix B:")
print(matrix_b)
print(f"Shape: {matrix_b.shape}")
print()

# Matrix multiplication
matrix_product = np.dot(matrix_a, matrix_b)
print("Matrix product A × B:")
print(matrix_product)
print(f"Shape: {matrix_product.shape}")
print()

# Transpose in matrix operations
# A^T × A (commonly used in least squares)
ata = np.dot(matrix_a.T, matrix_a)
print("A^T × A:")
print(ata)
print(f"Shape: {ata.shape}")
print()

# Create symmetric matrix
symmetric = np.array([[1, 2, 3], [2, 4, 5], [3, 5, 6]])
print("Symmetric matrix:")
print(symmetric)
print("Is symmetric:", np.allclose(symmetric, symmetric.T))

Performance Optimization Tips

Memory Layout and Performance

python
import numpy as np
import time

# Create large arrays to test performance
size = 1000
array_c = np.random.random((size, size))
array_f = np.asfortranarray(array_c)  # Fortran order

print(f"C-order array flag: {array_c.flags['C_CONTIGUOUS']}")
print(f"Fortran-order array flag: {array_f.flags['F_CONTIGUOUS']}")
print()

# Test row access performance
start_time = time.time()
for i in range(100):
    _ = array_c[i, :].sum()  # Access rows
c_row_time = time.time() - start_time

start_time = time.time()
for i in range(100):
    _ = array_f[i, :].sum()  # Access rows
f_row_time = time.time() - start_time

print(f"C-order row access time: {c_row_time:.4f} seconds")
print(f"F-order row access time: {f_row_time:.4f} seconds")
print()

# Test column access performance
start_time = time.time()
for i in range(100):
    _ = array_c[:, i].sum()  # Access columns
c_col_time = time.time() - start_time

start_time = time.time()
for i in range(100):
    _ = array_f[:, i].sum()  # Access columns
f_col_time = time.time() - start_time

print(f"C-order column access time: {c_col_time:.4f} seconds")
print(f"F-order column access time: {f_col_time:.4f} seconds")

Chapter Summary

In this chapter, we learned:

  • Basic concepts and attributes of array shapes
  • Various methods for array reshaping
  • Difference between flatten and ravel
  • Multiple ways to transpose arrays
  • Various concatenation methods (hstack, vstack, concatenate, stack, etc.)
  • Different splitting techniques
  • Adding and removing array dimensions
  • Axis swapping and moving operations
  • Shape operation techniques in practical applications
  • Performance optimization considerations

Next Steps

In the next chapter, we'll learn NumPy's mathematical operations and functions, including basic operations, statistical functions, trigonometric functions, and more.

Exercises

  1. Create a 3x4 array, reshape it to 2x6, then transpose it
  2. Create three 2x3 arrays, concatenate them horizontally, vertically, and depth-wise
  3. Create a 4x6 array, split it into 2x2 subarrays
  4. Implement a function to convert images from HWC format to CHW format
  5. Create a batch processing function to combine multiple 1D arrays into a 2D batch array

Content is for learning and research only.