NumPy Array Basics
In this chapter, we'll dive deep into NumPy arrays (ndarray), including array creation, attributes, data types, and other core concepts.
What is a NumPy Array
NumPy array (ndarray) is the core data structure of the NumPy library. It's a multidimensional array object with the following characteristics:
- Homogeneity: All elements in the array must be of the same data type
- Fixed Size: Array size is fixed after creation and cannot be dynamically changed
- Efficiency: Uses contiguous memory storage for fast operations
- Multidimensionality: Supports arrays of any dimension
Array Creation Methods
1. Creating from Python Lists
python
import numpy as np
# 1D array
array_1d = np.array([1, 2, 3, 4, 5])
print("1D array:", array_1d)
print("Data type:", array_1d.dtype)
print("Shape:", array_1d.shape)
print()
# 2D array
array_2d = np.array([[1, 2, 3], [4, 5, 6]])
print("2D array:")
print(array_2d)
print("Data type:", array_2d.dtype)
print("Shape:", array_2d.shape)
print()
# 3D array
array_3d = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])
print("3D array:")
print(array_3d)
print("Data type:", array_3d.dtype)
print("Shape:", array_3d.shape)Output:
1D array: [1 2 3 4 5]
Data type: int32
Shape: (5,)
2D array:
[[1 2 3]
[4 5 6]]
Data type: int32
Shape: (2, 3)
3D array:
[[[1 2]
[3 4]]
[[5 6]
[7 8]]]
Data type: int32
Shape: (2, 2, 2)2. Creating with Built-in Functions
Creating Special Arrays
python
import numpy as np
# All-zeros array
zeros_1d = np.zeros(5)
zeros_2d = np.zeros((3, 4))
print("1D all-zeros array:", zeros_1d)
print("2D all-zeros array:")
print(zeros_2d)
print()
# All-ones array
ones_1d = np.ones(4)
ones_2d = np.ones((2, 3))
print("1D all-ones array:", ones_1d)
print("2D all-ones array:")
print(ones_2d)
print()
# Identity matrix
identity = np.eye(3)
print("3x3 identity matrix:")
print(identity)
print()
# Array filled with specific value
full_array = np.full((2, 3), 7)
print("Array filled with 7:")
print(full_array)Output:
1D all-zeros array: [0. 0. 0. 0. 0.]
2D all-zeros array:
[[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]]
1D all-ones array: [1. 1. 1. 1.]
2D all-ones array:
[[1. 1. 1.]
[1. 1. 1.]]
3x3 identity matrix:
[[1. 0. 0.]
[0. 1. 0.]
[0. 0. 1.]]
Array filled with 7:
[[7 7 7]
[7 7 7]]Creating Numeric Sequences
python
import numpy as np
# Arithmetic sequence
arange_array = np.arange(10) # 0 to 9
print("0 to 9:", arange_array)
arange_step = np.arange(2, 10, 2) # From 2 to 10, step 2
print("2 to 10, step 2:", arange_step)
arange_float = np.arange(0, 1, 0.1) # Float step
print("0 to 1, step 0.1:", arange_float)
print()
# Evenly spaced sequence
linspace_array = np.linspace(0, 10, 5) # 5 evenly spaced points from 0 to 10
print("5 points from 0 to 10:", linspace_array)
linspace_exclude = np.linspace(0, 10, 5, endpoint=False) # Exclude endpoint
print("5 points from 0 to 10 (excluding endpoint):", linspace_exclude)
print()
# Log space
logspace_array = np.logspace(0, 2, 5) # 5 evenly spaced points from 10^0 to 10^2
print("Log space:", logspace_array)Output:
0 to 9: [0 1 2 3 4 5 6 7 8 9]
2 to 10, step 2: [2 4 6 8]
0 to 1, step 0.1: [0. 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9]
5 points from 0 to 10: [ 0. 2.5 5. 7.5 10. ]
5 points from 0 to 10 (excluding endpoint): [0. 2. 4. 6. 8.]
Log space: [ 1. 3.16227766 10. 31.6227766 100. ]Creating Random Arrays
python
import numpy as np
# Set random seed for reproducible results
np.random.seed(42)
# Random numbers between 0 and 1
random_uniform = np.random.random(5)
print("0-1 random numbers:", random_uniform)
# Random integers in specified range
random_int = np.random.randint(1, 10, 5)
print("1-10 random integers:", random_int)
# Normal distribution random numbers
random_normal = np.random.normal(0, 1, 5) # Mean 0, std 1
print("Normal distribution random numbers:", random_normal)
# Multidimensional random array
random_2d = np.random.random((3, 3))
print("3x3 random array:")
print(random_2d)Output:
0-1 random numbers: [0.37454012 0.95071431 0.73199394 0.59865848 0.15601864]
1-10 random integers: [6 4 8 8 3]
Normal distribution random numbers: [ 1.76405235 0.40015721 0.97873798 2.2408932 1.86755799]
3x3 random array:
[[0.95008842 0.4879643 0.22479665]
[0.19806286 0.76053071 0.16911084]
[0.08833981 0.68535982 0.95339335]]Basic Array Attributes
python
import numpy as np
# Create sample array
array = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])
print("Sample array:")
print(array)
print()
# Basic attributes
print(f"Number of dimensions (ndim): {array.ndim}")
print(f"Array shape (shape): {array.shape}")
print(f"Total elements (size): {array.size}")
print(f"Data type (dtype): {array.dtype}")
print(f"Bytes per element (itemsize): {array.itemsize}")
print(f"Total bytes (nbytes): {array.nbytes}")
print(f"Memory layout (flags): ")
print(array.flags)Output:
Sample array:
[[ 1 2 3 4]
[ 5 6 7 8]
[ 9 10 11 12]]
Number of dimensions (ndim): 2
Array shape (shape): (3, 4)
Total elements (size): 12
Data type (dtype): int32
Bytes per element (itemsize): 4
Total bytes (nbytes): 48
Memory layout (flags):
C_CONTIGUOUS : True
F_CONTIGUOUS : False
OWNDATA : True
WRITEABLE : True
ALIGNED : True
WRITEBACKIFCOPY : False
UPDATEIFCOPY : FalseData Types Explained
NumPy Supported Data Types
python
import numpy as np
# Integer types
int8_array = np.array([1, 2, 3], dtype=np.int8) # 8-bit integer
int16_array = np.array([1, 2, 3], dtype=np.int16) # 16-bit integer
int32_array = np.array([1, 2, 3], dtype=np.int32) # 32-bit integer
int64_array = np.array([1, 2, 3], dtype=np.int64) # 64-bit integer
print("int8:", int8_array.dtype, "bytes:", int8_array.itemsize)
print("int16:", int16_array.dtype, "bytes:", int16_array.itemsize)
print("int32:", int32_array.dtype, "bytes:", int32_array.itemsize)
print("int64:", int64_array.dtype, "bytes:", int64_array.itemsize)
print()
# Float types
float16_array = np.array([1.0, 2.0, 3.0], dtype=np.float16) # Half precision
float32_array = np.array([1.0, 2.0, 3.0], dtype=np.float32) # Single precision
float64_array = np.array([1.0, 2.0, 3.0], dtype=np.float64) # Double precision
print("float16:", float16_array.dtype, "bytes:", float16_array.itemsize)
print("float32:", float32_array.dtype, "bytes:", float32_array.itemsize)
print("float64:", float64_array.dtype, "bytes:", float64_array.itemsize)
print()
# Boolean type
bool_array = np.array([True, False, True], dtype=np.bool_)
print("bool:", bool_array.dtype, "bytes:", bool_array.itemsize)
print("Boolean array:", bool_array)
print()
# Complex type
complex_array = np.array([1+2j, 3+4j], dtype=np.complex128)
print("complex128:", complex_array.dtype, "bytes:", complex_array.itemsize)
print("Complex array:", complex_array)Output:
int8: int8 bytes: 1
int16: int16 bytes: 2
int32: int32 bytes: 4
int64: int64 bytes: 8
float16: float16 bytes: 2
float32: float32 bytes: 4
float64: float64 bytes: 8
bool: bool bytes: 1
Boolean array: [ True False True]
complex128: complex128 bytes: 16
Complex array: [1.+2.j 3.+4.j]Data Type Conversion
python
import numpy as np
# Create integer array
int_array = np.array([1, 2, 3, 4, 5])
print("Original integer array:", int_array, "type:", int_array.dtype)
# Convert to float
float_array = int_array.astype(np.float64)
print("Converted to float:", float_array, "type:", float_array.dtype)
# Convert to string
string_array = int_array.astype(np.str_)
print("Converted to string:", string_array, "type:", string_array.dtype)
# Float to integer (truncates decimal part)
float_data = np.array([1.7, 2.3, 3.9])
int_from_float = float_data.astype(np.int32)
print("Float array:", float_data)
print("Converted to integer:", int_from_float)
# String to number
string_numbers = np.array(['1', '2', '3'])
numbers = string_numbers.astype(np.int32)
print("String array:", string_numbers)
print("Converted to numbers:", numbers)Output:
Original integer array: [1 2 3 4 5] type: int32
Converted to float: [1. 2. 3. 4. 5.] type: float64
Converted to string: ['1' '2' '3' '4' '5'] type: <U11
Float array: [1.7 2.3 3.9]
Converted to integer: [1 2 3]
String array: ['1' '2' '3']
Converted to numbers: [1 2 3]Array Shape Operations
Changing Array Shape
python
import numpy as np
# Create 1D array
array_1d = np.arange(12)
print("Original 1D array:", array_1d)
print("Shape:", array_1d.shape)
print()
# Reshape to 2D array
array_2d = array_1d.reshape(3, 4)
print("Reshaped to 3x4:")
print(array_2d)
print("Shape:", array_2d.shape)
print()
# Reshape to 3D array
array_3d = array_1d.reshape(2, 2, 3)
print("Reshaped to 2x2x3:")
print(array_3d)
print("Shape:", array_3d.shape)
print()
# Auto-calculate dimension (using -1)
auto_reshape = array_1d.reshape(4, -1) # Auto-calculate columns
print("Auto-calculate dimension (4, -1):")
print(auto_reshape)
print("Shape:", auto_reshape.shape)Output:
Original 1D array: [ 0 1 2 3 4 5 6 7 8 9 10 11]
Shape: (12,)
Reshaped to 3x4:
[[ 0 1 2 3]
[ 4 5 6 7]
[ 8 9 10 11]]
Shape: (3, 4)
Reshaped to 2x2x3:
[[[ 0 1 2]
[ 3 4 5]]
[[ 6 7 8]
[ 9 10 11]]]
Shape: (2, 2, 3)
Auto-calculate dimension (4, -1):
[[ 0 1 2]
[ 3 4 5]
[ 6 7 8]
[ 9 10 11]]
Shape: (4, 3)Flattening Arrays
python
import numpy as np
# Create 2D array
array_2d = np.array([[1, 2, 3], [4, 5, 6]])
print("Original 2D array:")
print(array_2d)
print()
# Flatten to 1D array
flattened = array_2d.flatten()
print("flatten():", flattened)
print("Original array unchanged:", array_2d)
print()
# Flatten to 1D array (returns view)
raveled = array_2d.ravel()
print("ravel():", raveled)
print("Modifying ravel result:")
raveled[0] = 999
print("ravel result:", raveled)
print("Original array:", array_2d) # Original array also changesOutput:
Original 2D array:
[[1 2 3]
[4 5 6]]
flatten(): [1 2 3 4 5 6]
Original array unchanged: [[1 2 3]
[4 5 6]]
ravel(): [1 2 3 4 5 6]
Modifying ravel result:
ravel result: [999 2 3 4 5 6]
Original array: [[999 2 3]
[ 4 5 6]]Array Concatenation and Splitting
Array Concatenation
python
import numpy as np
# Create two arrays
array1 = np.array([1, 2, 3])
array2 = np.array([4, 5, 6])
print("Array 1:", array1)
print("Array 2:", array2)
print()
# Horizontal concatenation (along column direction)
horizontal = np.hstack([array1, array2])
print("Horizontal concatenation:", horizontal)
# Vertical concatenation (along row direction)
vertical = np.vstack([array1, array2])
print("Vertical concatenation:")
print(vertical)
print()
# 2D array concatenation
array2d_1 = np.array([[1, 2], [3, 4]])
array2d_2 = np.array([[5, 6], [7, 8]])
print("2D array 1:")
print(array2d_1)
print("2D array 2:")
print(array2d_2)
print()
# Concatenate along axis 0 (row direction)
concat_axis0 = np.concatenate([array2d_1, array2d_2], axis=0)
print("Concatenate along axis 0:")
print(concat_axis0)
# Concatenate along axis 1 (column direction)
concat_axis1 = np.concatenate([array2d_1, array2d_2], axis=1)
print("Concatenate along axis 1:")
print(concat_axis1)Output:
Array 1: [1 2 3]
Array 2: [4 5 6]
Horizontal concatenation: [1 2 3 4 5 6]
Vertical concatenation:
[[1 2 3]
[4 5 6]]
2D array 1:
[[1 2]
[3 4]]
2D array 2:
[[5 6]
[7 8]]
Concatenate along axis 0:
[[1 2]
[3 4]
[5 6]
[7 8]]
Concatenate along axis 1:
[[1 2 5 6]
[3 4 7 8]]Array Splitting
python
import numpy as np
# Create array
array = np.arange(12)
print("Original array:", array)
# Equal split
split_equal = np.split(array, 3) # Split into 3 equal parts
print("Split into 3 equal parts:")
for i, part in enumerate(split_equal):
print(f" Part {i+1}: {part}")
print()
# Split at specified points
split_points = np.split(array, [3, 7]) # Split at indices 3 and 7
print("Split at indices 3 and 7:")
for i, part in enumerate(split_points):
print(f" Part {i+1}: {part}")
print()
# 2D array splitting
array_2d = np.arange(12).reshape(3, 4)
print("2D array:")
print(array_2d)
print()
# Horizontal split
hsplit_result = np.hsplit(array_2d, 2) # Split into 2 columns
print("Horizontal split:")
for i, part in enumerate(hsplit_result):
print(f" Part {i+1}:")
print(part)
print()
# Vertical split
vsplit_result = np.vsplit(array_2d, 3) # Split into 3 rows
print("Vertical split:")
for i, part in enumerate(vsplit_result):
print(f" Part {i+1}: {part}")Output:
Original array: [ 0 1 2 3 4 5 6 7 8 9 10 11]
Split into 3 equal parts:
Part 1: [0 1 2 3]
Part 2: [4 5 6 7]
Part 3: [ 8 9 10 11]
Split at indices 3 and 7:
Part 1: [0 1 2]
Part 2: [3 4 5 6]
Part 3: [ 7 8 9 10 11]
2D array:
[[ 0 1 2 3]
[ 4 5 6 7]
[ 8 9 10 11]]
Horizontal split:
Part 1:
[[0 1]
[4 5]
[8 9]]
Part 2:
[[ 2 3]
[ 6 7]
[10 11]]
Vertical split:
Part 1: [[0 1 2 3]]
Part 2: [[4 5 6 7]]
Part 3: [[ 8 9 10 11]]Array Copying
View vs Copy
python
import numpy as np
# Create original array
original = np.array([1, 2, 3, 4, 5])
print("Original array:", original)
print()
# Create view (shared memory)
view = original.view()
print("View:", view)
print("Shares memory:", np.shares_memory(original, view))
# Modify view
view[0] = 999
print("After modifying view:")
print("Original array:", original)
print("View:", view)
print()
# Create copy (independent memory)
copy = original.copy()
print("Copy:", copy)
print("Shares memory:", np.shares_memory(original, copy))
# Modify copy
copy[1] = 888
print("After modifying copy:")
print("Original array:", original)
print("Copy:", copy)Output:
Original array: [1 2 3 4 5]
View: [1 2 3 4 5]
Shares memory: True
After modifying view:
Original array: [999 2 3 4 5]
View: [999 2 3 4 5]
Copy: [999 2 3 4 5]
Shares memory: False
After modifying copy:
Original array: [999 2 3 4 5]
Copy: [999 888 3 4 5]Practical Examples
Example 1: Image Data Processing
python
import numpy as np
# Simulate a simple grayscale image (8x8 pixels)
image = np.random.randint(0, 256, (8, 8), dtype=np.uint8)
print("Original image data:")
print(image)
print(f"Image shape: {image.shape}")
print(f"Data type: {image.dtype}")
print(f"Pixel value range: {image.min()} - {image.max()}")
print()
# Image processing operations
# 1. Increase brightness
brighter = np.clip(image + 50, 0, 255).astype(np.uint8)
print("Pixel range after brightness increase:", f"{brighter.min()} - {brighter.max()}")
# 2. Image binarization
threshold = 128
binary = (image > threshold).astype(np.uint8) * 255
print(f"Unique values after binarization (threshold {threshold}):", np.unique(binary))
# 3. Calculate image statistics
print(f"Average pixel value: {np.mean(image):.2f}")
print(f"Standard deviation: {np.std(image):.2f}")
print(f"Median: {np.median(image)}")Example 2: Scientific Computing
python
import numpy as np
# Create experimental data
time = np.linspace(0, 10, 100) # Time from 0 to 10 seconds, 100 data points
frequency = 2 # 2Hz
amplitude = 5 # Amplitude 5
noise_level = 0.5 # Noise level
# Generate noisy sine wave signal
signal = amplitude * np.sin(2 * np.pi * frequency * time)
noise = np.random.normal(0, noise_level, len(time))
noisy_signal = signal + noise
print(f"Time range: {time[0]:.2f} - {time[-1]:.2f} seconds")
print(f"Number of data points: {len(time)}")
print(f"Signal statistics:")
print(f" Mean: {np.mean(noisy_signal):.3f}")
print(f" Standard deviation: {np.std(noisy_signal):.3f}")
print(f" Maximum: {np.max(noisy_signal):.3f}")
print(f" Minimum: {np.min(noisy_signal):.3f}")
# Simple signal processing
# Calculate moving average to smooth signal
window_size = 5
smoothed_signal = np.convolve(noisy_signal, np.ones(window_size)/window_size, mode='valid')
print(f"\nSmoothed signal length: {len(smoothed_signal)}")
print(f"Standard deviation after smoothing: {np.std(smoothed_signal):.3f}")Performance Comparison
python
import numpy as np
import time
# Compare performance of NumPy arrays and Python lists
size = 1000000
# Python list operations
start_time = time.time()
python_list = list(range(size))
python_result = [x * 2 for x in python_list]
list_time = time.time() - start_time
# NumPy array operations
start_time = time.time()
numpy_array = np.arange(size)
numpy_result = numpy_array * 2
numpy_time = time.time() - start_time
print(f"Data size: {size:,} elements")
print(f"Python list time: {list_time:.4f} seconds")
print(f"NumPy array time: {numpy_time:.4f} seconds")
print(f"NumPy speedup: {list_time/numpy_time:.1f}x")
# Memory usage comparison
import sys
list_memory = sys.getsizeof(python_list) + sum(sys.getsizeof(x) for x in python_list[:100]) # Estimate
numpy_memory = numpy_array.nbytes
print(f"\nPython list memory (estimated): {list_memory:,} bytes")
print(f"NumPy array memory: {numpy_memory:,} bytes")
print(f"Memory efficiency improvement: {list_memory/numpy_memory:.1f}x")Chapter Summary
In this chapter, we learned:
- Basic concepts and characteristics of NumPy arrays
- Multiple array creation methods
- Basic array attributes and information retrieval
- Data type system and type conversion
- Array shape operations (reshaping, flattening)
- Array concatenation and splitting
- Difference between views and copies
- Practical application examples
- Performance advantage analysis
Next Steps
In the next chapter, we'll learn NumPy array indexing and slicing operations, which are fundamental skills for data manipulation.
Exercises
- Create a 5x5 array filled with numbers 1 to 25, then reshape it to 25x1
- Create two 3x3 random arrays, concatenate them horizontally and vertically
- Create an array with 100 random numbers, split it into 10 equal parts
- Compare memory usage of different data types (int8, int32, float32, float64)
- Create an array simulating temperature data, implement data standardization (mean 0, std 1)