PostgreSQL SELECT
Overview
The SELECT statement is used to query data from one or more tables. It is the most commonly used SQL statement and forms the foundation of data retrieval in PostgreSQL.
Basic SELECT Syntax
sql
SELECT column1, column2, ...
FROM table_name;Select All Columns
sql
-- Select all columns
SELECT * FROM users;
-- Select all from specific table
SELECT * FROM products;Select Specific Columns
sql
-- Select specific columns
SELECT name, email FROM users;
-- Select with multiple columns
SELECT id, name, email, created_at FROM users;Column Aliases
sql
-- Using AS keyword
SELECT name AS customer_name, email AS contact_email FROM users;
-- Without AS keyword
SELECT name customer_name, email contact_email FROM users;
-- Alias with spaces (use quotes)
SELECT name AS "Customer Name", email AS "Email Address" FROM users;Expressions in SELECT
sql
-- Arithmetic operations
SELECT product_name, price, price * 1.1 AS price_with_tax FROM products;
-- String concatenation
SELECT first_name || ' ' || last_name AS full_name FROM users;
-- Using functions
SELECT UPPER(name) AS name_upper, LENGTH(email) AS email_length FROM users;DISTINCT - Remove Duplicates
What is DISTINCT?
The DISTINCT keyword is used to remove duplicate rows from query results. It returns only unique values from the specified columns.
Basic Syntax
sql
SELECT DISTINCT column1, column2, ...
FROM table_name;Single Column DISTINCT
sql
-- Remove duplicates
SELECT DISTINCT country FROM users;
-- Get unique product categories
SELECT DISTINCT category FROM products ORDER BY category;
-- Count unique values
SELECT COUNT(DISTINCT customer_id) AS unique_customers FROM orders;
SELECT COUNT(DISTINCT product_id) AS unique_products FROM order_items;Multiple Column DISTINCT
When using DISTINCT with multiple columns, PostgreSQL considers the combination of all specified columns to determine uniqueness.
sql
-- Multiple column deduplication
SELECT DISTINCT status, country FROM users;
-- Get unique combinations of city and state
SELECT DISTINCT city, state FROM customers ORDER BY state, city;
-- Get unique product-category combinations
SELECT DISTINCT product_name, category FROM products;DISTINCT with Aggregate Functions
sql
-- Count unique customers
SELECT COUNT(DISTINCT customer_id) AS unique_customers FROM orders;
-- Count unique products in each category
SELECT
category,
COUNT(DISTINCT product_id) AS unique_products
FROM products
GROUP BY category;
-- Multiple distinct counts
SELECT
COUNT(DISTINCT customer_id) AS unique_customers,
COUNT(DISTINCT product_id) AS unique_products,
COUNT(*) AS total_orders
FROM orders o
JOIN order_items oi ON o.order_id = oi.order_id;DISTINCT ON (PostgreSQL-Specific)
DISTINCT ON returns the first row for each unique value in the specified column(s).
sql
-- Get the first employee from each department
SELECT DISTINCT ON (department)
department,
employee_name,
salary
FROM employees
ORDER BY department, salary DESC;
-- Get the most recent order for each customer
SELECT DISTINCT ON (customer_id)
customer_id,
order_id,
order_date,
total_amount
FROM orders
ORDER BY customer_id, order_date DESC;
-- Get the highest-priced product in each category
SELECT DISTINCT ON (category)
category,
product_name,
price
FROM products
ORDER BY category, price DESC;DISTINCT Performance Optimization
sql
-- 1. Create indexes on frequently queried columns
CREATE INDEX idx_customers_city ON customers(city);
SELECT DISTINCT city FROM customers;
-- 2. DISTINCT vs GROUP BY
-- Using DISTINCT
SELECT DISTINCT category FROM products;
-- Using GROUP BY (may be faster with indexes)
SELECT category FROM products GROUP BY category;
-- 3. Avoid DISTINCT on large result sets
-- Inefficient
SELECT DISTINCT * FROM large_table;
-- Better: Be specific about columns
SELECT DISTINCT column1, column2 FROM large_table;DISTINCT with JOINs
sql
-- Get unique customers who bought specific products
SELECT DISTINCT c.customer_id, c.customer_name
FROM customers c
JOIN orders o ON c.customer_id = o.customer_id
JOIN order_items oi ON o.order_id = oi.order_id
WHERE oi.product_id IN (1, 2, 3);
-- Get unique product categories sold in last month
SELECT DISTINCT p.category
FROM products p
JOIN order_items oi ON p.product_id = oi.product_id
JOIN orders o ON oi.order_id = o.order_id
WHERE o.order_date >= CURRENT_DATE - INTERVAL '1 month';NULL Handling
DISTINCT treats NULL values as equal:
sql
-- If multiple rows have NULL in column, only one NULL is returned
SELECT DISTINCT phone_number FROM customers;
-- To exclude NULLs
SELECT DISTINCT phone_number
FROM customers
WHERE phone_number IS NOT NULL;Common Mistakes
sql
-- Mistake 1: Using DISTINCT unnecessarily
SELECT DISTINCT id FROM users; -- Primary key is already unique
-- Mistake 2: DISTINCT with ORDER BY
-- Error: ORDER BY column must be in SELECT
SELECT DISTINCT city FROM customers ORDER BY state;
-- Correct: Include all ORDER BY columns
SELECT DISTINCT city, state FROM customers ORDER BY state, city;Calculated Fields
sql
-- Mathematical calculations
SELECT
product_name,
price,
quantity,
price * quantity AS total_value
FROM inventory;
-- Conditional expressions
SELECT
name,
age,
CASE
WHEN age < 18 THEN 'Minor'
WHEN age >= 18 AND age < 65 THEN 'Adult'
ELSE 'Senior'
END AS age_group
FROM users;Selecting from Multiple Tables
sql
-- Simple join in SELECT
SELECT users.name, orders.order_number
FROM users, orders
WHERE users.id = orders.user_id;
-- Better: Use explicit JOIN
SELECT u.name, o.order_number
FROM users u
INNER JOIN orders o ON u.id = o.user_id;Subqueries in SELECT
sql
-- Scalar subquery
SELECT
name,
(SELECT COUNT(*) FROM orders WHERE user_id = users.id) AS order_count
FROM users;
-- Subquery with aggregate
SELECT
product_name,
price,
(SELECT AVG(price) FROM products) AS avg_price
FROM products;SELECT with Functions
sql
-- Aggregate functions
SELECT COUNT(*) FROM users;
SELECT AVG(price) FROM products;
SELECT SUM(total) FROM orders;
SELECT MIN(price), MAX(price) FROM products;
-- String functions
SELECT UPPER(name), LOWER(email) FROM users;
SELECT CONCAT(first_name, ' ', last_name) AS full_name FROM users;
-- Date functions
SELECT CURRENT_DATE, CURRENT_TIME, CURRENT_TIMESTAMP;
SELECT name, AGE(birth_date) AS age FROM users;SELECT with LIMIT
sql
-- Limit number of rows
SELECT * FROM users LIMIT 10;
-- Limit with offset (pagination)
SELECT * FROM users LIMIT 10 OFFSET 20;
-- Alternative syntax
SELECT * FROM users OFFSET 20 LIMIT 10;SELECT with ORDER BY
sql
-- Order by single column
SELECT * FROM users ORDER BY name;
-- Order descending
SELECT * FROM products ORDER BY price DESC;
-- Order by multiple columns
SELECT * FROM users ORDER BY country, name;
-- Order with expressions
SELECT * FROM products ORDER BY price * quantity DESC;SELECT with WHERE
sql
-- Basic filtering
SELECT * FROM users WHERE status = 'active';
-- Multiple conditions
SELECT * FROM products WHERE price > 50 AND category = 'Electronics';
-- Pattern matching
SELECT * FROM users WHERE email LIKE '%@gmail.com';
-- Range
SELECT * FROM products WHERE price BETWEEN 10 AND 100;
-- List
SELECT * FROM users WHERE country IN ('USA', 'UK', 'Canada');SELECT with GROUP BY
sql
-- Group and count
SELECT country, COUNT(*) AS user_count
FROM users
GROUP BY country;
-- Group with multiple aggregates
SELECT
category,
COUNT(*) AS product_count,
AVG(price) AS avg_price,
MIN(price) AS min_price,
MAX(price) AS max_price
FROM products
GROUP BY category;SELECT with HAVING
sql
-- Filter groups
SELECT country, COUNT(*) AS user_count
FROM users
GROUP BY country
HAVING COUNT(*) > 100;
-- Having with multiple conditions
SELECT
category,
AVG(price) AS avg_price
FROM products
GROUP BY category
HAVING AVG(price) > 50 AND COUNT(*) > 10;Complete SELECT Example
sql
-- Complex query combining multiple clauses
SELECT
u.name,
u.email,
COUNT(o.id) AS order_count,
SUM(o.total) AS total_spent,
AVG(o.total) AS avg_order_value
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
WHERE u.status = 'active'
AND u.created_at >= '2024-01-01'
GROUP BY u.id, u.name, u.email
HAVING COUNT(o.id) > 0
ORDER BY total_spent DESC
LIMIT 100;Best Practices
sql
-- 1. Specify columns instead of SELECT *
SELECT id, name, email FROM users; -- Good
SELECT * FROM users; -- Avoid in production
-- 2. Use table aliases for clarity
SELECT u.name, o.order_number
FROM users u
INNER JOIN orders o ON u.id = o.user_id;
-- 3. Use meaningful column aliases
SELECT
COUNT(*) AS total_users,
AVG(age) AS average_age
FROM users;
-- 4. Format for readability
SELECT
u.id,
u.name,
u.email,
COUNT(o.id) AS order_count
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
GROUP BY u.id, u.name, u.email;Common Patterns
Pagination
sql
-- Page 1 (rows 1-10)
SELECT * FROM products ORDER BY id LIMIT 10 OFFSET 0;
-- Page 2 (rows 11-20)
SELECT * FROM products ORDER BY id LIMIT 10 OFFSET 10;
-- Page 3 (rows 21-30)
SELECT * FROM products ORDER BY id LIMIT 10 OFFSET 20;Top N Records
sql
-- Top 10 most expensive products
SELECT * FROM products ORDER BY price DESC LIMIT 10;
-- Top 5 customers by total spending
SELECT
user_id,
SUM(total) AS total_spent
FROM orders
GROUP BY user_id
ORDER BY total_spent DESC
LIMIT 5;Conditional Selection
sql
-- Select different columns based on condition
SELECT
name,
CASE
WHEN status = 'active' THEN email
ELSE 'N/A'
END AS contact
FROM users;Summary
The SELECT statement is fundamental to PostgreSQL:
- Basic:
SELECT columns FROM table - Filtering: Use WHERE clause
- Sorting: Use ORDER BY clause
- Grouping: Use GROUP BY with aggregates
- Limiting: Use LIMIT and OFFSET
- Aliases: Improve readability
- Functions: Transform and aggregate data