Database Design for Web Apps: From Normalization, Indexing to Query Optimization
The database is the "backbone" of most web apps. Wrong design from the start will lead to:
- Slow queries as data grows.
- Hard-to-debug bugs due to inconsistent data.
- Headache-inducing migration/scaling later.
This article covers key principles and patterns for effective database design for web applications.
Database Normalization
First Normal Form (1NF)
Each cell contains only one atomic value:
-- ❌ Violates 1NF
CREATE TABLE users (
id INT PRIMARY KEY,
name VARCHAR(100),
phone_numbers VARCHAR(255) -- "123-456-7890, 098-765-4321"
);
-- ✅ Follows 1NF
CREATE TABLE users (
id INT PRIMARY KEY,
name VARCHAR(100)
);
CREATE TABLE user_phones (
id INT PRIMARY KEY,
user_id INT REFERENCES users(id),
phone_number VARCHAR(20)
);
Second Normal Form (2NF)
Eliminate partial dependencies:
-- ❌ Violates 2NF
CREATE TABLE order_items (
order_id INT,
product_id INT,
product_name VARCHAR(100), -- Depends on product_id
product_price DECIMAL(10,2), -- Depends on product_id
quantity INT,
PRIMARY KEY (order_id, product_id)
);
-- ✅ Follows 2NF
CREATE TABLE products (
id INT PRIMARY KEY,
name VARCHAR(100),
price DECIMAL(10,2)
);
CREATE TABLE order_items (
order_id INT,
product_id INT REFERENCES products(id),
quantity INT,
PRIMARY KEY (order_id, product_id)
);
Third Normal Form (3NF)
Eliminate transitive dependencies:
-- ❌ Violates 3NF
CREATE TABLE employees (
id INT PRIMARY KEY,
name VARCHAR(100),
department_id INT,
department_name VARCHAR(100), -- Indirectly depends via department_id
department_budget DECIMAL(12,2) -- Indirectly depends via department_id
);
-- ✅ Follows 3NF
CREATE TABLE departments (
id INT PRIMARY KEY,
name VARCHAR(100),
budget DECIMAL(12,2)
);
CREATE TABLE employees (
id INT PRIMARY KEY,
name VARCHAR(100),
department_id INT REFERENCES departments(id)
);
When to Denormalize
Read-heavy workloads
-- Normalized (many JOINs)
SELECT
u.name,
p.title,
c.content,
COUNT(pl.id) as likes_count
FROM users u
JOIN posts p ON u.id = p.user_id
JOIN comments c ON p.id = c.post_id
LEFT JOIN post_likes pl ON p.id = pl.post_id
GROUP BY u.id, p.id, c.id;
-- Denormalized (fewer JOINs)
CREATE TABLE post_stats (
post_id INT PRIMARY KEY,
likes_count INT DEFAULT 0,
comments_count INT DEFAULT 0,
shares_count INT DEFAULT 0,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
Indexing Strategies
Single Column Indexes
-- Index for frequent WHERE clauses
CREATE INDEX idx_users_email ON users(email);
CREATE INDEX idx_posts_created_at ON posts(created_at);
CREATE INDEX idx_orders_status ON orders(status);
Composite Indexes
-- Index for multi-column queries
CREATE INDEX idx_user_posts ON posts(user_id, created_at DESC);
CREATE INDEX idx_product_category ON products(category_id, price, name);
-- Query uses index effectively
SELECT * FROM posts
WHERE user_id = 123
ORDER BY created_at DESC
LIMIT 10;
Partial Indexes
-- Index only for records satisfying condition
CREATE INDEX idx_active_users ON users(email) WHERE active = true;
CREATE INDEX idx_published_posts ON posts(created_at) WHERE status = 'published';
Covering Indexes
-- Index contains all necessary columns
CREATE INDEX idx_user_profile_covering
ON users(id)
INCLUDE (name, email, avatar_url, created_at);
-- Query doesn't need to access table
SELECT id, name, email, avatar_url, created_at
FROM users
WHERE id = 123;
Relationship Design Patterns
One-to-Many
CREATE TABLE categories (
id INT PRIMARY KEY,
name VARCHAR(100),
slug VARCHAR(100) UNIQUE
);
CREATE TABLE products (
id INT PRIMARY KEY,
name VARCHAR(200),
category_id INT NOT NULL REFERENCES categories(id),
price DECIMAL(10,2),
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
Many-to-Many
CREATE TABLE users (
id INT PRIMARY KEY,
username VARCHAR(50) UNIQUE,
email VARCHAR(100) UNIQUE
);
CREATE TABLE roles (
id INT PRIMARY KEY,
name VARCHAR(50) UNIQUE,
permissions TEXT[]
);
-- Junction table
CREATE TABLE user_roles (
user_id INT REFERENCES users(id) ON DELETE CASCADE,
role_id INT REFERENCES roles(id) ON DELETE CASCADE,
assigned_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
assigned_by INT REFERENCES users(id),
PRIMARY KEY (user_id, role_id)
);
Self-referencing (Tree structures)
-- Adjacency List Model
CREATE TABLE categories (
id INT PRIMARY KEY,
name VARCHAR(100),
parent_id INT REFERENCES categories(id),
level INT DEFAULT 0,
path VARCHAR(500) -- "/electronics/computers/laptops"
);
-- Nested Set Model (for read-heavy scenarios)
CREATE TABLE categories_nested (
id INT PRIMARY KEY,
name VARCHAR(100),
lft INT NOT NULL,
rgt INT NOT NULL,
level INT DEFAULT 0
);
Data Types and Storage Optimization
Choosing right data types
-- ❌ Wasted storage
CREATE TABLE users (
id BIGINT, -- INT is enough for most cases
name VARCHAR(500), -- VARCHAR(100) usually enough
age DECIMAL(10,2), -- SMALLINT enough
is_active VARCHAR(10) -- BOOLEAN better
);
-- ✅ Optimized
CREATE TABLE users (
id INT PRIMARY KEY,
name VARCHAR(100) NOT NULL,
age SMALLINT CHECK (age >= 0 AND age <= 150),
is_active BOOLEAN DEFAULT true,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
JSON columns (PostgreSQL, MySQL 5.7+)
CREATE TABLE user_preferences (
user_id INT PRIMARY KEY REFERENCES users(id),
settings JSONB, -- PostgreSQL
metadata JSON, -- MySQL
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- Index on JSON fields
CREATE INDEX idx_user_preferences_theme
ON user_preferences USING GIN ((settings->>'theme'));
-- Query JSON data
SELECT * FROM user_preferences
WHERE settings->>'theme' = 'dark'
AND settings->'notifications'->>'email' = 'true';
Constraints and Data Integrity
Check Constraints
CREATE TABLE products (
id INT PRIMARY KEY,
name VARCHAR(200) NOT NULL,
price DECIMAL(10,2) CHECK (price > 0),
discount_percent INT CHECK (discount_percent >= 0 AND discount_percent <= 100),
status VARCHAR(20) CHECK (status IN ('draft', 'active', 'inactive', 'discontinued')),
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
Unique Constraints
CREATE TABLE users (
id INT PRIMARY KEY,
username VARCHAR(50) NOT NULL,
email VARCHAR(100) NOT NULL,
phone VARCHAR(20),
CONSTRAINT uk_users_username UNIQUE (username),
CONSTRAINT uk_users_email UNIQUE (email),
CONSTRAINT uk_users_phone UNIQUE (phone) -- Can be NULL
);
Foreign Key Constraints with Actions
CREATE TABLE posts (
id INT PRIMARY KEY,
title VARCHAR(200) NOT NULL,
user_id INT NOT NULL,
category_id INT,
CONSTRAINT fk_posts_user
FOREIGN KEY (user_id) REFERENCES users(id)
ON DELETE CASCADE,
CONSTRAINT fk_posts_category
FOREIGN KEY (category_id) REFERENCES categories(id)
ON DELETE SET NULL
);
Performance Best Practices
Query Optimization
-- ❌ Avoid SELECT *
SELECT * FROM posts WHERE user_id = 123;
-- ✅ Select only necessary columns
SELECT id, title, created_at FROM posts WHERE user_id = 123;
-- ❌ Avoid functions in WHERE
SELECT * FROM users WHERE UPPER(name) = 'JOHN';
-- ✅ Use functional index or store normalized data
CREATE INDEX idx_users_name_upper ON users(UPPER(name));
-- or
ALTER TABLE users ADD COLUMN name_normalized VARCHAR(100)
GENERATED ALWAYS AS (UPPER(name)) STORED;
Efficient Pagination
-- ❌ OFFSET has poor performance with large datasets
SELECT * FROM posts ORDER BY created_at DESC LIMIT 20 OFFSET 10000;
-- ✅ Cursor-based pagination
SELECT * FROM posts
WHERE created_at < '2023-12-01 10:00:00'
ORDER BY created_at DESC
LIMIT 20;
Backup and Recovery Strategy
Logical Backups
# PostgreSQL
pg_dump -h localhost -U username -d database_name > backup.sql
# MySQL
mysqldump -u username -p database_name > backup.sql
Point-in-time Recovery
-- PostgreSQL WAL archiving
archive_mode = on
archive_command = 'cp %p /path/to/archive/%f'
wal_level = replica
Monitoring and Maintenance
Query Performance Monitoring
-- PostgreSQL - Enable query logging
log_statement = 'all'
log_min_duration_statement = 1000 -- Log queries > 1 second
-- Analyze slow queries
SELECT query, mean_time, calls, total_time
FROM pg_stat_statements
ORDER BY mean_time DESC
LIMIT 10;
Index Maintenance
-- PostgreSQL - Reindex
REINDEX INDEX idx_users_email;
REINDEX TABLE users;
-- Check index usage
SELECT schemaname, tablename, indexname, idx_scan, idx_tup_read, idx_tup_fetch
FROM pg_stat_user_indexes
ORDER BY idx_scan ASC;
Conclusion
Effective database design requires:
- Understanding business requirements and access patterns
- Balancing normalization and performance
- Using indexes smartly
- Implementing proper constraints to ensure data integrity
- Monitoring and optimizing continuously
Designing the database correctly from the start will help your application scale well and be easier to maintain in the future.
Tags: Database, PostgreSQL, SQL, Backend, Performance, Design Patterns


