JavaScript Regular Expressions
Regular Expressions (RegExp) are powerful text processing tools used for matching, finding, replacing, and validating string patterns. In JavaScript, regular expressions are widely used in form validation, data cleaning, and text parsing. Mastering regular expressions is crucial for complex string operations.
What are Regular Expressions
Regular expressions are special string patterns that describe a series of strings conforming to certain syntax rules. They provide a concise and flexible way to match, find, and manage text.
// Simple regex example
const pattern = /hello/;
const text = "hello world";
console.log(pattern.test(text)); // trueCreating Regular Expressions
Two ways to create regular expressions in JavaScript:
1. Literal Syntax (Recommended)
const regex = /pattern/flags;
const emailPattern = /^[^\s@]+@[^\s@]+\.[^\s@]+$/;
const phonePattern = /^\d{3}-\d{3}-\d{4}$/;2. RegExp Constructor
const regex = new RegExp("pattern", "flags");
const emailPattern = new RegExp("^[^\\s@]+@[^\\s@]+\\.[^\\s@]+$");
// Dynamic regex creation
const searchTerm = "JavaScript";
const searchRegex = new RegExp(searchTerm, "gi");Regular Expression Flags
Flags modify regex behavior:
// g - Global match
const text = "hello hello hello";
console.log(text.match(/hello/g)); // ["hello", "hello", "hello"]
// i - Case insensitive
console.log(/hello/i.test("HELLO")); // true
// m - Multiline
const multiline = /^hello/m;
console.log(multiline.test("world\nhello")); // true
// s - dotAll mode (ES2018)
console.log(/hello.world/s.test("hello\nworld")); // true
// u - Unicode mode
console.log(/\u{1F600}/u.test("😀")); // true
// y - Sticky match
const sticky = /hello/y;
console.log(sticky.test("hello world")); // truePattern Syntax
Character Classes
// Simple character class
const vowel = /[aeiou]/;
console.log(vowel.test("hello")); // true
// Range character class
const digit = /[0-9]/;
const letter = /[a-zA-Z]/;
// Negated character class
const nonDigit = /[^0-9]/;
// Predefined character classes
console.log(/\d/.test("123")); // true (digit)
console.log(/\D/.test("abc")); // true (non-digit)
console.log(/\w/.test("hello")); // true (word character)
console.log(/\W/.test("!?")); // true (non-word character)
console.log(/\s/.test(" ")); // true (whitespace)
console.log(/\S/.test("a")); // true (non-whitespace)Quantifiers
// Exact quantity
console.log(/a{3}/.test("aaab")); // true
// Range quantity
console.log(/a{2,4}/.test("aaa")); // true
// At least
console.log(/a{2,}/.test("aaaa")); // true
// Zero or one
console.log(/colou?r/.test("color")); // true
console.log(/colou?r/.test("colour")); // true
// Zero or more
console.log(/a*/.test("b")); // true
console.log(/a*/.test("aaa")); // true
// One or more
console.log(/a+/.test("aaa")); // true
console.log(/a+/.test("b")); // falseBoundary Matching
// Word boundary
const wordBoundary = /\bhello\b/;
console.log(wordBoundary.test("hello world")); // true
console.log(wordBoundary.test("sayhello")); // false
// Line start
console.log(/^hello/.test("hello world")); // true
console.log(/^hello/.test("say hello")); // false
// Line end
console.log(/world$/.test("hello world")); // true
// Entire string
console.log(/^hello world$/.test("hello world")); // trueGrouping and Capturing
// Simple grouping
const phone = /(\d{3})-(\d{3})-(\d{4})/;
const match = "123-456-7890".match(phone);
console.log(match[0]); // "123-456-7890"
console.log(match[1]); // "123"
console.log(match[2]); // "456"
console.log(match[3]); // "7890"
// Non-capturing group
const nonCapturing = /(?:https?):\/\/[^\s]+/;
// Named capture groups (ES2018)
const namedGroups = /(?<year>\d{4})-(?<month>\d{2})-(?<day>\d{2})/;
const dateMatch = "2024-01-15".match(namedGroups);
console.log(dateMatch.groups.year); // "2024"
console.log(dateMatch.groups.month); // "01"
console.log(dateMatch.groups.day); // "15"Alternation and Lookahead
// Alternation
console.log(/cat|dog/.test("I have a cat")); // true
// Positive lookahead
console.log(/password(?=\d)/.test("password123")); // true
console.log(/password(?=\d)/.test("password")); // false
// Negative lookahead
console.log(/password(?!\d)/.test("password")); // true
console.log(/password(?!\d)/.test("password123")); // false
// Positive lookbehind (ES2018)
console.log(/(?<=\d)\$/.test("100$")); // true
console.log(/(?<=\d)\$/.test("$100")); // false
// Negative lookbehind (ES2018)
console.log(/(?<!\d)\$/.test("$100")); // trueRegExp Object Methods
test()
const regex = /hello/;
console.log(regex.test("hello world")); // true
console.log(regex.test("goodbye")); // falseexec()
const regex = /(\d{4})-(\d{2})-(\d{2})/;
const result = regex.exec("Today is 2024-01-15");
console.log(result[0]); // "2024-01-15"
console.log(result[1]); // "2024"
console.log(result.index); // 9String Methods with Regex
match()
const text = "Phone numbers: 123-456-7890 and 098-765-4321";
// Without global flag
const single = text.match(/\d{3}-\d{3}-\d{4}/);
console.log(single); // ["123-456-7890"]
// With global flag
const all = text.match(/\d{3}-\d{3}-\d{4}/g);
console.log(all); // ["123-456-7890", "098-765-4321"]search()
const text = "Hello world, welcome to JavaScript";
console.log(text.search(/world/)); // 6
console.log(text.search(/javascript/i)); // 25
console.log(text.search(/python/)); // -1replace()
const text = "Hello world, hello JavaScript";
// Basic replace
console.log(text.replace(/hello/i, "Hi")); // "Hi world, hello JavaScript"
// Global replace
console.log(text.replace(/hello/gi, "Hi")); // "Hi world, Hi JavaScript"
// Using capture groups
const phone = "Call me at 123-456-7890";
console.log(phone.replace(/(\d{3})-(\d{3})-(\d{4})/, "($1) $2-$3"));
// "Call me at (123) 456-7890"
// Function replacement
const prices = "Price is $29.99 and $15.50";
console.log(prices.replace(/\$(\d+\.\d+)/g, (match, price) => {
return `$${(parseFloat(price) * 1.1).toFixed(2)}`;
})); // "Price is $32.99 and $17.05"split()
const text = "apple,banana;orange:grape";
console.log(text.split(/[,;:]/)); // ["apple", "banana", "orange", "grape"]Common Regex Patterns
Form Validation
// Email validation
const emailRegex = /^[^\s@]+@[^\s@]+\.[^\s@]+$/;
console.log(emailRegex.test("user@example.com")); // true
// Phone number
const phoneRegex = /^\d{3}-\d{3}-\d{4}$/;
console.log(phoneRegex.test("123-456-7890")); // true
// Password strength (8+ chars, upper/lower case, digit)
const passwordRegex = /^(?=.*[a-z])(?=.*[A-Z])(?=.*\d)[a-zA-Z\d@$!%*?&]{8,}$/;
console.log(passwordRegex.test("Password123")); // true
// Zip code
const zipCodeRegex = /^\d{5}(-\d{4})?$/;
console.log(zipCodeRegex.test("12345")); // true
console.log(zipCodeRegex.test("12345-6789")); // trueData Extraction
// Extract URL parameters
const url = "https://example.com?name=John&age=25";
const paramRegex = /[?&]([^=]+)=([^&]*)/g;
const params = {};
let match;
while ((match = paramRegex.exec(url)) !== null) {
params[match[1]] = match[2];
}
console.log(params); // { name: "John", age: "25" }
// Extract HTML tag content
const html = "<p>Hello</p><div>World</div>";
const tagRegex = /<(\w+)>(.*?)<\/\1>/g;Performance Optimization
1. Avoid Catastrophic Backtracking
// Bad pattern (can cause backtracking)
const badPattern = /(a+)+b/;
// Good pattern
const goodPattern = /a+b/;2. Use Specific Quantifiers
// Vague quantifier
const vague = /\w*/;
// Specific quantifier
const specific = /\w{0,100}/;3. Avoid Unnecessary Capturing
// Unnecessary capturing
const unnecessary = /(\w+)@(\w+)\.(\w+)/;
// Non-capturing groups
const nonCapturing = /(?:\w+)@(?:\w+)\.(?:\w+)/;Practical Example: Form Validator
class FormValidator {
static patterns = {
email: /^[^\s@]+@[^\s@]+\.[^\s@]+$/,
phone: /^\d{3}-\d{3}-\d{4}$/,
zipCode: /^\d{5}(-\d{4})?$/,
password: /^(?=.*[a-z])(?=.*[A-Z])(?=.*\d)[a-zA-Z\d@$!%*?&]{8,}$/,
url: /^https?:\/\/[^\s/$.?#].[^\s]*$/i,
creditCard: /^\d{4}-\d{4}-\d{4}-\d{4}$/
};
static validate(field, value, patternName) {
const pattern = this.patterns[patternName];
if (!pattern) {
throw new Error(`Unknown pattern: ${patternName}`);
}
return {
isValid: pattern.test(value),
field: field,
value: value,
pattern: patternName
};
}
static validateAll(formData) {
const results = [];
for (let [field, { value, pattern }] of Object.entries(formData)) {
results.push(this.validate(field, value, pattern));
}
return results;
}
}
// Usage
const formData = {
email: { value: "user@example.com", pattern: "email" },
phone: { value: "123-456-7890", pattern: "phone" },
password: { value: "Password123", pattern: "password" }
};
const results = FormValidator.validateAll(formData);
console.log(results);Text Processor
class TextProcessor {
// Highlight keywords
static highlight(text, keywords, tag = "mark") {
if (!Array.isArray(keywords)) {
keywords = [keywords];
}
let processed = text;
keywords.forEach(keyword => {
const escaped = keyword.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
const regex = new RegExp(`(${escaped})`, 'gi');
processed = processed.replace(regex, `<${tag}>$1</${tag}>`);
});
return processed;
}
// Extract links
static extractLinks(text) {
const urlRegex = /https?:\/\/[^\s/$.?#].[^\s]*/gi;
return text.match(urlRegex) || [];
}
// Strip HTML tags
static stripHtml(html) {
return html.replace(/<[^>]*>/g, '');
}
// Format phone number
static formatPhone(text) {
return text.replace(/(\d{3})(\d{3})(\d{4})/g, '$1-$2-$3');
}
// Word frequency
static wordFrequency(text) {
const words = text.toLowerCase().match(/\b\w+\b/g) || [];
const frequency = {};
words.forEach(word => {
frequency[word] = (frequency[word] || 0) + 1;
});
return frequency;
}
}
// Usage
console.log(TextProcessor.highlight("Hello world", ["Hello"]));
console.log(TextProcessor.extractLinks("Visit https://example.com"));
console.log(TextProcessor.formatPhone("1234567890"));
console.log(TextProcessor.wordFrequency("hello world hello"));Summary
Key points about JavaScript regular expressions:
- Creation: Literal syntax, RegExp constructor
- Flags: g (global), i (case insensitive), m (multiline), s (dotAll), u (Unicode), y (sticky)
- Pattern Syntax: Character classes, quantifiers, boundaries, grouping, alternation, lookahead/behind
- Methods: test(), exec(), match(), search(), replace(), split()
- Advanced Features: Unicode property escapes, named capture groups, backreferences
- Common Patterns: Form validation, data extraction, text processing
- Performance: Avoid backtracking, pre-compile, use appropriate flags
- Best Practices: Error handling, pre-compilation, appropriate pattern selection
Mastering regular expressions is a key skill for complex text operations. In the next chapter, we will learn about JavaScript error handling.