Go Regular Expressions

Regular expressions are a powerful tool for text pattern matching. Go provides full regular expression support through the regexp package.

📋 Regular Expression Basics

Basic Matching and Finding

package main

import (
    "fmt"
    "regexp"
)

func basicMatching() {
    fmt.Println("=== 基本正则匹配 ===")
    
    // 简单匹配
    pattern := "Go"
    text := "Go is a programming language"
    
    matched, _ := regexp.MatchString(pattern, text)
    fmt.Printf("'%s' 匹配 '%s': %v\n", pattern, text, matched)
    
    // 编译正则表达式
    re := regexp.MustCompile(`\b[Gg]o\b`) // 匹配单词 "Go" 或 "go"
    
    testTexts := []string{
        "Go is great",
        "I love go programming", 
        "going somewhere",
        "Let's go!",
    }
    
    for _, text := range testTexts {
        if re.MatchString(text) {
            fmt.Printf("✅ '%s'\n", text)
        } else {
            fmt.Printf("❌ '%s'\n", text)
        }
    }
    
    // 查找所有匹配
    numberRe := regexp.MustCompile(`\d+`)
    numText := "我有 3 个苹果和 15 个橙子"
    
    first := numberRe.FindString(numText)
    all := numberRe.FindAllString(numText, -1)
    
    fmt.Printf("第一个数字: %s\n", first)
    fmt.Printf("所有数字: %v\n", all)
}

func main() {
    basicMatching()
}

Capture Groups and Replacement

package main

import (
    "fmt"
    "regexp"
)

func captureAndReplace() {
    fmt.Println("=== 捕获组和替换 ===")
    
    // 日期捕获
    dateRe := regexp.MustCompile(`(\d{4})-(\d{2})-(\d{2})`)
    dateText := "今天是 2023-12-25"
    
    match := dateRe.FindStringSubmatch(dateText)
    if match != nil {
        fmt.Printf("完整匹配: %s\n", match[0])
        fmt.Printf("年: %s, 月: %s, 日: %s\n", match[1], match[2], match[3])
    }
    
    // 命名捕获组
    namedRe := regexp.MustCompile(`(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})`)
    match = namedRe.FindStringSubmatch("生日: 1990-05-15")
    
    if match != nil {
        names := namedRe.SubexpNames()
        for i, name := range names {
            if i > 0 && name != "" {
                fmt.Printf("%s: %s\n", name, match[i])
            }
        }
    }
    
    // 字符串替换
    fmt.Println("\n字符串替换:")
    
    // 简单替换
    catRe := regexp.MustCompile(`\bcat\b`)
    text := "The cat sat on the mat. Another cat was nearby."
    replaced := catRe.ReplaceAllString(text, "dog")
    
    fmt.Printf("原文: %s\n", text)
    fmt.Printf("替换后: %s\n", replaced)
    
    // 使用捕获组替换 (YYYY-MM-DD -> MM/DD/YYYY)
    dateText2 := "日期: 2023-12-25 和 2023-01-01"
    formatted := dateRe.ReplaceAllString(dateText2, "$2/$3/$1")
    
    fmt.Printf("原格式: %s\n", dateText2)
    fmt.Printf("新格式: %s\n", formatted)
}

func main() {
    captureAndReplace()
}

🎯 Data Validation

Common Validation Patterns

package main

import (
    "fmt"
    "regexp"
)

// 验证器
type Validator struct {
    patterns map[string]*regexp.Regexp
}

func NewValidator() *Validator {
    return &Validator{
        patterns: map[string]*regexp.Regexp{
            "email":    regexp.MustCompile(`^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$`),
            "phone":    regexp.MustCompile(`^1[3-9]\d{9}$`), // 中国手机号
            "password": regexp.MustCompile(`^(?=.*[a-z])(?=.*[A-Z])(?=.*\d)(?=.*[@$!%*?&])[A-Za-z\d@$!%*?&]{8,}$`),
            "url":      regexp.MustCompile(`^https?:\/\/[^\s/$.?#].[^\s]*$`),
            "ipv4":     regexp.MustCompile(`^((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$`),
        },
    }
}

func (v *Validator) Validate(dataType, value string) bool {
    if pattern, exists := v.patterns[dataType]; exists {
        return pattern.MatchString(value)
    }
    return false
}

func validationDemo() {
    fmt.Println("=== 数据验证演示 ===")
    
    validator := NewValidator()
    
    testData := map[string][]string{
        "email": {
            "user@example.com",     // ✅
            "invalid-email",        // ❌
            "test@domain.co.uk",    // ✅
        },
        "phone": {
            "13812345678",          // ✅
            "1234567890",           // ❌
            "15987654321",          // ✅
        },
        "password": {
            "Password123!",         // ✅
            "password",             // ❌
            "ComplexPass@2023",     // ✅
        },
        "url": {
            "https://www.example.com",  // ✅
            "invalid-url",              // ❌
            "http://api.service.org",   // ✅
        },
    }
    
    for dataType, values := range testData {
        fmt.Printf("\n%s 验证:\n", dataType)
        for _, value := range values {
            isValid := validator.Validate(dataType, value)
            status := "❌"
            if isValid {
                status = "✅"
            }
            fmt.Printf("  %s '%s'\n", status, value)
        }
    }
}

func main() {
    validationDemo()
}

🔧 Text Processing

Text Cleaning and Processing

package main

import (
    "fmt"
    "regexp"
    "strings"
)

type TextProcessor struct {
    htmlTags    *regexp.Regexp
    whitespace  *regexp.Regexp
    numbers     *regexp.Regexp
    emails      *regexp.Regexp
}

func NewTextProcessor() *TextProcessor {
    return &TextProcessor{
        htmlTags:   regexp.MustCompile(`<[^>]*>`),
        whitespace: regexp.MustCompile(`\s+`),
        numbers:    regexp.MustCompile(`\d+`),
        emails:     regexp.MustCompile(`[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}`),
    }
}

func (tp *TextProcessor) RemoveHTMLTags(text string) string {
    return tp.htmlTags.ReplaceAllString(text, "")
}

func (tp *TextProcessor) NormalizeWhitespace(text string) string {
    return strings.TrimSpace(tp.whitespace.ReplaceAllString(text, " "))
}

func (tp *TextProcessor) ExtractNumbers(text string) []string {
    return tp.numbers.FindAllString(text, -1)
}

func (tp *TextProcessor) ExtractEmails(text string) []string {
    return tp.emails.FindAllString(text, -1)
}

func (tp *TextProcessor) MaskSensitiveData(text string) string {
    // 隐藏邮箱
    emailMasked := tp.emails.ReplaceAllStringFunc(text, func(email string) string {
        parts := strings.Split(email, "@")
        if len(parts) == 2 {
            user := parts[0]
            domain := parts[1]
            if len(user) > 2 {
                return user[:2] + "***@" + domain
            }
        }
        return "***@***"
    })
    
    // 隐藏手机号
    phoneRe := regexp.MustCompile(`1[3-9]\d{9}`)
    return phoneRe.ReplaceAllStringFunc(emailMasked, func(phone string) string {
        return phone[:3] + "****" + phone[7:]
    })
}

func textProcessingDemo() {
    fmt.Println("=== 文本处理演示 ===")
    
    processor := NewTextProcessor()
    
    // HTML 清理
    htmlText := `<p>联系我们:<strong>邮箱</strong> admin@company.com</p>
                 <div>电话:<span>13812345678</span></div>`
    
    fmt.Printf("原文: %s\n", htmlText)
    
    cleaned := processor.RemoveHTMLTags(htmlText)
    fmt.Printf("移除HTML: %s\n", cleaned)
    
    normalized := processor.NormalizeWhitespace(cleaned)
    fmt.Printf("规范空白: %s\n", normalized)
    
    // 数据提取
    fmt.Println("\n数据提取:")
    
    text := "联系方式: john@example.com, admin@company.org, 电话: 13812345678, 价格: 299.99"
    
    emails := processor.ExtractEmails(text)
    numbers := processor.ExtractNumbers(text)
    
    fmt.Printf("原文: %s\n", text)
    fmt.Printf("邮箱: %v\n", emails)
    fmt.Printf("数字: %v\n", numbers)
    
    // 敏感数据脱敏
    fmt.Println("\n敏感数据脱敏:")
    sensitiveText := "用户邮箱: alice@example.com, 手机: 13987654321"
    
    fmt.Printf("原文: %s\n", sensitiveText)
    masked := processor.MaskSensitiveData(sensitiveText)
    fmt.Printf("脱敏后: %s\n", masked)
}

func main() {
    textProcessingDemo()
}

🎯 Practical Application Examples

Log Parser

package main

import (
    "fmt"
    "regexp"
    "strconv"
    "strings"
    "time"
)

type LogEntry struct {
    Timestamp time.Time
    Level     string
    IP        string
    Message   string
    Status    int
}

type LogParser struct {
    accessLogRe *regexp.Regexp
    errorLogRe  *regexp.Regexp
    ipRe        *regexp.Regexp
}

func NewLogParser() *LogParser {
    return &LogParser{
        // 简化的访问日志格式: IP [时间] "请求" 状态码
        accessLogRe: regexp.MustCompile(`^(\S+) \[([^\]]+)\] "([^"]*)" (\d+)`),
        
        // 错误日志格式: [时间] 级别: 消息
        errorLogRe: regexp.MustCompile(`^\[([^\]]+)\] (\w+): (.+)`),
        
        // IP 地址
        ipRe: regexp.MustCompile(`\b(?:\d{1,3}\.){3}\d{1,3}\b`),
    }
}

func (lp *LogParser) ParseAccessLog(line string) (*LogEntry, error) {
    matches := lp.accessLogRe.FindStringSubmatch(line)
    if len(matches) < 5 {
        return nil, fmt.Errorf("无法解析访问日志")
    }
    
    timestamp, _ := time.Parse("02/Jan/2006:15:04:05", matches[2])
    status, _ := strconv.Atoi(matches[4])
    
    return &LogEntry{
        Timestamp: timestamp,
        Level:     "ACCESS",
        IP:        matches[1],
        Message:   matches[3],
        Status:    status,
    }, nil
}

func (lp *LogParser) ParseErrorLog(line string) (*LogEntry, error) {
    matches := lp.errorLogRe.FindStringSubmatch(line)
    if len(matches) < 4 {
        return nil, fmt.Errorf("无法解析错误日志")
    }
    
    timestamp, _ := time.Parse("2006-01-02 15:04:05", matches[1])
    
    // 提取IP地址
    ip := ""
    if ips := lp.ipRe.FindAllString(matches[3], 1); len(ips) > 0 {
        ip = ips[0]
    }
    
    return &LogEntry{
        Timestamp: timestamp,
        Level:     matches[2],
        IP:        ip,
        Message:   matches[3],
        Status:    500,
    }, nil
}

func (lp *LogParser) AnalyzeLogs(entries []LogEntry) map[string]interface{} {
    stats := map[string]interface{}{
        "total":        len(entries),
        "levels":       make(map[string]int),
        "status_codes": make(map[string]int),
        "unique_ips":   make(map[string]bool),
        "errors":       0,
    }
    
    levels := stats["levels"].(map[string]int)
    statusCodes := stats["status_codes"].(map[string]int)
    uniqueIPs := stats["unique_ips"].(map[string]bool)
    errors := 0
    
    for _, entry := range entries {
        levels[entry.Level]++
        
        statusKey := strconv.Itoa(entry.Status)
        statusCodes[statusKey]++
        
        if entry.IP != "" {
            uniqueIPs[entry.IP] = true
        }
        
        if entry.Status >= 400 || strings.ToUpper(entry.Level) == "ERROR" {
            errors++
        }
    }
    
    stats["errors"] = errors
    stats["unique_ip_count"] = len(uniqueIPs)
    
    return stats
}

func logParsingDemo() {
    fmt.Println("=== 日志解析演示 ===")
    
    parser := NewLogParser()
    
    // 模拟日志数据
    accessLogs := []string{
        `192.168.1.100 [25/Dec/2023:10:00:00] "GET /index.html" 200`,
        `192.168.1.101 [25/Dec/2023:10:01:00] "POST /api/login" 200`,
        `192.168.1.102 [25/Dec/2023:10:02:00] "GET /admin" 403`,
        `192.168.1.100 [25/Dec/2023:10:03:00] "GET /notfound" 404`,
    }
    
    errorLogs := []string{
        `[2023-12-25 10:05:00] ERROR: Database connection failed from 192.168.1.103`,
        `[2023-12-25 10:06:00] WARN: High memory usage detected`,
        `[2023-12-25 10:07:00] ERROR: Auth failed for 192.168.1.104`,
    }
    
    var allEntries []LogEntry
    
    // 解析访问日志
    fmt.Println("解析访问日志:")
    for _, line := range accessLogs {
        if entry, err := parser.ParseAccessLog(line); err == nil {
            allEntries = append(allEntries, *entry)
            fmt.Printf("✅ %s [%d] %s from %s\n", 
                      entry.Level, entry.Status, entry.Message, entry.IP)
        }
    }
    
    // 解析错误日志
    fmt.Println("\n解析错误日志:")
    for _, line := range errorLogs {
        if entry, err := parser.ParseErrorLog(line); err == nil {
            allEntries = append(allEntries, *entry)
            fmt.Printf("✅ %s: %s\n", entry.Level, entry.Message)
        }
    }
    
    // 统计分析
    fmt.Println("\n=== 统计分析 ===")
    stats := parser.AnalyzeLogs(allEntries)
    
    fmt.Printf("总日志数: %v\n", stats["total"])
    fmt.Printf("错误数: %v\n", stats["errors"])
    fmt.Printf("唯一IP数: %v\n", stats["unique_ip_count"])
    
    fmt.Println("\n级别分布:")
    levels := stats["levels"].(map[string]int)
    for level, count := range levels {
        fmt.Printf("  %s: %d\n", level, count)
    }
    
    fmt.Println("\n状态码分布:")
    statusCodes := stats["status_codes"].(map[string]int)
    for code, count := range statusCodes {
        fmt.Printf("  %s: %d\n", code, count)
    }
}

func main() {
    logParsingDemo()
}

🎓 Summary

In this chapter, we learned about Go regular expressions:

  • Basic matching: pattern compilation, string matching, find operations
  • Capture groups: capture groups, named groups, pattern replacement
  • Data validation: common validation for email, phone, password, etc.
  • Text processing: HTML cleaning, data extraction, sensitive data masking
  • Practical applications: log parser implementation

Regular expressions are a powerful tool for processing text data, widely used in data validation, text parsing, log analysis, and more.


Next, we will learn about Go Type Assertion to understand the dynamic checking mechanism for interface types.

::: tip Regular Expression Tips

  • Prefer compiled regular expressions for better performance
  • Use capture groups reasonably and avoid unnecessary complexity
  • Watch regex performance and avoid excessive backtracking
  • For complex validation, consider combining multiple simple patterns :::