#Go Regular Expressions
Regular expressions are a powerful tool for text pattern matching. Go provides full regular expression support through the regexp package.
#📋 Regular Expression Basics
#Basic Matching and Finding
package main
import (
"fmt"
"regexp"
)
func basicMatching() {
fmt.Println("=== 基本正则匹配 ===")
// 简单匹配
pattern := "Go"
text := "Go is a programming language"
matched, _ := regexp.MatchString(pattern, text)
fmt.Printf("'%s' 匹配 '%s': %v\n", pattern, text, matched)
// 编译正则表达式
re := regexp.MustCompile(`\b[Gg]o\b`) // 匹配单词 "Go" 或 "go"
testTexts := []string{
"Go is great",
"I love go programming",
"going somewhere",
"Let's go!",
}
for _, text := range testTexts {
if re.MatchString(text) {
fmt.Printf("✅ '%s'\n", text)
} else {
fmt.Printf("❌ '%s'\n", text)
}
}
// 查找所有匹配
numberRe := regexp.MustCompile(`\d+`)
numText := "我有 3 个苹果和 15 个橙子"
first := numberRe.FindString(numText)
all := numberRe.FindAllString(numText, -1)
fmt.Printf("第一个数字: %s\n", first)
fmt.Printf("所有数字: %v\n", all)
}
func main() {
basicMatching()
}#Capture Groups and Replacement
package main
import (
"fmt"
"regexp"
)
func captureAndReplace() {
fmt.Println("=== 捕获组和替换 ===")
// 日期捕获
dateRe := regexp.MustCompile(`(\d{4})-(\d{2})-(\d{2})`)
dateText := "今天是 2023-12-25"
match := dateRe.FindStringSubmatch(dateText)
if match != nil {
fmt.Printf("完整匹配: %s\n", match[0])
fmt.Printf("年: %s, 月: %s, 日: %s\n", match[1], match[2], match[3])
}
// 命名捕获组
namedRe := regexp.MustCompile(`(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})`)
match = namedRe.FindStringSubmatch("生日: 1990-05-15")
if match != nil {
names := namedRe.SubexpNames()
for i, name := range names {
if i > 0 && name != "" {
fmt.Printf("%s: %s\n", name, match[i])
}
}
}
// 字符串替换
fmt.Println("\n字符串替换:")
// 简单替换
catRe := regexp.MustCompile(`\bcat\b`)
text := "The cat sat on the mat. Another cat was nearby."
replaced := catRe.ReplaceAllString(text, "dog")
fmt.Printf("原文: %s\n", text)
fmt.Printf("替换后: %s\n", replaced)
// 使用捕获组替换 (YYYY-MM-DD -> MM/DD/YYYY)
dateText2 := "日期: 2023-12-25 和 2023-01-01"
formatted := dateRe.ReplaceAllString(dateText2, "$2/$3/$1")
fmt.Printf("原格式: %s\n", dateText2)
fmt.Printf("新格式: %s\n", formatted)
}
func main() {
captureAndReplace()
}#🎯 Data Validation
#Common Validation Patterns
package main
import (
"fmt"
"regexp"
)
// 验证器
type Validator struct {
patterns map[string]*regexp.Regexp
}
func NewValidator() *Validator {
return &Validator{
patterns: map[string]*regexp.Regexp{
"email": regexp.MustCompile(`^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$`),
"phone": regexp.MustCompile(`^1[3-9]\d{9}$`), // 中国手机号
"password": regexp.MustCompile(`^(?=.*[a-z])(?=.*[A-Z])(?=.*\d)(?=.*[@$!%*?&])[A-Za-z\d@$!%*?&]{8,}$`),
"url": regexp.MustCompile(`^https?:\/\/[^\s/$.?#].[^\s]*$`),
"ipv4": regexp.MustCompile(`^((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$`),
},
}
}
func (v *Validator) Validate(dataType, value string) bool {
if pattern, exists := v.patterns[dataType]; exists {
return pattern.MatchString(value)
}
return false
}
func validationDemo() {
fmt.Println("=== 数据验证演示 ===")
validator := NewValidator()
testData := map[string][]string{
"email": {
"user@example.com", // ✅
"invalid-email", // ❌
"test@domain.co.uk", // ✅
},
"phone": {
"13812345678", // ✅
"1234567890", // ❌
"15987654321", // ✅
},
"password": {
"Password123!", // ✅
"password", // ❌
"ComplexPass@2023", // ✅
},
"url": {
"https://www.example.com", // ✅
"invalid-url", // ❌
"http://api.service.org", // ✅
},
}
for dataType, values := range testData {
fmt.Printf("\n%s 验证:\n", dataType)
for _, value := range values {
isValid := validator.Validate(dataType, value)
status := "❌"
if isValid {
status = "✅"
}
fmt.Printf(" %s '%s'\n", status, value)
}
}
}
func main() {
validationDemo()
}#🔧 Text Processing
#Text Cleaning and Processing
package main
import (
"fmt"
"regexp"
"strings"
)
type TextProcessor struct {
htmlTags *regexp.Regexp
whitespace *regexp.Regexp
numbers *regexp.Regexp
emails *regexp.Regexp
}
func NewTextProcessor() *TextProcessor {
return &TextProcessor{
htmlTags: regexp.MustCompile(`<[^>]*>`),
whitespace: regexp.MustCompile(`\s+`),
numbers: regexp.MustCompile(`\d+`),
emails: regexp.MustCompile(`[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}`),
}
}
func (tp *TextProcessor) RemoveHTMLTags(text string) string {
return tp.htmlTags.ReplaceAllString(text, "")
}
func (tp *TextProcessor) NormalizeWhitespace(text string) string {
return strings.TrimSpace(tp.whitespace.ReplaceAllString(text, " "))
}
func (tp *TextProcessor) ExtractNumbers(text string) []string {
return tp.numbers.FindAllString(text, -1)
}
func (tp *TextProcessor) ExtractEmails(text string) []string {
return tp.emails.FindAllString(text, -1)
}
func (tp *TextProcessor) MaskSensitiveData(text string) string {
// 隐藏邮箱
emailMasked := tp.emails.ReplaceAllStringFunc(text, func(email string) string {
parts := strings.Split(email, "@")
if len(parts) == 2 {
user := parts[0]
domain := parts[1]
if len(user) > 2 {
return user[:2] + "***@" + domain
}
}
return "***@***"
})
// 隐藏手机号
phoneRe := regexp.MustCompile(`1[3-9]\d{9}`)
return phoneRe.ReplaceAllStringFunc(emailMasked, func(phone string) string {
return phone[:3] + "****" + phone[7:]
})
}
func textProcessingDemo() {
fmt.Println("=== 文本处理演示 ===")
processor := NewTextProcessor()
// HTML 清理
htmlText := `<p>联系我们:<strong>邮箱</strong> admin@company.com</p>
<div>电话:<span>13812345678</span></div>`
fmt.Printf("原文: %s\n", htmlText)
cleaned := processor.RemoveHTMLTags(htmlText)
fmt.Printf("移除HTML: %s\n", cleaned)
normalized := processor.NormalizeWhitespace(cleaned)
fmt.Printf("规范空白: %s\n", normalized)
// 数据提取
fmt.Println("\n数据提取:")
text := "联系方式: john@example.com, admin@company.org, 电话: 13812345678, 价格: 299.99"
emails := processor.ExtractEmails(text)
numbers := processor.ExtractNumbers(text)
fmt.Printf("原文: %s\n", text)
fmt.Printf("邮箱: %v\n", emails)
fmt.Printf("数字: %v\n", numbers)
// 敏感数据脱敏
fmt.Println("\n敏感数据脱敏:")
sensitiveText := "用户邮箱: alice@example.com, 手机: 13987654321"
fmt.Printf("原文: %s\n", sensitiveText)
masked := processor.MaskSensitiveData(sensitiveText)
fmt.Printf("脱敏后: %s\n", masked)
}
func main() {
textProcessingDemo()
}#🎯 Practical Application Examples
#Log Parser
package main
import (
"fmt"
"regexp"
"strconv"
"strings"
"time"
)
type LogEntry struct {
Timestamp time.Time
Level string
IP string
Message string
Status int
}
type LogParser struct {
accessLogRe *regexp.Regexp
errorLogRe *regexp.Regexp
ipRe *regexp.Regexp
}
func NewLogParser() *LogParser {
return &LogParser{
// 简化的访问日志格式: IP [时间] "请求" 状态码
accessLogRe: regexp.MustCompile(`^(\S+) \[([^\]]+)\] "([^"]*)" (\d+)`),
// 错误日志格式: [时间] 级别: 消息
errorLogRe: regexp.MustCompile(`^\[([^\]]+)\] (\w+): (.+)`),
// IP 地址
ipRe: regexp.MustCompile(`\b(?:\d{1,3}\.){3}\d{1,3}\b`),
}
}
func (lp *LogParser) ParseAccessLog(line string) (*LogEntry, error) {
matches := lp.accessLogRe.FindStringSubmatch(line)
if len(matches) < 5 {
return nil, fmt.Errorf("无法解析访问日志")
}
timestamp, _ := time.Parse("02/Jan/2006:15:04:05", matches[2])
status, _ := strconv.Atoi(matches[4])
return &LogEntry{
Timestamp: timestamp,
Level: "ACCESS",
IP: matches[1],
Message: matches[3],
Status: status,
}, nil
}
func (lp *LogParser) ParseErrorLog(line string) (*LogEntry, error) {
matches := lp.errorLogRe.FindStringSubmatch(line)
if len(matches) < 4 {
return nil, fmt.Errorf("无法解析错误日志")
}
timestamp, _ := time.Parse("2006-01-02 15:04:05", matches[1])
// 提取IP地址
ip := ""
if ips := lp.ipRe.FindAllString(matches[3], 1); len(ips) > 0 {
ip = ips[0]
}
return &LogEntry{
Timestamp: timestamp,
Level: matches[2],
IP: ip,
Message: matches[3],
Status: 500,
}, nil
}
func (lp *LogParser) AnalyzeLogs(entries []LogEntry) map[string]interface{} {
stats := map[string]interface{}{
"total": len(entries),
"levels": make(map[string]int),
"status_codes": make(map[string]int),
"unique_ips": make(map[string]bool),
"errors": 0,
}
levels := stats["levels"].(map[string]int)
statusCodes := stats["status_codes"].(map[string]int)
uniqueIPs := stats["unique_ips"].(map[string]bool)
errors := 0
for _, entry := range entries {
levels[entry.Level]++
statusKey := strconv.Itoa(entry.Status)
statusCodes[statusKey]++
if entry.IP != "" {
uniqueIPs[entry.IP] = true
}
if entry.Status >= 400 || strings.ToUpper(entry.Level) == "ERROR" {
errors++
}
}
stats["errors"] = errors
stats["unique_ip_count"] = len(uniqueIPs)
return stats
}
func logParsingDemo() {
fmt.Println("=== 日志解析演示 ===")
parser := NewLogParser()
// 模拟日志数据
accessLogs := []string{
`192.168.1.100 [25/Dec/2023:10:00:00] "GET /index.html" 200`,
`192.168.1.101 [25/Dec/2023:10:01:00] "POST /api/login" 200`,
`192.168.1.102 [25/Dec/2023:10:02:00] "GET /admin" 403`,
`192.168.1.100 [25/Dec/2023:10:03:00] "GET /notfound" 404`,
}
errorLogs := []string{
`[2023-12-25 10:05:00] ERROR: Database connection failed from 192.168.1.103`,
`[2023-12-25 10:06:00] WARN: High memory usage detected`,
`[2023-12-25 10:07:00] ERROR: Auth failed for 192.168.1.104`,
}
var allEntries []LogEntry
// 解析访问日志
fmt.Println("解析访问日志:")
for _, line := range accessLogs {
if entry, err := parser.ParseAccessLog(line); err == nil {
allEntries = append(allEntries, *entry)
fmt.Printf("✅ %s [%d] %s from %s\n",
entry.Level, entry.Status, entry.Message, entry.IP)
}
}
// 解析错误日志
fmt.Println("\n解析错误日志:")
for _, line := range errorLogs {
if entry, err := parser.ParseErrorLog(line); err == nil {
allEntries = append(allEntries, *entry)
fmt.Printf("✅ %s: %s\n", entry.Level, entry.Message)
}
}
// 统计分析
fmt.Println("\n=== 统计分析 ===")
stats := parser.AnalyzeLogs(allEntries)
fmt.Printf("总日志数: %v\n", stats["total"])
fmt.Printf("错误数: %v\n", stats["errors"])
fmt.Printf("唯一IP数: %v\n", stats["unique_ip_count"])
fmt.Println("\n级别分布:")
levels := stats["levels"].(map[string]int)
for level, count := range levels {
fmt.Printf(" %s: %d\n", level, count)
}
fmt.Println("\n状态码分布:")
statusCodes := stats["status_codes"].(map[string]int)
for code, count := range statusCodes {
fmt.Printf(" %s: %d\n", code, count)
}
}
func main() {
logParsingDemo()
}#🎓 Summary
In this chapter, we learned about Go regular expressions:
- ✅ Basic matching: pattern compilation, string matching, find operations
- ✅ Capture groups: capture groups, named groups, pattern replacement
- ✅ Data validation: common validation for email, phone, password, etc.
- ✅ Text processing: HTML cleaning, data extraction, sensitive data masking
- ✅ Practical applications: log parser implementation
Regular expressions are a powerful tool for processing text data, widely used in data validation, text parsing, log analysis, and more.
Next, we will learn about Go Type Assertion to understand the dynamic checking mechanism for interface types.
::: tip Regular Expression Tips
- Prefer compiled regular expressions for better performance
- Use capture groups reasonably and avoid unnecessary complexity
- Watch regex performance and avoid excessive backtracking
- For complex validation, consider combining multiple simple patterns :::