Skip to content

Perl 正则表达式

基本匹配

简单匹配

perl
my $text = "Hello World";

if ($text =~ /World/) {
    print "Found 'World'\n";
}

if ($text !~ /Python/) {
    print "Did not find 'Python'\n";
}

不区分大小写

perl
my $text = "Hello World";

# 使用 i 修饰符
if ($text =~ /world/i) {
    print "Found (case insensitive)\n";
}

匹配变量

perl
my $text = "Hello World";
my $pattern = "World";

if ($text =~ /$pattern/) {
    print "Found pattern: $pattern\n";
}

字符类

基本字符类

perl
# 匹配数字
if ("123" =~ /\d/) {
    print "Contains digits\n";
}

# 匹配字母
if ("abc" =~ /[a-zA-Z]/) {
    print "Contains letters\n";
}

# 匹配空白字符
if ("hello world" =~ /\s/) {
    print "Contains whitespace\n";
}

预定义字符类

perl
\d    # 匹配数字 [0-9]
\D    # 匹配非数字 [^0-9]
\w    # 匹配单词字符 [a-zA-Z0-9_]
\W    # 匹配非单词字符
\s    # 匹配空白字符 [ \t\n\r\f]
\S    # 匹配非空白字符

# 示例
if ("hello 123" =~ /\d+/) {
    print "Found digits\n";
}

if ("test@example.com" =~ /\w+@\w+\.\w+/) {
    print "Valid email format\n";
}

自定义字符类

perl
# 匹配元音
if ("hello" =~ /[aeiou]/) {
    print "Contains vowel\n";
}

# 匹配十六进制数字
if ("A1F" =~ /[0-9A-Fa-f]+/) {
    print "Hexadecimal\n";
}

# 否定字符类
if ("abc" =~ /[^aeiou]/) {
    print "Contains non-vowel\n";
}

量词

基本量词

perl
*      # 0 次或多次
+      # 1 次或多次
?      # 0 次或 1 次
{n}    # 恰好 n 次
{n,}   # n 次或更多
{n,m}  # n 到 m 次

量词示例

perl
# * 匹配
"test" =~ /a*/;     # 匹配 0 次
"aaabbb" =~ /a*/;   # 匹配 3 次

# + 匹配
"test" =~ /a+/;     # 不匹配
"aaabbb" =~ /a+/;   # 匹配 3 次

# ? 匹配
"color" =~ /colou?r/;   # 匹配
"colour" =~ /colou?r/;   # 匹配
"colouur" =~ /colou?r/;  # 不匹配

# {n,m} 匹配
"123" =~ /\d{3}/;     # 匹配 3 位数字
"12" =~ /\d{3,5}/;    # 不匹配
"12345" =~ /\d{3,5}/;  # 匹配 5 位数字

贪婪和非贪婪

perl
my $text = "<b>hello</b> <b>world</b>";

# 贪婪匹配(默认)
if ($text =~ /<b>.*<\/b>/) {
    print "Greedy: $&\n";  # <b>hello</b> <b>world</b>
}

# 非贪婪匹配
if ($text =~ /<b>.*?<\/b>/) {
    print "Non-greedy: $&\n";  # <b>hello</b>
}

锚点

基本锚点

perl
^     # 字符串开头
$     # 字符串结尾
\b    # 单词边界
\B    # 非单词边界

锚点示例

perl
# 字符串开头
if ("hello world" =~ /^hello/) {
    print "Starts with 'hello'\n";
}

# 字符串结尾
if ("hello world" =~ /world$/) {
    print "Ends with 'world'\n";
}

# 单词边界
if ("hello world" =~ /\bworld\b/) {
    print "Whole word 'world'\n";
}

if ("worldwide" =~ /\bworld\b/) {
    print "Does not match\n";
}

捕获组

基本捕获组

perl
my $text = "John Doe 30";

if ($text =~ /(\w+)\s+(\w+)\s+(\d+)/) {
    my $first_name = $1;
    my $last_name = $2;
    my $age = $3;
    
    print "Name: $first_name $last_name\n";
    print "Age: $age\n";
}

命名捕获组

perl
my $text = "2024-01-15";

if ($text =~ /(?<year>\d{4})-(?<month>\d{2})-(?<day>\d{2})/) {
    print "Year: $+{year}\n";
    print "Month: $+{month}\n";
    print "Day: $+{day}\n";
}

非捕获组

perl
# 非捕获组 (?:...)
if ("abcabc" =~ /(?:abc){2}/) {
    print "Matched without capturing\n";
}

# 普通捕获组会捕获
if ("abcabc" =~ /(abc){2}/) {
    print "Captured: $1\n";  # abc
}

替换

基本替换

perl
my $text = "Hello World";

# 替换第一个匹配
$text =~ s/World/Perl/;
print $text;  # Hello Perl

# 全局替换
$text =~ s/o/x/g;
print $text;  # Hellx Perx

使用捕获组替换

perl
my $text = "John Doe";

# 交换姓名
$text =~ s/(\w+)\s+(\w+)/$2 $1/;
print $text;  # Doe John

替换修饰符

perl
my $text = "Hello World";

# i - 不区分大小写
$text =~ s/world/perl/i;

# g - 全局替换
$text =~ s/o/x/g;

# e - 执行替换
$text =~ s/\d+/$1 * 2/eg;

模式修饰符

常用修饰符

perl
/i    # 不区分大小写
/m    # 多行模式
/s    # 单行模式(. 匹配换行符)
/x    # 允许空白和注释
/g    # 全局匹配
/c    # 匹配失败后不重置位置
/o    # 只编译一次

多行模式

perl
my $text = "Line1\nLine2\nLine3";

# 匹配每一行的开头
$text =~ /^\w+/gm;  # 匹配 Line1, Line2, Line3

单行模式

perl
my $text = "Hello\nWorld";

# . 匹配换行符
$text =~ /Hello.*World/s;  # 匹配

扩展模式

perl
my $email = qr/
    \w+            # 用户名
    @              # @ 符号
    [\w.-]+        # 域名
    \.             # 点
    [a-zA-Z]{2,}   # 顶级域名
/x;

if ("test@example.com" =~ $email) {
    print "Valid email\n";
}

分割和连接

split

perl
my $text = "apple,banana,orange";

# 分割字符串
my @fruits = split /,/, $text;
print "@fruits\n";  # apple banana orange

# 限制分割次数
my @parts = split /,/, $text, 2;
print "@parts\n";  # apple banana,orange

# 使用正则表达式分割
my $data = "apple banana-orange";
my @items = split /[, -]/, $data;
print "@items\n";  # apple banana orange

grep

perl
my @numbers = (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);

# 过滤偶数
my @evens = grep { $_ % 2 == 0 } @numbers;
print "@evens\n";  # 2 4 6 8 10

# 使用正则表达式过滤
my @strings = qw(apple banana orange grape);
my @with_a = grep { /a/ } @strings;
print "@with_a\n";  # apple banana orange grape

map

perl
my @numbers = (1, 2, 3, 4, 5);

# 转换数据
my @squared = map { $_ * $_ } @numbers;
print "@squared\n";  # 1 4 9 16 25

# 使用正则表达式提取
my @emails = qw(test@example.com user@example.org);
my @domains = map { /@([\w.-]+)/; $1 } @emails;
print "@domains\n";  # example.com example.org

高级特性

前瞻和后顾

perl
# 正向前瞻
if ("hello123" =~ /hello(?=123)/) {
    print "Followed by 123\n";
}

# 负向前瞻
if ("hello world" =~ /hello(?!123)/) {
    print "Not followed by 123\n";
}

# 正向后顾(需要固定长度)
if ("123hello" =~ /(?<=123)hello/) {
    print "Preceded by 123\n";
}

# 负向后顾
if ("abchello" =~ /(?<!123)hello/) {
    print "Not preceded by 123\n";
}

条件模式

perl
my $text = "abc";

# 条件匹配
if ($text =~ /(a)?b?(?(1)c|d)/) {
    print "Conditional match\n";
}

递归模式

perl
# 匹配嵌套括号
my $nested = qr/\((?:[^()]|(?&nested))*\)(?(DEFINE)(?<nested>\((?:[^()]|(?&nested))*\))/;

if ("(a(b)c)" =~ /$nested/) {
    print "Nested parentheses matched\n";
}

实践示例

示例 1:验证电子邮件

perl
#!/usr/bin/perl
use strict;
use warnings;

sub is_valid_email {
    my ($email) = @_;
    
    my $email_regex = qr/
        ^                     # 开头
        [a-zA-Z0-9._%+-]+    # 用户名
        @                     # @
        [a-zA-Z0-9.-]+        # 域名
        \.                    # 点
        [a-zA-Z]{2,}          # 顶级域名
        $                     # 结尾
    /x;
    
    return $email =~ $email_regex;
}

my @emails = (
    "test@example.com",
    "user.name@domain.org",
    "invalid@email",
    "test@.com"
);

foreach my $email (@emails) {
    my $valid = is_valid_email($email) ? "Valid" : "Invalid";
    print "$email: $valid\n";
}

示例 2:提取 URL

perl
#!/usr/bin/perl
use strict;
use warnings;

sub extract_urls {
    my ($text) = @_;
    
    my $url_regex = qr/
        https?:\/\/           # 协议
        [a-zA-Z0-9.-]+        # 域名
        (:[0-9]+)?           # 端口
        (\/[^\s]*)?          # 路径
        (\?[^\s]*)?          # 查询参数
    /xg;
    
    my @urls = $text =~ /$url_regex/g;
    return @urls;
}

my $text = "Visit https://example.com/path?query=1 or http://test.org:8080";
my @urls = extract_urls($text);

print "Found URLs:\n";
print "$_\n" for @urls;

示例 3:文本清理

perl
#!/usr/bin/perl
use strict;
use warnings;

sub clean_text {
    my ($text) = @_;
    
    # 移除多余的空白
    $text =~ s/\s+/ /g;
    
    # 移除前导和尾随空白
    $text =~ s/^\s+//;
    $text =~ s/\s+$//;
    
    # 移除特殊字符
    $text =~ s/[^\w\s,-]//g;
    
    return $text;
}

my $messy = "  Hello   World!!!  How  are you??  ";
my $clean = clean_text($messy);

print "Original: $messy\n";
print "Clean: $clean\n";

示例 4:日志解析

perl
#!/usr/bin/perl
use strict;
use warnings;

my $log_regex = qr/
    ^\[(.*?)\]              # 时间戳
    \[(\w+)\]               # 日志级别
    (.*)$                   # 消息
/x;

while (my $line = <DATA>) {
    if ($line =~ /$log_regex/) {
        my ($timestamp, $level, $message) = ($1, $2, $3);
        
        printf "%-20s %-10s %s\n", $timestamp, $level, $message;
    }
}

__DATA__
[2024-01-15 10:30:00] [INFO] Application started
[2024-01-15 10:30:05] [ERROR] Failed to connect
[2024-01-15 10:30:10] [WARN] Low disk space

小结

本章节学习了 Perl 的正则表达式:

  1. ✅ 基本匹配
  2. ✅ 字符类
  3. ✅ 量词
  4. ✅ 锚点
  5. ✅ 捕获组
  6. ✅ 替换
  7. ✅ 模式修饰符
  8. ✅ 分割和连接
  9. ✅ 高级特性

接下来,我们将学习 Perl 发送邮件