Perl 正则表达式
基本匹配
简单匹配
perl
my $text = "Hello World";
if ($text =~ /World/) {
print "Found 'World'\n";
}
if ($text !~ /Python/) {
print "Did not find 'Python'\n";
}不区分大小写
perl
my $text = "Hello World";
# 使用 i 修饰符
if ($text =~ /world/i) {
print "Found (case insensitive)\n";
}匹配变量
perl
my $text = "Hello World";
my $pattern = "World";
if ($text =~ /$pattern/) {
print "Found pattern: $pattern\n";
}字符类
基本字符类
perl
# 匹配数字
if ("123" =~ /\d/) {
print "Contains digits\n";
}
# 匹配字母
if ("abc" =~ /[a-zA-Z]/) {
print "Contains letters\n";
}
# 匹配空白字符
if ("hello world" =~ /\s/) {
print "Contains whitespace\n";
}预定义字符类
perl
\d # 匹配数字 [0-9]
\D # 匹配非数字 [^0-9]
\w # 匹配单词字符 [a-zA-Z0-9_]
\W # 匹配非单词字符
\s # 匹配空白字符 [ \t\n\r\f]
\S # 匹配非空白字符
# 示例
if ("hello 123" =~ /\d+/) {
print "Found digits\n";
}
if ("test@example.com" =~ /\w+@\w+\.\w+/) {
print "Valid email format\n";
}自定义字符类
perl
# 匹配元音
if ("hello" =~ /[aeiou]/) {
print "Contains vowel\n";
}
# 匹配十六进制数字
if ("A1F" =~ /[0-9A-Fa-f]+/) {
print "Hexadecimal\n";
}
# 否定字符类
if ("abc" =~ /[^aeiou]/) {
print "Contains non-vowel\n";
}量词
基本量词
perl
* # 0 次或多次
+ # 1 次或多次
? # 0 次或 1 次
{n} # 恰好 n 次
{n,} # n 次或更多
{n,m} # n 到 m 次量词示例
perl
# * 匹配
"test" =~ /a*/; # 匹配 0 次
"aaabbb" =~ /a*/; # 匹配 3 次
# + 匹配
"test" =~ /a+/; # 不匹配
"aaabbb" =~ /a+/; # 匹配 3 次
# ? 匹配
"color" =~ /colou?r/; # 匹配
"colour" =~ /colou?r/; # 匹配
"colouur" =~ /colou?r/; # 不匹配
# {n,m} 匹配
"123" =~ /\d{3}/; # 匹配 3 位数字
"12" =~ /\d{3,5}/; # 不匹配
"12345" =~ /\d{3,5}/; # 匹配 5 位数字贪婪和非贪婪
perl
my $text = "<b>hello</b> <b>world</b>";
# 贪婪匹配(默认)
if ($text =~ /<b>.*<\/b>/) {
print "Greedy: $&\n"; # <b>hello</b> <b>world</b>
}
# 非贪婪匹配
if ($text =~ /<b>.*?<\/b>/) {
print "Non-greedy: $&\n"; # <b>hello</b>
}锚点
基本锚点
perl
^ # 字符串开头
$ # 字符串结尾
\b # 单词边界
\B # 非单词边界锚点示例
perl
# 字符串开头
if ("hello world" =~ /^hello/) {
print "Starts with 'hello'\n";
}
# 字符串结尾
if ("hello world" =~ /world$/) {
print "Ends with 'world'\n";
}
# 单词边界
if ("hello world" =~ /\bworld\b/) {
print "Whole word 'world'\n";
}
if ("worldwide" =~ /\bworld\b/) {
print "Does not match\n";
}捕获组
基本捕获组
perl
my $text = "John Doe 30";
if ($text =~ /(\w+)\s+(\w+)\s+(\d+)/) {
my $first_name = $1;
my $last_name = $2;
my $age = $3;
print "Name: $first_name $last_name\n";
print "Age: $age\n";
}命名捕获组
perl
my $text = "2024-01-15";
if ($text =~ /(?<year>\d{4})-(?<month>\d{2})-(?<day>\d{2})/) {
print "Year: $+{year}\n";
print "Month: $+{month}\n";
print "Day: $+{day}\n";
}非捕获组
perl
# 非捕获组 (?:...)
if ("abcabc" =~ /(?:abc){2}/) {
print "Matched without capturing\n";
}
# 普通捕获组会捕获
if ("abcabc" =~ /(abc){2}/) {
print "Captured: $1\n"; # abc
}替换
基本替换
perl
my $text = "Hello World";
# 替换第一个匹配
$text =~ s/World/Perl/;
print $text; # Hello Perl
# 全局替换
$text =~ s/o/x/g;
print $text; # Hellx Perx使用捕获组替换
perl
my $text = "John Doe";
# 交换姓名
$text =~ s/(\w+)\s+(\w+)/$2 $1/;
print $text; # Doe John替换修饰符
perl
my $text = "Hello World";
# i - 不区分大小写
$text =~ s/world/perl/i;
# g - 全局替换
$text =~ s/o/x/g;
# e - 执行替换
$text =~ s/\d+/$1 * 2/eg;模式修饰符
常用修饰符
perl
/i # 不区分大小写
/m # 多行模式
/s # 单行模式(. 匹配换行符)
/x # 允许空白和注释
/g # 全局匹配
/c # 匹配失败后不重置位置
/o # 只编译一次多行模式
perl
my $text = "Line1\nLine2\nLine3";
# 匹配每一行的开头
$text =~ /^\w+/gm; # 匹配 Line1, Line2, Line3单行模式
perl
my $text = "Hello\nWorld";
# . 匹配换行符
$text =~ /Hello.*World/s; # 匹配扩展模式
perl
my $email = qr/
\w+ # 用户名
@ # @ 符号
[\w.-]+ # 域名
\. # 点
[a-zA-Z]{2,} # 顶级域名
/x;
if ("test@example.com" =~ $email) {
print "Valid email\n";
}分割和连接
split
perl
my $text = "apple,banana,orange";
# 分割字符串
my @fruits = split /,/, $text;
print "@fruits\n"; # apple banana orange
# 限制分割次数
my @parts = split /,/, $text, 2;
print "@parts\n"; # apple banana,orange
# 使用正则表达式分割
my $data = "apple banana-orange";
my @items = split /[, -]/, $data;
print "@items\n"; # apple banana orangegrep
perl
my @numbers = (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
# 过滤偶数
my @evens = grep { $_ % 2 == 0 } @numbers;
print "@evens\n"; # 2 4 6 8 10
# 使用正则表达式过滤
my @strings = qw(apple banana orange grape);
my @with_a = grep { /a/ } @strings;
print "@with_a\n"; # apple banana orange grapemap
perl
my @numbers = (1, 2, 3, 4, 5);
# 转换数据
my @squared = map { $_ * $_ } @numbers;
print "@squared\n"; # 1 4 9 16 25
# 使用正则表达式提取
my @emails = qw(test@example.com user@example.org);
my @domains = map { /@([\w.-]+)/; $1 } @emails;
print "@domains\n"; # example.com example.org高级特性
前瞻和后顾
perl
# 正向前瞻
if ("hello123" =~ /hello(?=123)/) {
print "Followed by 123\n";
}
# 负向前瞻
if ("hello world" =~ /hello(?!123)/) {
print "Not followed by 123\n";
}
# 正向后顾(需要固定长度)
if ("123hello" =~ /(?<=123)hello/) {
print "Preceded by 123\n";
}
# 负向后顾
if ("abchello" =~ /(?<!123)hello/) {
print "Not preceded by 123\n";
}条件模式
perl
my $text = "abc";
# 条件匹配
if ($text =~ /(a)?b?(?(1)c|d)/) {
print "Conditional match\n";
}递归模式
perl
# 匹配嵌套括号
my $nested = qr/\((?:[^()]|(?&nested))*\)(?(DEFINE)(?<nested>\((?:[^()]|(?&nested))*\))/;
if ("(a(b)c)" =~ /$nested/) {
print "Nested parentheses matched\n";
}实践示例
示例 1:验证电子邮件
perl
#!/usr/bin/perl
use strict;
use warnings;
sub is_valid_email {
my ($email) = @_;
my $email_regex = qr/
^ # 开头
[a-zA-Z0-9._%+-]+ # 用户名
@ # @
[a-zA-Z0-9.-]+ # 域名
\. # 点
[a-zA-Z]{2,} # 顶级域名
$ # 结尾
/x;
return $email =~ $email_regex;
}
my @emails = (
"test@example.com",
"user.name@domain.org",
"invalid@email",
"test@.com"
);
foreach my $email (@emails) {
my $valid = is_valid_email($email) ? "Valid" : "Invalid";
print "$email: $valid\n";
}示例 2:提取 URL
perl
#!/usr/bin/perl
use strict;
use warnings;
sub extract_urls {
my ($text) = @_;
my $url_regex = qr/
https?:\/\/ # 协议
[a-zA-Z0-9.-]+ # 域名
(:[0-9]+)? # 端口
(\/[^\s]*)? # 路径
(\?[^\s]*)? # 查询参数
/xg;
my @urls = $text =~ /$url_regex/g;
return @urls;
}
my $text = "Visit https://example.com/path?query=1 or http://test.org:8080";
my @urls = extract_urls($text);
print "Found URLs:\n";
print "$_\n" for @urls;示例 3:文本清理
perl
#!/usr/bin/perl
use strict;
use warnings;
sub clean_text {
my ($text) = @_;
# 移除多余的空白
$text =~ s/\s+/ /g;
# 移除前导和尾随空白
$text =~ s/^\s+//;
$text =~ s/\s+$//;
# 移除特殊字符
$text =~ s/[^\w\s,-]//g;
return $text;
}
my $messy = " Hello World!!! How are you?? ";
my $clean = clean_text($messy);
print "Original: $messy\n";
print "Clean: $clean\n";示例 4:日志解析
perl
#!/usr/bin/perl
use strict;
use warnings;
my $log_regex = qr/
^\[(.*?)\] # 时间戳
\[(\w+)\] # 日志级别
(.*)$ # 消息
/x;
while (my $line = <DATA>) {
if ($line =~ /$log_regex/) {
my ($timestamp, $level, $message) = ($1, $2, $3);
printf "%-20s %-10s %s\n", $timestamp, $level, $message;
}
}
__DATA__
[2024-01-15 10:30:00] [INFO] Application started
[2024-01-15 10:30:05] [ERROR] Failed to connect
[2024-01-15 10:30:10] [WARN] Low disk space小结
本章节学习了 Perl 的正则表达式:
- ✅ 基本匹配
- ✅ 字符类
- ✅ 量词
- ✅ 锚点
- ✅ 捕获组
- ✅ 替换
- ✅ 模式修饰符
- ✅ 分割和连接
- ✅ 高级特性
接下来,我们将学习 Perl 发送邮件。