常規表達式(一)#
# 常規表達式(一) (Regular expression)
# (1) 基本樣式比對 "=~" 與 "!~"
# 比對字串,成功傳回 true
# 失敗傳回 false
"Hello World" =~ /World/;
$string = "Hello World!";
# 若比對成功,則 print
print "It matches\n" if $string =~ /World/;
# 若比對失敗,則 print
print "It doesn't match\n" if $string !~ /World/;
$_ = "Hello World";
# 不指定比對目標,預設為 $_
print "It matches\n" if /World/;
# 大小寫不同,比對失敗
"Hello World" =~ /world/;
# 空白字元也視為一般字元,比對成功
"Hello World" =~ /o W/;
# 比對失敗
"Hello World" =~ /World /;
# (2) 自訂分隔字元
# 等同於 "Hello World" =~ /World/;
"Hello World" =~ m!World!;
"Hello World" =~ m{World}; # The same
# 比對成功,'/' 現在變成一般字元
"/usr/bin/perl" =~ m"/perl";
# (3) 中介字符 (metacharacter) 與字符集 (character class)
# ^:比對行首
"Hello World" =~ /^Hello/; # 比對成功
# 比對失敗,因為 World 不在行首
"Hello World" =~ /^World/;
# $:比對行尾
"Hello World" =~ /World$/; # 比對成功
# 比對失敗,因為 World 不在行尾
"Hello World" =~ /Hello$/;
# .:比對除了換行(\n)以外的任意一個字元
"Hello World" =~ /Wo.ld/; # 比對成功
# *:比對其前一個項目零次以上 (as many as possible)
# +:比對其前一個項目一次以上 (as many as possible)
# ?:比對其前一個項目零次或一次 (as many as possible)
"Hello World" =~ /Hel*o/; # l* 代表 'l' 零次以上,比對成功
"Hello World" =~ /Hel+o/; # l* 代表 'l' 一次以上,比對成功
# .* 代表 任意字元零次以上,比對成功,
# 但 Perl 所比對的結果是:
# 'That is a cat, not a hat' 不是 'That'
"That is a cat, not a hat." =~ /T.*at/;
# 加入 '?' 使比對到的資料越短越好,
# 此時 Perl 所比對的結果是:'That'
"That is a cat, not a hat." =~ /T.*?at/;
# []:比對中括號中任意一個字符
# 比對開頭是 A 或 B 或 C,比對失敗
"Hat" =~ /^[ABC]/;
# 比對開頭是 A 或 B 或 C 或 D,比對成功
"Cat" =~ /^[A-D]/;
# [^]:與 [] 相反,比對不在中括號中任意一個字符
# ^[^ABC] 比對開頭不是 A 或 B 或 C 的,比對成功
"Hat" =~ /^[^ABC]/;
# 比對非英文字結尾,比對成功
"Hello World." =~ /[^A-Za-z]$/;
# |:比對任意一組字符
# 比對 cat 或 dog,比對成功
"Mary has a cat." =~ /cat|dog/;
# {}:指定前一個項目出現的次數
# l{1,3} 代表 'l' 一到三次,比對成功
"Helllo World" =~ /Hel{1,3}o/;
# l{2,} 代表 'l' 兩次以上,比對成功
"Helllo World" =~ /Hel{2,}o/;
# l{2} 代表 'l' 兩次,比對失敗
"Helllo World" =~ /Hel{2}o/;
# \b:比對單字邊界
# \B:比對非單字邊界
# 比對 Hello 這個單字,比對成功
"Hello World" =~ /\bHello\b/;
# 比對 Hello 這個單字,比對失敗
"Helloworld" =~ /\bHello\b/;
# 比對成功
"Helloworld" =~ /\bHello\B/;
# 比對 word 這個單字,比對成功
'This is a "word".' =~ /\bword\b/;
# \w:word [a-zA-Z0-9_]
# \W:non-word [^a-zA-Z0-9_]
# \s:space [ \t\n\r\f\v]
# \S:non-space [^ \t\n\r\f\v]
# \d:digit [0-9]
# \D:non-digit [^0-9]
"Hello World" =~ /^\w+\W\w+$/; # 比對成功
# Reference : perlre(1)
常規表達式(二)#
# 常規表達式(二)
# (1) s/// 取代
$_ = "Hello World.\n";
s/World/Bill/; # 取代 Hello 成 Bill
print; # Hello Bill.
$_ = "Hello World.\n";
$word = "World";
s/$word/Bill/; # 可內嵌變數
print;
# (2) 使用 () 儲存變數
$_ = "Every Dog Has It's Day.";
# 比對行首的第一個與第二個字,並儲存至變數 $1 與 $2
/^(\w+)\W+(\w+)/;
print "The first 2 words are: $1 and $2";
$_ = "Every Dog Has It's Day.";
s/(\w+)/<$1>/g;
print; # <Every> <Dog> <Has> <It>'<s> <Day>.
$_ = "barbarian";
s/(\w+)\1/$1/; # 抓出重複的地方,去掉重複的地方
print; # barian
# (3) Modifiers
# g:Match globally, i.e., find all occurrences.
$_ = "Hello World.\n";
s/l/<L>/; # 只取代第一個 'l'
print; # He<L>lo World.
$_ = "Hello World.\n";
s/l/<L>/g; # 取代所有的 'l'
print; # He<L><L>o Wor<L>d.
# i:Do case-insensitive pattern matching.
# 忽略大小寫差別,比對成功
"Hello World" =~ /hello world/i;
# o:Compile pattern only once.
$word = "something";
while($something){
# something ...
s/$word/$another/o; # 加入 'o',只編譯一次,可加快執行效率
# something ...
}
$str = "abcdefg\n";
$str =~ s/($_)/<$1>/o for('c' .. 'f');
print $str; # ab<<<<c>>>>defg ,似乎不是我要的
# m:Treat string as mutiple lines.
# That is, change "^" and "$" from matching the
# start or end of the string to matching the start
# or end of any line anywhere within the string.
$_ = "abc\ndef\nghi\n";
s/^(.)/\u$1/mg; # 把行首第一個字母變大寫
print; # 結果正確,"Abc\nDef\nGhi"
$_ = "abc\ndef\nghi\n";
s/^(.)/\u$1/g; # 若沒有加上 m
print; # 結果變成,"Abc\ndef\nghi"
$_ = "abc\ndef\nghi\n";
s/(.)$/\u$1/mg; # 同理
print; # 結果為,"abC\ndeF\nghI"
# s:Treat string as single line. That is,
# change "." to match any character whatsoever,
# even a newline, which normally it would not match.
$_ = "abc\ndef\nghi\n";
# 加入 s ,使 '.' 可比對 "\n",比對成功。
print "Matched" if /a.*i/s;
# 不加 s ,則比對失敗。
print "Matched" if /a.*i/;