# 常規表達式(一) (Regular expression) # (1) 基本樣式比對 "=~" 與 "!~" # 比對字串,成功傳回 true # 失敗傳回 false "Hello World" =~ /World/; $string = "Hello World!"; # 若比對成功,則 print print "It matches\n" if $string =~ /World/; # 若比對失敗,則 print print "It doesn't match\n" if $string !~ /World/; $_ = "Hello World"; # 不指定比對目標,預設為 $_ print "It matches\n" if /World/; # 大小寫不同,比對失敗 "Hello World" =~ /world/; # 空白字元也視為一般字元,比對成功 "Hello World" =~ /o W/; # 比對失敗 "Hello World" =~ /World /; # (2) 自訂分隔字元 # 等同於 "Hello World" =~ /World/; "Hello World" =~ m!World!; "Hello World" =~ m{World}; # The same # 比對成功,'/' 現在變成一般字元 "/usr/bin/perl" =~ m"/perl"; # (3) 中介字符 (metacharacter) 與字符集 (character class) # ^:比對行首 "Hello World" =~ /^Hello/; # 比對成功 # 比對失敗,因為 World 不在行首 "Hello World" =~ /^World/; # $:比對行尾 "Hello World" =~ /World$/; # 比對成功 # 比對失敗,因為 World 不在行尾 "Hello World" =~ /Hello$/; # .:比對除了換行(\n)以外的任意一個字元 "Hello World" =~ /Wo.ld/; # 比對成功 # *:比對其前一個項目零次以上 (as many as possible) # +:比對其前一個項目一次以上 (as many as possible) # ?:比對其前一個項目零次或一次 (as many as possible) "Hello World" =~ /Hel*o/; # l* 代表 'l' 零次以上,比對成功 "Hello World" =~ /Hel+o/; # l* 代表 'l' 一次以上,比對成功 # .* 代表 任意字元零次以上,比對成功, # 但 Perl 所比對的結果是: # 'That is a cat, not a hat' 不是 'That' "That is a cat, not a hat." =~ /T.*at/; # 加入 '?' 使比對到的資料越短越好, # 此時 Perl 所比對的結果是:'That' "That is a cat, not a hat." =~ /T.*?at/; # []:比對中括號中任意一個字符 # 比對開頭是 A 或 B 或 C,比對失敗 "Hat" =~ /^[ABC]/; # 比對開頭是 A 或 B 或 C 或 D,比對成功 "Cat" =~ /^[A-D]/; # [^]:與 [] 相反,比對不在中括號中任意一個字符 # ^[^ABC] 比對開頭不是 A 或 B 或 C 的,比對成功 "Hat" =~ /^[^ABC]/; # 比對非英文字結尾,比對成功 "Hello World." =~ /[^A-Za-z]$/; # |:比對任意一組字符 # 比對 cat 或 dog,比對成功 "Mary has a cat." =~ /cat|dog/; # {}:指定前一個項目出現的次數 # l{1,3} 代表 'l' 一到三次,比對成功 "Helllo World" =~ /Hel{1,3}o/; # l{2,} 代表 'l' 兩次以上,比對成功 "Helllo World" =~ /Hel{2,}o/; # l{2} 代表 'l' 兩次,比對失敗 "Helllo World" =~ /Hel{2}o/; # \b:比對單字邊界 # \B:比對非單字邊界 # 比對 Hello 這個單字,比對成功 "Hello World" =~ /\bHello\b/; # 比對 Hello 這個單字,比對失敗 "Helloworld" =~ /\bHello\b/; # 比對成功 "Helloworld" =~ /\bHello\B/; # 比對 word 這個單字,比對成功 'This is a "word".' =~ /\bword\b/; # \w:word [a-zA-Z0-9_] # \W:non-word [^a-zA-Z0-9_] # \s:space [ \t\n\r\f\v] # \S:non-space [^ \t\n\r\f\v] # \d:digit [0-9] # \D:non-digit [^0-9] "Hello World" =~ /^\w+\W\w+$/; # 比對成功 # Reference : perlre(1)
# 常規表達式(二) # (1) s/// 取代 $_ = "Hello World.\n"; s/World/Bill/; # 取代 Hello 成 Bill print; # Hello Bill. $_ = "Hello World.\n"; $word = "World"; s/$word/Bill/; # 可內嵌變數 print; # (2) 使用 () 儲存變數 $_ = "Every Dog Has It's Day."; # 比對行首的第一個與第二個字,並儲存至變數 $1 與 $2 /^(\w+)\W+(\w+)/; print "The first 2 words are: $1 and $2"; $_ = "Every Dog Has It's Day."; s/(\w+)/<$1>/g; print; # <Every> <Dog> <Has> <It>'<s> <Day>. $_ = "barbarian"; s/(\w+)\1/$1/; # 抓出重複的地方,去掉重複的地方 print; # barian # (3) Modifiers # g:Match globally, i.e., find all occurrences. $_ = "Hello World.\n"; s/l/<L>/; # 只取代第一個 'l' print; # He<L>lo World. $_ = "Hello World.\n"; s/l/<L>/g; # 取代所有的 'l' print; # He<L><L>o Wor<L>d. # i:Do case-insensitive pattern matching. # 忽略大小寫差別,比對成功 "Hello World" =~ /hello world/i; # o:Compile pattern only once. $word = "something"; while($something){ # something ... s/$word/$another/o; # 加入 'o',只編譯一次,可加快執行效率 # something ... } $str = "abcdefg\n"; $str =~ s/($_)/<$1>/o for('c' .. 'f'); print $str; # ab<<<<c>>>>defg ,似乎不是我要的 # m:Treat string as mutiple lines. # That is, change "^" and "$" from matching the # start or end of the string to matching the start # or end of any line anywhere within the string. $_ = "abc\ndef\nghi\n"; s/^(.)/\u$1/mg; # 把行首第一個字母變大寫 print; # 結果正確,"Abc\nDef\nGhi" $_ = "abc\ndef\nghi\n"; s/^(.)/\u$1/g; # 若沒有加上 m print; # 結果變成,"Abc\ndef\nghi" $_ = "abc\ndef\nghi\n"; s/(.)$/\u$1/mg; # 同理 print; # 結果為,"abC\ndeF\nghI" # s:Treat string as single line. That is, # change "." to match any character whatsoever, # even a newline, which normally it would not match. $_ = "abc\ndef\nghi\n"; # 加入 s ,使 '.' 可比對 "\n",比對成功。 print "Matched" if /a.*i/s; # 不加 s ,則比對失敗。 print "Matched" if /a.*i/;
繼續閱讀:Perl 程式設計教學