正则表达式是从信息中搜索特定的模式的一把瑞士军刀。它们是一个巨大的工具库,其中的一些功能经常被忽视或未被充分利用。今天我将向你们展示一些正则表达式的高级用法。
举个例子,这是一个我们可能用来检测电话美国电话号码的正则表达式:
1
|
r
'^(1[-\s.])?(\()?\d{3}(?(2)\))[-\s.]?\d{3}[-\s.]?\d{4}$'
|
'^'
'(1[-\s.])?'
# optional '1-','1.' or '1'
'(\()?'
# optional opening parenthesis
'\d{3}'
# the area code
'(?(2)\))'
# if there was opening parenthesis,close it
'[-\s.]?'
# followed by '-' or '.' or space
# first 3 digits
# followed by '-' or '.' or space
'\d{4}$'
# last 4 digits
|
@H_404_33@
import
re
numbers
=
[
"123 555 6789"
,
"1-(123)-555-6789"
"(123-555-6789"
"(123).555.6789"
"123 55 6789"
]
for
number
in
numbers:
pattern
re.match(r
'^'
'\d{3}'
# the area code
'(?(2)\))'
'[-\s.]?'
# followed by '-' or '.' or space
# first 3 digits
# followed by '-' or '.' or space
'\d{4}$\s*'
# last 4 digits
if
pattern:
print
'{0} is valid'
.
format
(number)
else
:
print
'{0} is not valid'
(number)
|
@H_404_33@
123
555
6789
is
valid
1
-
(
123
)
-
555
6789
valid
123
is
not
valid
).
555.6789
valid
55
valid
|
@H_404_33@
'(1[-\s.])?'
'(\()?'
'\d{3}'
'(?(2)\))'
'[-\s.]?'
'\d{4}$'
(number)
|
@H_404_33@
at_beginning
max_repeat
0
1
subpattern
1
literal
49
in
45
category category_space
46
2147483648
in
category category_space
1
2
40
2147483648
in
category category_space
3
3
in
category category_digit
2147483648
in
category category_space
subpattern
None
groupref_exists
2
41
None
2147483648
in
category category_space
1
in
45
category category_space
46
2147483648
in
category category_space
3
in
category category_digit
2147483648
in
category category_space
1
in
45
category category_space
46
2147483648
in
category category_space
4
4
in
category category_digit
at at_end
2147483648
in
category category_space
valid
valid
valid
valid
valid
|
@H_404_33@
html
=
'Hello <a href="http://pypix.com" title="pypix">Pypix</a>'
m
re.findall(
'<a.*>.*<\/a>'
m:
print
m
|
@H_404_33@
'Hello <a href="http://pypix.com" title="pypix">Pypix</a>'
\
'Hello <a href="http://example.com" title"example">Example</a>'
m:
m
|
@H_404_33@
'<a href="http://pypix.com" title="pypix">Pypix</a>Hello <a href="http://example.com" title"example">Example</a>'
]
|
@H_404_33@
'<a href="http://example.com" title"example">Example</a>'
]
|
@H_404_33@
strings
[
"hello foo"
# returns False
"hello foobar"
]
# returns True
string
strings:
re.search(r
'foo(?=bar)'
pattern:
'True'
:
'False'
|
@H_404_33@
# returns True
"hello foobar"
# returns False
"hello foobaz"
]
# returns True
strings:
'foo(?!bar)'
pattern:
'True'
:
'False'
|
@H_404_33@
"<pypix>"
# returns true
"<foo"
# returns false
"bar>"
# returns false
"hello"
]
# returns true
strings:
'^(<)?[a-z]+(?(1)>)$'
pattern:
'True'
:
'False'
|
@H_404_33@
'(?P<hi>H.*)(?P<fstar>f.*)(?P<bstar>b.*)'
# prints b* => bar
"h* => {0}"
'hi'
# prints b* => Hello
|
@H_404_33@
template
"Hello [first_name] [last_name],\
Thank you
purchasing [product_name]
from
[store_name]. \
The total cost of your purchase was [product_price] plus [ship_price]
shipping. \
You can expect your product to arrive
[ship_days_min] to [ship_days_max] business days. \
Sincerely,\
[store_manager_name]"
# assume dic has all the replacement data
# such as dic['first_name'] dic['product_price'] etc...
dic
{
"first_name"
:
"John"
"last_name"
"Doe"
"product_name"
"iphone"
"store_name"
"Walkers"
"product_price"
:
"$500"
"ship_price"
"$10"
|
@H_404_33@