1)url 判断是否合法: /^(http:\/\/|https:\/\/)?((?:[A-Za-z0-9]+-[A-Za-z0-9]+|[A-Za-z0-9]+)\.)+([A-Za-z0-9]+)[\/\?\:]?.*$/
如果想不用\ 去转义 /,.,等特殊字符,可以 /\Q$var\E/
2)什么时候需要转义:1. 比如在"" 里面还要使用",则\"; 2. 一些特殊字符
3)数据库乱码问题:在脚本中查询的表和插入的表都要设置同样的编码如utf8($db_url->do(set names utf8)),在secureCRT显示时设置UTF-8,查询数据库显示前set names utf8;
MysqL> select max(id) idmax from tmmp;
+-------+
| idmax |
+-------+
| NULL |
+-------+
6)perl 对类型还是要注意,如$url为字符串,则如果判断 $url == 0 很可能为真
perl DBI 中的 my $ref = $sth->fetchrow_hashref(); $ref->{xxx} 返回的都是字符串?
7) perl 的print $log "xxx" ; syswrite $log,"xxxx"; perl中的printf很多字符打印有问题,最好是使用syswrite来打印数据
8) > or >> 如果文件不存在都会创建,只是truncate or append的区别
13) select substring_index('xxx.xxx.xx.22','.',-1) // 得到22
14) perl中的散列赋值都是引用拷贝而非值拷贝
15) perl打印shell脚本的结果信息之前需要先chomp结果,否则打印出来的信息不对
16) 取出url的后缀,如html,PHP等
if ($url =~ /^(http:\/\/ | https:\/\/).*\/.*\.([^\/\.\?]+)/)
{
my $suffix = $2;
}
17) 从url中取出域名
my @tmp = split(/:/,$url); my @tmp2= split(/\//,$tmp[1]); my $domain = $tmp2[2];
18)svn convert string from utf-8 to naive encoding; export LC_ALL=en_US.UTF-8
Perl Code
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 |
sub HttpHeadrParse
#解析http头部信息 { my $header = shift; # $header =~s/\r\n/$~/g; my @element = split(/\r\n/,$header); my %hash; my $key = ""; my $var = ""; my @tmp= ""; foreach $var (@element) { if($var=~/POST./ or $var =~/GET./) { @tmp = split(/ /,$var); $hash{ "Method"} = $tmp[ 0]; $hash{ "Cgi"} =$tmp[ 1]; # print "method : $tmp[0] cgi: $tmp[1]\n"; $hash{ "Protocol"} =$tmp[ 2]; } else { @tmp = split(/:/,$var); $hash{$tmp[ 0]}=$tmp[ 1]; if($tmp[ 0] eq "Host") { if($tmp[ 2] eq "") { $hash[ "Port"] = "80"; } else { $hash[ "Port"] = $tmp[ 2]; } } } } return %hash; } |
20)去除左右空格
2
3
4
5
6
7
sub trim
{
my $string = shift;
$string =~ s/^\s+//;
$string =~ s/\s+$//;
return $string;
}
{
my $string = shift;
$string =~ s/^\s+//;
$string =~ s/\s+$//;
return $string;
}
21)根据url,get,post 做md5 去重
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
#输入name,value字符串,name界定符;返回唯一的name列表
sub get_name{
my $param = shift;
my $delim = shift;
return '' if ! defined($param) || $param eq '';
my @name_value = split( "$delim", $param);
my %hash_name_value = ();
for( my $i = 0; $i<= $ #name_value; $i++){
my $name = '';
my $value = '';
if($name_value[$i] =~ /([^=]+)=(.*)/){
$name = $1;
$value = $2;
}
if($name =~ /[.\d]{ 10,50}|1\d{ 9,9}$/){ #去除随机数name
next;
}
$hash_name_value{ "$name"} = $value;
}
my @names = sort keys %hash_name_value;
my $names_str = join( "$delim",@names);
return $names_str;
}
#计算uniquemd5
sub uniquemd5{
my ($url, $params, $query_get, $query_post) = @_;
my $uniquemd5_str = $url;
my $port = 0;
my $q_get_name = '';
my $q_post_name = '';
my $p_name = '';
$q_get_name = get_name($query_get, '&');
$q_post_name = get_name($query_post, '&');
$p_name = get_name($params, ';');
if($url =~ m/(?:http|https):\/\/[^:\/;? #]+(?::([0-9]+))?/){
$port = $1;
}
if( defined $port && 80 == $port){
$uniquemd5_str =~ s/:[ 0- 9]+//;
}
$uniquemd5_str .= ",$p_name,$q_get_name,$q_post_name";
my $md5 = Digest::MD5->new;
my $uniquemd5 = $md5->add($uniquemd5_str)->hexdigest;
return $uniquemd5; }
sub get_name{
my $param = shift;
my $delim = shift;
return '' if ! defined($param) || $param eq '';
my @name_value = split( "$delim", $param);
my %hash_name_value = ();
for( my $i = 0; $i<= $ #name_value; $i++){
my $name = '';
my $value = '';
if($name_value[$i] =~ /([^=]+)=(.*)/){
$name = $1;
$value = $2;
}
if($name =~ /[.\d]{ 10,50}|1\d{ 9,9}$/){ #去除随机数name
next;
}
$hash_name_value{ "$name"} = $value;
}
my @names = sort keys %hash_name_value;
my $names_str = join( "$delim",@names);
return $names_str;
}
#计算uniquemd5
sub uniquemd5{
my ($url, $params, $query_get, $query_post) = @_;
my $uniquemd5_str = $url;
my $port = 0;
my $q_get_name = '';
my $q_post_name = '';
my $p_name = '';
$q_get_name = get_name($query_get, '&');
$q_post_name = get_name($query_post, '&');
$p_name = get_name($params, ';');
if($url =~ m/(?:http|https):\/\/[^:\/;? #]+(?::([0-9]+))?/){
$port = $1;
}
if( defined $port && 80 == $port){
$uniquemd5_str =~ s/:[ 0- 9]+//;
}
$uniquemd5_str .= ",$p_name,$q_get_name,$q_post_name";
my $md5 = Digest::MD5->new;
my $uniquemd5 = $md5->add($uniquemd5_str)->hexdigest;
return $uniquemd5; }