我们可以通过HTTP_USER_AGENT来判断是否是蜘蛛,搜索引擎的蜘蛛都有自己的独特标志,下面列取了一部分。
/**
* PHP判断来访者是否是搜索引擎的蜘蛛
*
* @param
* @arrange 512-笔记网: 512Pic.com
**/
function is_crawler() {
$userAgent = strtolower($_SERVER['HTTP_USER_AGENT']);
$spiders = array(
'Googlebot',// Google 爬虫
'Baiduspider',// 百度爬虫
'Yahoo! Slurp',// 雅虎爬虫
'YodaoBot',// 有道爬虫
'msnbot' // Bing爬虫
// 更多爬虫关键字
);
foreach ($spiders as $spider) {
$spider = strtolower($spider);
if (strpos($userAgent,$spider) !== false) {
return true;
}
}
return false;
}
/*** 来自编程之家 jb51.cc(jb51.cc) ***/
下面的PHP代码附带了更多的蜘蛛标识
/**
* PHP判断来访者是否是搜索引擎的蜘蛛
*
* @param
* @arrange 512-笔记网: 512Pic.com
**/
function isCrawler() {
echo $agent= strtolower($_SERVER['HTTP_USER_AGENT']);
if (!empty($agent)) {
$spiderSite= array(
"TencentTraveler","Baiduspider+","BaiduGame","Googlebot","msnbot","Sosospider+","Sogou web spider","ia_archiver","Yahoo! Slurp","YoudaoBot","Yahoo Slurp","MSNBot","Java (Often spam bot)","BaiDuSpider","Voila","Yandex bot","BSpider","twiceler","Sogou Spider","Speedy Spider","Google AdSense","Heritrix","Python-urllib","Alexa (IA Archiver)","Ask","Exabot","Custo","OutfoxBot/YodaoBot","yacy","SurveyBot","legs","lwp-trivial","Nutch","StackRambler","The web archive (IA Archiver)","Perl tool","MJ12bot","Netcraft","MSIECrawler","WGet tools","larbin","Fish search",);
foreach($spiderSite as $val) {
$str = strtolower($val);
if (strpos($agent,$str) !== false) {
return true;
}
}
} else {
return false;
}
}
if (isCrawler()){
echo "你好蜘蛛精!";
}
else{
echo "你不是蜘蛛精啊!";
}
/*** 来自编程之家 jb51.cc(jb51.cc) ***/