function getHTTPS($url) { $ch = curl_init(); curl_setopt($ch,CURLOPT_SSL_VERIFYPEER,FALSE); curl_setopt($ch,CURLOPT_HEADER,false); curl_setopt($ch,CURLOPT_FOLLOWLOCATION,true); curl_setopt($ch,CURLOPT_URL,$url); curl_setopt($ch,CURLOPT_REFERER,CURLOPT_RETURNTRANSFER,TRUE); $result = curl_exec($ch); curl_close($ch); return $result; }
function ReadFiletext($filepath){ $filepath=trim($filepath); $htmlfp=@fopen($filepath,"r"); //远程 if(strstr($filepath,"://")) { while($data=@fread($htmlfp,500000)) { $string.=$data; } } //本地 else { $string=@fread($htmlfp,@filesize($filepath)); } @fclose($htmlfp); return $string; }
改成
function ReadFiletext($filepath){ $filepath=trim($filepath); $htmlfp=@fopen($filepath,"r"); //远程 if(strstr($filepath,"https://")){ return getHTTPS($filepath); } if(strstr($filepath,@filesize($filepath)); } @fclose($htmlfp); return $string; }
自此可实现采集https开头的网页链接