<?PHP define('PRE_DOMAIN','www'); define('DOMAIN','sina.com.cn'define('PROTOCOL','https'define('ROOT',PROTOCOL.'://'.PRE_DOMAIN.'.'.DOMAIN.'/'); foreach (spider() as $key => $value) { echo $value."\r\n"; } function spider(){ $headers=array( 'user-agent:Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML,like Gecko) Chrome/63.0.3239.132 Safari/537.36' ); $oUrls=parseURL(get(ROOT,$headers)); $result=(); $queue=(); foreach($oUrls $u){ $result[$u]=true; array_push($queue,1)">); while(!empty($queue)){ $v=array_pop(); $temp=parseURL(get($v,1)">)); $temp $j){ if(!isset(])){ yield ; $j]=; ); } } } } } function get($url,1)">$header=null){ $curl = curl_init(); curl_setopt($curl,CURLOPT_URL,1)">$url); curl_setopt(FALSE); if (!$header)){ curl_setopt(); } curl_setopt(); $output = curl_exec($curl); $h = curl_getinfo($h) && $h['http_code']==200 && stripos($h['content_type'],'text/html')===false){ return ""; } curl_close(return $output; } function parseURL($content){ preg_match_all('/<a.*href=["\']([^"\'>]*)["\'].*>/',1)">$content,1)">$matchsif($matchs[1])) return $match=$matchs[1]; foreach ($match ) { $flag=$value,'http')!==false && ){ ; } $match[$key]='https:'.; continueif($flag){ unset($key]); ; } $key]=ROOT.trim(); } $match; }