php curl模拟百度蜘蛛
更新时间:2023-05-25 21:55
代码如下:
// 模仿百度蜘蛛 /** * * @param unknown $filepath * @param number $post * 是否为Post发送 * @param number $ecms * 是否输出header * @param number $savcookie * 是否保存cookie * @param number $cookie * 是否为发送cookie * @return mixed */ function bd_crul($filepath, $ecms = 0, $post = 0, $savcookie = 0, $cookie = 0) { $ch = curl_init(); $ip = '220.181.108.' . rand(1, 255); // 百度蜘蛛 $cookiefile = ECMS_PATH . "e/data/tmp/cjcookie.txt"; // 创建一个用于存放cookie信息的临时文件 $timeout = 15; curl_setopt($ch, CURLOPT_URL, $filepath); curl_setopt($ch, CURLOPT_TIMEOUT, 0); // 伪造百度蜘蛛IP curl_setopt($ch, CURLOPT_HTTPHEADER, array( 'X-FORWARDED-FOR:' . $ip . '', 'CLIENT-IP:' . $ip . '' )); if($savcookie) { curl_setopt($ch, CURLOPT_COOKIEJAR, $cookiefile); } if($cookie) { curl_setopt($ch, CURLOPT_COOKIEFILE, $cookiefile); } // 伪造百度蜘蛛头部 curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)"); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt($ch, CURLOPT_HEADER, $ecms); curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout); curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); curl_setopt($ch, CURLOPT_POST, $post); $content = curl_exec($ch); curl_close($ch); if($content === false) { // 输出错误信息 $no = curl_errno($ch); switch (trim($no)) { case 28: $error = '访问目标地址超时'; break; default: $error = curl_error($ch); break; } exit(); } else { return $content; } } |
上一篇:php抽奖概率算法(适合大转盘等概率) 下一篇:匿名函数和闭包案例分析