php 利用php抓取批量关键词百度推广广告中网址保存在txt文件中
利用php抓取批量关键词百度推广广告中网址保存在txt文件中 结合服务器的定时任务可以定时查找关键词广告的竞争程度[代码片段(85行)]
结合服务器的定时任务可以定时查找关键词广告的竞争程度
<?php
$fp = @fopen ( "semallurl.txt", "a+" );
$kws1 = "上海酒店,北京酒店,广州酒店,天津酒店,广州酒店";
$kws = explode ( ",", $kws1 );
foreach ( $kws as $kw ) {
$keywords = $kw;
$enkeywords = urlencode ( $keywords );
$pageURL = "http://www.baidu.com/s?word=$enkeywords";
$contents = fetch ( $pageURL ); /* 抓取页面 */
$contents = preg_replace ( '/<script[^>]*?>.*?<\\/script>/', "", $contents ); /*
*
* 去掉js代码
*/
$contents_left = "";
$contents_right = "";
$ads_left_green = "";
$ads_left_white = "";
$contentsbytwoside = "";
$ads_right = ""; /* 变量初始化 */
$contentsbytwoside = explode ( '<div id="content_', $contents );
$contents_right = $contentsbytwoside [1];
$contents_right = '<div id="content_' . $contents_right; /* 搜索结果右边部分 */
$contents_left = $contentsbytwoside [2];
$contents_left = '<div id="content_' . $contents_left; /* 搜索结果左边部分 */
preg_match_all ( '/(<div id=\\"[0-9]*\\" class=\\"ec_pp_f ec_pp_top.*?)<a href=\\"http:\\/\\/e\\.baidu\\.com\\//', $contents_left, $ads_left_white );
preg_match_all ( '/(<table class=\\"EC_mr15 EC_ppim_top ec_pp_f.*?<\\/table>)/', $contents_left, $ads_left_green );
preg_match_all ( '(<div id=\\"bdfs[^>]*class=\\"EC_im EC_fr EC_PP EC_idea1017 \\">.*?<a class=\\"EC_BL EC_desc\\".*?<\\/a>)', $contents_right, $ads_right );
echo "------------Keywords ads for" . $kw . "start ------------------------------------<br>";
fwrite ( $fp, "----------" . $kw . " ads start------------------------- \\r\\n" );
echo "left ads with green background is<br>";
/* print_r($ads_left_green[0]); */
foreach ( $ads_left_green [0] as $tg1 ) {
preg_match ( '/<span>.*?<\\/span>/', $tg1, $tg11 );
fwrite ( $fp, strip_tags ( $tg11 [0] ) . "\\r\\n" );
echo $tg11 [0] . "<br>";
}
;
echo "<p>-------------<br>";
echo "left ads with white background is<br>";
/* print_r($ads_left_white[0]); */
foreach ( $ads_left_white [0] as $tg2 ) {
preg_match ( '/<span class=\\"ec_url\\">.*?<\\/span>/', $tg2, $tg22 );
fwrite ( $fp, strip_tags ( $tg22 [0] ) . "\\r\\n" );
echo $tg22 [0] . "<br>";
}
;
echo "<p>-------------<br>";
echo "right ads with is<br>";
/* print_r($ads_right[0]); */
foreach ( $ads_right [0] as $tg3 ) {
preg_match ( '/(<font size=\\"-1\\" class=\\"EC_url\\">.*?<\\/font>)/', $tg3, $tg33 );
fwrite ( $fp, strip_tags ( $tg33 [0] ) . "\\r\\n" );
echo $tg33 [0] . "<br>";
}
;
echo "---------------Keywords ads for" . $kw . "END ------------------------------------<br>";
fwrite ( $fp, "----------" . $kw . " ads End------------------------- \\r\\n" );
}
;
fwrite ( $fp, date ( "Y-m-d H:i:s" ) . " PHP代码自动运行!\\r\\n" );
fclose ( $fp );
function fetch($Date) {
$ch = curl_init ();
$timeout = 5;
curl_setopt ( $ch, CURLOPT_URL, "$Date" );
curl_setopt ( $ch, CURLOPT_RETURNTRANSFER, 1 );
curl_setopt ( $ch, CURLOPT_USERAGENT, "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1)" );
curl_setopt ( $ch, CURLOPT_CONNECTTIMEOUT, $timeout );
$contents = curl_exec ( $ch );
curl_close ( $ch );
return $contents;
}
?>
//该片段来自于http://outofmemory.cn
- 上一篇:php json_encode 乱码
- 下一篇:php 计算 生肖
精彩图集
精彩文章






