We are using php to find relevent blogs based on keywords
$blog_data = array(
'campaign_id' =>$campaign['campaign_id'],
'campaign_name' => $campaign['campaign_name'],
'mandatory_keywords' => $mandatory_keywords,
'extra_keywords' => $extra_keywords,
'page_url' => $get_blog['page_url'],
'page_id' => $get_blog['id'],
'html' => $get_blog['page_text']
);
$blog_parse_result = parse_blog_data($blog_data);
function parse_blog_data($data)
{
show( "Parse text for: ".$data['page_url']);
$keywords = array_merge($data['mandatory_keywords'],$data['extra_keywords']);
$page_id=$data['page_id'];//Page id for statistics_pages
$page_url=$data['page_url'];//Page id for statistics_pages
#show( "page url= ".$page_url."<br>");
if(empty($keywords)){
# update_blog_row($data['id'],"","NOT_OK",$page_id);
# show( "\n************** END: NO KEYWORDS {$data->campaign_name} ($data->campaign_id) ****************\n\n");
return;
}
require_once("blog_parser/phpQuery.php");
$doc = phpQuery::newDocumentHTML($data['html']);
$text = "";
$images = array();
foreach(pq('p,h1,h2,h3') as $p)
{
$p_text = pq($p)->html();
foreach($keywords as $key)
{
#show( "{$key} > ");
if(!empty($p_text) && !empty($key) && strpos(strip_tags($p_text), $key))
{
// Retrieve text
$text .= !empty($text) ? "\n\n".strip_tags($p_text) : strip_tags($p_text);
// retrieve blog images
foreach(pq($p)->children("img") as $img)
{
$src = pq($img)->attr("src");
if(!empty($src) && @GetImageSize($src))
$images[] = $src;
}
#show("MATCH ************************************************************");
continue 2; // Finished parsing this p-tag
}else
{
#show("NO MATCH");
}
}
}
return array(
'status' => (!empty($text) ? "OK" : "NOT_OK"),
'text' => utf8_decode($text),
'images' => $images
);
}
$blog_data = array(
'campaign_id' =>$campaign['campaign_id'],
'campaign_name' => $campaign['campaign_name'],
'mandatory_keywords' => $mandatory_keywords,
'extra_keywords' => $extra_keywords,
'page_url' => $get_blog['page_url'],
'page_id' => $get_blog['id'],
'html' => $get_blog['page_text']
);
$blog_parse_result = parse_blog_data($blog_data);
function parse_blog_data($data)
{
show( "Parse text for: ".$data['page_url']);
$keywords = array_merge($data['mandatory_keywords'],$data['extra_keywords']);
$page_id=$data['page_id'];//Page id for statistics_pages
$page_url=$data['page_url'];//Page id for statistics_pages
#show( "page url= ".$page_url."<br>");
if(empty($keywords)){
# update_blog_row($data['id'],"","NOT_OK",$page_id);
# show( "\n************** END: NO KEYWORDS {$data->campaign_name} ($data->campaign_id) ****************\n\n");
return;
}
require_once("blog_parser/phpQuery.php");
$doc = phpQuery::newDocumentHTML($data['html']);
$text = "";
$images = array();
foreach(pq('p,h1,h2,h3') as $p)
{
$p_text = pq($p)->html();
foreach($keywords as $key)
{
#show( "{$key} > ");
if(!empty($p_text) && !empty($key) && strpos(strip_tags($p_text), $key))
{
// Retrieve text
$text .= !empty($text) ? "\n\n".strip_tags($p_text) : strip_tags($p_text);
// retrieve blog images
foreach(pq($p)->children("img") as $img)
{
$src = pq($img)->attr("src");
if(!empty($src) && @GetImageSize($src))
$images[] = $src;
}
#show("MATCH ************************************************************");
continue 2; // Finished parsing this p-tag
}else
{
#show("NO MATCH");
}
}
}
return array(
'status' => (!empty($text) ? "OK" : "NOT_OK"),
'text' => utf8_decode($text),
'images' => $images
);
}
No comments:
Post a Comment