<?php
include_once('simple_html_dom.php');
function get_url_contents($url){
$crl = curl_init();
$timeout = 5;
curl_setopt ($crl, CURLOPT_URL,$url);
curl_setopt ($crl, CURLOPT_RETURNTRANSFER, 1);
curl_setopt ($crl, CURLOPT_CONNECTTIMEOUT, $timeout);
$ret = curl_exec($crl);
curl_close($crl);
return $ret;
}
$url = 'http://books.rediff.com/categories';
$outhtml = get_url_contents($url);
$html= str_get_html($outhtml);
$urlarray = array();
foreach($html->find('a') as $link) {
$findme = 'http://';
if (strpos($link->href, 'http://') === 0) {
array_push($urlarray, $link->href);
}
}
print_r($urlarray);
?>
This is simple web crawler, where I have extracted all the urls on the page. I cannot understand how will I apply BFS/DFS in this crawler ???
Please help!