<?php
include_once('simple_html_dom.php');
function get_url_contents($url){
$crl = curl_init();
$timeout = 5;
curl_setopt ($crl, CURLOPT_URL,$url);
curl_setopt ($crl, CURLOPT_RETURNTRANSFER, 1);
curl_setopt ($crl, CURLOPT_CONNECTTIMEOUT, $timeout);
$ret = curl_exec($crl);
curl_close($crl);
return $ret;
}
$url = 'http://books.rediff.com/categories/fiction-genres/2180204';
$outhtml = get_url_contents($url);
$html= str_get_html($outhtml);
foreach($html->find('a') as $link) {
echo "<a href =".$link->href.">".$link->href."</a><br>";
}
?>
This gives all the links present on the given URL.
I wish to remove all the duplicate entries as well as those Javascript links that I get after crawling like "javascript:doSearch('MT'); javascript:window,history.go(-1);" ...
Please help!
Thanks ...