Hi,
I have been searching here and Google for the past few days but I haven't been able to find an answer.
I want to have a script that will download one page of a website with all the content i.e. images, css, js etc...
I have been able to save the html (text) like this:
function get_data($url)
{
$ch = curl_init();
$timeout = 5;
curl_setopt($ch,CURLOPT_URL,$url);
curl_setopt($ch,CURLOPT_RETURNTRANSFER,1);
curl_setopt($ch,CURLOPT_CONNECTTIMEOUT,$timeout);
$data = curl_exec($ch);
curl_close($ch);
return $data;
}
$returned_content = get_data('http://example.com/page.htm');
$my_file = 'file.htm';
$handle = fopen($my_file, 'w') or die('Cannot open file: '.$my_file);
fwrite($handle, $returned_content);
This will save a file called 'file.htm' with all the HTML but no images, css, js etc...
I have also been able to do this:
$img[]='http://example.com/image.jpg';
foreach($img as $i){
save_image($i);
if(getimagesize(basename($i))){
echo 'Image ' . basename($i) . ' Downloaded OK';
}else{
echo 'Image ' . basename($i) . ' Download Failed';
}
}
function save_image($img,$fullpath='basename'){
if($fullpath=='basename'){
$fullpath = basename($img);
}
$ch = curl_init ($img);
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_BINARYTRANSFER,1);
$rawdata=curl_exec($ch);
curl_close ($ch);
if(file_exists($fullpath)){
unlink($fullpath);
}
$fp = fopen($fullpath,'x');
fwrite($fp, $rawdata);
fclose($fp);
}
This will save that specific image but I haven't found anything that will save the entire HTML with all the content behind it.
Thanks for your help in advance!