My code is suppose to crawl web pages, index the links, then crawl those web pages and on and on again!
But it won't work?
I get no errors what is wrong?
I think it gets into the foreach but doesn't make it to the $DCheck if statement!
<?php
if(empty($_SESSION['page']))
{
$original_file = file_get_contents("http://www.yahoo.com/");
}
else
{
$original_file = file_get_contents($_SESSION['page']);
}
$stripped_file = strip_tags($original_file, "<a>");
preg_match_all("/<a(?:[^>]*)href=\"([^\"]*)\"(?:[^>]*)>(?:[^<]*)<\/a>/is", $stripped_file, $matches);
//DEBUGGING
//$matches[0] now contains the complete A tags; ex: <a href="link">text</a>
//$matches[1] now contains only the HREFs in the A tags; ex: link
foreach( $matches[1] as $key => $value)
{
echo "1";
echo "2";
$Check = mysql_query("SELECT * FROM pages WHERE URL='$value'");
$DCheck = mysql_num_rows($Check);
if($DCheck != 0)
{
mysql_query("INSERT INTO pages (url)
VALUES ('$value')");
$_SESSION['page'] = $matches[1];
die($DCheck);
}
}
?>