Hi,
I have this script, it attempts to place JSON data into a MySQL database.
The problem is it duplicates records, this does not happen when the “news_added” field is not NULL. That’s the only thing I can see different.
I would like the script to update a record, replacing the old one, instead of the db growing in size i.e. one URL is one record.
Can anyone suggest a solution, I have changed field names and URLs for privacy reasons.
Thanks in advance
<?php
include('mysql_conn.php');
include('config.php');
$date = date('Y-m-d');
$fulldt = $data=gmstrftime("%d-%m-%Y, %X ",time());
$a=0;
$querybuild="";$queryvalues="";$check="";
$logs="Last scrape on: ".$fulldt."
----------------------------------------------------
";
foreach ($urls as $key=>$url) {
$content = file_get_contents($url);
$json =json_decode($content);
foreach ($json as $fld=>$value) {
if (in_array($fld,$fields[$key])) {
if ($fld=='news_added') { $check = " where `inserted_date`='$date' AND `news_added`='$value' ";}
if ($fld=='code') { $check .= " AND `code`='$value' ";}
if ($fld=='id') { $check .= " AND `id`='$value' ";}
$querybuild.='`'.$fld.'`,';
$val = pastr($value,'text');
$queryvalues.=$val.',';
}
}
$qcheck = ("select * from `scrapes` ".$check." limit 1");
$rezcheck = mysql_query($qcheck) or die (mysql_error());
if (mysql_num_rows($rezcheck)==0) {
$querybuild = substr($querybuild,0,-1);
$queryvalues = substr($queryvalues,0,-1);
$finq = ("insert into `scrapes` (`inserted_date`,".$querybuild.") VALUES ('$date',".$queryvalues.")");
$rezins = mysql_query($finq) or die(mysql_error());
$logs .="URL => ".$url." Result: Inserted!
";
} else {
$logs .="URL => ".$url." Result: Row exists!
";
}
$querybuild="";$queryvalues="";$check="";
}
#writing the log file
$filem = "scraper_logs.txt";
$fhm = fopen($filem, 'w') or die("can't open file");
fwrite($fhm, $logs);
fclose($fhm);
?>
** config.php**
<?php
// script configurations
$urls = array(
0=>'http://example.com/json_api/1/',
1=>'http://example.com/json_api/2/',
);
$fields = array(
0=> array('field_1','code','field_3','news_updated','cost','news_added','web_name','code','id','first_name','event_1','web_type_id','cost_1','field_4','cost_id','name_middle','name_other','status','cost_9','in','news','cost_12','news_5','event_5','by_author','2_id','inserted_date'),
1=> array('field_1','code','field_3','news_updated','cost','news_added','web_name','code_1','id','first_name','event_1','web_type_id','cost_1','field_4','cost_id','name_middle','name_other','status','cost_9','in','news','cost_12','news_5','event_5','by_author','2_id','inserted_date'),
);
### general functions do not touch
if (!function_exists("pastr")) {
function pastr($theValue, $theType, $theDefinedValue = "", $theNotDefinedValue = "")
{
$theValue = get_magic_quotes_gpc() ? stripslashes($theValue) : $theValue;
$theValue = function_exists("mysql_real_escape_string") ? mysql_real_escape_string($theValue) : mysql_escape_string($theValue);
switch ($theType) {
case "text":
$theValue = ($theValue != "") ? "'" . $theValue . "'" : "NULL";
break;
case "long":
case "int":
$theValue = ($theValue != "") ? intval($theValue) : "NULL";
break;
case "double":
$theValue = ($theValue != "") ? "'" . doubleval($theValue) . "'" : "NULL";
break;
case "date":
$theValue = ($theValue != "") ? "'" . $theValue . "'" : "NULL";
break;
case "defined":
$theValue = ($theValue != "") ? $theDefinedValue : $theNotDefinedValue;
break;
}
return $theValue;
}
}
?>
** mysql_conn.php**
<?php
$_host = "localhost";
$_db= "db_name";
$_user = "root";
$_pass = "password";
$db_conn = mysql_pconnect($_host, $_user, $_pass) or trigger_error(mysql_error(),E_USER_ERROR);
mysql_select_db($_db, $db_conn);
?>