Hi guys
I've written a very simple program that reads in some files, does some cleanup on the data and then creates a sql statement to insert the data into a table. Now I have 125 of these fies of varying sizes and on some (not all) of the files (they are csv files) there are a large number of lines that are essentially empty entries (i.e. a bunch of empty commas). The result of my code is that it seems to hold onto the last correct strings (not the empty ones) and then tries to execute the sql statement. The statement shouldn't update the table as I'm telling it to use INSERT IGNORE but it's a waste of time going over thousands of lines of data that I should just ignore. So the question is how do I safely stop a BurrefedReader? It's located in the method readData in the Class DataIn below (alternative approaches would also be a help);
The main Class
package rosdata;
import java.io.File;
import java.io.BufferedReader;
import java.io.IOException;
import java.sql.SQLException;
public class ROSdata
{
/**
* @param args the command line arguments
*/
private static File aFile;
public static void main(String[] args) throws SQLException, IOException
{
aFile = new File("2013_11_res_sales_wedht.csv");
DataIn di = new DataIn(aFile);
}
}
And the DataIn Class
package rosdata;
import java.io.*;
import org.apache.commons.lang3.text.WordUtils;
import java.sql.*;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class DataIn
{
private File aFile;
private String currentLine;
private String applicationID;
private String productType;
private String titleNumber;
private String applicationDate;
private String county;
private String landClass;
private String deedCodeName;
private String consideration;
private String houseType;
private String dateOfEntry;
private String subBuilding;
private String buildingName;
private String buildingNumber;
private String street;
private String district;
private String postTown;
private String postcodeGIS;
private String xCoord;
private String yCoord;
private String tmp;
private List<String> list;
private String[] parts;
private String fileSource;
private Pattern p = Pattern.compile("^,+");
private String str = null;
public DataIn(File aFile) throws SQLException, IOException
{
this.aFile = aFile;
// readData(aFile);
final File folder = new File("C:\\Documents and Settings\\jamesmonteith\\My Documents\\NetBeansProjects\\ROSdata\\data");
listFilesForFolder(folder);
}
public void listFilesForFolder(final File folder) throws SQLException, IOException {
for (final File fileEntry : folder.listFiles()) {
if (fileEntry.isDirectory())
{
listFilesForFolder(fileEntry);
} else
{
System.out.println(fileEntry.getName());
readData(fileEntry);
}
}
}
private void readData(File aFile) throws SQLException, IOException
{
System.out.println("-------- MySQL JDBC Connection Testing ------------");
try {
Class.forName("com.mysql.jdbc.Driver");
} catch (ClassNotFoundException e) {
System.out.println("Where is your MySQL JDBC Driver?");
e.printStackTrace();
return;
}
Connection connection = null;
Connection connect = null;
connection = DriverManager.getConnection("jdbc:mysql://localhost:3306/address_data", "root", "Ell10t05");
Statement statement = null;
Statement stm = null;
try {
System.out.println("MySQL JDBC Driver Registered!");
boolean b = false;
String useDB = "USE address_data";
statement = connection.createStatement();
ResultSet rs = statement.executeQuery(useDB);
BufferedReader br = null;
br = new BufferedReader(new FileReader(aFile));
list = new ArrayList<String>();
while ((currentLine = br.readLine()) != null)
{
Matcher m = p.matcher(currentLine);
while(!m.find())
{
System.out.println(currentLine);
}
list = Arrays.asList(currentLine.split(",(?=([^\"]*\"[^\"]*\")*[^\"]*$)"));
for (int i = 0; i < list.size() ; i++)
{
applicationID = list.get(0).trim();
productType = list.get(1).trim();
titleNumber = list.get(2).trim();
applicationDate = list.get(3).trim();
county = list.get(4).trim();
landClass = list.get(5).trim();
deedCodeName = list.get(6).trim();
consideration = list.get(7).trim().replaceAll(",", "").replaceAll("[^\\d.]", "");
houseType = WordUtils.capitalizeFully(list.get(8).trim());
dateOfEntry = list.get(9).trim();
subBuilding = list.get(10).trim().replaceAll("\"", "");
buildingName = WordUtils.capitalizeFully(list.get(11)).replaceAll("\"","").trim();
buildingNumber = list.get(12).replaceAll("\"","").trim();
street = WordUtils.capitalizeFully(list.get(13)).trim().replaceAll(",", "~").replaceAll("\"", "");
district = WordUtils.capitalizeFully(list.get(14).replaceAll("\\.", "")).replaceAll("\"","").trim();
postTown = WordUtils.capitalizeFully(list.get(15)).replaceAll("\"","").trim();
postcodeGIS ="";
xCoord = "";
yCoord="";
tmp = "";
fileSource = aFile.getName();
if( i >=16)
{
postcodeGIS =list.get(16).replaceAll("\\s", "").replaceAll("\"", "").trim();
if( i > 16 )
{
xCoord = list.get(17).trim().replaceAll(",", "").replaceAll("\"", "");
if(i > 17)
{
yCoord = list.get(18).trim().replaceAll(",", "").replaceAll("\"", "");
tmp = list.get(18);
}
}
}
if(i < 17 || yCoord.isEmpty())
{
xCoord = "0.00";
yCoord = xCoord;
}
if(dateOfEntry.equals("Unknown") || dateOfEntry.isEmpty())
{
dateOfEntry = "0000/00/00";
}
if(consideration.isEmpty())
{
consideration="0";
}
}
if(!applicationID.contains("Application"))
{
str = "insert ignore into ros_data(`application_id`, `product_type`, `title_number`, `application_date`, `county`, `land_class`,`deed_code_name`, `consideration`, `house_type`,`date_of_entry`, `sub_building`, `building_name`, `building_number`, `street`, `district`, `post_town`, `postcode_gis`, `x_coord`, `y_coord`, `filesource`) VALUES ("
+ "\"" + applicationID + "\"" + ","
+ "\"" + productType + "\"" + ","
+ "\"" + titleNumber + "\"" + ","
+ applicationDate + ","
+ "\"" + county + "\"" + ","
+ "\"" + landClass + "\"" + ","
+ "\"" + deedCodeName + "\"" + ","
+ consideration + ","
+ "\"" + houseType + "\"" + ","
+ dateOfEntry + ","
+ "\"" + subBuilding + "\"" + ","
+ "\"" + buildingName + "\"" + ","
+ "\"" + buildingNumber + "\"" + ","
+ "\"" + street.replaceAll("~", ",") + "\"" + ","
+ "\"" + district + "\"" + ","
+ "\"" + postTown + "\"" + ","
+ "\"" + postcodeGIS + "\"" + ","
+ "\"" + xCoord + "\"" + ","
+ "\"" + yCoord + "\"" + ","
+ "\"" + fileSource + "\""+ ")" + ";";
// System.out.println(str);
// stm = connection.createStatement();
// stm.executeUpdate(str);
}
}
} catch (SQLException e) {
System.out.println("Connection Failed! Check output console");
System.out.println(str);
System.out.println(buildingNumber);
System.out.println(street);
System.out.println(district);
System.out.println(postTown);
e.printStackTrace();
return;
}
if (connection != null) {
System.out.println("You made it, take control your database now!");
} else {
System.out.println("Failed to make connection!");
}
}
}
As you can see I was toying with the idea of using a regex to terminate the reader but couldn't think of a good way to do this.
Any help would be appreciated.
Many Thanks